LLVM 23.0.0git
SimplifyCFG.cpp
Go to the documentation of this file.
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
20#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/Loads.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DebugInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalValue.h"
48#include "llvm/IR/IRBuilder.h"
49#include "llvm/IR/InstrTypes.h"
50#include "llvm/IR/Instruction.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
56#include "llvm/IR/Metadata.h"
57#include "llvm/IR/Module.h"
58#include "llvm/IR/NoFolder.h"
59#include "llvm/IR/Operator.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
70#include "llvm/Support/Debug.h"
80#include <algorithm>
81#include <cassert>
82#include <climits>
83#include <cmath>
84#include <cstddef>
85#include <cstdint>
86#include <iterator>
87#include <map>
88#include <optional>
89#include <set>
90#include <tuple>
91#include <utility>
92#include <vector>
93
94using namespace llvm;
95using namespace PatternMatch;
96
97#define DEBUG_TYPE "simplifycfg"
98
99namespace llvm {
100
102 "simplifycfg-require-and-preserve-domtree", cl::Hidden,
103
104 cl::desc(
105 "Temporary development switch used to gradually uplift SimplifyCFG "
106 "into preserving DomTree,"));
107
108// Chosen as 2 so as to be cheap, but still to have enough power to fold
109// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
110// To catch this, we need to fold a compare and a select, hence '2' being the
111// minimum reasonable default.
113 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
114 cl::desc(
115 "Control the amount of phi node folding to perform (default = 2)"));
116
118 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
119 cl::desc("Control the maximal total instruction cost that we are willing "
120 "to speculatively execute to fold a 2-entry PHI node into a "
121 "select (default = 4)"));
122
123static cl::opt<bool>
124 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
125 cl::desc("Hoist common instructions up to the parent block"));
126
128 "simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true),
129 cl::desc("Hoist loads if the target supports conditional faulting"));
130
132 "simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true),
133 cl::desc("Hoist stores if the target supports conditional faulting"));
134
136 "hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6),
137 cl::desc("Control the maximal conditional load/store that we are willing "
138 "to speculatively execute to eliminate conditional branch "
139 "(default = 6)"));
140
142 HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
143 cl::init(20),
144 cl::desc("Allow reordering across at most this many "
145 "instructions when hoisting"));
146
147static cl::opt<bool>
148 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
149 cl::desc("Sink common instructions down to the end block"));
150
152 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
153 cl::desc("Hoist conditional stores if an unconditional store precedes"));
154
156 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
157 cl::desc("Hoist conditional stores even if an unconditional store does not "
158 "precede - hoist multiple conditional stores into a single "
159 "predicated store"));
160
162 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
163 cl::desc("When merging conditional stores, do so even if the resultant "
164 "basic blocks are unlikely to be if-converted as a result"));
165
167 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
168 cl::desc("Allow exactly one expensive instruction to be speculatively "
169 "executed"));
170
172 "max-speculation-depth", cl::Hidden, cl::init(10),
173 cl::desc("Limit maximum recursion depth when calculating costs of "
174 "speculatively executed instructions"));
175
176static cl::opt<int>
177 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
178 cl::init(10),
179 cl::desc("Max size of a block which is still considered "
180 "small enough to thread through"));
181
182// Two is chosen to allow one negation and a logical combine.
184 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
185 cl::init(2),
186 cl::desc("Maximum cost of combining conditions when "
187 "folding branches"));
188
190 "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
191 cl::init(2),
192 cl::desc("Multiplier to apply to threshold when determining whether or not "
193 "to fold branch to common destination when vector operations are "
194 "present"));
195
197 "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
198 cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
199
201 "max-switch-cases-per-result", cl::Hidden, cl::init(16),
202 cl::desc("Limit cases to analyze when converting a switch to select"));
203
205 "max-jump-threading-live-blocks", cl::Hidden, cl::init(24),
206 cl::desc("Limit number of blocks a define in a threaded block is allowed "
207 "to be live in"));
208
210
211} // end namespace llvm
212
213STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
214STATISTIC(NumLinearMaps,
215 "Number of switch instructions turned into linear mapping");
216STATISTIC(NumLookupTables,
217 "Number of switch instructions turned into lookup tables");
219 NumLookupTablesHoles,
220 "Number of switch instructions turned into lookup tables (holes checked)");
221STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
222STATISTIC(NumFoldValueComparisonIntoPredecessors,
223 "Number of value comparisons folded into predecessor basic blocks");
224STATISTIC(NumFoldBranchToCommonDest,
225 "Number of branches folded into predecessor basic block");
227 NumHoistCommonCode,
228 "Number of common instruction 'blocks' hoisted up to the begin block");
229STATISTIC(NumHoistCommonInstrs,
230 "Number of common instructions hoisted up to the begin block");
231STATISTIC(NumSinkCommonCode,
232 "Number of common instruction 'blocks' sunk down to the end block");
233STATISTIC(NumSinkCommonInstrs,
234 "Number of common instructions sunk down to the end block");
235STATISTIC(NumSpeculations, "Number of speculative executed instructions");
236STATISTIC(NumInvokes,
237 "Number of invokes with empty resume blocks simplified into calls");
238STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
239STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
240
241namespace {
242
243// The first field contains the value that the switch produces when a certain
244// case group is selected, and the second field is a vector containing the
245// cases composing the case group.
246using SwitchCaseResultVectorTy =
248
249// The first field contains the phi node that generates a result of the switch
250// and the second field contains the value generated for a certain case in the
251// switch for that PHI.
252using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
253
254/// ValueEqualityComparisonCase - Represents a case of a switch.
255struct ValueEqualityComparisonCase {
257 BasicBlock *Dest;
258
259 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
260 : Value(Value), Dest(Dest) {}
261
262 bool operator<(ValueEqualityComparisonCase RHS) const {
263 // Comparing pointers is ok as we only rely on the order for uniquing.
264 return Value < RHS.Value;
265 }
266
267 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
268};
269
270class SimplifyCFGOpt {
271 const TargetTransformInfo &TTI;
272 DomTreeUpdater *DTU;
273 const DataLayout &DL;
274 ArrayRef<WeakVH> LoopHeaders;
275 const SimplifyCFGOptions &Options;
276 bool Resimplify;
277
278 Value *isValueEqualityComparison(Instruction *TI);
279 BasicBlock *getValueEqualityComparisonCases(
280 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
281 bool simplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
282 BasicBlock *Pred,
283 IRBuilder<> &Builder);
284 bool performValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
285 Instruction *PTI,
286 IRBuilder<> &Builder);
287 bool foldValueComparisonIntoPredecessors(Instruction *TI,
288 IRBuilder<> &Builder);
289
290 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
291 bool simplifySingleResume(ResumeInst *RI);
292 bool simplifyCommonResume(ResumeInst *RI);
293 bool simplifyCleanupReturn(CleanupReturnInst *RI);
294 bool simplifyUnreachable(UnreachableInst *UI);
295 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
296 bool simplifyDuplicateSwitchArms(SwitchInst *SI, DomTreeUpdater *DTU);
297 bool simplifyIndirectBr(IndirectBrInst *IBI);
298 bool simplifyUncondBranch(UncondBrInst *BI, IRBuilder<> &Builder);
299 bool simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder);
300 bool foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI);
301
302 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
303 IRBuilder<> &Builder);
304 bool tryToSimplifyUncondBranchWithICmpSelectInIt(ICmpInst *ICI,
305 SelectInst *Select,
306 IRBuilder<> &Builder);
307 bool hoistCommonCodeFromSuccessors(Instruction *TI, bool AllInstsEqOnly);
308 bool hoistSuccIdenticalTerminatorToSwitchOrIf(
309 Instruction *TI, Instruction *I1,
310 SmallVectorImpl<Instruction *> &OtherSuccTIs,
311 ArrayRef<BasicBlock *> UniqueSuccessors);
312 bool speculativelyExecuteBB(CondBrInst *BI, BasicBlock *ThenBB);
313 bool simplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
314 BasicBlock *TrueBB, BasicBlock *FalseBB,
315 uint32_t TrueWeight, uint32_t FalseWeight);
316 bool simplifyBranchOnICmpChain(CondBrInst *BI, IRBuilder<> &Builder,
317 const DataLayout &DL);
318 bool simplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
319 bool simplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
320 bool turnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
321 bool simplifyDuplicatePredecessors(BasicBlock *Succ, DomTreeUpdater *DTU);
322
323public:
324 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
325 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
326 const SimplifyCFGOptions &Opts)
327 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
328 assert((!DTU || !DTU->hasPostDomTree()) &&
329 "SimplifyCFG is not yet capable of maintaining validity of a "
330 "PostDomTree, so don't ask for it.");
331 }
332
333 bool simplifyOnce(BasicBlock *BB);
334 bool run(BasicBlock *BB);
335
336 // Helper to set Resimplify and return change indication.
337 bool requestResimplify() {
338 Resimplify = true;
339 return true;
340 }
341};
342
343// we synthesize a || b as select a, true, b
344// we synthesize a && b as select a, b, false
345// this function determines if SI is playing one of those roles.
346[[maybe_unused]] bool
347isSelectInRoleOfConjunctionOrDisjunction(const SelectInst *SI) {
348 return ((isa<ConstantInt>(SI->getTrueValue()) &&
349 (dyn_cast<ConstantInt>(SI->getTrueValue())->isOne())) ||
350 (isa<ConstantInt>(SI->getFalseValue()) &&
351 (dyn_cast<ConstantInt>(SI->getFalseValue())->isNullValue())));
352}
353
354} // end anonymous namespace
355
356/// Return true if all the PHI nodes in the basic block \p BB
357/// receive compatible (identical) incoming values when coming from
358/// all of the predecessor blocks that are specified in \p IncomingBlocks.
359///
360/// Note that if the values aren't exactly identical, but \p EquivalenceSet
361/// is provided, and *both* of the values are present in the set,
362/// then they are considered equal.
364 BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
365 SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
366 assert(IncomingBlocks.size() == 2 &&
367 "Only for a pair of incoming blocks at the time!");
368
369 // FIXME: it is okay if one of the incoming values is an `undef` value,
370 // iff the other incoming value is guaranteed to be a non-poison value.
371 // FIXME: it is okay if one of the incoming values is a `poison` value.
372 return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
373 Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
374 Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
375 if (IV0 == IV1)
376 return true;
377 if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
378 EquivalenceSet->contains(IV1))
379 return true;
380 return false;
381 });
382}
383
384/// Return true if it is safe to merge these two
385/// terminator instructions together.
386static bool
388 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
389 if (SI1 == SI2)
390 return false; // Can't merge with self!
391
392 // It is not safe to merge these two switch instructions if they have a common
393 // successor, and if that successor has a PHI node, and if *that* PHI node has
394 // conflicting incoming values from the two switch blocks.
395 BasicBlock *SI1BB = SI1->getParent();
396 BasicBlock *SI2BB = SI2->getParent();
397
399 bool Fail = false;
400 for (BasicBlock *Succ : successors(SI2BB)) {
401 if (!SI1Succs.count(Succ))
402 continue;
403 if (incomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
404 continue;
405 Fail = true;
406 if (FailBlocks)
407 FailBlocks->insert(Succ);
408 else
409 break;
410 }
411
412 return !Fail;
413}
414
415/// Update PHI nodes in Succ to indicate that there will now be entries in it
416/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
417/// will be the same as those coming in from ExistPred, an existing predecessor
418/// of Succ.
419static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
420 BasicBlock *ExistPred,
421 MemorySSAUpdater *MSSAU = nullptr) {
422 for (PHINode &PN : Succ->phis())
423 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
424 if (MSSAU)
425 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
426 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
427}
428
429/// Compute an abstract "cost" of speculating the given instruction,
430/// which is assumed to be safe to speculate. TCC_Free means cheap,
431/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
432/// expensive.
434 const TargetTransformInfo &TTI) {
435 return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
436}
437
438/// If we have a merge point of an "if condition" as accepted above,
439/// return true if the specified value dominates the block. We don't handle
440/// the true generality of domination here, just a special case which works
441/// well enough for us.
442///
443/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
444/// see if V (which must be an instruction) and its recursive operands
445/// that do not dominate BB have a combined cost lower than Budget and
446/// are non-trapping. If both are true, the instruction is inserted into the
447/// set and true is returned.
448///
449/// The cost for most non-trapping instructions is defined as 1 except for
450/// Select whose cost is 2.
451///
452/// After this function returns, Cost is increased by the cost of
453/// V plus its non-dominating operands. If that cost is greater than
454/// Budget, false is returned and Cost is undefined.
456 Value *V, BasicBlock *BB, Instruction *InsertPt,
457 SmallPtrSetImpl<Instruction *> &AggressiveInsts, InstructionCost &Cost,
459 SmallPtrSetImpl<Instruction *> &ZeroCostInstructions, unsigned Depth = 0) {
460 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
461 // so limit the recursion depth.
462 // TODO: While this recursion limit does prevent pathological behavior, it
463 // would be better to track visited instructions to avoid cycles.
465 return false;
466
468 if (!I) {
469 // Non-instructions dominate all instructions and can be executed
470 // unconditionally.
471 return true;
472 }
473 BasicBlock *PBB = I->getParent();
474
475 // We don't want to allow weird loops that might have the "if condition" in
476 // the bottom of this block.
477 if (PBB == BB)
478 return false;
479
480 // If this instruction is defined in a block that contains an unconditional
481 // branch to BB, then it must be in the 'conditional' part of the "if
482 // statement". If not, it definitely dominates the region.
484 if (!BI || BI->getSuccessor() != BB)
485 return true;
486
487 // If we have seen this instruction before, don't count it again.
488 if (AggressiveInsts.count(I))
489 return true;
490
491 // Okay, it looks like the instruction IS in the "condition". Check to
492 // see if it's a cheap instruction to unconditionally compute, and if it
493 // only uses stuff defined outside of the condition. If so, hoist it out.
494 if (!isSafeToSpeculativelyExecute(I, InsertPt, AC))
495 return false;
496
497 // Overflow arithmetic instruction plus extract value are usually generated
498 // when a division is being replaced. But, in this case, the zero check may
499 // still be kept in the code. In that case it would be worth to hoist these
500 // two instruction out of the basic block. Let's treat this pattern as one
501 // single cheap instruction here!
502 WithOverflowInst *OverflowInst;
503 if (match(I, m_ExtractValue<1>(m_OneUse(m_WithOverflowInst(OverflowInst))))) {
504 ZeroCostInstructions.insert(OverflowInst);
505 Cost += 1;
506 } else if (!ZeroCostInstructions.contains(I))
507 Cost += computeSpeculationCost(I, TTI);
508
509 // Allow exactly one instruction to be speculated regardless of its cost
510 // (as long as it is safe to do so).
511 // This is intended to flatten the CFG even if the instruction is a division
512 // or other expensive operation. The speculation of an expensive instruction
513 // is expected to be undone in CodeGenPrepare if the speculation has not
514 // enabled further IR optimizations.
515 if (Cost > Budget &&
516 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
517 !Cost.isValid()))
518 return false;
519
520 // Okay, we can only really hoist these out if their operands do
521 // not take us over the cost threshold.
522 for (Use &Op : I->operands())
523 if (!dominatesMergePoint(Op, BB, InsertPt, AggressiveInsts, Cost, Budget,
524 TTI, AC, ZeroCostInstructions, Depth + 1))
525 return false;
526 // Okay, it's safe to do this! Remember this instruction.
527 AggressiveInsts.insert(I);
528 return true;
529}
530
531/// Extract ConstantInt from value, looking through IntToPtr
532/// and PointerNullValue. Return NULL if value is not a constant int.
534 // Normal constant int.
536 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
537 return CI;
538
539 // It is not safe to look through inttoptr or ptrtoint when using unstable
540 // pointer types.
541 if (DL.hasUnstableRepresentation(V->getType()))
542 return nullptr;
543
544 // This is some kind of pointer constant. Turn it into a pointer-sized
545 // ConstantInt if possible.
546 IntegerType *IntPtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
547
548 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
550 return ConstantInt::get(IntPtrTy, 0);
551
552 // IntToPtr const int, we can look through this if the semantics of
553 // inttoptr for this address space are a simple (truncating) bitcast.
555 if (CE->getOpcode() == Instruction::IntToPtr)
556 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
557 // The constant is very likely to have the right type already.
558 if (CI->getType() == IntPtrTy)
559 return CI;
560 else
561 return cast<ConstantInt>(
562 ConstantFoldIntegerCast(CI, IntPtrTy, /*isSigned=*/false, DL));
563 }
564 return nullptr;
565}
566
567namespace {
568
569/// Given a chain of or (||) or and (&&) comparison of a value against a
570/// constant, this will try to recover the information required for a switch
571/// structure.
572/// It will depth-first traverse the chain of comparison, seeking for patterns
573/// like %a == 12 or %a < 4 and combine them to produce a set of integer
574/// representing the different cases for the switch.
575/// Note that if the chain is composed of '||' it will build the set of elements
576/// that matches the comparisons (i.e. any of this value validate the chain)
577/// while for a chain of '&&' it will build the set elements that make the test
578/// fail.
579struct ConstantComparesGatherer {
580 const DataLayout &DL;
581
582 /// Value found for the switch comparison
583 Value *CompValue = nullptr;
584
585 /// Extra clause to be checked before the switch
586 Value *Extra = nullptr;
587
588 /// Set of integers to match in switch
590
591 /// Number of comparisons matched in the and/or chain
592 unsigned UsedICmps = 0;
593
594 /// If the elements in Vals matches the comparisons
595 bool IsEq = false;
596
597 // Used to check if the first matched CompValue shall be the Extra check.
598 bool IgnoreFirstMatch = false;
599 bool MultipleMatches = false;
600
601 /// Construct and compute the result for the comparison instruction Cond
602 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
603 gather(Cond);
604 if (CompValue || !MultipleMatches)
605 return;
606 Extra = nullptr;
607 Vals.clear();
608 UsedICmps = 0;
609 IgnoreFirstMatch = true;
610 gather(Cond);
611 }
612
613 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
614 ConstantComparesGatherer &
615 operator=(const ConstantComparesGatherer &) = delete;
616
617private:
618 /// Try to set the current value used for the comparison, it succeeds only if
619 /// it wasn't set before or if the new value is the same as the old one
620 bool setValueOnce(Value *NewVal) {
621 if (IgnoreFirstMatch) {
622 IgnoreFirstMatch = false;
623 return false;
624 }
625 if (CompValue && CompValue != NewVal) {
626 MultipleMatches = true;
627 return false;
628 }
629 CompValue = NewVal;
630 return true;
631 }
632
633 /// Try to match Instruction "I" as a comparison against a constant and
634 /// populates the array Vals with the set of values that match (or do not
635 /// match depending on isEQ).
636 /// Return false on failure. On success, the Value the comparison matched
637 /// against is placed in CompValue.
638 /// If CompValue is already set, the function is expected to fail if a match
639 /// is found but the value compared to is different.
640 bool matchInstruction(Instruction *I, bool isEQ) {
641 if (match(I, m_Not(m_Instruction(I))))
642 isEQ = !isEQ;
643
644 Value *Val;
645 if (match(I, m_NUWTrunc(m_Value(Val)))) {
646 // If we already have a value for the switch, it has to match!
647 if (!setValueOnce(Val))
648 return false;
649 UsedICmps++;
650 Vals.push_back(ConstantInt::get(cast<IntegerType>(Val->getType()), isEQ));
651 return true;
652 }
653 // If this is an icmp against a constant, handle this as one of the cases.
654 ICmpInst *ICI;
655 ConstantInt *C;
656 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
657 (C = getConstantInt(I->getOperand(1), DL)))) {
658 return false;
659 }
660
661 Value *RHSVal;
662 const APInt *RHSC;
663
664 // Pattern match a special case
665 // (x & ~2^z) == y --> x == y || x == y|2^z
666 // This undoes a transformation done by instcombine to fuse 2 compares.
667 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
668 // It's a little bit hard to see why the following transformations are
669 // correct. Here is a CVC3 program to verify them for 64-bit values:
670
671 /*
672 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
673 x : BITVECTOR(64);
674 y : BITVECTOR(64);
675 z : BITVECTOR(64);
676 mask : BITVECTOR(64) = BVSHL(ONE, z);
677 QUERY( (y & ~mask = y) =>
678 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
679 );
680 QUERY( (y | mask = y) =>
681 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
682 );
683 */
684
685 // Please note that each pattern must be a dual implication (<--> or
686 // iff). One directional implication can create spurious matches. If the
687 // implication is only one-way, an unsatisfiable condition on the left
688 // side can imply a satisfiable condition on the right side. Dual
689 // implication ensures that satisfiable conditions are transformed to
690 // other satisfiable conditions and unsatisfiable conditions are
691 // transformed to other unsatisfiable conditions.
692
693 // Here is a concrete example of a unsatisfiable condition on the left
694 // implying a satisfiable condition on the right:
695 //
696 // mask = (1 << z)
697 // (x & ~mask) == y --> (x == y || x == (y | mask))
698 //
699 // Substituting y = 3, z = 0 yields:
700 // (x & -2) == 3 --> (x == 3 || x == 2)
701
702 // Pattern match a special case:
703 /*
704 QUERY( (y & ~mask = y) =>
705 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
706 );
707 */
708 if (match(ICI->getOperand(0),
709 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
710 APInt Mask = ~*RHSC;
711 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
712 // If we already have a value for the switch, it has to match!
713 if (!setValueOnce(RHSVal))
714 return false;
715
716 Vals.push_back(C);
717 Vals.push_back(
718 ConstantInt::get(C->getContext(),
719 C->getValue() | Mask));
720 UsedICmps++;
721 return true;
722 }
723 }
724
725 // Pattern match a special case:
726 /*
727 QUERY( (y | mask = y) =>
728 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
729 );
730 */
731 if (match(ICI->getOperand(0),
732 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
733 APInt Mask = *RHSC;
734 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
735 // If we already have a value for the switch, it has to match!
736 if (!setValueOnce(RHSVal))
737 return false;
738
739 Vals.push_back(C);
740 Vals.push_back(ConstantInt::get(C->getContext(),
741 C->getValue() & ~Mask));
742 UsedICmps++;
743 return true;
744 }
745 }
746
747 // If we already have a value for the switch, it has to match!
748 if (!setValueOnce(ICI->getOperand(0)))
749 return false;
750
751 UsedICmps++;
752 Vals.push_back(C);
753 return true;
754 }
755
756 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
757 ConstantRange Span =
759
760 // Shift the range if the compare is fed by an add. This is the range
761 // compare idiom as emitted by instcombine.
762 Value *CandidateVal = I->getOperand(0);
763 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
764 Span = Span.subtract(*RHSC);
765 CandidateVal = RHSVal;
766 }
767
768 // If this is an and/!= check, then we are looking to build the set of
769 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
770 // x != 0 && x != 1.
771 if (!isEQ)
772 Span = Span.inverse();
773
774 // If there are a ton of values, we don't want to make a ginormous switch.
775 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
776 return false;
777 }
778
779 // If we already have a value for the switch, it has to match!
780 if (!setValueOnce(CandidateVal))
781 return false;
782
783 // Add all values from the range to the set
784 APInt Tmp = Span.getLower();
785 do
786 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
787 while (++Tmp != Span.getUpper());
788
789 UsedICmps++;
790 return true;
791 }
792
793 /// Given a potentially 'or'd or 'and'd together collection of icmp
794 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
795 /// the value being compared, and stick the list constants into the Vals
796 /// vector.
797 /// One "Extra" case is allowed to differ from the other.
798 void gather(Value *V) {
799 Value *Op0, *Op1;
800 if (match(V, m_LogicalOr(m_Value(Op0), m_Value(Op1))))
801 IsEq = true;
802 else if (match(V, m_LogicalAnd(m_Value(Op0), m_Value(Op1))))
803 IsEq = false;
804 else
805 return;
806 // Keep a stack (SmallVector for efficiency) for depth-first traversal
807 SmallVector<Value *, 8> DFT{Op0, Op1};
808 SmallPtrSet<Value *, 8> Visited{V, Op0, Op1};
809
810 while (!DFT.empty()) {
811 V = DFT.pop_back_val();
812
813 if (Instruction *I = dyn_cast<Instruction>(V)) {
814 // If it is a || (or && depending on isEQ), process the operands.
815 if (IsEq ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
816 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
817 if (Visited.insert(Op1).second)
818 DFT.push_back(Op1);
819 if (Visited.insert(Op0).second)
820 DFT.push_back(Op0);
821
822 continue;
823 }
824
825 // Try to match the current instruction
826 if (matchInstruction(I, IsEq))
827 // Match succeed, continue the loop
828 continue;
829 }
830
831 // One element of the sequence of || (or &&) could not be match as a
832 // comparison against the same value as the others.
833 // We allow only one "Extra" case to be checked before the switch
834 if (!Extra) {
835 Extra = V;
836 continue;
837 }
838 // Failed to parse a proper sequence, abort now
839 CompValue = nullptr;
840 break;
841 }
842 }
843};
844
845} // end anonymous namespace
846
848 MemorySSAUpdater *MSSAU = nullptr) {
849 Instruction *Cond = nullptr;
851 Cond = dyn_cast<Instruction>(SI->getCondition());
852 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
853 Cond = dyn_cast<Instruction>(BI->getCondition());
854 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
855 Cond = dyn_cast<Instruction>(IBI->getAddress());
856 }
857
858 TI->eraseFromParent();
859 if (Cond)
861}
862
863/// Return true if the specified terminator checks
864/// to see if a value is equal to constant integer value.
865Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
866 Value *CV = nullptr;
867 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
868 // Do not permit merging of large switch instructions into their
869 // predecessors unless there is only one predecessor.
870 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
871 CV = SI->getCondition();
872 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(TI))
873 if (BI->getCondition()->hasOneUse()) {
874 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
875 if (ICI->isEquality() && getConstantInt(ICI->getOperand(1), DL))
876 CV = ICI->getOperand(0);
877 } else if (auto *Trunc = dyn_cast<TruncInst>(BI->getCondition())) {
878 if (Trunc->hasNoUnsignedWrap())
879 CV = Trunc->getOperand(0);
880 }
881 }
882
883 // Unwrap any lossless ptrtoint cast (except for unstable pointers).
884 if (CV) {
885 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
886 Value *Ptr = PTII->getPointerOperand();
887 if (DL.hasUnstableRepresentation(Ptr->getType()))
888 return CV;
889 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
890 CV = Ptr;
891 }
892 }
893 return CV;
894}
895
896/// Given a value comparison instruction,
897/// decode all of the 'cases' that it represents and return the 'default' block.
898BasicBlock *SimplifyCFGOpt::getValueEqualityComparisonCases(
899 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
900 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
901 Cases.reserve(SI->getNumCases());
902 for (auto Case : SI->cases())
903 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
904 Case.getCaseSuccessor()));
905 return SI->getDefaultDest();
906 }
907
908 CondBrInst *BI = cast<CondBrInst>(TI);
909 Value *Cond = BI->getCondition();
910 ICmpInst::Predicate Pred;
911 ConstantInt *C;
912 if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
913 Pred = ICI->getPredicate();
914 C = getConstantInt(ICI->getOperand(1), DL);
915 } else {
916 Pred = ICmpInst::ICMP_NE;
917 auto *Trunc = cast<TruncInst>(Cond);
918 C = ConstantInt::get(cast<IntegerType>(Trunc->getOperand(0)->getType()), 0);
919 }
920 BasicBlock *Succ = BI->getSuccessor(Pred == ICmpInst::ICMP_NE);
921 Cases.push_back(ValueEqualityComparisonCase(C, Succ));
922 return BI->getSuccessor(Pred == ICmpInst::ICMP_EQ);
923}
924
925/// Given a vector of bb/value pairs, remove any entries
926/// in the list that match the specified block.
927static void
929 std::vector<ValueEqualityComparisonCase> &Cases) {
930 llvm::erase(Cases, BB);
931}
932
933/// Return true if there are any keys in C1 that exist in C2 as well.
934static bool valuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
935 std::vector<ValueEqualityComparisonCase> &C2) {
936 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
937
938 // Make V1 be smaller than V2.
939 if (V1->size() > V2->size())
940 std::swap(V1, V2);
941
942 if (V1->empty())
943 return false;
944 if (V1->size() == 1) {
945 // Just scan V2.
946 ConstantInt *TheVal = (*V1)[0].Value;
947 for (const ValueEqualityComparisonCase &VECC : *V2)
948 if (TheVal == VECC.Value)
949 return true;
950 }
951
952 // Otherwise, just sort both lists and compare element by element.
953 array_pod_sort(V1->begin(), V1->end());
954 array_pod_sort(V2->begin(), V2->end());
955 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
956 while (i1 != e1 && i2 != e2) {
957 if ((*V1)[i1].Value == (*V2)[i2].Value)
958 return true;
959 if ((*V1)[i1].Value < (*V2)[i2].Value)
960 ++i1;
961 else
962 ++i2;
963 }
964 return false;
965}
966
967/// If TI is known to be a terminator instruction and its block is known to
968/// only have a single predecessor block, check to see if that predecessor is
969/// also a value comparison with the same value, and if that comparison
970/// determines the outcome of this comparison. If so, simplify TI. This does a
971/// very limited form of jump threading.
972bool SimplifyCFGOpt::simplifyEqualityComparisonWithOnlyPredecessor(
973 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
974 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
975 if (!PredVal)
976 return false; // Not a value comparison in predecessor.
977
978 Value *ThisVal = isValueEqualityComparison(TI);
979 assert(ThisVal && "This isn't a value comparison!!");
980 if (ThisVal != PredVal)
981 return false; // Different predicates.
982
983 // TODO: Preserve branch weight metadata, similarly to how
984 // foldValueComparisonIntoPredecessors preserves it.
985
986 // Find out information about when control will move from Pred to TI's block.
987 std::vector<ValueEqualityComparisonCase> PredCases;
988 BasicBlock *PredDef =
989 getValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
990 eliminateBlockCases(PredDef, PredCases); // Remove default from cases.
991
992 // Find information about how control leaves this block.
993 std::vector<ValueEqualityComparisonCase> ThisCases;
994 BasicBlock *ThisDef = getValueEqualityComparisonCases(TI, ThisCases);
995 eliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
996
997 // If TI's block is the default block from Pred's comparison, potentially
998 // simplify TI based on this knowledge.
999 if (PredDef == TI->getParent()) {
1000 // If we are here, we know that the value is none of those cases listed in
1001 // PredCases. If there are any cases in ThisCases that are in PredCases, we
1002 // can simplify TI.
1003 if (!valuesOverlap(PredCases, ThisCases))
1004 return false;
1005
1006 if (isa<CondBrInst>(TI)) {
1007 // Okay, one of the successors of this condbr is dead. Convert it to a
1008 // uncond br.
1009 assert(ThisCases.size() == 1 && "Branch can only have one case!");
1010 // Insert the new branch.
1011 Instruction *NI = Builder.CreateBr(ThisDef);
1012 (void)NI;
1013
1014 // Remove PHI node entries for the dead edge.
1015 ThisCases[0].Dest->removePredecessor(PredDef);
1016
1017 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1018 << "Through successor TI: " << *TI << "Leaving: " << *NI
1019 << "\n");
1020
1022
1023 if (DTU)
1024 DTU->applyUpdates(
1025 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
1026
1027 return true;
1028 }
1029
1030 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
1031 // Okay, TI has cases that are statically dead, prune them away.
1032 SmallPtrSet<Constant *, 16> DeadCases;
1033 for (const ValueEqualityComparisonCase &Case : PredCases)
1034 DeadCases.insert(Case.Value);
1035
1036 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1037 << "Through successor TI: " << *TI);
1038
1039 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
1040 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
1041 --i;
1042 auto *Successor = i->getCaseSuccessor();
1043 if (DTU)
1044 ++NumPerSuccessorCases[Successor];
1045 if (DeadCases.count(i->getCaseValue())) {
1046 Successor->removePredecessor(PredDef);
1047 SI.removeCase(i);
1048 if (DTU)
1049 --NumPerSuccessorCases[Successor];
1050 }
1051 }
1052
1053 if (DTU) {
1054 std::vector<DominatorTree::UpdateType> Updates;
1055 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
1056 if (I.second == 0)
1057 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
1058 DTU->applyUpdates(Updates);
1059 }
1060
1061 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
1062 return true;
1063 }
1064
1065 // Otherwise, TI's block must correspond to some matched value. Find out
1066 // which value (or set of values) this is.
1067 ConstantInt *TIV = nullptr;
1068 BasicBlock *TIBB = TI->getParent();
1069 for (const auto &[Value, Dest] : PredCases)
1070 if (Dest == TIBB) {
1071 if (TIV)
1072 return false; // Cannot handle multiple values coming to this block.
1073 TIV = Value;
1074 }
1075 assert(TIV && "No edge from pred to succ?");
1076
1077 // Okay, we found the one constant that our value can be if we get into TI's
1078 // BB. Find out which successor will unconditionally be branched to.
1079 BasicBlock *TheRealDest = nullptr;
1080 for (const auto &[Value, Dest] : ThisCases)
1081 if (Value == TIV) {
1082 TheRealDest = Dest;
1083 break;
1084 }
1085
1086 // If not handled by any explicit cases, it is handled by the default case.
1087 if (!TheRealDest)
1088 TheRealDest = ThisDef;
1089
1090 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1091
1092 // Remove PHI node entries for dead edges.
1093 BasicBlock *CheckEdge = TheRealDest;
1094 for (BasicBlock *Succ : successors(TIBB))
1095 if (Succ != CheckEdge) {
1096 if (Succ != TheRealDest)
1097 RemovedSuccs.insert(Succ);
1098 Succ->removePredecessor(TIBB);
1099 } else
1100 CheckEdge = nullptr;
1101
1102 // Insert the new branch.
1103 Instruction *NI = Builder.CreateBr(TheRealDest);
1104 (void)NI;
1105
1106 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1107 << "Through successor TI: " << *TI << "Leaving: " << *NI
1108 << "\n");
1109
1111 if (DTU) {
1112 SmallVector<DominatorTree::UpdateType, 2> Updates;
1113 Updates.reserve(RemovedSuccs.size());
1114 for (auto *RemovedSucc : RemovedSuccs)
1115 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1116 DTU->applyUpdates(Updates);
1117 }
1118 return true;
1119}
1120
1121namespace {
1122
1123/// This class implements a stable ordering of constant
1124/// integers that does not depend on their address. This is important for
1125/// applications that sort ConstantInt's to ensure uniqueness.
1126struct ConstantIntOrdering {
1127 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1128 return LHS->getValue().ult(RHS->getValue());
1129 }
1130};
1131
1132} // end anonymous namespace
1133
1135 ConstantInt *const *P2) {
1136 const ConstantInt *LHS = *P1;
1137 const ConstantInt *RHS = *P2;
1138 if (LHS == RHS)
1139 return 0;
1140 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1141}
1142
1143/// Get Weights of a given terminator, the default weight is at the front
1144/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1145/// metadata.
1147 SmallVectorImpl<uint64_t> &Weights) {
1148 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1149 assert(MD && "Invalid branch-weight metadata");
1150 extractFromBranchWeightMD64(MD, Weights);
1151
1152 // If TI is a conditional eq, the default case is the false case,
1153 // and the corresponding branch-weight data is at index 2. We swap the
1154 // default weight to be the first entry.
1155 if (CondBrInst *BI = dyn_cast<CondBrInst>(TI)) {
1156 assert(Weights.size() == 2);
1157 auto *ICI = dyn_cast<ICmpInst>(BI->getCondition());
1158 if (!ICI)
1159 return;
1160
1161 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1162 std::swap(Weights.front(), Weights.back());
1163 }
1164}
1165
1167 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1168 Instruction *PTI = PredBlock->getTerminator();
1169
1170 // If we have bonus instructions, clone them into the predecessor block.
1171 // Note that there may be multiple predecessor blocks, so we cannot move
1172 // bonus instructions to a predecessor block.
1173 for (Instruction &BonusInst : *BB) {
1174 if (BonusInst.isTerminator())
1175 continue;
1176
1177 // Skip cloning pseudo probes into the predecessor, as it would overcount
1178 // otherwise.
1179 if (isa<PseudoProbeInst>(BonusInst))
1180 continue;
1181
1182 Instruction *NewBonusInst = BonusInst.clone();
1183
1184 if (!NewBonusInst->getDebugLoc().isSameSourceLocation(PTI->getDebugLoc())) {
1185 // Unless the instruction has the same !dbg location as the original
1186 // branch, drop it. When we fold the bonus instructions we want to make
1187 // sure we reset their debug locations in order to avoid stepping on
1188 // dead code caused by folding dead branches.
1189 NewBonusInst->setDebugLoc(DebugLoc::getDropped());
1190 } else if (const DebugLoc &DL = NewBonusInst->getDebugLoc()) {
1191 mapAtomInstance(DL, VMap);
1192 }
1193
1194 RemapInstruction(NewBonusInst, VMap,
1196
1197 // If we speculated an instruction, we need to drop any metadata that may
1198 // result in undefined behavior, as the metadata might have been valid
1199 // only given the branch precondition.
1200 // Similarly strip attributes on call parameters that may cause UB in
1201 // location the call is moved to.
1202 NewBonusInst->dropUBImplyingAttrsAndMetadata();
1203
1204 NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1205 auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1206 RemapDbgRecordRange(NewBonusInst->getModule(), Range, VMap,
1208
1209 NewBonusInst->takeName(&BonusInst);
1210 BonusInst.setName(NewBonusInst->getName() + ".old");
1211 VMap[&BonusInst] = NewBonusInst;
1212
1213 // Update (liveout) uses of bonus instructions,
1214 // now that the bonus instruction has been cloned into predecessor.
1215 // Note that we expect to be in a block-closed SSA form for this to work!
1216 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1217 auto *UI = cast<Instruction>(U.getUser());
1218 auto *PN = dyn_cast<PHINode>(UI);
1219 if (!PN) {
1220 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1221 "If the user is not a PHI node, then it should be in the same "
1222 "block as, and come after, the original bonus instruction.");
1223 continue; // Keep using the original bonus instruction.
1224 }
1225 // Is this the block-closed SSA form PHI node?
1226 if (PN->getIncomingBlock(U) == BB)
1227 continue; // Great, keep using the original bonus instruction.
1228 // The only other alternative is an "use" when coming from
1229 // the predecessor block - here we should refer to the cloned bonus instr.
1230 assert(PN->getIncomingBlock(U) == PredBlock &&
1231 "Not in block-closed SSA form?");
1232 U.set(NewBonusInst);
1233 }
1234 }
1235
1236 // Key Instructions: We may have propagated atom info into the pred. If the
1237 // pred's terminator already has atom info do nothing as merging would drop
1238 // one atom group anyway. If it doesn't, propagte the remapped atom group
1239 // from BB's terminator.
1240 if (auto &PredDL = PTI->getDebugLoc()) {
1241 auto &DL = BB->getTerminator()->getDebugLoc();
1242 if (!PredDL->getAtomGroup() && DL && DL->getAtomGroup() &&
1243 PredDL.isSameSourceLocation(DL)) {
1244 PTI->setDebugLoc(DL);
1245 RemapSourceAtom(PTI, VMap);
1246 }
1247 }
1248}
1249
1250bool SimplifyCFGOpt::performValueComparisonIntoPredecessorFolding(
1251 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1252 BasicBlock *BB = TI->getParent();
1253 BasicBlock *Pred = PTI->getParent();
1254
1256
1257 // Figure out which 'cases' to copy from SI to PSI.
1258 std::vector<ValueEqualityComparisonCase> BBCases;
1259 BasicBlock *BBDefault = getValueEqualityComparisonCases(TI, BBCases);
1260
1261 std::vector<ValueEqualityComparisonCase> PredCases;
1262 BasicBlock *PredDefault = getValueEqualityComparisonCases(PTI, PredCases);
1263
1264 // Based on whether the default edge from PTI goes to BB or not, fill in
1265 // PredCases and PredDefault with the new switch cases we would like to
1266 // build.
1267 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1268
1269 // Update the branch weight metadata along the way
1270 SmallVector<uint64_t, 8> Weights;
1271 bool PredHasWeights = hasBranchWeightMD(*PTI);
1272 bool SuccHasWeights = hasBranchWeightMD(*TI);
1273
1274 if (PredHasWeights) {
1275 getBranchWeights(PTI, Weights);
1276 // branch-weight metadata is inconsistent here.
1277 if (Weights.size() != 1 + PredCases.size())
1278 PredHasWeights = SuccHasWeights = false;
1279 } else if (SuccHasWeights)
1280 // If there are no predecessor weights but there are successor weights,
1281 // populate Weights with 1, which will later be scaled to the sum of
1282 // successor's weights
1283 Weights.assign(1 + PredCases.size(), 1);
1284
1285 SmallVector<uint64_t, 8> SuccWeights;
1286 if (SuccHasWeights) {
1287 getBranchWeights(TI, SuccWeights);
1288 // branch-weight metadata is inconsistent here.
1289 if (SuccWeights.size() != 1 + BBCases.size())
1290 PredHasWeights = SuccHasWeights = false;
1291 } else if (PredHasWeights)
1292 SuccWeights.assign(1 + BBCases.size(), 1);
1293
1294 if (PredDefault == BB) {
1295 // If this is the default destination from PTI, only the edges in TI
1296 // that don't occur in PTI, or that branch to BB will be activated.
1297 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1298 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1299 if (PredCases[i].Dest != BB)
1300 PTIHandled.insert(PredCases[i].Value);
1301 else {
1302 // The default destination is BB, we don't need explicit targets.
1303 std::swap(PredCases[i], PredCases.back());
1304
1305 if (PredHasWeights || SuccHasWeights) {
1306 // Increase weight for the default case.
1307 Weights[0] += Weights[i + 1];
1308 std::swap(Weights[i + 1], Weights.back());
1309 Weights.pop_back();
1310 }
1311
1312 PredCases.pop_back();
1313 --i;
1314 --e;
1315 }
1316
1317 // Reconstruct the new switch statement we will be building.
1318 if (PredDefault != BBDefault) {
1319 PredDefault->removePredecessor(Pred);
1320 if (DTU && PredDefault != BB)
1321 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1322 PredDefault = BBDefault;
1323 ++NewSuccessors[BBDefault];
1324 }
1325
1326 unsigned CasesFromPred = Weights.size();
1327 uint64_t ValidTotalSuccWeight = 0;
1328 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1329 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1330 PredCases.push_back(BBCases[i]);
1331 ++NewSuccessors[BBCases[i].Dest];
1332 if (SuccHasWeights || PredHasWeights) {
1333 // The default weight is at index 0, so weight for the ith case
1334 // should be at index i+1. Scale the cases from successor by
1335 // PredDefaultWeight (Weights[0]).
1336 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1337 ValidTotalSuccWeight += SuccWeights[i + 1];
1338 }
1339 }
1340
1341 if (SuccHasWeights || PredHasWeights) {
1342 ValidTotalSuccWeight += SuccWeights[0];
1343 // Scale the cases from predecessor by ValidTotalSuccWeight.
1344 for (unsigned i = 1; i < CasesFromPred; ++i)
1345 Weights[i] *= ValidTotalSuccWeight;
1346 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1347 Weights[0] *= SuccWeights[0];
1348 }
1349 } else {
1350 // If this is not the default destination from PSI, only the edges
1351 // in SI that occur in PSI with a destination of BB will be
1352 // activated.
1353 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1354 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1355 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1356 if (PredCases[i].Dest == BB) {
1357 PTIHandled.insert(PredCases[i].Value);
1358
1359 if (PredHasWeights || SuccHasWeights) {
1360 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1361 std::swap(Weights[i + 1], Weights.back());
1362 Weights.pop_back();
1363 }
1364
1365 std::swap(PredCases[i], PredCases.back());
1366 PredCases.pop_back();
1367 --i;
1368 --e;
1369 }
1370
1371 // Okay, now we know which constants were sent to BB from the
1372 // predecessor. Figure out where they will all go now.
1373 for (const ValueEqualityComparisonCase &Case : BBCases)
1374 if (PTIHandled.count(Case.Value)) {
1375 // If this is one we are capable of getting...
1376 if (PredHasWeights || SuccHasWeights)
1377 Weights.push_back(WeightsForHandled[Case.Value]);
1378 PredCases.push_back(Case);
1379 ++NewSuccessors[Case.Dest];
1380 PTIHandled.erase(Case.Value); // This constant is taken care of
1381 }
1382
1383 // If there are any constants vectored to BB that TI doesn't handle,
1384 // they must go to the default destination of TI.
1385 for (ConstantInt *I : PTIHandled) {
1386 if (PredHasWeights || SuccHasWeights)
1387 Weights.push_back(WeightsForHandled[I]);
1388 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1389 ++NewSuccessors[BBDefault];
1390 }
1391 }
1392
1393 // Okay, at this point, we know which new successor Pred will get. Make
1394 // sure we update the number of entries in the PHI nodes for these
1395 // successors.
1396 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1397 if (DTU) {
1398 SuccsOfPred = {llvm::from_range, successors(Pred)};
1399 Updates.reserve(Updates.size() + NewSuccessors.size());
1400 }
1401 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1402 NewSuccessors) {
1403 for (auto I : seq(NewSuccessor.second)) {
1404 (void)I;
1405 addPredecessorToBlock(NewSuccessor.first, Pred, BB);
1406 }
1407 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1408 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1409 }
1410
1411 Builder.SetInsertPoint(PTI);
1412 // Convert pointer to int before we switch.
1413 if (CV->getType()->isPointerTy()) {
1414 assert(!DL.hasUnstableRepresentation(CV->getType()) &&
1415 "Should not end up here with unstable pointers");
1416 CV =
1417 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1418 }
1419
1420 // Now that the successors are updated, create the new Switch instruction.
1421 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1422 NewSI->setDebugLoc(PTI->getDebugLoc());
1423 for (ValueEqualityComparisonCase &V : PredCases)
1424 NewSI->addCase(V.Value, V.Dest);
1425
1426 if (PredHasWeights || SuccHasWeights)
1427 setFittedBranchWeights(*NewSI, Weights, /*IsExpected=*/false,
1428 /*ElideAllZero=*/true);
1429
1431
1432 // Okay, last check. If BB is still a successor of PSI, then we must
1433 // have an infinite loop case. If so, add an infinitely looping block
1434 // to handle the case to preserve the behavior of the code.
1435 BasicBlock *InfLoopBlock = nullptr;
1436 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1437 if (NewSI->getSuccessor(i) == BB) {
1438 if (!InfLoopBlock) {
1439 // Insert it at the end of the function, because it's either code,
1440 // or it won't matter if it's hot. :)
1441 InfLoopBlock =
1442 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1443 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
1444 if (DTU)
1445 Updates.push_back(
1446 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1447 }
1448 NewSI->setSuccessor(i, InfLoopBlock);
1449 }
1450
1451 if (DTU) {
1452 if (InfLoopBlock)
1453 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1454
1455 Updates.push_back({DominatorTree::Delete, Pred, BB});
1456
1457 DTU->applyUpdates(Updates);
1458 }
1459
1460 ++NumFoldValueComparisonIntoPredecessors;
1461 return true;
1462}
1463
1464/// The specified terminator is a value equality comparison instruction
1465/// (either a switch or a branch on "X == c").
1466/// See if any of the predecessors of the terminator block are value comparisons
1467/// on the same value. If so, and if safe to do so, fold them together.
1468bool SimplifyCFGOpt::foldValueComparisonIntoPredecessors(Instruction *TI,
1469 IRBuilder<> &Builder) {
1470 BasicBlock *BB = TI->getParent();
1471 Value *CV = isValueEqualityComparison(TI); // CondVal
1472 assert(CV && "Not a comparison?");
1473
1474 bool Changed = false;
1475
1476 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1477 while (!Preds.empty()) {
1478 BasicBlock *Pred = Preds.pop_back_val();
1479 Instruction *PTI = Pred->getTerminator();
1480
1481 // Don't try to fold into itself.
1482 if (Pred == BB)
1483 continue;
1484
1485 // See if the predecessor is a comparison with the same value.
1486 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1487 if (PCV != CV)
1488 continue;
1489
1490 SmallSetVector<BasicBlock *, 4> FailBlocks;
1491 if (!safeToMergeTerminators(TI, PTI, &FailBlocks)) {
1492 for (auto *Succ : FailBlocks) {
1493 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1494 return false;
1495 }
1496 }
1497
1498 performValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1499 Changed = true;
1500 }
1501 return Changed;
1502}
1503
1504// If we would need to insert a select that uses the value of this invoke
1505// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1506// need to do this), we can't hoist the invoke, as there is nowhere to put the
1507// select in this case.
1509 Instruction *I1, Instruction *I2) {
1510 for (BasicBlock *Succ : successors(BB1)) {
1511 for (const PHINode &PN : Succ->phis()) {
1512 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1513 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1514 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1515 return false;
1516 }
1517 }
1518 }
1519 return true;
1520}
1521
1522// Get interesting characteristics of instructions that
1523// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1524// instructions can be reordered across.
1530
1532 // Pseudo probes don't constrain reordering of other instructions.
1534 return 0;
1535 unsigned Flags = 0;
1536 if (I->mayReadFromMemory())
1537 Flags |= SkipReadMem;
1538 // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1539 // inalloca) across stacksave/stackrestore boundaries.
1540 if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1541 Flags |= SkipSideEffect;
1543 Flags |= SkipImplicitControlFlow;
1544 return Flags;
1545}
1546
1547// Returns true if it is safe to reorder an instruction across preceding
1548// instructions in a basic block.
1549static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1550 // Don't reorder a store over a load.
1551 if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1552 return false;
1553
1554 // If we have seen an instruction with side effects, it's unsafe to reorder an
1555 // instruction which reads memory or itself has side effects.
1556 if ((Flags & SkipSideEffect) &&
1557 (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1558 return false;
1559
1560 // Reordering across an instruction which does not necessarily transfer
1561 // control to the next instruction is speculation.
1563 return false;
1564
1565 // Hoisting of llvm.deoptimize is only legal together with the next return
1566 // instruction, which this pass is not always able to do.
1567 if (auto *CB = dyn_cast<CallBase>(I))
1568 if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1569 return false;
1570
1571 // It's also unsafe/illegal to hoist an instruction above its instruction
1572 // operands
1573 BasicBlock *BB = I->getParent();
1574 for (Value *Op : I->operands()) {
1575 if (auto *J = dyn_cast<Instruction>(Op))
1576 if (J->getParent() == BB)
1577 return false;
1578 }
1579
1580 return true;
1581}
1582
1583static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1584
1585/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1586/// instructions \p I1 and \p I2 can and should be hoisted.
1588 const TargetTransformInfo &TTI) {
1589 // If we're going to hoist a call, make sure that the two instructions
1590 // we're commoning/hoisting are both marked with musttail, or neither of
1591 // them is marked as such. Otherwise, we might end up in a situation where
1592 // we hoist from a block where the terminator is a `ret` to a block where
1593 // the terminator is a `br`, and `musttail` calls expect to be followed by
1594 // a return.
1595 auto *C1 = dyn_cast<CallInst>(I1);
1596 auto *C2 = dyn_cast<CallInst>(I2);
1597 if (C1 && C2)
1598 if (C1->isMustTailCall() != C2->isMustTailCall())
1599 return false;
1600
1601 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1602 return false;
1603
1604 // If any of the two call sites has nomerge or convergent attribute, stop
1605 // hoisting.
1606 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1607 if (CB1->cannotMerge() || CB1->isConvergent())
1608 return false;
1609 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1610 if (CB2->cannotMerge() || CB2->isConvergent())
1611 return false;
1612
1613 return true;
1614}
1615
1616/// Hoists DbgVariableRecords from \p I1 and \p OtherInstrs that are identical
1617/// in lock-step to \p TI. This matches how dbg.* intrinsics are hoisting in
1618/// hoistCommonCodeFromSuccessors. e.g. The input:
1619/// I1 DVRs: { x, z },
1620/// OtherInsts: { I2 DVRs: { x, y, z } }
1621/// would result in hoisting only DbgVariableRecord x.
1623 Instruction *TI, Instruction *I1,
1624 SmallVectorImpl<Instruction *> &OtherInsts) {
1625 if (!I1->hasDbgRecords())
1626 return;
1627 using CurrentAndEndIt =
1628 std::pair<DbgRecord::self_iterator, DbgRecord::self_iterator>;
1629 // Vector of {Current, End} iterators.
1631 Itrs.reserve(OtherInsts.size() + 1);
1632 // Helper lambdas for lock-step checks:
1633 // Return true if this Current == End.
1634 auto atEnd = [](const CurrentAndEndIt &Pair) {
1635 return Pair.first == Pair.second;
1636 };
1637 // Return true if all Current are identical.
1638 auto allIdentical = [](const SmallVector<CurrentAndEndIt> &Itrs) {
1639 return all_of(make_first_range(ArrayRef(Itrs).drop_front()),
1641 return Itrs[0].first->isIdenticalToWhenDefined(*I);
1642 });
1643 };
1644
1645 // Collect the iterators.
1646 Itrs.push_back(
1647 {I1->getDbgRecordRange().begin(), I1->getDbgRecordRange().end()});
1648 for (Instruction *Other : OtherInsts) {
1649 if (!Other->hasDbgRecords())
1650 return;
1651 Itrs.push_back(
1652 {Other->getDbgRecordRange().begin(), Other->getDbgRecordRange().end()});
1653 }
1654
1655 // Iterate in lock-step until any of the DbgRecord lists are exausted. If
1656 // the lock-step DbgRecord are identical, hoist all of them to TI.
1657 // This replicates the dbg.* intrinsic behaviour in
1658 // hoistCommonCodeFromSuccessors.
1659 while (none_of(Itrs, atEnd)) {
1660 bool HoistDVRs = allIdentical(Itrs);
1661 for (CurrentAndEndIt &Pair : Itrs) {
1662 // Increment Current iterator now as we may be about to move the
1663 // DbgRecord.
1664 DbgRecord &DR = *Pair.first++;
1665 if (HoistDVRs) {
1666 DR.removeFromParent();
1667 TI->getParent()->insertDbgRecordBefore(&DR, TI->getIterator());
1668 }
1669 }
1670 }
1671}
1672
1674 const Instruction *I2) {
1675 if (I1->isIdenticalToWhenDefined(I2, /*IntersectAttrs=*/true))
1676 return true;
1677
1678 if (auto *Cmp1 = dyn_cast<CmpInst>(I1))
1679 if (auto *Cmp2 = dyn_cast<CmpInst>(I2))
1680 return Cmp1->getPredicate() == Cmp2->getSwappedPredicate() &&
1681 Cmp1->getOperand(0) == Cmp2->getOperand(1) &&
1682 Cmp1->getOperand(1) == Cmp2->getOperand(0);
1683
1684 if (I1->isCommutative() && I1->isSameOperationAs(I2)) {
1685 return I1->getOperand(0) == I2->getOperand(1) &&
1686 I1->getOperand(1) == I2->getOperand(0) &&
1687 equal(drop_begin(I1->operands(), 2), drop_begin(I2->operands(), 2));
1688 }
1689
1690 return false;
1691}
1692
1693/// If the target supports conditional faulting,
1694/// we look for the following pattern:
1695/// \code
1696/// BB:
1697/// ...
1698/// %cond = icmp ult %x, %y
1699/// br i1 %cond, label %TrueBB, label %FalseBB
1700/// FalseBB:
1701/// store i32 1, ptr %q, align 4
1702/// ...
1703/// TrueBB:
1704/// %maskedloadstore = load i32, ptr %b, align 4
1705/// store i32 %maskedloadstore, ptr %p, align 4
1706/// ...
1707/// \endcode
1708///
1709/// and transform it into:
1710///
1711/// \code
1712/// BB:
1713/// ...
1714/// %cond = icmp ult %x, %y
1715/// %maskedloadstore = cload i32, ptr %b, %cond
1716/// cstore i32 %maskedloadstore, ptr %p, %cond
1717/// cstore i32 1, ptr %q, ~%cond
1718/// br i1 %cond, label %TrueBB, label %FalseBB
1719/// FalseBB:
1720/// ...
1721/// TrueBB:
1722/// ...
1723/// \endcode
1724///
1725/// where cload/cstore are represented by llvm.masked.load/store intrinsics,
1726/// e.g.
1727///
1728/// \code
1729/// %vcond = bitcast i1 %cond to <1 x i1>
1730/// %v0 = call <1 x i32> @llvm.masked.load.v1i32.p0
1731/// (ptr %b, i32 4, <1 x i1> %vcond, <1 x i32> poison)
1732/// %maskedloadstore = bitcast <1 x i32> %v0 to i32
1733/// call void @llvm.masked.store.v1i32.p0
1734/// (<1 x i32> %v0, ptr %p, i32 4, <1 x i1> %vcond)
1735/// %cond.not = xor i1 %cond, true
1736/// %vcond.not = bitcast i1 %cond.not to <1 x i>
1737/// call void @llvm.masked.store.v1i32.p0
1738/// (<1 x i32> <i32 1>, ptr %q, i32 4, <1x i1> %vcond.not)
1739/// \endcode
1740///
1741/// So we need to turn hoisted load/store into cload/cstore.
1742///
1743/// \param BI The branch instruction.
1744/// \param SpeculatedConditionalLoadsStores The load/store instructions that
1745/// will be speculated.
1746/// \param Invert indicates if speculates FalseBB. Only used in triangle CFG.
1748 CondBrInst *BI,
1749 SmallVectorImpl<Instruction *> &SpeculatedConditionalLoadsStores,
1750 std::optional<bool> Invert, Instruction *Sel) {
1751 auto &Context = BI->getParent()->getContext();
1752 auto *VCondTy = FixedVectorType::get(Type::getInt1Ty(Context), 1);
1753 auto *Cond = BI->getCondition();
1754 // Construct the condition if needed.
1755 BasicBlock *BB = BI->getParent();
1756 Value *Mask = nullptr;
1757 Value *MaskFalse = nullptr;
1758 Value *MaskTrue = nullptr;
1759 if (Invert.has_value()) {
1760 IRBuilder<> Builder(Sel ? Sel : SpeculatedConditionalLoadsStores.back());
1761 Mask = Builder.CreateBitCast(
1762 *Invert ? Builder.CreateXor(Cond, ConstantInt::getTrue(Context)) : Cond,
1763 VCondTy);
1764 } else {
1765 IRBuilder<> Builder(BI);
1766 MaskFalse = Builder.CreateBitCast(
1767 Builder.CreateXor(Cond, ConstantInt::getTrue(Context)), VCondTy);
1768 MaskTrue = Builder.CreateBitCast(Cond, VCondTy);
1769 }
1770 auto PeekThroughBitcasts = [](Value *V) {
1771 while (auto *BitCast = dyn_cast<BitCastInst>(V))
1772 V = BitCast->getOperand(0);
1773 return V;
1774 };
1775 for (auto *I : SpeculatedConditionalLoadsStores) {
1776 IRBuilder<> Builder(Invert.has_value() ? I : BI);
1777 if (!Invert.has_value())
1778 Mask = I->getParent() == BI->getSuccessor(0) ? MaskTrue : MaskFalse;
1779 // We currently assume conditional faulting load/store is supported for
1780 // scalar types only when creating new instructions. This can be easily
1781 // extended for vector types in the future.
1782 assert(!getLoadStoreType(I)->isVectorTy() && "not implemented");
1783 auto *Op0 = I->getOperand(0);
1784 CallInst *MaskedLoadStore = nullptr;
1785 if (auto *LI = dyn_cast<LoadInst>(I)) {
1786 // Handle Load.
1787 auto *Ty = I->getType();
1788 PHINode *PN = nullptr;
1789 Value *PassThru = nullptr;
1790 if (Invert.has_value())
1791 for (User *U : I->users()) {
1792 if ((PN = dyn_cast<PHINode>(U))) {
1793 PassThru = Builder.CreateBitCast(
1794 PeekThroughBitcasts(PN->getIncomingValueForBlock(BB)),
1795 FixedVectorType::get(Ty, 1));
1796 } else if (auto *Ins = cast<Instruction>(U);
1797 Sel && Ins->getParent() == BB) {
1798 // This happens when store or/and a speculative instruction between
1799 // load and store were hoisted to the BB. Make sure the masked load
1800 // inserted before its use.
1801 // We assume there's one of such use.
1802 Builder.SetInsertPoint(Ins);
1803 }
1804 }
1805 MaskedLoadStore = Builder.CreateMaskedLoad(
1806 FixedVectorType::get(Ty, 1), Op0, LI->getAlign(), Mask, PassThru);
1807 Value *NewLoadStore = Builder.CreateBitCast(MaskedLoadStore, Ty);
1808 if (PN)
1809 PN->setIncomingValue(PN->getBasicBlockIndex(BB), NewLoadStore);
1810 I->replaceAllUsesWith(NewLoadStore);
1811 } else {
1812 // Handle Store.
1813 auto *StoredVal = Builder.CreateBitCast(
1814 PeekThroughBitcasts(Op0), FixedVectorType::get(Op0->getType(), 1));
1815 MaskedLoadStore = Builder.CreateMaskedStore(
1816 StoredVal, I->getOperand(1), cast<StoreInst>(I)->getAlign(), Mask);
1817 }
1818 // For non-debug metadata, only !annotation, !range, !nonnull and !align are
1819 // kept when hoisting (see Instruction::dropUBImplyingAttrsAndMetadata).
1820 //
1821 // !nonnull, !align : Not support pointer type, no need to keep.
1822 // !range: Load type is changed from scalar to vector, but the metadata on
1823 // vector specifies a per-element range, so the semantics stay the
1824 // same. Keep it.
1825 // !annotation: Not impact semantics. Keep it.
1826 if (const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range))
1827 MaskedLoadStore->addRangeRetAttr(getConstantRangeFromMetadata(*Ranges));
1828 I->dropUBImplyingAttrsAndUnknownMetadata({LLVMContext::MD_annotation});
1829 // FIXME: DIAssignID is not supported for masked store yet.
1830 // (Verifier::visitDIAssignIDMetadata)
1832 I->eraseMetadataIf([](unsigned MDKind, MDNode *Node) {
1833 return Node->getMetadataID() == Metadata::DIAssignIDKind;
1834 });
1835 MaskedLoadStore->copyMetadata(*I);
1836 I->eraseFromParent();
1837 }
1838}
1839
1841 const TargetTransformInfo &TTI) {
1842 // Not handle volatile or atomic.
1843 bool IsStore = false;
1844 if (auto *L = dyn_cast<LoadInst>(I)) {
1845 if (!L->isSimple() || !HoistLoadsWithCondFaulting)
1846 return false;
1847 } else if (auto *S = dyn_cast<StoreInst>(I)) {
1848 if (!S->isSimple() || !HoistStoresWithCondFaulting)
1849 return false;
1850 IsStore = true;
1851 } else
1852 return false;
1853
1854 // llvm.masked.load/store use i32 for alignment while load/store use i64.
1855 // That's why we have the alignment limitation.
1856 // FIXME: Update the prototype of the intrinsics?
1857 return TTI.hasConditionalLoadStoreForType(getLoadStoreType(I), IsStore) &&
1859}
1860
1861/// Hoist any common code in the successor blocks up into the block. This
1862/// function guarantees that BB dominates all successors. If AllInstsEqOnly is
1863/// given, only perform hoisting in case all successors blocks contain matching
1864/// instructions only. In that case, all instructions can be hoisted and the
1865/// original branch will be replaced and selects for PHIs are added.
1866bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(Instruction *TI,
1867 bool AllInstsEqOnly) {
1868 // This does very trivial matching, with limited scanning, to find identical
1869 // instructions in the two blocks. In particular, we don't want to get into
1870 // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1871 // such, we currently just scan for obviously identical instructions in an
1872 // identical order, possibly separated by the same number of non-identical
1873 // instructions.
1874 BasicBlock *BB = TI->getParent();
1875 unsigned int SuccSize = succ_size(BB);
1876 if (SuccSize < 2)
1877 return false;
1878
1879 // If either of the blocks has it's address taken, then we can't do this fold,
1880 // because the code we'd hoist would no longer run when we jump into the block
1881 // by it's address.
1882 SmallSetVector<BasicBlock *, 4> UniqueSuccessors(from_range, successors(BB));
1883 for (auto *Succ : UniqueSuccessors) {
1884 if (Succ->hasAddressTaken())
1885 return false;
1886 // Use getUniquePredecessor instead of getSinglePredecessor to support
1887 // multi-cases successors in switch.
1888 if (Succ->getUniquePredecessor())
1889 continue;
1890 // If Succ has >1 predecessors, continue to check if the Succ contains only
1891 // one `unreachable` inst. Since executing `unreachable` inst is an UB, we
1892 // can relax the condition based on the assumptiom that the program would
1893 // never enter Succ and trigger such an UB.
1894 if (isa<UnreachableInst>(*Succ->begin()))
1895 continue;
1896 return false;
1897 }
1898 // The second of pair is a SkipFlags bitmask.
1899 using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1900 SmallVector<SuccIterPair, 8> SuccIterPairs;
1901 for (auto *Succ : UniqueSuccessors) {
1902 BasicBlock::iterator SuccItr = Succ->begin();
1903 if (isa<PHINode>(*SuccItr))
1904 return false;
1905 SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1906 }
1907
1908 if (AllInstsEqOnly) {
1909 // Check if all instructions in the successor blocks match. This allows
1910 // hoisting all instructions and removing the blocks we are hoisting from,
1911 // so does not add any new instructions.
1912
1913 // Check if sizes and terminators of all successors match.
1914 unsigned Size0 = UniqueSuccessors[0]->size();
1915 Instruction *Term0 = UniqueSuccessors[0]->getTerminator();
1916 bool AllSame =
1917 all_of(drop_begin(UniqueSuccessors), [Term0, Size0](BasicBlock *Succ) {
1918 return Succ->getTerminator()->isIdenticalTo(Term0) &&
1919 Succ->size() == Size0;
1920 });
1921 if (!AllSame)
1922 return false;
1923 LockstepReverseIterator<true> LRI(UniqueSuccessors.getArrayRef());
1924 while (LRI.isValid()) {
1925 Instruction *I0 = (*LRI)[0];
1926 if (any_of(*LRI, [I0](Instruction *I) {
1927 return !areIdenticalUpToCommutativity(I0, I);
1928 })) {
1929 return false;
1930 }
1931 --LRI;
1932 }
1933 // Now we know that all instructions in all successors can be hoisted. Let
1934 // the loop below handle the hoisting.
1935 }
1936
1937 // Count how many instructions were not hoisted so far. There's a limit on how
1938 // many instructions we skip, serving as a compilation time control as well as
1939 // preventing excessive increase of life ranges.
1940 unsigned NumSkipped = 0;
1941 // If we find an unreachable instruction at the beginning of a basic block, we
1942 // can still hoist instructions from the rest of the basic blocks.
1943 if (SuccIterPairs.size() > 2) {
1944 erase_if(SuccIterPairs,
1945 [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1946 if (SuccIterPairs.size() < 2)
1947 return false;
1948 }
1949
1950 bool Changed = false;
1951
1952 for (;;) {
1953 auto *SuccIterPairBegin = SuccIterPairs.begin();
1954 auto &BB1ItrPair = *SuccIterPairBegin++;
1955 auto OtherSuccIterPairRange =
1956 iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1957 auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1958
1959 Instruction *I1 = &*BB1ItrPair.first;
1960
1961 bool AllInstsAreIdentical = true;
1962 bool HasTerminator = I1->isTerminator();
1963 for (auto &SuccIter : OtherSuccIterRange) {
1964 Instruction *I2 = &*SuccIter;
1965 HasTerminator |= I2->isTerminator();
1966 if (AllInstsAreIdentical && (!areIdenticalUpToCommutativity(I1, I2) ||
1967 MMRAMetadata(*I1) != MMRAMetadata(*I2)))
1968 AllInstsAreIdentical = false;
1969 }
1970
1971 SmallVector<Instruction *, 8> OtherInsts;
1972 for (auto &SuccIter : OtherSuccIterRange)
1973 OtherInsts.push_back(&*SuccIter);
1974
1975 // If we are hoisting the terminator instruction, don't move one (making a
1976 // broken BB), instead clone it, and remove BI.
1977 if (HasTerminator) {
1978 // Even if BB, which contains only one unreachable instruction, is ignored
1979 // at the beginning of the loop, we can hoist the terminator instruction.
1980 // If any instructions remain in the block, we cannot hoist terminators.
1981 if (NumSkipped || !AllInstsAreIdentical) {
1982 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
1983 return Changed;
1984 }
1985
1986 return hoistSuccIdenticalTerminatorToSwitchOrIf(
1987 TI, I1, OtherInsts, UniqueSuccessors.getArrayRef()) ||
1988 Changed;
1989 }
1990
1991 if (AllInstsAreIdentical) {
1992 unsigned SkipFlagsBB1 = BB1ItrPair.second;
1993 AllInstsAreIdentical =
1994 isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1995 all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1996 Instruction *I2 = &*Pair.first;
1997 unsigned SkipFlagsBB2 = Pair.second;
1998 // Even if the instructions are identical, it may not
1999 // be safe to hoist them if we have skipped over
2000 // instructions with side effects or their operands
2001 // weren't hoisted.
2002 return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
2004 });
2005 }
2006
2007 if (AllInstsAreIdentical) {
2008 BB1ItrPair.first++;
2009 // For a normal instruction, we just move one to right before the
2010 // branch, then replace all uses of the other with the first. Finally,
2011 // we remove the now redundant second instruction.
2012 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2013 // We've just hoisted DbgVariableRecords; move I1 after them (before TI)
2014 // and leave any that were not hoisted behind (by calling moveBefore
2015 // rather than moveBeforePreserving).
2016 I1->moveBefore(TI->getIterator());
2017 for (auto &SuccIter : OtherSuccIterRange) {
2018 Instruction *I2 = &*SuccIter++;
2019 assert(I2 != I1);
2020 if (!I2->use_empty())
2021 I2->replaceAllUsesWith(I1);
2022 I1->andIRFlags(I2);
2023 if (auto *CB = dyn_cast<CallBase>(I1)) {
2024 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I2));
2025 assert(Success && "We should not be trying to hoist callbases "
2026 "with non-intersectable attributes");
2027 // For NDEBUG Compile.
2028 (void)Success;
2029 }
2030
2031 combineMetadataForCSE(I1, I2, true);
2032 // I1 and I2 are being combined into a single instruction. Its debug
2033 // location is the merged locations of the original instructions.
2034 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
2035 I2->eraseFromParent();
2036 }
2037 if (!Changed)
2038 NumHoistCommonCode += SuccIterPairs.size();
2039 Changed = true;
2040 NumHoistCommonInstrs += SuccIterPairs.size();
2041 } else {
2042 if (NumSkipped >= HoistCommonSkipLimit) {
2043 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherInsts);
2044 return Changed;
2045 }
2046 // We are about to skip over a pair of non-identical instructions. Record
2047 // if any have characteristics that would prevent reordering instructions
2048 // across them.
2049 for (auto &SuccIterPair : SuccIterPairs) {
2050 Instruction *I = &*SuccIterPair.first++;
2051 SuccIterPair.second |= skippedInstrFlags(I);
2052 }
2053 ++NumSkipped;
2054 }
2055 }
2056}
2057
2058bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
2059 Instruction *TI, Instruction *I1,
2060 SmallVectorImpl<Instruction *> &OtherSuccTIs,
2061 ArrayRef<BasicBlock *> UniqueSuccessors) {
2062
2063 auto *BI = dyn_cast<CondBrInst>(TI);
2064
2065 bool Changed = false;
2066 BasicBlock *TIParent = TI->getParent();
2067 BasicBlock *BB1 = I1->getParent();
2068
2069 // Use only for an if statement.
2070 auto *I2 = *OtherSuccTIs.begin();
2071 auto *BB2 = I2->getParent();
2072 if (BI) {
2073 assert(OtherSuccTIs.size() == 1);
2074 assert(BI->getSuccessor(0) == I1->getParent());
2075 assert(BI->getSuccessor(1) == I2->getParent());
2076 }
2077
2078 // In the case of an if statement, we try to hoist an invoke.
2079 // FIXME: Can we define a safety predicate for CallBr?
2080 // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
2081 // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
2082 if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
2083 return false;
2084
2085 // TODO: callbr hoisting currently disabled pending further study.
2086 if (isa<CallBrInst>(I1))
2087 return false;
2088
2089 for (BasicBlock *Succ : successors(BB1)) {
2090 for (PHINode &PN : Succ->phis()) {
2091 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2092 for (Instruction *OtherSuccTI : OtherSuccTIs) {
2093 Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
2094 if (BB1V == BB2V)
2095 continue;
2096
2097 // In the case of an if statement, check for
2098 // passingValueIsAlwaysUndefined here because we would rather eliminate
2099 // undefined control flow then converting it to a select.
2100 if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
2102 return false;
2103 }
2104 }
2105 }
2106
2107 // Hoist DbgVariableRecords attached to the terminator to match dbg.*
2108 // intrinsic hoisting behaviour in hoistCommonCodeFromSuccessors.
2109 hoistLockstepIdenticalDbgVariableRecords(TI, I1, OtherSuccTIs);
2110 // Clone the terminator and hoist it into the pred, without any debug info.
2111 Instruction *NT = I1->clone();
2112 NT->insertInto(TIParent, TI->getIterator());
2113 if (!NT->getType()->isVoidTy()) {
2114 I1->replaceAllUsesWith(NT);
2115 for (Instruction *OtherSuccTI : OtherSuccTIs)
2116 OtherSuccTI->replaceAllUsesWith(NT);
2117 NT->takeName(I1);
2118 }
2119 Changed = true;
2120 NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
2121
2122 // Ensure terminator gets a debug location, even an unknown one, in case
2123 // it involves inlinable calls.
2125 Locs.push_back(I1->getDebugLoc());
2126 for (auto *OtherSuccTI : OtherSuccTIs)
2127 Locs.push_back(OtherSuccTI->getDebugLoc());
2128 NT->setDebugLoc(DebugLoc::getMergedLocations(Locs));
2129
2130 // PHIs created below will adopt NT's merged DebugLoc.
2131 IRBuilder<NoFolder> Builder(NT);
2132
2133 // In the case of an if statement, hoisting one of the terminators from our
2134 // successor is a great thing. Unfortunately, the successors of the if/else
2135 // blocks may have PHI nodes in them. If they do, all PHI entries for BB1/BB2
2136 // must agree for all PHI nodes, so we insert select instruction to compute
2137 // the final result.
2138 if (BI) {
2139 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
2140 for (BasicBlock *Succ : successors(BB1)) {
2141 for (PHINode &PN : Succ->phis()) {
2142 Value *BB1V = PN.getIncomingValueForBlock(BB1);
2143 Value *BB2V = PN.getIncomingValueForBlock(BB2);
2144 if (BB1V == BB2V)
2145 continue;
2146
2147 // These values do not agree. Insert a select instruction before NT
2148 // that determines the right value.
2149 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
2150 if (!SI) {
2151 // Propagate fast-math-flags from phi node to its replacement select.
2153 BI->getCondition(), BB1V, BB2V,
2154 isa<FPMathOperator>(PN) ? &PN : nullptr,
2155 BB1V->getName() + "." + BB2V->getName(), BI));
2156 }
2157
2158 // Make the PHI node use the select for all incoming values for BB1/BB2
2159 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
2160 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
2161 PN.setIncomingValue(i, SI);
2162 }
2163 }
2164 }
2165
2167
2168 // Update any PHI nodes in our new successors.
2169 for (BasicBlock *Succ : successors(BB1)) {
2170 addPredecessorToBlock(Succ, TIParent, BB1);
2171 if (DTU)
2172 Updates.push_back({DominatorTree::Insert, TIParent, Succ});
2173 }
2174
2175 if (DTU) {
2176 // TI might be a switch with multi-cases destination, so we need to care for
2177 // the duplication of successors.
2178 for (BasicBlock *Succ : UniqueSuccessors)
2179 Updates.push_back({DominatorTree::Delete, TIParent, Succ});
2180 }
2181
2183 if (DTU)
2184 DTU->applyUpdates(Updates);
2185 return Changed;
2186}
2187
2188// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
2189// into variables.
2191 int OpIdx) {
2192 // Divide/Remainder by constant is typically much cheaper than by variable.
2193 if (I->isIntDivRem())
2194 return OpIdx != 1;
2195 return !isa<IntrinsicInst>(I);
2196}
2197
2198// All instructions in Insts belong to different blocks that all unconditionally
2199// branch to a common successor. Analyze each instruction and return true if it
2200// would be possible to sink them into their successor, creating one common
2201// instruction instead. For every value that would be required to be provided by
2202// PHI node (because an operand varies in each input block), add to PHIOperands.
2205 DenseMap<const Use *, SmallVector<Value *, 4>> &PHIOperands) {
2206 // Prune out obviously bad instructions to move. Each instruction must have
2207 // the same number of uses, and we check later that the uses are consistent.
2208 std::optional<unsigned> NumUses;
2209 for (auto *I : Insts) {
2210 // These instructions may change or break semantics if moved.
2211 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
2212 I->getType()->isTokenTy())
2213 return false;
2214
2215 // Do not try to sink an instruction in an infinite loop - it can cause
2216 // this algorithm to infinite loop.
2217 if (I->getParent()->getSingleSuccessor() == I->getParent())
2218 return false;
2219
2220 // Conservatively return false if I is an inline-asm instruction. Sinking
2221 // and merging inline-asm instructions can potentially create arguments
2222 // that cannot satisfy the inline-asm constraints.
2223 // If the instruction has nomerge or convergent attribute, return false.
2224 if (const auto *C = dyn_cast<CallBase>(I))
2225 if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
2226 return false;
2227
2228 if (!NumUses)
2229 NumUses = I->getNumUses();
2230 else if (NumUses != I->getNumUses())
2231 return false;
2232 }
2233
2234 const Instruction *I0 = Insts.front();
2235 const auto I0MMRA = MMRAMetadata(*I0);
2236 for (auto *I : Insts) {
2237 if (!I->isSameOperationAs(I0, Instruction::CompareUsingIntersectedAttrs))
2238 return false;
2239
2240 // Treat MMRAs conservatively. This pass can be quite aggressive and
2241 // could drop a lot of MMRAs otherwise.
2242 if (MMRAMetadata(*I) != I0MMRA)
2243 return false;
2244 }
2245
2246 // Uses must be consistent: If I0 is used in a phi node in the sink target,
2247 // then the other phi operands must match the instructions from Insts. This
2248 // also has to hold true for any phi nodes that would be created as a result
2249 // of sinking. Both of these cases are represented by PhiOperands.
2250 for (const Use &U : I0->uses()) {
2251 auto It = PHIOperands.find(&U);
2252 if (It == PHIOperands.end())
2253 // There may be uses in other blocks when sinking into a loop header.
2254 return false;
2255 if (!equal(Insts, It->second))
2256 return false;
2257 }
2258
2259 // For calls to be sinkable, they must all be indirect, or have same callee.
2260 // I.e. if we have two direct calls to different callees, we don't want to
2261 // turn that into an indirect call. Likewise, if we have an indirect call,
2262 // and a direct call, we don't actually want to have a single indirect call.
2263 if (isa<CallBase>(I0)) {
2264 auto IsIndirectCall = [](const Instruction *I) {
2265 return cast<CallBase>(I)->isIndirectCall();
2266 };
2267 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
2268 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
2269 if (HaveIndirectCalls) {
2270 if (!AllCallsAreIndirect)
2271 return false;
2272 } else {
2273 // All callees must be identical.
2274 Value *Callee = nullptr;
2275 for (const Instruction *I : Insts) {
2276 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
2277 if (!Callee)
2278 Callee = CurrCallee;
2279 else if (Callee != CurrCallee)
2280 return false;
2281 }
2282 }
2283 }
2284
2285 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
2286 Value *Op = I0->getOperand(OI);
2287 auto SameAsI0 = [&I0, OI](const Instruction *I) {
2288 assert(I->getNumOperands() == I0->getNumOperands());
2289 return I->getOperand(OI) == I0->getOperand(OI);
2290 };
2291 if (!all_of(Insts, SameAsI0)) {
2294 // We can't create a PHI from this GEP.
2295 return false;
2296 auto &Ops = PHIOperands[&I0->getOperandUse(OI)];
2297 for (auto *I : Insts)
2298 Ops.push_back(I->getOperand(OI));
2299 }
2300 }
2301 return true;
2302}
2303
2304// Assuming canSinkInstructions(Blocks) has returned true, sink the last
2305// instruction of every block in Blocks to their common successor, commoning
2306// into one instruction.
2308 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2309
2310 // canSinkInstructions returning true guarantees that every block has at
2311 // least one non-terminator instruction.
2313 for (auto *BB : Blocks) {
2314 Instruction *I = BB->getTerminator();
2315 I = I->getPrevNode();
2316 Insts.push_back(I);
2317 }
2318
2319 // We don't need to do any more checking here; canSinkInstructions should
2320 // have done it all for us.
2321 SmallVector<Value*, 4> NewOperands;
2322 Instruction *I0 = Insts.front();
2323 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2324 // This check is different to that in canSinkInstructions. There, we
2325 // cared about the global view once simplifycfg (and instcombine) have
2326 // completed - it takes into account PHIs that become trivially
2327 // simplifiable. However here we need a more local view; if an operand
2328 // differs we create a PHI and rely on instcombine to clean up the very
2329 // small mess we may make.
2330 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2331 return I->getOperand(O) != I0->getOperand(O);
2332 });
2333 if (!NeedPHI) {
2334 NewOperands.push_back(I0->getOperand(O));
2335 continue;
2336 }
2337
2338 // Create a new PHI in the successor block and populate it.
2339 auto *Op = I0->getOperand(O);
2340 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2341 auto *PN =
2342 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2343 PN->insertBefore(BBEnd->begin());
2344 for (auto *I : Insts)
2345 PN->addIncoming(I->getOperand(O), I->getParent());
2346 NewOperands.push_back(PN);
2347 }
2348
2349 // Arbitrarily use I0 as the new "common" instruction; remap its operands
2350 // and move it to the start of the successor block.
2351 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2352 I0->getOperandUse(O).set(NewOperands[O]);
2353
2354 I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2355
2356 // Update metadata and IR flags, and merge debug locations.
2357 for (auto *I : Insts)
2358 if (I != I0) {
2359 // The debug location for the "common" instruction is the merged locations
2360 // of all the commoned instructions. We start with the original location
2361 // of the "common" instruction and iteratively merge each location in the
2362 // loop below.
2363 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2364 // However, as N-way merge for CallInst is rare, so we use simplified API
2365 // instead of using complex API for N-way merge.
2366 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2367 combineMetadataForCSE(I0, I, true);
2368 I0->andIRFlags(I);
2369 if (auto *CB = dyn_cast<CallBase>(I0)) {
2370 bool Success = CB->tryIntersectAttributes(cast<CallBase>(I));
2371 assert(Success && "We should not be trying to sink callbases "
2372 "with non-intersectable attributes");
2373 // For NDEBUG Compile.
2374 (void)Success;
2375 }
2376 }
2377
2378 for (User *U : make_early_inc_range(I0->users())) {
2379 // canSinkLastInstruction checked that all instructions are only used by
2380 // phi nodes in a way that allows replacing the phi node with the common
2381 // instruction.
2382 auto *PN = cast<PHINode>(U);
2383 PN->replaceAllUsesWith(I0);
2384 PN->eraseFromParent();
2385 }
2386
2387 // Finally nuke all instructions apart from the common instruction.
2388 for (auto *I : Insts) {
2389 if (I == I0)
2390 continue;
2391 // The remaining uses are debug users, replace those with the common inst.
2392 // In most (all?) cases this just introduces a use-before-def.
2393 assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2394 I->replaceAllUsesWith(I0);
2395 I->eraseFromParent();
2396 }
2397}
2398
2399/// Check whether BB's predecessors end with unconditional branches. If it is
2400/// true, sink any common code from the predecessors to BB.
2402 DomTreeUpdater *DTU) {
2403 // We support two situations:
2404 // (1) all incoming arcs are unconditional
2405 // (2) there are non-unconditional incoming arcs
2406 //
2407 // (2) is very common in switch defaults and
2408 // else-if patterns;
2409 //
2410 // if (a) f(1);
2411 // else if (b) f(2);
2412 //
2413 // produces:
2414 //
2415 // [if]
2416 // / \
2417 // [f(1)] [if]
2418 // | | \
2419 // | | |
2420 // | [f(2)]|
2421 // \ | /
2422 // [ end ]
2423 //
2424 // [end] has two unconditional predecessor arcs and one conditional. The
2425 // conditional refers to the implicit empty 'else' arc. This conditional
2426 // arc can also be caused by an empty default block in a switch.
2427 //
2428 // In this case, we attempt to sink code from all *unconditional* arcs.
2429 // If we can sink instructions from these arcs (determined during the scan
2430 // phase below) we insert a common successor for all unconditional arcs and
2431 // connect that to [end], to enable sinking:
2432 //
2433 // [if]
2434 // / \
2435 // [x(1)] [if]
2436 // | | \
2437 // | | \
2438 // | [x(2)] |
2439 // \ / |
2440 // [sink.split] |
2441 // \ /
2442 // [ end ]
2443 //
2444 SmallVector<BasicBlock*,4> UnconditionalPreds;
2445 bool HaveNonUnconditionalPredecessors = false;
2446 for (auto *PredBB : predecessors(BB)) {
2447 auto *PredBr = dyn_cast<UncondBrInst>(PredBB->getTerminator());
2448 if (PredBr)
2449 UnconditionalPreds.push_back(PredBB);
2450 else
2451 HaveNonUnconditionalPredecessors = true;
2452 }
2453 if (UnconditionalPreds.size() < 2)
2454 return false;
2455
2456 // We take a two-step approach to tail sinking. First we scan from the end of
2457 // each block upwards in lockstep. If the n'th instruction from the end of each
2458 // block can be sunk, those instructions are added to ValuesToSink and we
2459 // carry on. If we can sink an instruction but need to PHI-merge some operands
2460 // (because they're not identical in each instruction) we add these to
2461 // PHIOperands.
2462 // We prepopulate PHIOperands with the phis that already exist in BB.
2464 for (PHINode &PN : BB->phis()) {
2466 for (const Use &U : PN.incoming_values())
2467 IncomingVals.insert({PN.getIncomingBlock(U), &U});
2468 auto &Ops = PHIOperands[IncomingVals[UnconditionalPreds[0]]];
2469 for (BasicBlock *Pred : UnconditionalPreds)
2470 Ops.push_back(*IncomingVals[Pred]);
2471 }
2472
2473 int ScanIdx = 0;
2474 SmallPtrSet<Value*,4> InstructionsToSink;
2475 LockstepReverseIterator<true> LRI(UnconditionalPreds);
2476 while (LRI.isValid() &&
2477 canSinkInstructions(*LRI, PHIOperands)) {
2478 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2479 << "\n");
2480 InstructionsToSink.insert_range(*LRI);
2481 ++ScanIdx;
2482 --LRI;
2483 }
2484
2485 // If no instructions can be sunk, early-return.
2486 if (ScanIdx == 0)
2487 return false;
2488
2489 bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2490
2491 if (!followedByDeoptOrUnreachable) {
2492 // Check whether this is the pointer operand of a load/store.
2493 auto IsMemOperand = [](Use &U) {
2494 auto *I = cast<Instruction>(U.getUser());
2495 if (isa<LoadInst>(I))
2496 return U.getOperandNo() == LoadInst::getPointerOperandIndex();
2497 if (isa<StoreInst>(I))
2498 return U.getOperandNo() == StoreInst::getPointerOperandIndex();
2499 return false;
2500 };
2501
2502 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2503 // actually sink before encountering instruction that is unprofitable to
2504 // sink?
2505 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator<true> &LRI) {
2506 unsigned NumPHIInsts = 0;
2507 for (Use &U : (*LRI)[0]->operands()) {
2508 auto It = PHIOperands.find(&U);
2509 if (It != PHIOperands.end() && !all_of(It->second, [&](Value *V) {
2510 return InstructionsToSink.contains(V);
2511 })) {
2512 ++NumPHIInsts;
2513 // Do not separate a load/store from the gep producing the address.
2514 // The gep can likely be folded into the load/store as an addressing
2515 // mode. Additionally, a load of a gep is easier to analyze than a
2516 // load of a phi.
2517 if (IsMemOperand(U) &&
2518 any_of(It->second, [](Value *V) { return isa<GEPOperator>(V); }))
2519 return false;
2520 // FIXME: this check is overly optimistic. We may end up not sinking
2521 // said instruction, due to the very same profitability check.
2522 // See @creating_too_many_phis in sink-common-code.ll.
2523 }
2524 }
2525 LLVM_DEBUG(dbgs() << "SINK: #phi insts: " << NumPHIInsts << "\n");
2526 return NumPHIInsts <= 1;
2527 };
2528
2529 // We've determined that we are going to sink last ScanIdx instructions,
2530 // and recorded them in InstructionsToSink. Now, some instructions may be
2531 // unprofitable to sink. But that determination depends on the instructions
2532 // that we are going to sink.
2533
2534 // First, forward scan: find the first instruction unprofitable to sink,
2535 // recording all the ones that are profitable to sink.
2536 // FIXME: would it be better, after we detect that not all are profitable.
2537 // to either record the profitable ones, or erase the unprofitable ones?
2538 // Maybe we need to choose (at runtime) the one that will touch least
2539 // instrs?
2540 LRI.reset();
2541 int Idx = 0;
2542 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2543 while (Idx < ScanIdx) {
2544 if (!ProfitableToSinkInstruction(LRI)) {
2545 // Too many PHIs would be created.
2546 LLVM_DEBUG(
2547 dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2548 break;
2549 }
2550 InstructionsProfitableToSink.insert_range(*LRI);
2551 --LRI;
2552 ++Idx;
2553 }
2554
2555 // If no instructions can be sunk, early-return.
2556 if (Idx == 0)
2557 return false;
2558
2559 // Did we determine that (only) some instructions are unprofitable to sink?
2560 if (Idx < ScanIdx) {
2561 // Okay, some instructions are unprofitable.
2562 ScanIdx = Idx;
2563 InstructionsToSink = InstructionsProfitableToSink;
2564
2565 // But, that may make other instructions unprofitable, too.
2566 // So, do a backward scan, do any earlier instructions become
2567 // unprofitable?
2568 assert(
2569 !ProfitableToSinkInstruction(LRI) &&
2570 "We already know that the last instruction is unprofitable to sink");
2571 ++LRI;
2572 --Idx;
2573 while (Idx >= 0) {
2574 // If we detect that an instruction becomes unprofitable to sink,
2575 // all earlier instructions won't be sunk either,
2576 // so preemptively keep InstructionsProfitableToSink in sync.
2577 // FIXME: is this the most performant approach?
2578 for (auto *I : *LRI)
2579 InstructionsProfitableToSink.erase(I);
2580 if (!ProfitableToSinkInstruction(LRI)) {
2581 // Everything starting with this instruction won't be sunk.
2582 ScanIdx = Idx;
2583 InstructionsToSink = InstructionsProfitableToSink;
2584 }
2585 ++LRI;
2586 --Idx;
2587 }
2588 }
2589
2590 // If no instructions can be sunk, early-return.
2591 if (ScanIdx == 0)
2592 return false;
2593 }
2594
2595 bool Changed = false;
2596
2597 if (HaveNonUnconditionalPredecessors) {
2598 if (!followedByDeoptOrUnreachable) {
2599 // It is always legal to sink common instructions from unconditional
2600 // predecessors. However, if not all predecessors are unconditional,
2601 // this transformation might be pessimizing. So as a rule of thumb,
2602 // don't do it unless we'd sink at least one non-speculatable instruction.
2603 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2604 LRI.reset();
2605 int Idx = 0;
2606 bool Profitable = false;
2607 while (Idx < ScanIdx) {
2608 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2609 Profitable = true;
2610 break;
2611 }
2612 --LRI;
2613 ++Idx;
2614 }
2615 if (!Profitable)
2616 return false;
2617 }
2618
2619 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2620 // We have a conditional edge and we're going to sink some instructions.
2621 // Insert a new block postdominating all blocks we're going to sink from.
2622 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2623 // Edges couldn't be split.
2624 return false;
2625 Changed = true;
2626 }
2627
2628 // Now that we've analyzed all potential sinking candidates, perform the
2629 // actual sink. We iteratively sink the last non-terminator of the source
2630 // blocks into their common successor unless doing so would require too
2631 // many PHI instructions to be generated (currently only one PHI is allowed
2632 // per sunk instruction).
2633 //
2634 // We can use InstructionsToSink to discount values needing PHI-merging that will
2635 // actually be sunk in a later iteration. This allows us to be more
2636 // aggressive in what we sink. This does allow a false positive where we
2637 // sink presuming a later value will also be sunk, but stop half way through
2638 // and never actually sink it which means we produce more PHIs than intended.
2639 // This is unlikely in practice though.
2640 int SinkIdx = 0;
2641 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2642 LLVM_DEBUG(dbgs() << "SINK: Sink: "
2643 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2644 << "\n");
2645
2646 // Because we've sunk every instruction in turn, the current instruction to
2647 // sink is always at index 0.
2648 LRI.reset();
2649
2650 sinkLastInstruction(UnconditionalPreds);
2651 NumSinkCommonInstrs++;
2652 Changed = true;
2653 }
2654 if (SinkIdx != 0)
2655 ++NumSinkCommonCode;
2656 return Changed;
2657}
2658
2659namespace {
2660
2661struct CompatibleSets {
2662 using SetTy = SmallVector<InvokeInst *, 2>;
2663
2665
2666 static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2667
2668 SetTy &getCompatibleSet(InvokeInst *II);
2669
2670 void insert(InvokeInst *II);
2671};
2672
2673CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2674 // Perform a linear scan over all the existing sets, see if the new `invoke`
2675 // is compatible with any particular set. Since we know that all the `invokes`
2676 // within a set are compatible, only check the first `invoke` in each set.
2677 // WARNING: at worst, this has quadratic complexity.
2678 for (CompatibleSets::SetTy &Set : Sets) {
2679 if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2680 return Set;
2681 }
2682
2683 // Otherwise, we either had no sets yet, or this invoke forms a new set.
2684 return Sets.emplace_back();
2685}
2686
2687void CompatibleSets::insert(InvokeInst *II) {
2688 getCompatibleSet(II).emplace_back(II);
2689}
2690
2691bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2692 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2693
2694 // Can we theoretically merge these `invoke`s?
2695 auto IsIllegalToMerge = [](InvokeInst *II) {
2696 return II->cannotMerge() || II->isInlineAsm();
2697 };
2698 if (any_of(Invokes, IsIllegalToMerge))
2699 return false;
2700
2701 // Either both `invoke`s must be direct,
2702 // or both `invoke`s must be indirect.
2703 auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2704 bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2705 bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2706 if (HaveIndirectCalls) {
2707 if (!AllCallsAreIndirect)
2708 return false;
2709 } else {
2710 // All callees must be identical.
2711 Value *Callee = nullptr;
2712 for (InvokeInst *II : Invokes) {
2713 Value *CurrCallee = II->getCalledOperand();
2714 assert(CurrCallee && "There is always a called operand.");
2715 if (!Callee)
2716 Callee = CurrCallee;
2717 else if (Callee != CurrCallee)
2718 return false;
2719 }
2720 }
2721
2722 // Either both `invoke`s must not have a normal destination,
2723 // or both `invoke`s must have a normal destination,
2724 auto HasNormalDest = [](InvokeInst *II) {
2725 return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2726 };
2727 if (any_of(Invokes, HasNormalDest)) {
2728 // Do not merge `invoke` that does not have a normal destination with one
2729 // that does have a normal destination, even though doing so would be legal.
2730 if (!all_of(Invokes, HasNormalDest))
2731 return false;
2732
2733 // All normal destinations must be identical.
2734 BasicBlock *NormalBB = nullptr;
2735 for (InvokeInst *II : Invokes) {
2736 BasicBlock *CurrNormalBB = II->getNormalDest();
2737 assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2738 if (!NormalBB)
2739 NormalBB = CurrNormalBB;
2740 else if (NormalBB != CurrNormalBB)
2741 return false;
2742 }
2743
2744 // In the normal destination, the incoming values for these two `invoke`s
2745 // must be compatible.
2746 SmallPtrSet<Value *, 16> EquivalenceSet(llvm::from_range, Invokes);
2748 NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2749 &EquivalenceSet))
2750 return false;
2751 }
2752
2753#ifndef NDEBUG
2754 // All unwind destinations must be identical.
2755 // We know that because we have started from said unwind destination.
2756 BasicBlock *UnwindBB = nullptr;
2757 for (InvokeInst *II : Invokes) {
2758 BasicBlock *CurrUnwindBB = II->getUnwindDest();
2759 assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2760 if (!UnwindBB)
2761 UnwindBB = CurrUnwindBB;
2762 else
2763 assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2764 }
2765#endif
2766
2767 // In the unwind destination, the incoming values for these two `invoke`s
2768 // must be compatible.
2770 Invokes.front()->getUnwindDest(),
2771 {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2772 return false;
2773
2774 // Ignoring arguments, these `invoke`s must be identical,
2775 // including operand bundles.
2776 const InvokeInst *II0 = Invokes.front();
2777 for (auto *II : Invokes.drop_front())
2778 if (!II->isSameOperationAs(II0, Instruction::CompareUsingIntersectedAttrs))
2779 return false;
2780
2781 // Can we theoretically form the data operands for the merged `invoke`?
2782 auto IsIllegalToMergeArguments = [](auto Ops) {
2783 Use &U0 = std::get<0>(Ops);
2784 Use &U1 = std::get<1>(Ops);
2785 if (U0 == U1)
2786 return false;
2788 U0.getOperandNo());
2789 };
2790 assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2791 if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2792 IsIllegalToMergeArguments))
2793 return false;
2794
2795 return true;
2796}
2797
2798} // namespace
2799
2800// Merge all invokes in the provided set, all of which are compatible
2801// as per the `CompatibleSets::shouldBelongToSameSet()`.
2803 DomTreeUpdater *DTU) {
2804 assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2805
2807 if (DTU)
2808 Updates.reserve(2 + 3 * Invokes.size());
2809
2810 bool HasNormalDest =
2811 !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2812
2813 // Clone one of the invokes into a new basic block.
2814 // Since they are all compatible, it doesn't matter which invoke is cloned.
2815 InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2816 InvokeInst *II0 = Invokes.front();
2817 BasicBlock *II0BB = II0->getParent();
2818 BasicBlock *InsertBeforeBlock =
2819 II0->getParent()->getIterator()->getNextNode();
2820 Function *Func = II0BB->getParent();
2821 LLVMContext &Ctx = II0->getContext();
2822
2823 BasicBlock *MergedInvokeBB = BasicBlock::Create(
2824 Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2825
2826 auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2827 // NOTE: all invokes have the same attributes, so no handling needed.
2828 MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2829
2830 if (!HasNormalDest) {
2831 // This set does not have a normal destination,
2832 // so just form a new block with unreachable terminator.
2833 BasicBlock *MergedNormalDest = BasicBlock::Create(
2834 Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2835 auto *UI = new UnreachableInst(Ctx, MergedNormalDest);
2836 UI->setDebugLoc(DebugLoc::getTemporary());
2837 MergedInvoke->setNormalDest(MergedNormalDest);
2838 }
2839
2840 // The unwind destination, however, remainds identical for all invokes here.
2841
2842 return MergedInvoke;
2843 }();
2844
2845 if (DTU) {
2846 // Predecessor blocks that contained these invokes will now branch to
2847 // the new block that contains the merged invoke, ...
2848 for (InvokeInst *II : Invokes)
2849 Updates.push_back(
2850 {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2851
2852 // ... which has the new `unreachable` block as normal destination,
2853 // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2854 for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2855 Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2856 SuccBBOfMergedInvoke});
2857
2858 // Since predecessor blocks now unconditionally branch to a new block,
2859 // they no longer branch to their original successors.
2860 for (InvokeInst *II : Invokes)
2861 for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2862 Updates.push_back(
2863 {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2864 }
2865
2866 bool IsIndirectCall = Invokes[0]->isIndirectCall();
2867
2868 // Form the merged operands for the merged invoke.
2869 for (Use &U : MergedInvoke->operands()) {
2870 // Only PHI together the indirect callees and data operands.
2871 if (MergedInvoke->isCallee(&U)) {
2872 if (!IsIndirectCall)
2873 continue;
2874 } else if (!MergedInvoke->isDataOperand(&U))
2875 continue;
2876
2877 // Don't create trivial PHI's with all-identical incoming values.
2878 bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2879 return II->getOperand(U.getOperandNo()) != U.get();
2880 });
2881 if (!NeedPHI)
2882 continue;
2883
2884 // Form a PHI out of all the data ops under this index.
2886 U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke->getIterator());
2887 for (InvokeInst *II : Invokes)
2888 PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2889
2890 U.set(PN);
2891 }
2892
2893 // We've ensured that each PHI node has compatible (identical) incoming values
2894 // when coming from each of the `invoke`s in the current merge set,
2895 // so update the PHI nodes accordingly.
2896 for (BasicBlock *Succ : successors(MergedInvoke))
2897 addPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2898 /*ExistPred=*/Invokes.front()->getParent());
2899
2900 // And finally, replace the original `invoke`s with an unconditional branch
2901 // to the block with the merged `invoke`. Also, give that merged `invoke`
2902 // the merged debugloc of all the original `invoke`s.
2903 DILocation *MergedDebugLoc = nullptr;
2904 for (InvokeInst *II : Invokes) {
2905 // Compute the debug location common to all the original `invoke`s.
2906 if (!MergedDebugLoc)
2907 MergedDebugLoc = II->getDebugLoc();
2908 else
2909 MergedDebugLoc =
2910 DebugLoc::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2911
2912 // And replace the old `invoke` with an unconditionally branch
2913 // to the block with the merged `invoke`.
2914 for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2915 OrigSuccBB->removePredecessor(II->getParent());
2916 auto *BI = UncondBrInst::Create(MergedInvoke->getParent(), II->getParent());
2917 // The unconditional branch is part of the replacement for the original
2918 // invoke, so should use its DebugLoc.
2919 BI->setDebugLoc(II->getDebugLoc());
2920 bool Success = MergedInvoke->tryIntersectAttributes(II);
2921 assert(Success && "Merged invokes with incompatible attributes");
2922 // For NDEBUG Compile
2923 (void)Success;
2924 II->replaceAllUsesWith(MergedInvoke);
2925 II->eraseFromParent();
2926 ++NumInvokesMerged;
2927 }
2928 MergedInvoke->setDebugLoc(MergedDebugLoc);
2929 ++NumInvokeSetsFormed;
2930
2931 if (DTU)
2932 DTU->applyUpdates(Updates);
2933}
2934
2935/// If this block is a `landingpad` exception handling block, categorize all
2936/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2937/// being "mergeable" together, and then merge invokes in each set together.
2938///
2939/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2940/// [...] [...]
2941/// | |
2942/// [invoke0] [invoke1]
2943/// / \ / \
2944/// [cont0] [landingpad] [cont1]
2945/// to:
2946/// [...] [...]
2947/// \ /
2948/// [invoke]
2949/// / \
2950/// [cont] [landingpad]
2951///
2952/// But of course we can only do that if the invokes share the `landingpad`,
2953/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2954/// and the invoked functions are "compatible".
2957 return false;
2958
2959 bool Changed = false;
2960
2961 // FIXME: generalize to all exception handling blocks?
2962 if (!BB->isLandingPad())
2963 return Changed;
2964
2965 CompatibleSets Grouper;
2966
2967 // Record all the predecessors of this `landingpad`. As per verifier,
2968 // the only allowed predecessor is the unwind edge of an `invoke`.
2969 // We want to group "compatible" `invokes` into the same set to be merged.
2970 for (BasicBlock *PredBB : predecessors(BB))
2971 Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2972
2973 // And now, merge `invoke`s that were grouped togeter.
2974 for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2975 if (Invokes.size() < 2)
2976 continue;
2977 Changed = true;
2978 mergeCompatibleInvokesImpl(Invokes, DTU);
2979 }
2980
2981 return Changed;
2982}
2983
2984namespace {
2985/// Track ephemeral values, which should be ignored for cost-modelling
2986/// purposes. Requires walking instructions in reverse order.
2987class EphemeralValueTracker {
2988 SmallPtrSet<const Instruction *, 32> EphValues;
2989
2990 bool isEphemeral(const Instruction *I) {
2991 if (isa<AssumeInst>(I))
2992 return true;
2993 return !I->mayHaveSideEffects() && !I->isTerminator() &&
2994 all_of(I->users(), [&](const User *U) {
2995 return EphValues.count(cast<Instruction>(U));
2996 });
2997 }
2998
2999public:
3000 bool track(const Instruction *I) {
3001 if (isEphemeral(I)) {
3002 EphValues.insert(I);
3003 return true;
3004 }
3005 return false;
3006 }
3007
3008 bool contains(const Instruction *I) const { return EphValues.contains(I); }
3009};
3010} // namespace
3011
3012/// Determine if we can hoist sink a sole store instruction out of a
3013/// conditional block.
3014///
3015/// We are looking for code like the following:
3016/// BrBB:
3017/// store i32 %add, i32* %arrayidx2
3018/// ... // No other stores or function calls (we could be calling a memory
3019/// ... // function).
3020/// %cmp = icmp ult %x, %y
3021/// br i1 %cmp, label %EndBB, label %ThenBB
3022/// ThenBB:
3023/// store i32 %add5, i32* %arrayidx2
3024/// br label EndBB
3025/// EndBB:
3026/// ...
3027/// We are going to transform this into:
3028/// BrBB:
3029/// store i32 %add, i32* %arrayidx2
3030/// ... //
3031/// %cmp = icmp ult %x, %y
3032/// %add.add5 = select i1 %cmp, i32 %add, %add5
3033/// store i32 %add.add5, i32* %arrayidx2
3034/// ...
3035///
3036/// \return The pointer to the value of the previous store if the store can be
3037/// hoisted into the predecessor block. 0 otherwise.
3039 BasicBlock *StoreBB, BasicBlock *EndBB) {
3040 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
3041 if (!StoreToHoist)
3042 return nullptr;
3043
3044 // Volatile or atomic.
3045 if (!StoreToHoist->isSimple())
3046 return nullptr;
3047
3048 Value *StorePtr = StoreToHoist->getPointerOperand();
3049 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
3050
3051 // Look for a store to the same pointer in BrBB.
3052 unsigned MaxNumInstToLookAt = 9;
3053 // Skip pseudo probe intrinsic calls which are not really killing any memory
3054 // accesses.
3055 for (Instruction &CurI : reverse(*BrBB)) {
3056 if (!MaxNumInstToLookAt)
3057 break;
3058 --MaxNumInstToLookAt;
3059
3060 if (isa<PseudoProbeInst>(CurI))
3061 continue;
3062
3063 // Could be calling an instruction that affects memory like free().
3064 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
3065 return nullptr;
3066
3067 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
3068 // Found the previous store to same location and type. Make sure it is
3069 // simple, to avoid introducing a spurious non-atomic write after an
3070 // atomic write.
3071 if (SI->getPointerOperand() == StorePtr &&
3072 SI->getValueOperand()->getType() == StoreTy && SI->isSimple() &&
3073 SI->getAlign() >= StoreToHoist->getAlign())
3074 // Found the previous store, return its value operand.
3075 return SI->getValueOperand();
3076 return nullptr; // Unknown store.
3077 }
3078
3079 if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
3080 if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
3081 LI->isSimple() && LI->getAlign() >= StoreToHoist->getAlign()) {
3082 Value *Obj = getUnderlyingObject(StorePtr);
3083 bool ExplicitlyDereferenceableOnly;
3084 // The dereferenceability query here is only required to satisfy the
3085 // writable contract, actual dereferenceability is proven by the
3086 // presence of an access. As such, we can ignore frees.
3087 if (isWritableObject(Obj, ExplicitlyDereferenceableOnly) &&
3090 .WithoutRet) &&
3091 (!ExplicitlyDereferenceableOnly ||
3092 isDereferenceablePointer(StorePtr, StoreTy, LI->getDataLayout(),
3093 /*IgnoreFree=*/true))) {
3094 // Found a previous load, return it.
3095 return LI;
3096 }
3097 }
3098 // The load didn't work out, but we may still find a store.
3099 }
3100 }
3101
3102 return nullptr;
3103}
3104
3105/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
3106/// converted to selects.
3108 BasicBlock *EndBB,
3109 unsigned &SpeculatedInstructions,
3110 InstructionCost &Cost,
3111 const TargetTransformInfo &TTI) {
3113 BB->getParent()->hasMinSize()
3116
3117 bool HaveRewritablePHIs = false;
3118 for (PHINode &PN : EndBB->phis()) {
3119 Value *OrigV = PN.getIncomingValueForBlock(BB);
3120 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
3121
3122 // FIXME: Try to remove some of the duplication with
3123 // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
3124 if (ThenV == OrigV)
3125 continue;
3126
3127 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(),
3128 CmpInst::makeCmpResultType(PN.getType()),
3130
3131 // Don't convert to selects if we could remove undefined behavior instead.
3132 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
3134 return false;
3135
3136 HaveRewritablePHIs = true;
3137 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
3138 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
3139 if (!OrigCE && !ThenCE)
3140 continue; // Known cheap (FIXME: Maybe not true for aggregates).
3141
3142 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
3143 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
3144 InstructionCost MaxCost =
3146 if (OrigCost + ThenCost > MaxCost)
3147 return false;
3148
3149 // Account for the cost of an unfolded ConstantExpr which could end up
3150 // getting expanded into Instructions.
3151 // FIXME: This doesn't account for how many operations are combined in the
3152 // constant expression.
3153 ++SpeculatedInstructions;
3154 if (SpeculatedInstructions > 1)
3155 return false;
3156 }
3157
3158 return HaveRewritablePHIs;
3159}
3160
3162 std::optional<bool> Invert,
3163 const TargetTransformInfo &TTI) {
3164 // If the branch is non-unpredictable, and is predicted to *not* branch to
3165 // the `then` block, then avoid speculating it.
3166 if (BI->getMetadata(LLVMContext::MD_unpredictable))
3167 return true;
3168
3169 uint64_t TWeight, FWeight;
3170 if (!extractBranchWeights(*BI, TWeight, FWeight) || (TWeight + FWeight) == 0)
3171 return true;
3172
3173 if (!Invert.has_value())
3174 return false;
3175
3176 uint64_t EndWeight = *Invert ? TWeight : FWeight;
3177 BranchProbability BIEndProb =
3178 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
3179 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3180 return BIEndProb < Likely;
3181}
3182
3183/// Speculate a conditional basic block flattening the CFG.
3184///
3185/// Note that this is a very risky transform currently. Speculating
3186/// instructions like this is most often not desirable. Instead, there is an MI
3187/// pass which can do it with full awareness of the resource constraints.
3188/// However, some cases are "obvious" and we should do directly. An example of
3189/// this is speculating a single, reasonably cheap instruction.
3190///
3191/// There is only one distinct advantage to flattening the CFG at the IR level:
3192/// it makes very common but simplistic optimizations such as are common in
3193/// instcombine and the DAG combiner more powerful by removing CFG edges and
3194/// modeling their effects with easier to reason about SSA value graphs.
3195///
3196///
3197/// An illustration of this transform is turning this IR:
3198/// \code
3199/// BB:
3200/// %cmp = icmp ult %x, %y
3201/// br i1 %cmp, label %EndBB, label %ThenBB
3202/// ThenBB:
3203/// %sub = sub %x, %y
3204/// br label BB2
3205/// EndBB:
3206/// %phi = phi [ %sub, %ThenBB ], [ 0, %BB ]
3207/// ...
3208/// \endcode
3209///
3210/// Into this IR:
3211/// \code
3212/// BB:
3213/// %cmp = icmp ult %x, %y
3214/// %sub = sub %x, %y
3215/// %cond = select i1 %cmp, 0, %sub
3216/// ...
3217/// \endcode
3218///
3219/// \returns true if the conditional block is removed.
3220bool SimplifyCFGOpt::speculativelyExecuteBB(CondBrInst *BI,
3221 BasicBlock *ThenBB) {
3222 if (!Options.SpeculateBlocks)
3223 return false;
3224
3225 // Be conservative for now. FP select instruction can often be expensive.
3226 Value *BrCond = BI->getCondition();
3227 if (isa<FCmpInst>(BrCond))
3228 return false;
3229
3230 BasicBlock *BB = BI->getParent();
3231 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
3232 InstructionCost Budget =
3234
3235 // If ThenBB is actually on the false edge of the conditional branch, remember
3236 // to swap the select operands later.
3237 bool Invert = false;
3238 if (ThenBB != BI->getSuccessor(0)) {
3239 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
3240 Invert = true;
3241 }
3242 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
3243
3244 if (!isProfitableToSpeculate(BI, Invert, TTI))
3245 return false;
3246
3247 // Keep a count of how many times instructions are used within ThenBB when
3248 // they are candidates for sinking into ThenBB. Specifically:
3249 // - They are defined in BB, and
3250 // - They have no side effects, and
3251 // - All of their uses are in ThenBB.
3252 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
3253
3254 SmallVector<Instruction *, 4> SpeculatedPseudoProbes;
3255
3256 unsigned SpeculatedInstructions = 0;
3257 bool HoistLoadsStores = Options.HoistLoadsStoresWithCondFaulting;
3258 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
3259 Value *SpeculatedStoreValue = nullptr;
3260 StoreInst *SpeculatedStore = nullptr;
3261 EphemeralValueTracker EphTracker;
3262 for (Instruction &I : reverse(drop_end(*ThenBB))) {
3263 // Skip pseudo probes. The consequence is we lose track of the branch
3264 // probability for ThenBB, which is fine since the optimization here takes
3265 // place regardless of the branch probability.
3266 if (isa<PseudoProbeInst>(I)) {
3267 // The probe should be deleted so that it will not be over-counted when
3268 // the samples collected on the non-conditional path are counted towards
3269 // the conditional path. We leave it for the counts inference algorithm to
3270 // figure out a proper count for an unknown probe.
3271 SpeculatedPseudoProbes.push_back(&I);
3272 continue;
3273 }
3274
3275 // Ignore ephemeral values, they will be dropped by the transform.
3276 if (EphTracker.track(&I))
3277 continue;
3278
3279 // Only speculatively execute a single instruction (not counting the
3280 // terminator) for now.
3281 bool IsSafeCheapLoadStore = HoistLoadsStores &&
3283 SpeculatedConditionalLoadsStores.size() <
3285 // Not count load/store into cost if target supports conditional faulting
3286 // b/c it's cheap to speculate it.
3287 if (IsSafeCheapLoadStore)
3288 SpeculatedConditionalLoadsStores.push_back(&I);
3289 else
3290 ++SpeculatedInstructions;
3291
3292 if (SpeculatedInstructions > 1)
3293 return false;
3294
3295 // Don't hoist the instruction if it's unsafe or expensive.
3296 if (!IsSafeCheapLoadStore &&
3298 !(HoistCondStores && !SpeculatedStoreValue &&
3299 (SpeculatedStoreValue =
3300 isSafeToSpeculateStore(&I, BB, ThenBB, EndBB))))
3301 return false;
3302 if (!IsSafeCheapLoadStore && !SpeculatedStoreValue &&
3305 return false;
3306
3307 // Store the store speculation candidate.
3308 if (!SpeculatedStore && SpeculatedStoreValue)
3309 SpeculatedStore = cast<StoreInst>(&I);
3310
3311 // Do not hoist the instruction if any of its operands are defined but not
3312 // used in BB. The transformation will prevent the operand from
3313 // being sunk into the use block.
3314 for (Use &Op : I.operands()) {
3316 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3317 continue; // Not a candidate for sinking.
3318
3319 ++SinkCandidateUseCounts[OpI];
3320 }
3321 }
3322
3323 // Consider any sink candidates which are only used in ThenBB as costs for
3324 // speculation. Note, while we iterate over a DenseMap here, we are summing
3325 // and so iteration order isn't significant.
3326 for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3327 if (Inst->hasNUses(Count)) {
3328 ++SpeculatedInstructions;
3329 if (SpeculatedInstructions > 1)
3330 return false;
3331 }
3332
3333 // Check that we can insert the selects and that it's not too expensive to do
3334 // so.
3335 bool Convert =
3336 SpeculatedStore != nullptr || !SpeculatedConditionalLoadsStores.empty();
3338 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3339 SpeculatedInstructions, Cost, TTI);
3340 if (!Convert || Cost > Budget)
3341 return false;
3342
3343 // If we get here, we can hoist the instruction and if-convert.
3344 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3345
3346 Instruction *Sel = nullptr;
3347 // Insert a select of the value of the speculated store.
3348 if (SpeculatedStoreValue) {
3349 IRBuilder<NoFolder> Builder(BI);
3350 Value *OrigV = SpeculatedStore->getValueOperand();
3351 Value *TrueV = SpeculatedStore->getValueOperand();
3352 Value *FalseV = SpeculatedStoreValue;
3353 if (Invert)
3354 std::swap(TrueV, FalseV);
3355 Value *S = Builder.CreateSelect(
3356 BrCond, TrueV, FalseV, "spec.store.select", BI);
3357 Sel = cast<Instruction>(S);
3358 SpeculatedStore->setOperand(0, S);
3359 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3360 SpeculatedStore->getDebugLoc());
3361 // The value stored is still conditional, but the store itself is now
3362 // unconditionally executed, so we must be sure that any linked dbg.assign
3363 // intrinsics are tracking the new stored value (the result of the
3364 // select). If we don't, and the store were to be removed by another pass
3365 // (e.g. DSE), then we'd eventually end up emitting a location describing
3366 // the conditional value, unconditionally.
3367 //
3368 // === Before this transformation ===
3369 // pred:
3370 // store %one, %x.dest, !DIAssignID !1
3371 // dbg.assign %one, "x", ..., !1, ...
3372 // br %cond if.then
3373 //
3374 // if.then:
3375 // store %two, %x.dest, !DIAssignID !2
3376 // dbg.assign %two, "x", ..., !2, ...
3377 //
3378 // === After this transformation ===
3379 // pred:
3380 // store %one, %x.dest, !DIAssignID !1
3381 // dbg.assign %one, "x", ..., !1
3382 /// ...
3383 // %merge = select %cond, %two, %one
3384 // store %merge, %x.dest, !DIAssignID !2
3385 // dbg.assign %merge, "x", ..., !2
3386 for (DbgVariableRecord *DbgAssign :
3387 at::getDVRAssignmentMarkers(SpeculatedStore))
3388 if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3389 DbgAssign->replaceVariableLocationOp(OrigV, S);
3390 }
3391
3392 // Metadata can be dependent on the condition we are hoisting above.
3393 // Strip all UB-implying metadata on the instruction. Drop the debug loc
3394 // to avoid making it appear as if the condition is a constant, which would
3395 // be misleading while debugging.
3396 // Similarly strip attributes that maybe dependent on condition we are
3397 // hoisting above.
3398 for (auto &I : make_early_inc_range(*ThenBB)) {
3399 if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3400 I.dropLocation();
3401 }
3402 I.dropUBImplyingAttrsAndMetadata();
3403
3404 // Drop ephemeral values.
3405 if (EphTracker.contains(&I)) {
3406 I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3407 I.eraseFromParent();
3408 }
3409 }
3410
3411 // Hoist the instructions.
3412 // Drop DbgVariableRecords attached to these instructions.
3413 for (auto &It : *ThenBB)
3414 for (DbgRecord &DR : make_early_inc_range(It.getDbgRecordRange()))
3415 // Drop all records except assign-kind DbgVariableRecords (dbg.assign
3416 // equivalent).
3417 if (DbgVariableRecord *DVR = dyn_cast<DbgVariableRecord>(&DR);
3418 !DVR || !DVR->isDbgAssign())
3419 It.dropOneDbgRecord(&DR);
3420 BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3421 std::prev(ThenBB->end()));
3422
3423 if (!SpeculatedConditionalLoadsStores.empty())
3424 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores, Invert,
3425 Sel);
3426
3427 // Insert selects and rewrite the PHI operands.
3428 IRBuilder<NoFolder> Builder(BI);
3429 for (PHINode &PN : EndBB->phis()) {
3430 unsigned OrigI = PN.getBasicBlockIndex(BB);
3431 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3432 Value *OrigV = PN.getIncomingValue(OrigI);
3433 Value *ThenV = PN.getIncomingValue(ThenI);
3434
3435 // Skip PHIs which are trivial.
3436 if (OrigV == ThenV)
3437 continue;
3438
3439 // Create a select whose true value is the speculatively executed value and
3440 // false value is the pre-existing value. Swap them if the branch
3441 // destinations were inverted.
3442 Value *TrueV = ThenV, *FalseV = OrigV;
3443 if (Invert)
3444 std::swap(TrueV, FalseV);
3445 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3446 PN.setIncomingValue(OrigI, V);
3447 PN.setIncomingValue(ThenI, V);
3448 }
3449
3450 // Remove speculated pseudo probes.
3451 for (Instruction *I : SpeculatedPseudoProbes)
3452 I->eraseFromParent();
3453
3454 ++NumSpeculations;
3455 return true;
3456}
3457
3459
3460// Return false if number of blocks searched is too much.
3461static bool findReaching(BasicBlock *BB, BasicBlock *DefBB,
3462 BlocksSet &ReachesNonLocalUses) {
3463 if (BB == DefBB)
3464 return true;
3465 if (!ReachesNonLocalUses.insert(BB).second)
3466 return true;
3467
3468 if (ReachesNonLocalUses.size() > MaxJumpThreadingLiveBlocks)
3469 return false;
3470 for (BasicBlock *Pred : predecessors(BB))
3471 if (!findReaching(Pred, DefBB, ReachesNonLocalUses))
3472 return false;
3473 return true;
3474}
3475
3476/// Return true if we can thread a branch across this block.
3478 BlocksSet &NonLocalUseBlocks) {
3479 int Size = 0;
3480 EphemeralValueTracker EphTracker;
3481
3482 // Walk the loop in reverse so that we can identify ephemeral values properly
3483 // (values only feeding assumes).
3484 for (Instruction &I : reverse(*BB)) {
3485 // Can't fold blocks that contain noduplicate or convergent calls.
3486 if (CallInst *CI = dyn_cast<CallInst>(&I))
3487 if (CI->cannotDuplicate() || CI->isConvergent())
3488 return false;
3489
3490 // Ignore ephemeral values which are deleted during codegen.
3491 // We will delete Phis while threading, so Phis should not be accounted in
3492 // block's size.
3493 if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3494 if (Size++ > MaxSmallBlockSize)
3495 return false; // Don't clone large BB's.
3496 }
3497
3498 // Record blocks with non-local uses of values defined in the current basic
3499 // block.
3500 for (User *U : I.users()) {
3502 BasicBlock *UsedInBB = UI->getParent();
3503 if (UsedInBB == BB) {
3504 if (isa<PHINode>(UI))
3505 return false;
3506 } else
3507 NonLocalUseBlocks.insert(UsedInBB);
3508 }
3509
3510 // Looks ok, continue checking.
3511 }
3512
3513 return true;
3514}
3515
3517 BasicBlock *To) {
3518 // Don't look past the block defining the value, we might get the value from
3519 // a previous loop iteration.
3520 auto *I = dyn_cast<Instruction>(V);
3521 if (I && I->getParent() == To)
3522 return nullptr;
3523
3524 // We know the value if the From block branches on it.
3525 auto *BI = dyn_cast<CondBrInst>(From->getTerminator());
3526 if (BI && BI->getCondition() == V &&
3527 BI->getSuccessor(0) != BI->getSuccessor(1))
3528 return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3529 : ConstantInt::getFalse(BI->getContext());
3530
3531 return nullptr;
3532}
3533
3534/// If we have a conditional branch on something for which we know the constant
3535/// value in predecessors (e.g. a phi node in the current block), thread edges
3536/// from the predecessor to their ultimate destination.
3537static std::optional<bool>
3539 const DataLayout &DL,
3540 AssumptionCache *AC) {
3542 BasicBlock *BB = BI->getParent();
3543 Value *Cond = BI->getCondition();
3545 if (PN && PN->getParent() == BB) {
3546 // Degenerate case of a single entry PHI.
3547 if (PN->getNumIncomingValues() == 1) {
3549 return true;
3550 }
3551
3552 for (Use &U : PN->incoming_values())
3553 if (auto *CB = dyn_cast<ConstantInt>(U))
3554 KnownValues[CB].insert(PN->getIncomingBlock(U));
3555 } else {
3556 for (BasicBlock *Pred : predecessors(BB)) {
3557 if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3558 KnownValues[CB].insert(Pred);
3559 }
3560 }
3561
3562 if (KnownValues.empty())
3563 return false;
3564
3565 // Now we know that this block has multiple preds and two succs.
3566 // Check that the block is small enough and record which non-local blocks use
3567 // values defined in the block.
3568
3569 BlocksSet NonLocalUseBlocks;
3570 BlocksSet ReachesNonLocalUseBlocks;
3571 if (!blockIsSimpleEnoughToThreadThrough(BB, NonLocalUseBlocks))
3572 return false;
3573
3574 // Jump-threading can only be done to destinations where no values defined
3575 // in BB are live.
3576
3577 // Quickly check if both destinations have uses. If so, jump-threading cannot
3578 // be done.
3579 if (NonLocalUseBlocks.contains(BI->getSuccessor(0)) &&
3580 NonLocalUseBlocks.contains(BI->getSuccessor(1)))
3581 return false;
3582
3583 // Search backward from NonLocalUseBlocks to find which blocks
3584 // reach non-local uses.
3585 for (BasicBlock *UseBB : NonLocalUseBlocks)
3586 // Give up if too many blocks are searched.
3587 if (!findReaching(UseBB, BB, ReachesNonLocalUseBlocks))
3588 return false;
3589
3590 for (const auto &Pair : KnownValues) {
3591 ConstantInt *CB = Pair.first;
3592 ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3593 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3594
3595 // Okay, we now know that all edges from PredBB should be revectored to
3596 // branch to RealDest.
3597 if (RealDest == BB)
3598 continue; // Skip self loops.
3599
3600 // Skip if the predecessor's terminator is an indirect branch.
3601 if (any_of(PredBBs, [](BasicBlock *PredBB) {
3602 return isa<IndirectBrInst>(PredBB->getTerminator());
3603 }))
3604 continue;
3605
3606 // Only revector to RealDest if no values defined in BB are live.
3607 if (ReachesNonLocalUseBlocks.contains(RealDest))
3608 continue;
3609
3610 LLVM_DEBUG({
3611 dbgs() << "Condition " << *Cond << " in " << BB->getName()
3612 << " has value " << *Pair.first << " in predecessors:\n";
3613 for (const BasicBlock *PredBB : Pair.second)
3614 dbgs() << " " << PredBB->getName() << "\n";
3615 dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3616 });
3617
3618 // Split the predecessors we are threading into a new edge block. We'll
3619 // clone the instructions into this block, and then redirect it to RealDest.
3620 BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3621 if (!EdgeBB)
3622 continue;
3623
3624 // TODO: These just exist to reduce test diff, we can drop them if we like.
3625 EdgeBB->setName(RealDest->getName() + ".critedge");
3626 EdgeBB->moveBefore(RealDest);
3627
3628 // Update PHI nodes.
3629 addPredecessorToBlock(RealDest, EdgeBB, BB);
3630
3631 // BB may have instructions that are being threaded over. Clone these
3632 // instructions into EdgeBB. We know that there will be no uses of the
3633 // cloned instructions outside of EdgeBB.
3634 BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3635 ValueToValueMapTy TranslateMap; // Track translated values.
3636 TranslateMap[Cond] = CB;
3637
3638 // RemoveDIs: track instructions that we optimise away while folding, so
3639 // that we can copy DbgVariableRecords from them later.
3640 BasicBlock::iterator SrcDbgCursor = BB->begin();
3641 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3642 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3643 TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3644 continue;
3645 }
3646 // Clone the instruction.
3647 Instruction *N = BBI->clone();
3648 // Insert the new instruction into its new home.
3649 N->insertInto(EdgeBB, InsertPt);
3650
3651 if (BBI->hasName())
3652 N->setName(BBI->getName() + ".c");
3653
3654 // Update operands due to translation.
3655 // Key Instructions: Remap all the atom groups.
3656 if (const DebugLoc &DL = BBI->getDebugLoc())
3657 mapAtomInstance(DL, TranslateMap);
3658 RemapInstruction(N, TranslateMap,
3660
3661 // Check for trivial simplification.
3662 if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3663 if (!BBI->use_empty())
3664 TranslateMap[&*BBI] = V;
3665 if (!N->mayHaveSideEffects()) {
3666 N->eraseFromParent(); // Instruction folded away, don't need actual
3667 // inst
3668 N = nullptr;
3669 }
3670 } else {
3671 if (!BBI->use_empty())
3672 TranslateMap[&*BBI] = N;
3673 }
3674 if (N) {
3675 // Copy all debug-info attached to instructions from the last we
3676 // successfully clone, up to this instruction (they might have been
3677 // folded away).
3678 for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3679 N->cloneDebugInfoFrom(&*SrcDbgCursor);
3680 SrcDbgCursor = std::next(BBI);
3681 // Clone debug-info on this instruction too.
3682 N->cloneDebugInfoFrom(&*BBI);
3683
3684 // Register the new instruction with the assumption cache if necessary.
3685 if (auto *Assume = dyn_cast<AssumeInst>(N))
3686 if (AC)
3687 AC->registerAssumption(Assume);
3688 }
3689 }
3690
3691 for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3692 InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3693 InsertPt->cloneDebugInfoFrom(BI);
3694
3695 BB->removePredecessor(EdgeBB);
3696 UncondBrInst *EdgeBI = cast<UncondBrInst>(EdgeBB->getTerminator());
3697 EdgeBI->setSuccessor(0, RealDest);
3698 EdgeBI->setDebugLoc(BI->getDebugLoc());
3699
3700 if (DTU) {
3702 Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3703 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3704 DTU->applyUpdates(Updates);
3705 }
3706
3707 // For simplicity, we created a separate basic block for the edge. Merge
3708 // it back into the predecessor if possible. This not only avoids
3709 // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3710 // bypass the check for trivial cycles above.
3711 MergeBlockIntoPredecessor(EdgeBB, DTU);
3712
3713 // Signal repeat, simplifying any other constants.
3714 return std::nullopt;
3715 }
3716
3717 return false;
3718}
3719
3720bool SimplifyCFGOpt::foldCondBranchOnValueKnownInPredecessor(CondBrInst *BI) {
3721 // Note: If BB is a loop header then there is a risk that threading introduces
3722 // a non-canonical loop by moving a back edge. So we avoid this optimization
3723 // for loop headers if NeedCanonicalLoop is set.
3724 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BI->getParent()))
3725 return false;
3726
3727 std::optional<bool> Result;
3728 bool EverChanged = false;
3729 do {
3730 // Note that None means "we changed things, but recurse further."
3731 Result =
3733 EverChanged |= Result == std::nullopt || *Result;
3734 } while (Result == std::nullopt);
3735 return EverChanged;
3736}
3737
3738/// Given a BB that starts with the specified two-entry PHI node,
3739/// see if we can eliminate it.
3742 const DataLayout &DL,
3743 bool SpeculateUnpredictables) {
3744 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
3745 // statement", which has a very simple dominance structure. Basically, we
3746 // are trying to find the condition that is being branched on, which
3747 // subsequently causes this merge to happen. We really want control
3748 // dependence information for this check, but simplifycfg can't keep it up
3749 // to date, and this catches most of the cases we care about anyway.
3750 BasicBlock *BB = PN->getParent();
3751
3752 BasicBlock *IfTrue, *IfFalse;
3753 CondBrInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3754 if (!DomBI)
3755 return false;
3756 Value *IfCond = DomBI->getCondition();
3757 // Don't bother if the branch will be constant folded trivially.
3758 if (isa<ConstantInt>(IfCond))
3759 return false;
3760
3761 BasicBlock *DomBlock = DomBI->getParent();
3763 llvm::copy_if(PN->blocks(), std::back_inserter(IfBlocks),
3764 [](BasicBlock *IfBlock) {
3765 return isa<UncondBrInst>(IfBlock->getTerminator());
3766 });
3767 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3768 "Will have either one or two blocks to speculate.");
3769
3770 // If the branch is non-unpredictable, see if we either predictably jump to
3771 // the merge bb (if we have only a single 'then' block), or if we predictably
3772 // jump to one specific 'then' block (if we have two of them).
3773 // It isn't beneficial to speculatively execute the code
3774 // from the block that we know is predictably not entered.
3775 bool IsUnpredictable = DomBI->getMetadata(LLVMContext::MD_unpredictable);
3776 if (!IsUnpredictable) {
3777 uint64_t TWeight, FWeight;
3778 if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3779 (TWeight + FWeight) != 0) {
3780 BranchProbability BITrueProb =
3781 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3782 BranchProbability Likely = TTI.getPredictableBranchThreshold();
3783 BranchProbability BIFalseProb = BITrueProb.getCompl();
3784 if (IfBlocks.size() == 1) {
3785 BranchProbability BIBBProb =
3786 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3787 if (BIBBProb >= Likely)
3788 return false;
3789 } else {
3790 if (BITrueProb >= Likely || BIFalseProb >= Likely)
3791 return false;
3792 }
3793 }
3794 }
3795
3796 // Don't try to fold an unreachable block. For example, the phi node itself
3797 // can't be the candidate if-condition for a select that we want to form.
3798 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3799 if (IfCondPhiInst->getParent() == BB)
3800 return false;
3801
3802 // Okay, we found that we can merge this two-entry phi node into a select.
3803 // Doing so would require us to fold *all* two entry phi nodes in this block.
3804 // At some point this becomes non-profitable (particularly if the target
3805 // doesn't support cmov's). Only do this transformation if there are two or
3806 // fewer PHI nodes in this block.
3807 unsigned NumPhis = 0;
3808 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3809 if (NumPhis > 2)
3810 return false;
3811
3812 // Loop over the PHI's seeing if we can promote them all to select
3813 // instructions. While we are at it, keep track of the instructions
3814 // that need to be moved to the dominating block.
3815 SmallPtrSet<Instruction *, 4> AggressiveInsts;
3816 SmallPtrSet<Instruction *, 2> ZeroCostInstructions;
3817 InstructionCost Cost = 0;
3818 InstructionCost Budget =
3820 if (SpeculateUnpredictables && IsUnpredictable)
3821 Budget += TTI.getBranchMispredictPenalty();
3822
3823 bool Changed = false;
3824 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3825 PHINode *PN = cast<PHINode>(II++);
3826 if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3827 PN->replaceAllUsesWith(V);
3828 PN->eraseFromParent();
3829 Changed = true;
3830 continue;
3831 }
3832
3833 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, DomBI,
3834 AggressiveInsts, Cost, Budget, TTI, AC,
3835 ZeroCostInstructions) ||
3836 !dominatesMergePoint(PN->getIncomingValue(1), BB, DomBI,
3837 AggressiveInsts, Cost, Budget, TTI, AC,
3838 ZeroCostInstructions))
3839 return Changed;
3840 }
3841
3842 // If we folded the first phi, PN dangles at this point. Refresh it. If
3843 // we ran out of PHIs then we simplified them all.
3844 PN = dyn_cast<PHINode>(BB->begin());
3845 if (!PN)
3846 return true;
3847
3848 // Don't fold i1 branches on PHIs which contain binary operators or
3849 // (possibly inverted) select form of or/ands if their parameters are
3850 // an equality test.
3851 auto IsBinOpOrAndEq = [](Value *V) {
3852 CmpPredicate Pred;
3853 if (match(V, m_CombineOr(
3855 m_BinOp(m_Cmp(Pred, m_Value(), m_Value()), m_Value()),
3856 m_BinOp(m_Value(), m_Cmp(Pred, m_Value(), m_Value()))),
3858 m_Cmp(Pred, m_Value(), m_Value()))))) {
3859 return CmpInst::isEquality(Pred);
3860 }
3861 return false;
3862 };
3863 if (PN->getType()->isIntegerTy(1) &&
3864 (IsBinOpOrAndEq(PN->getIncomingValue(0)) ||
3865 IsBinOpOrAndEq(PN->getIncomingValue(1)) || IsBinOpOrAndEq(IfCond)))
3866 return Changed;
3867
3868 // If all PHI nodes are promotable, check to make sure that all instructions
3869 // in the predecessor blocks can be promoted as well. If not, we won't be able
3870 // to get rid of the control flow, so it's not worth promoting to select
3871 // instructions.
3872 for (BasicBlock *IfBlock : IfBlocks)
3873 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3874 if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3875 // This is not an aggressive instruction that we can promote.
3876 // Because of this, we won't be able to get rid of the control flow, so
3877 // the xform is not worth it.
3878 return Changed;
3879 }
3880
3881 // If either of the blocks has it's address taken, we can't do this fold.
3882 if (any_of(IfBlocks,
3883 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3884 return Changed;
3885
3886 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfCond;
3887 if (IsUnpredictable) dbgs() << " (unpredictable)";
3888 dbgs() << " T: " << IfTrue->getName()
3889 << " F: " << IfFalse->getName() << "\n");
3890
3891 // If we can still promote the PHI nodes after this gauntlet of tests,
3892 // do all of the PHI's now.
3893
3894 // Move all 'aggressive' instructions, which are defined in the
3895 // conditional parts of the if's up to the dominating block.
3896 for (BasicBlock *IfBlock : IfBlocks)
3897 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3898
3899 IRBuilder<NoFolder> Builder(DomBI);
3900 // Propagate fast-math-flags from phi nodes to replacement selects.
3901 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3902 // Change the PHI node into a select instruction.
3903 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3904 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3905
3906 Value *Sel = Builder.CreateSelectFMF(IfCond, TrueVal, FalseVal,
3907 isa<FPMathOperator>(PN) ? PN : nullptr,
3908 "", DomBI);
3909 PN->replaceAllUsesWith(Sel);
3910 Sel->takeName(PN);
3911 PN->eraseFromParent();
3912 }
3913
3914 // At this point, all IfBlocks are empty, so our if statement
3915 // has been flattened. Change DomBlock to jump directly to our new block to
3916 // avoid other simplifycfg's kicking in on the diamond.
3917 Builder.CreateBr(BB);
3918
3920 if (DTU) {
3921 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3922 for (auto *Successor : successors(DomBlock))
3923 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3924 }
3925
3926 DomBI->eraseFromParent();
3927 if (DTU)
3928 DTU->applyUpdates(Updates);
3929
3930 return true;
3931}
3932
3935 Value *RHS, const Twine &Name = "") {
3936 // Try to relax logical op to binary op.
3937 if (impliesPoison(RHS, LHS))
3938 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3939 if (Opc == Instruction::And)
3940 return Builder.CreateLogicalAnd(LHS, RHS, Name);
3941 if (Opc == Instruction::Or)
3942 return Builder.CreateLogicalOr(LHS, RHS, Name);
3943 llvm_unreachable("Invalid logical opcode");
3944}
3945
3946/// Return true if either PBI or BI has branch weight available, and store
3947/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3948/// not have branch weight, use 1:1 as its weight.
3950 uint64_t &PredTrueWeight,
3951 uint64_t &PredFalseWeight,
3952 uint64_t &SuccTrueWeight,
3953 uint64_t &SuccFalseWeight) {
3954 bool PredHasWeights =
3955 extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3956 bool SuccHasWeights =
3957 extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3958 if (PredHasWeights || SuccHasWeights) {
3959 if (!PredHasWeights)
3960 PredTrueWeight = PredFalseWeight = 1;
3961 if (!SuccHasWeights)
3962 SuccTrueWeight = SuccFalseWeight = 1;
3963 return true;
3964 } else {
3965 return false;
3966 }
3967}
3968
3969/// Determine if the two branches share a common destination and deduce a glue
3970/// that joins the branches' conditions to arrive at the common destination if
3971/// that would be profitable.
3972static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3974 const TargetTransformInfo *TTI) {
3975 assert(BI && PBI && "Both blocks must end with a conditional branches.");
3976 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3977 "PredBB must be a predecessor of BB.");
3978
3979 // We have the potential to fold the conditions together, but if the
3980 // predecessor branch is predictable, we may not want to merge them.
3981 uint64_t PTWeight, PFWeight;
3982 BranchProbability PBITrueProb, Likely;
3983 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3984 extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3985 (PTWeight + PFWeight) != 0) {
3986 PBITrueProb =
3987 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3988 Likely = TTI->getPredictableBranchThreshold();
3989 }
3990
3991 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3992 // Speculate the 2nd condition unless the 1st is probably true.
3993 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3994 return {{BI->getSuccessor(0), Instruction::Or, false}};
3995 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3996 // Speculate the 2nd condition unless the 1st is probably false.
3997 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3998 return {{BI->getSuccessor(1), Instruction::And, false}};
3999 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4000 // Speculate the 2nd condition unless the 1st is probably true.
4001 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
4002 return {{BI->getSuccessor(1), Instruction::And, true}};
4003 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4004 // Speculate the 2nd condition unless the 1st is probably false.
4005 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
4006 return {{BI->getSuccessor(0), Instruction::Or, true}};
4007 }
4008 return std::nullopt;
4009}
4010
4012 DomTreeUpdater *DTU,
4013 MemorySSAUpdater *MSSAU,
4014 const TargetTransformInfo *TTI) {
4015 BasicBlock *BB = BI->getParent();
4016 BasicBlock *PredBlock = PBI->getParent();
4017
4018 // Determine if the two branches share a common destination.
4019 BasicBlock *CommonSucc;
4021 bool InvertPredCond;
4022 std::tie(CommonSucc, Opc, InvertPredCond) =
4024
4025 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
4026
4028 BB->getContext(), ConstantFolder{},
4030 // The builder is used to create instructions to eliminate the branch in
4031 // BB. If BB's terminator has !annotation metadata, add it to the new
4032 // instructions.
4033 I->copyMetadata(*BB->getTerminator(), LLVMContext::MD_annotation);
4034 }));
4035 Builder.SetInsertPoint(PBI);
4036
4037 // If we need to invert the condition in the pred block to match, do so now.
4038 if (InvertPredCond) {
4039 InvertBranch(PBI, Builder);
4040 }
4041
4042 BasicBlock *UniqueSucc =
4043 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
4044
4045 // Before cloning instructions, notify the successor basic block that it
4046 // is about to have a new predecessor. This will update PHI nodes,
4047 // which will allow us to update live-out uses of bonus instructions.
4048 addPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
4049
4050 // Try to update branch weights.
4051 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4052 SmallVector<uint64_t, 2> MDWeights;
4053 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4054 SuccTrueWeight, SuccFalseWeight)) {
4055
4056 if (PBI->getSuccessor(0) == BB) {
4057 // PBI: br i1 %x, BB, FalseDest
4058 // BI: br i1 %y, UniqueSucc, FalseDest
4059 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
4060 MDWeights.push_back(PredTrueWeight * SuccTrueWeight);
4061 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
4062 // TrueWeight for PBI * FalseWeight for BI.
4063 // We assume that total weights of a CondBrInst can fit into 32 bits.
4064 // Therefore, we will not have overflow using 64-bit arithmetic.
4065 MDWeights.push_back(PredFalseWeight * (SuccFalseWeight + SuccTrueWeight) +
4066 PredTrueWeight * SuccFalseWeight);
4067 } else {
4068 // PBI: br i1 %x, TrueDest, BB
4069 // BI: br i1 %y, TrueDest, UniqueSucc
4070 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
4071 // FalseWeight for PBI * TrueWeight for BI.
4072 MDWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
4073 PredFalseWeight * SuccTrueWeight);
4074 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
4075 MDWeights.push_back(PredFalseWeight * SuccFalseWeight);
4076 }
4077
4078 setFittedBranchWeights(*PBI, MDWeights, /*IsExpected=*/false,
4079 /*ElideAllZero=*/true);
4080
4081 // TODO: If BB is reachable from all paths through PredBlock, then we
4082 // could replace PBI's branch probabilities with BI's.
4083 } else
4084 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
4085
4086 // Now, update the CFG.
4087 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
4088
4089 if (DTU)
4090 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
4091 {DominatorTree::Delete, PredBlock, BB}});
4092
4093 // If BI was a loop latch, it may have had associated loop metadata.
4094 // We need to copy it to the new latch, that is, PBI.
4095 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
4096 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
4097
4098 ValueToValueMapTy VMap; // maps original values to cloned values
4100
4101 Module *M = BB->getModule();
4102
4103 PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
4104 for (DbgVariableRecord &DVR :
4106 RemapDbgRecord(M, &DVR, VMap,
4108 }
4109
4110 // Now that the Cond was cloned into the predecessor basic block,
4111 // or/and the two conditions together.
4112 Value *BICond = VMap[BI->getCondition()];
4113 PBI->setCondition(
4114 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
4116 if (auto *SI = dyn_cast<SelectInst>(PBI->getCondition()))
4117 if (!MDWeights.empty()) {
4118 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4119 setFittedBranchWeights(*SI, {MDWeights[0], MDWeights[1]},
4120 /*IsExpected=*/false, /*ElideAllZero=*/true);
4121 }
4122
4123 ++NumFoldBranchToCommonDest;
4124 return true;
4125}
4126
4127/// Return if an instruction's type or any of its operands' types are a vector
4128/// type.
4129static bool isVectorOp(Instruction &I) {
4130 return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
4131 return U->getType()->isVectorTy();
4132 });
4133}
4134
4135/// If this basic block is simple enough, and if a predecessor branches to us
4136/// and one of our successors, fold the block into the predecessor and use
4137/// logical operations to pick the right destination.
4139 MemorySSAUpdater *MSSAU,
4140 const TargetTransformInfo *TTI,
4141 AssumptionCache *AC,
4142 unsigned BonusInstThreshold) {
4143 BasicBlock *BB = BI->getParent();
4147
4149
4151 Cond->getParent() != BB || !Cond->hasOneUse())
4152 return false;
4153
4154 // Finally, don't infinitely unroll conditional loops.
4155 if (is_contained(successors(BB), BB))
4156 return false;
4157
4158 // With which predecessors will we want to deal with?
4160 for (BasicBlock *PredBlock : predecessors(BB)) {
4161 CondBrInst *PBI = dyn_cast<CondBrInst>(PredBlock->getTerminator());
4162
4163 // Check that we have two conditional branches. If there is a PHI node in
4164 // the common successor, verify that the same value flows in from both
4165 // blocks.
4166 if (!PBI || !safeToMergeTerminators(BI, PBI))
4167 continue;
4168
4169 // Determine if the two branches share a common destination.
4170 BasicBlock *CommonSucc;
4172 bool InvertPredCond;
4173 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
4174 std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
4175 else
4176 continue;
4177
4178 // Check the cost of inserting the necessary logic before performing the
4179 // transformation.
4180 if (TTI) {
4181 Type *Ty = BI->getCondition()->getType();
4182 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
4183 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
4184 !isa<CmpInst>(PBI->getCondition())))
4185 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
4186
4188 continue;
4189 }
4190
4191 // Ok, we do want to deal with this predecessor. Record it.
4192 Preds.emplace_back(PredBlock);
4193 }
4194
4195 // If there aren't any predecessors into which we can fold,
4196 // don't bother checking the cost.
4197 if (Preds.empty())
4198 return false;
4199
4200 // Only allow this transformation if computing the condition doesn't involve
4201 // too many instructions and these involved instructions can be executed
4202 // unconditionally. We denote all involved instructions except the condition
4203 // as "bonus instructions", and only allow this transformation when the
4204 // number of the bonus instructions we'll need to create when cloning into
4205 // each predecessor does not exceed a certain threshold.
4206 unsigned NumBonusInsts = 0;
4207 bool SawVectorOp = false;
4208 const unsigned PredCount = Preds.size();
4209 // Speculated instructions will be inserted before the terminator of the
4210 // predecessor. Only handle the simple case of one predecessor.
4211 const Instruction *CxtI =
4212 PredCount == 1 ? Preds[0]->getTerminator() : nullptr;
4213 for (Instruction &I : *BB) {
4214 // Don't check the branch condition comparison itself.
4215 if (&I == Cond)
4216 continue;
4217 // Ignore the terminator.
4219 continue;
4220 // Pseudo probes aren't speculatable but can be dropped on fold.
4222 continue;
4223 // I must be safe to execute unconditionally.
4224 if (!isSafeToSpeculativelyExecute(&I, CxtI, AC))
4225 return false;
4226 SawVectorOp |= isVectorOp(I);
4227
4228 // Account for the cost of duplicating this instruction into each
4229 // predecessor. Ignore free instructions.
4230 if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
4232 NumBonusInsts += PredCount;
4233
4234 // Early exits once we reach the limit.
4235 if (NumBonusInsts >
4236 BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
4237 return false;
4238 }
4239
4240 auto IsBCSSAUse = [BB, &I](Use &U) {
4241 auto *UI = cast<Instruction>(U.getUser());
4242 if (auto *PN = dyn_cast<PHINode>(UI))
4243 return PN->getIncomingBlock(U) == BB;
4244 return UI->getParent() == BB && I.comesBefore(UI);
4245 };
4246
4247 // Does this instruction require rewriting of uses?
4248 if (!all_of(I.uses(), IsBCSSAUse))
4249 return false;
4250 }
4251 if (NumBonusInsts >
4252 BonusInstThreshold *
4253 (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
4254 return false;
4255
4256 // Ok, we have the budget. Perform the transformation.
4257 for (BasicBlock *PredBlock : Preds) {
4258 auto *PBI = cast<CondBrInst>(PredBlock->getTerminator());
4259 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
4260 }
4261 return false;
4262}
4263
4264// If there is only one store in BB1 and BB2, return it, otherwise return
4265// nullptr.
4267 StoreInst *S = nullptr;
4268 for (auto *BB : {BB1, BB2}) {
4269 if (!BB)
4270 continue;
4271 for (auto &I : *BB)
4272 if (auto *SI = dyn_cast<StoreInst>(&I)) {
4273 if (S)
4274 // Multiple stores seen.
4275 return nullptr;
4276 else
4277 S = SI;
4278 }
4279 }
4280 return S;
4281}
4282
4284 Value *AlternativeV = nullptr) {
4285 // PHI is going to be a PHI node that allows the value V that is defined in
4286 // BB to be referenced in BB's only successor.
4287 //
4288 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
4289 // doesn't matter to us what the other operand is (it'll never get used). We
4290 // could just create a new PHI with an undef incoming value, but that could
4291 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
4292 // other PHI. So here we directly look for some PHI in BB's successor with V
4293 // as an incoming operand. If we find one, we use it, else we create a new
4294 // one.
4295 //
4296 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
4297 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
4298 // where OtherBB is the single other predecessor of BB's only successor.
4299 PHINode *PHI = nullptr;
4300 BasicBlock *Succ = BB->getSingleSuccessor();
4301
4302 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
4303 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
4304 PHI = cast<PHINode>(I);
4305 if (!AlternativeV)
4306 break;
4307
4308 assert(Succ->hasNPredecessors(2));
4309 auto PredI = pred_begin(Succ);
4310 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
4311 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
4312 break;
4313 PHI = nullptr;
4314 }
4315 if (PHI)
4316 return PHI;
4317
4318 // If V is not an instruction defined in BB, just return it.
4319 if (!AlternativeV &&
4320 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
4321 return V;
4322
4323 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
4324 PHI->insertBefore(Succ->begin());
4325 PHI->addIncoming(V, BB);
4326 for (BasicBlock *PredBB : predecessors(Succ))
4327 if (PredBB != BB)
4328 PHI->addIncoming(
4329 AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4330 return PHI;
4331}
4332
4334 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4335 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4336 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4337 // For every pointer, there must be exactly two stores, one coming from
4338 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4339 // store (to any address) in PTB,PFB or QTB,QFB.
4340 // FIXME: We could relax this restriction with a bit more work and performance
4341 // testing.
4342 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4343 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4344 if (!PStore || !QStore)
4345 return false;
4346
4347 // Now check the stores are compatible.
4348 if (!QStore->isUnordered() || !PStore->isUnordered() ||
4349 PStore->getOrdering() != QStore->getOrdering() ||
4350 PStore->getSyncScopeID() != QStore->getSyncScopeID() ||
4351 PStore->getValueOperand()->getType() !=
4352 QStore->getValueOperand()->getType())
4353 return false;
4354
4355 // Check that sinking the store won't cause program behavior changes. Sinking
4356 // the store out of the Q blocks won't change any behavior as we're sinking
4357 // from a block to its unconditional successor. But we're moving a store from
4358 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4359 // So we need to check that there are no aliasing loads or stores in
4360 // QBI, QTB and QFB. We also need to check there are no conflicting memory
4361 // operations between PStore and the end of its parent block.
4362 //
4363 // The ideal way to do this is to query AliasAnalysis, but we don't
4364 // preserve AA currently so that is dangerous. Be super safe and just
4365 // check there are no other memory operations at all.
4366 for (auto &I : *QFB->getSinglePredecessor())
4367 if (I.mayReadOrWriteMemory())
4368 return false;
4369 for (auto &I : *QFB)
4370 if (&I != QStore && I.mayReadOrWriteMemory())
4371 return false;
4372 if (QTB)
4373 for (auto &I : *QTB)
4374 if (&I != QStore && I.mayReadOrWriteMemory())
4375 return false;
4376 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4377 I != E; ++I)
4378 if (&*I != PStore && I->mayReadOrWriteMemory())
4379 return false;
4380
4381 // If we're not in aggressive mode, we only optimize if we have some
4382 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4383 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4384 if (!BB)
4385 return true;
4386 // Heuristic: if the block can be if-converted/phi-folded and the
4387 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4388 // thread this store.
4389 InstructionCost Cost = 0;
4390 InstructionCost Budget =
4392 for (auto &I : *BB) {
4393 // Consider terminator instruction to be free.
4394 if (I.isTerminator())
4395 continue;
4396 // If this is one the stores that we want to speculate out of this BB,
4397 // then don't count it's cost, consider it to be free.
4398 if (auto *S = dyn_cast<StoreInst>(&I))
4399 if (llvm::find(FreeStores, S))
4400 continue;
4401 // Else, we have a white-list of instructions that we are ak speculating.
4403 return false; // Not in white-list - not worthwhile folding.
4404 // And finally, if this is a non-free instruction that we are okay
4405 // speculating, ensure that we consider the speculation budget.
4406 Cost +=
4407 TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4408 if (Cost > Budget)
4409 return false; // Eagerly refuse to fold as soon as we're out of budget.
4410 }
4411 assert(Cost <= Budget &&
4412 "When we run out of budget we will eagerly return from within the "
4413 "per-instruction loop.");
4414 return true;
4415 };
4416
4417 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4419 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4420 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4421 return false;
4422
4423 // If PostBB has more than two predecessors, we need to split it so we can
4424 // sink the store.
4425 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4426 // We know that QFB's only successor is PostBB. And QFB has a single
4427 // predecessor. If QTB exists, then its only successor is also PostBB.
4428 // If QTB does not exist, then QFB's only predecessor has a conditional
4429 // branch to QFB and PostBB.
4430 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4431 BasicBlock *NewBB =
4432 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4433 if (!NewBB)
4434 return false;
4435 PostBB = NewBB;
4436 }
4437
4438 // OK, we're going to sink the stores to PostBB. The store has to be
4439 // conditional though, so first create the predicate.
4440 CondBrInst *PBranch =
4442 CondBrInst *QBranch =
4444 Value *PCond = PBranch->getCondition();
4445 Value *QCond = QBranch->getCondition();
4446
4448 PStore->getParent());
4450 QStore->getParent(), PPHI);
4451
4452 BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4453 IRBuilder<> QB(PostBB, PostBBFirst);
4454 QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4455
4456 InvertPCond ^= (PStore->getParent() != PTB);
4457 InvertQCond ^= (QStore->getParent() != QTB);
4458 Value *PPred = InvertPCond ? QB.CreateNot(PCond) : PCond;
4459 Value *QPred = InvertQCond ? QB.CreateNot(QCond) : QCond;
4460
4461 Value *CombinedPred = QB.CreateOr(PPred, QPred);
4462
4463 BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4464 auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4465 /*Unreachable=*/false,
4466 /*BranchWeights=*/nullptr, DTU);
4467 if (hasBranchWeightMD(*PBranch) && hasBranchWeightMD(*QBranch) &&
4469 SmallVector<uint32_t, 2> PWeights, QWeights;
4470 extractBranchWeights(*PBranch, PWeights);
4471 extractBranchWeights(*QBranch, QWeights);
4472 if (InvertPCond)
4473 std::swap(PWeights[0], PWeights[1]);
4474 if (InvertQCond)
4475 std::swap(QWeights[0], QWeights[1]);
4476 auto CombinedWeights = getDisjunctionWeights(PWeights, QWeights);
4478 {CombinedWeights[0], CombinedWeights[1]},
4479 /*IsExpected=*/false, /*ElideAllZero=*/true);
4480 }
4481
4482 QB.SetInsertPoint(T);
4483 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4484 combineMetadataForCSE(QStore, PStore, true);
4485 SI->copyMetadata(*QStore);
4486 // Update any dbg.assign intrinsics to track the merged value (QPHI) instead
4487 // of the original constant values, likely making these identical.
4488 for (auto *DbgAssign : at::getDVRAssignmentMarkers(SI)) {
4489 if (llvm::is_contained(DbgAssign->location_ops(),
4490 PStore->getValueOperand()))
4491 DbgAssign->replaceVariableLocationOp(PStore->getValueOperand(), QPHI);
4492 if (llvm::is_contained(DbgAssign->location_ops(),
4493 QStore->getValueOperand()))
4494 DbgAssign->replaceVariableLocationOp(QStore->getValueOperand(), QPHI);
4495 }
4496
4497 // Choose the minimum alignment. If we could prove both stores execute, we
4498 // could use biggest one. In this case, though, we only know that one of the
4499 // stores executes. And we don't know it's safe to take the alignment from a
4500 // store that doesn't execute.
4501 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4502
4503 if (QStore->isAtomic())
4504 SI->setAtomic(QStore->getOrdering(), QStore->getSyncScopeID());
4505
4506 QStore->eraseFromParent();
4507 PStore->eraseFromParent();
4508
4509 return true;
4510}
4511
4513 DomTreeUpdater *DTU, const DataLayout &DL,
4514 const TargetTransformInfo &TTI) {
4515 // The intention here is to find diamonds or triangles (see below) where each
4516 // conditional block contains a store to the same address. Both of these
4517 // stores are conditional, so they can't be unconditionally sunk. But it may
4518 // be profitable to speculatively sink the stores into one merged store at the
4519 // end, and predicate the merged store on the union of the two conditions of
4520 // PBI and QBI.
4521 //
4522 // This can reduce the number of stores executed if both of the conditions are
4523 // true, and can allow the blocks to become small enough to be if-converted.
4524 // This optimization will also chain, so that ladders of test-and-set
4525 // sequences can be if-converted away.
4526 //
4527 // We only deal with simple diamonds or triangles:
4528 //
4529 // PBI or PBI or a combination of the two
4530 // / \ | \
4531 // PTB PFB | PFB
4532 // \ / | /
4533 // QBI QBI
4534 // / \ | \
4535 // QTB QFB | QFB
4536 // \ / | /
4537 // PostBB PostBB
4538 //
4539 // We model triangles as a type of diamond with a nullptr "true" block.
4540 // Triangles are canonicalized so that the fallthrough edge is represented by
4541 // a true condition, as in the diagram above.
4542 BasicBlock *PTB = PBI->getSuccessor(0);
4543 BasicBlock *PFB = PBI->getSuccessor(1);
4544 BasicBlock *QTB = QBI->getSuccessor(0);
4545 BasicBlock *QFB = QBI->getSuccessor(1);
4546 BasicBlock *PostBB = QFB->getSingleSuccessor();
4547
4548 // Make sure we have a good guess for PostBB. If QTB's only successor is
4549 // QFB, then QFB is a better PostBB.
4550 if (QTB->getSingleSuccessor() == QFB)
4551 PostBB = QFB;
4552
4553 // If we couldn't find a good PostBB, stop.
4554 if (!PostBB)
4555 return false;
4556
4557 bool InvertPCond = false, InvertQCond = false;
4558 // Canonicalize fallthroughs to the true branches.
4559 if (PFB == QBI->getParent()) {
4560 std::swap(PFB, PTB);
4561 InvertPCond = true;
4562 }
4563 if (QFB == PostBB) {
4564 std::swap(QFB, QTB);
4565 InvertQCond = true;
4566 }
4567
4568 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4569 // and QFB may not. Model fallthroughs as a nullptr block.
4570 if (PTB == QBI->getParent())
4571 PTB = nullptr;
4572 if (QTB == PostBB)
4573 QTB = nullptr;
4574
4575 // Legality bailouts. We must have at least the non-fallthrough blocks and
4576 // the post-dominating block, and the non-fallthroughs must only have one
4577 // predecessor.
4578 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4579 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4580 };
4581 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4582 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4583 return false;
4584 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4585 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4586 return false;
4587 if (!QBI->getParent()->hasNUses(2))
4588 return false;
4589
4590 // OK, this is a sequence of two diamonds or triangles.
4591 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4592 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4593 for (auto *BB : {PTB, PFB}) {
4594 if (!BB)
4595 continue;
4596 for (auto &I : *BB)
4598 PStoreAddresses.insert(SI->getPointerOperand());
4599 }
4600 for (auto *BB : {QTB, QFB}) {
4601 if (!BB)
4602 continue;
4603 for (auto &I : *BB)
4605 QStoreAddresses.insert(SI->getPointerOperand());
4606 }
4607
4608 set_intersect(PStoreAddresses, QStoreAddresses);
4609 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4610 // clear what it contains.
4611 auto &CommonAddresses = PStoreAddresses;
4612
4613 bool Changed = false;
4614 for (auto *Address : CommonAddresses)
4615 Changed |=
4616 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4617 InvertPCond, InvertQCond, DTU, DL, TTI);
4618 return Changed;
4619}
4620
4621/// If the previous block ended with a widenable branch, determine if reusing
4622/// the target block is profitable and legal. This will have the effect of
4623/// "widening" PBI, but doesn't require us to reason about hosting safety.
4625 DomTreeUpdater *DTU) {
4626 // TODO: This can be generalized in two important ways:
4627 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4628 // values from the PBI edge.
4629 // 2) We can sink side effecting instructions into BI's fallthrough
4630 // successor provided they doesn't contribute to computation of
4631 // BI's condition.
4632 BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4633 BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4634 if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4635 !BI->getParent()->getSinglePredecessor())
4636 return false;
4637 if (!IfFalseBB->phis().empty())
4638 return false; // TODO
4639 // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4640 // may undo the transform done here.
4641 // TODO: There might be a more fine-grained solution to this.
4642 if (!llvm::succ_empty(IfFalseBB))
4643 return false;
4644 // Use lambda to lazily compute expensive condition after cheap ones.
4645 auto NoSideEffects = [](BasicBlock &BB) {
4646 return llvm::none_of(BB, [](const Instruction &I) {
4647 return I.mayWriteToMemory() || I.mayHaveSideEffects();
4648 });
4649 };
4650 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4651 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4652 NoSideEffects(*BI->getParent())) {
4653 auto *OldSuccessor = BI->getSuccessor(1);
4654 OldSuccessor->removePredecessor(BI->getParent());
4655 BI->setSuccessor(1, IfFalseBB);
4656 if (DTU)
4657 DTU->applyUpdates(
4658 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4659 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4660 return true;
4661 }
4662 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4663 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4664 NoSideEffects(*BI->getParent())) {
4665 auto *OldSuccessor = BI->getSuccessor(0);
4666 OldSuccessor->removePredecessor(BI->getParent());
4667 BI->setSuccessor(0, IfFalseBB);
4668 if (DTU)
4669 DTU->applyUpdates(
4670 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4671 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4672 return true;
4673 }
4674 return false;
4675}
4676
4677/// If we have a conditional branch as a predecessor of another block,
4678/// this function tries to simplify it. We know
4679/// that PBI and BI are both conditional branches, and BI is in one of the
4680/// successor blocks of PBI - PBI branches to BI.
4682 DomTreeUpdater *DTU,
4683 const DataLayout &DL,
4684 const TargetTransformInfo &TTI) {
4685 BasicBlock *BB = BI->getParent();
4686
4687 // If this block ends with a branch instruction, and if there is a
4688 // predecessor that ends on a branch of the same condition, make
4689 // this conditional branch redundant.
4690 if (PBI->getCondition() == BI->getCondition() &&
4691 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4692 // Okay, the outcome of this conditional branch is statically
4693 // knowable. If this block had a single pred, handle specially, otherwise
4694 // foldCondBranchOnValueKnownInPredecessor() will handle it.
4695 if (BB->getSinglePredecessor()) {
4696 // Turn this into a branch on constant.
4697 bool CondIsTrue = PBI->getSuccessor(0) == BB;
4698 BI->setCondition(
4699 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4700 return true; // Nuke the branch on constant.
4701 }
4702 }
4703
4704 // If the previous block ended with a widenable branch, determine if reusing
4705 // the target block is profitable and legal. This will have the effect of
4706 // "widening" PBI, but doesn't require us to reason about hosting safety.
4707 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4708 return true;
4709
4710 // If both branches are conditional and both contain stores to the same
4711 // address, remove the stores from the conditionals and create a conditional
4712 // merged store at the end.
4713 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4714 return true;
4715
4716 // If this is a conditional branch in an empty block, and if any
4717 // predecessors are a conditional branch to one of our destinations,
4718 // fold the conditions into logical ops and one cond br.
4719
4720 // Ignore dbg intrinsics.
4721 if (&*BB->begin() != BI)
4722 return false;
4723
4724 int PBIOp, BIOp;
4725 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4726 PBIOp = 0;
4727 BIOp = 0;
4728 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4729 PBIOp = 0;
4730 BIOp = 1;
4731 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4732 PBIOp = 1;
4733 BIOp = 0;
4734 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4735 PBIOp = 1;
4736 BIOp = 1;
4737 } else {
4738 return false;
4739 }
4740
4741 // Check to make sure that the other destination of this branch
4742 // isn't BB itself. If so, this is an infinite loop that will
4743 // keep getting unwound.
4744 if (PBI->getSuccessor(PBIOp) == BB)
4745 return false;
4746
4747 // If predecessor's branch probability to BB is too low don't merge branches.
4748 SmallVector<uint32_t, 2> PredWeights;
4749 if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4750 extractBranchWeights(*PBI, PredWeights) &&
4751 (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4752
4754 PredWeights[PBIOp],
4755 static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4756
4757 BranchProbability Likely = TTI.getPredictableBranchThreshold();
4758 if (CommonDestProb >= Likely)
4759 return false;
4760 }
4761
4762 // Do not perform this transformation if it would require
4763 // insertion of a large number of select instructions. For targets
4764 // without predication/cmovs, this is a big pessimization.
4765
4766 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4767 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4768 unsigned NumPhis = 0;
4769 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4770 ++II, ++NumPhis) {
4771 if (NumPhis > 2) // Disable this xform.
4772 return false;
4773 }
4774
4775 // Finally, if everything is ok, fold the branches to logical ops.
4776 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4777
4778 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4779 << "AND: " << *BI->getParent());
4780
4782
4783 // If OtherDest *is* BB, then BB is a basic block with a single conditional
4784 // branch in it, where one edge (OtherDest) goes back to itself but the other
4785 // exits. We don't *know* that the program avoids the infinite loop
4786 // (even though that seems likely). If we do this xform naively, we'll end up
4787 // recursively unpeeling the loop. Since we know that (after the xform is
4788 // done) that the block *is* infinite if reached, we just make it an obviously
4789 // infinite loop with no cond branch.
4790 if (OtherDest == BB) {
4791 // Insert it at the end of the function, because it's either code,
4792 // or it won't matter if it's hot. :)
4793 BasicBlock *InfLoopBlock =
4794 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4795 UncondBrInst::Create(InfLoopBlock, InfLoopBlock);
4796 if (DTU)
4797 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4798 OtherDest = InfLoopBlock;
4799 }
4800
4801 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4802
4803 // BI may have other predecessors. Because of this, we leave
4804 // it alone, but modify PBI.
4805
4806 // Make sure we get to CommonDest on True&True directions.
4807 Value *PBICond = PBI->getCondition();
4808 IRBuilder<NoFolder> Builder(PBI);
4809 if (PBIOp)
4810 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4811
4812 Value *BICond = BI->getCondition();
4813 if (BIOp)
4814 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4815
4816 // Merge the conditions.
4817 Value *Cond =
4818 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4819
4820 // Modify PBI to branch on the new condition to the new dests.
4821 PBI->setCondition(Cond);
4822 PBI->setSuccessor(0, CommonDest);
4823 PBI->setSuccessor(1, OtherDest);
4824
4825 if (DTU) {
4826 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4827 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4828
4829 DTU->applyUpdates(Updates);
4830 }
4831
4832 // Update branch weight for PBI.
4833 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4834 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4835 bool HasWeights =
4836 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4837 SuccTrueWeight, SuccFalseWeight);
4838 if (HasWeights) {
4839 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4840 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4841 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4842 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4843 // The weight to CommonDest should be PredCommon * SuccTotal +
4844 // PredOther * SuccCommon.
4845 // The weight to OtherDest should be PredOther * SuccOther.
4846 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4847 PredOther * SuccCommon,
4848 PredOther * SuccOther};
4849
4850 setFittedBranchWeights(*PBI, NewWeights, /*IsExpected=*/false,
4851 /*ElideAllZero=*/true);
4852 // Cond may be a select instruction with the first operand set to "true", or
4853 // the second to "false" (see how createLogicalOp works for `and` and `or`)
4855 if (auto *SI = dyn_cast<SelectInst>(Cond)) {
4856 assert(isSelectInRoleOfConjunctionOrDisjunction(SI));
4857 // The select is predicated on PBICond
4858 assert(SI->getCondition() == PBICond);
4859 // The corresponding probabilities are what was referred to above as
4860 // PredCommon and PredOther.
4861 setFittedBranchWeights(*SI, {PredCommon, PredOther},
4862 /*IsExpected=*/false, /*ElideAllZero=*/true);
4863 }
4864 }
4865
4866 // OtherDest may have phi nodes. If so, add an entry from PBI's
4867 // block that are identical to the entries for BI's block.
4868 addPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4869
4870 // We know that the CommonDest already had an edge from PBI to
4871 // it. If it has PHIs though, the PHIs may have different
4872 // entries for BB and PBI's BB. If so, insert a select to make
4873 // them agree.
4874 for (PHINode &PN : CommonDest->phis()) {
4875 Value *BIV = PN.getIncomingValueForBlock(BB);
4876 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4877 Value *PBIV = PN.getIncomingValue(PBBIdx);
4878 if (BIV != PBIV) {
4879 // Insert a select in PBI to pick the right value.
4881 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4882 PN.setIncomingValue(PBBIdx, NV);
4883 // The select has the same condition as PBI, in the same BB. The
4884 // probabilities don't change.
4885 if (HasWeights) {
4886 uint64_t TrueWeight = PBIOp ? PredFalseWeight : PredTrueWeight;
4887 uint64_t FalseWeight = PBIOp ? PredTrueWeight : PredFalseWeight;
4888 setFittedBranchWeights(*NV, {TrueWeight, FalseWeight},
4889 /*IsExpected=*/false, /*ElideAllZero=*/true);
4890 }
4891 }
4892 }
4893
4894 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4895 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4896
4897 // This basic block is probably dead. We know it has at least
4898 // one fewer predecessor.
4899 return true;
4900}
4901
4902// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4903// true or to FalseBB if Cond is false.
4904// Takes care of updating the successors and removing the old terminator.
4905// Also makes sure not to introduce new successors by assuming that edges to
4906// non-successor TrueBBs and FalseBBs aren't reachable.
4907bool SimplifyCFGOpt::simplifyTerminatorOnSelect(Instruction *OldTerm,
4908 Value *Cond, BasicBlock *TrueBB,
4909 BasicBlock *FalseBB,
4910 uint32_t TrueWeight,
4911 uint32_t FalseWeight) {
4912 auto *BB = OldTerm->getParent();
4913 // Remove any superfluous successor edges from the CFG.
4914 // First, figure out which successors to preserve.
4915 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4916 // successor.
4917 BasicBlock *KeepEdge1 = TrueBB;
4918 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4919
4920 SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4921
4922 // Then remove the rest.
4923 for (BasicBlock *Succ : successors(OldTerm)) {
4924 // Make sure only to keep exactly one copy of each edge.
4925 if (Succ == KeepEdge1)
4926 KeepEdge1 = nullptr;
4927 else if (Succ == KeepEdge2)
4928 KeepEdge2 = nullptr;
4929 else {
4930 Succ->removePredecessor(BB,
4931 /*KeepOneInputPHIs=*/true);
4932
4933 if (Succ != TrueBB && Succ != FalseBB)
4934 RemovedSuccessors.insert(Succ);
4935 }
4936 }
4937
4938 IRBuilder<> Builder(OldTerm);
4939 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4940
4941 // Insert an appropriate new terminator.
4942 if (!KeepEdge1 && !KeepEdge2) {
4943 if (TrueBB == FalseBB) {
4944 // We were only looking for one successor, and it was present.
4945 // Create an unconditional branch to it.
4946 Builder.CreateBr(TrueBB);
4947 } else {
4948 // We found both of the successors we were looking for.
4949 // Create a conditional branch sharing the condition of the select.
4950 CondBrInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4951 setBranchWeights(*NewBI, {TrueWeight, FalseWeight},
4952 /*IsExpected=*/false, /*ElideAllZero=*/true);
4953 }
4954 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4955 // Neither of the selected blocks were successors, so this
4956 // terminator must be unreachable.
4957 new UnreachableInst(OldTerm->getContext(), OldTerm->getIterator());
4958 } else {
4959 // One of the selected values was a successor, but the other wasn't.
4960 // Insert an unconditional branch to the one that was found;
4961 // the edge to the one that wasn't must be unreachable.
4962 if (!KeepEdge1) {
4963 // Only TrueBB was found.
4964 Builder.CreateBr(TrueBB);
4965 } else {
4966 // Only FalseBB was found.
4967 Builder.CreateBr(FalseBB);
4968 }
4969 }
4970
4972
4973 if (DTU) {
4974 SmallVector<DominatorTree::UpdateType, 2> Updates;
4975 Updates.reserve(RemovedSuccessors.size());
4976 for (auto *RemovedSuccessor : RemovedSuccessors)
4977 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4978 DTU->applyUpdates(Updates);
4979 }
4980
4981 return true;
4982}
4983
4984// Replaces
4985// (switch (select cond, X, Y)) on constant X, Y
4986// with a branch - conditional if X and Y lead to distinct BBs,
4987// unconditional otherwise.
4988bool SimplifyCFGOpt::simplifySwitchOnSelect(SwitchInst *SI,
4989 SelectInst *Select) {
4990 // Check for constant integer values in the select.
4991 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4992 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4993 if (!TrueVal || !FalseVal)
4994 return false;
4995
4996 // Find the relevant condition and destinations.
4997 Value *Condition = Select->getCondition();
4998 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4999 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
5000
5001 // Get weight for TrueBB and FalseBB.
5002 uint32_t TrueWeight = 0, FalseWeight = 0;
5003 SmallVector<uint64_t, 8> Weights;
5004 bool HasWeights = hasBranchWeightMD(*SI);
5005 if (HasWeights) {
5006 getBranchWeights(SI, Weights);
5007 if (Weights.size() == 1 + SI->getNumCases()) {
5008 TrueWeight =
5009 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
5010 FalseWeight =
5011 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
5012 }
5013 }
5014
5015 // Perform the actual simplification.
5016 return simplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
5017 FalseWeight);
5018}
5019
5020// Replaces
5021// (indirectbr (select cond, blockaddress(@fn, BlockA),
5022// blockaddress(@fn, BlockB)))
5023// with
5024// (br cond, BlockA, BlockB).
5025bool SimplifyCFGOpt::simplifyIndirectBrOnSelect(IndirectBrInst *IBI,
5026 SelectInst *SI) {
5027 // Check that both operands of the select are block addresses.
5028 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
5029 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
5030 if (!TBA || !FBA)
5031 return false;
5032
5033 // Extract the actual blocks.
5034 BasicBlock *TrueBB = TBA->getBasicBlock();
5035 BasicBlock *FalseBB = FBA->getBasicBlock();
5036
5037 // The select's profile becomes the profile of the conditional branch that
5038 // replaces the indirect branch.
5039 SmallVector<uint32_t> SelectBranchWeights(2);
5041 extractBranchWeights(*SI, SelectBranchWeights);
5042 // Perform the actual simplification.
5043 return simplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB,
5044 SelectBranchWeights[0],
5045 SelectBranchWeights[1]);
5046}
5047
5048/// This is called when we find an icmp instruction
5049/// (a seteq/setne with a constant) as the only instruction in a
5050/// block that ends with an uncond branch. We are looking for a very specific
5051/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
5052/// this case, we merge the first two "or's of icmp" into a switch, but then the
5053/// default value goes to an uncond block with a seteq in it, we get something
5054/// like:
5055///
5056/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
5057/// DEFAULT:
5058/// %tmp = icmp eq i8 %A, 92
5059/// br label %end
5060/// end:
5061/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
5062///
5063/// We prefer to split the edge to 'end' so that there is a true/false entry to
5064/// the PHI, merging the third icmp into the switch.
5065bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
5066 ICmpInst *ICI, IRBuilder<> &Builder) {
5067 // Select == nullptr means we assume that there is a hidden no-op select
5068 // instruction of `_ = select %icmp, true, false` after `%icmp = icmp ...`
5069 return tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, nullptr, Builder);
5070}
5071
5072/// Similar to tryToSimplifyUncondBranchWithICmpInIt, but handle a more generic
5073/// case. This is called when we find an icmp instruction (a seteq/setne with a
5074/// constant) and its following select instruction as the only TWO instructions
5075/// in a block that ends with an uncond branch. We are looking for a very
5076/// specific pattern that occurs when "
5077/// if (A == 1) return C1;
5078/// if (A == 2) return C2;
5079/// if (A < 3) return C3;
5080/// return C4;
5081/// " gets simplified. In this case, we merge the first two "branches of icmp"
5082/// into a switch, but then the default value goes to an uncond block with a lt
5083/// icmp and select in it, as InstCombine can not simplify "A < 3" as "A == 2".
5084/// After SimplifyCFG and other subsequent optimizations (e.g., SCCP), we might
5085/// get something like:
5086///
5087/// case1:
5088/// switch i8 %A, label %DEFAULT [ i8 0, label %end i8 1, label %case2 ]
5089/// case2:
5090/// br label %end
5091/// DEFAULT:
5092/// %tmp = icmp eq i8 %A, 2
5093/// %val = select i1 %tmp, i8 C3, i8 C4
5094/// br label %end
5095/// end:
5096/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ %val, %DEFAULT ]
5097///
5098/// We prefer to split the edge to 'end' so that there are TWO entries of V3/V4
5099/// to the PHI, merging the icmp & select into the switch, as follows:
5100///
5101/// case1:
5102/// switch i8 %A, label %DEFAULT [
5103/// i8 0, label %end
5104/// i8 1, label %case2
5105/// i8 2, label %case3
5106/// ]
5107/// case2:
5108/// br label %end
5109/// case3:
5110/// br label %end
5111/// DEFAULT:
5112/// br label %end
5113/// end:
5114/// _ = phi i8 [ C1, %case1 ], [ C2, %case2 ], [ C3, %case2 ], [ C4, %DEFAULT]
5115bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpSelectInIt(
5116 ICmpInst *ICI, SelectInst *Select, IRBuilder<> &Builder) {
5117 BasicBlock *BB = ICI->getParent();
5118
5119 // If the block has any PHIs in it or the icmp/select has multiple uses, it is
5120 // too complex.
5121 /// TODO: support multi-phis in succ BB of select's BB.
5122 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse() ||
5123 (Select && !Select->hasOneUse()))
5124 return false;
5125
5126 // The pattern we're looking for is where our only predecessor is a switch on
5127 // 'V' and this block is the default case for the switch. In this case we can
5128 // fold the compared value into the switch to simplify things.
5129 BasicBlock *Pred = BB->getSinglePredecessor();
5130 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
5131 return false;
5132
5133 Value *IcmpCond;
5134 ConstantInt *NewCaseVal;
5135 CmpPredicate Predicate;
5136
5137 // Match icmp X, C
5138 if (!match(ICI,
5139 m_ICmp(Predicate, m_Value(IcmpCond), m_ConstantInt(NewCaseVal))))
5140 return false;
5141
5142 Value *SelectCond, *SelectTrueVal, *SelectFalseVal;
5144 if (!Select) {
5145 // If Select == nullptr, we can assume that there is a hidden no-op select
5146 // just after icmp
5147 SelectCond = ICI;
5148 SelectTrueVal = Builder.getTrue();
5149 SelectFalseVal = Builder.getFalse();
5150 User = ICI->user_back();
5151 } else {
5152 SelectCond = Select->getCondition();
5153 // Check if the select condition is the same as the icmp condition.
5154 if (SelectCond != ICI)
5155 return false;
5156 SelectTrueVal = Select->getTrueValue();
5157 SelectFalseVal = Select->getFalseValue();
5158 User = Select->user_back();
5159 }
5160
5161 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
5162 if (SI->getCondition() != IcmpCond)
5163 return false;
5164
5165 // If BB is reachable on a non-default case, then we simply know the value of
5166 // V in this block. Substitute it and constant fold the icmp instruction
5167 // away.
5168 if (SI->getDefaultDest() != BB) {
5169 ConstantInt *VVal = SI->findCaseDest(BB);
5170 assert(VVal && "Should have a unique destination value");
5171 ICI->setOperand(0, VVal);
5172
5173 if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
5174 ICI->replaceAllUsesWith(V);
5175 ICI->eraseFromParent();
5176 }
5177 // BB is now empty, so it is likely to simplify away.
5178 return requestResimplify();
5179 }
5180
5181 // Ok, the block is reachable from the default dest. If the constant we're
5182 // comparing exists in one of the other edges, then we can constant fold ICI
5183 // and zap it.
5184 if (SI->findCaseValue(NewCaseVal) != SI->case_default()) {
5185 Value *V;
5186 if (Predicate == ICmpInst::ICMP_EQ)
5188 else
5190
5191 ICI->replaceAllUsesWith(V);
5192 ICI->eraseFromParent();
5193 // BB is now empty, so it is likely to simplify away.
5194 return requestResimplify();
5195 }
5196
5197 // The use of the select has to be in the 'end' block, by the only PHI node in
5198 // the block.
5199 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
5200 PHINode *PHIUse = dyn_cast<PHINode>(User);
5201 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
5203 return false;
5204
5205 // If the icmp is a SETEQ, then the default dest gets SelectFalseVal, the new
5206 // edge gets SelectTrueVal in the PHI.
5207 Value *DefaultCst = SelectFalseVal;
5208 Value *NewCst = SelectTrueVal;
5209
5210 if (ICI->getPredicate() == ICmpInst::ICMP_NE)
5211 std::swap(DefaultCst, NewCst);
5212
5213 // Replace Select (which is used by the PHI for the default value) with
5214 // SelectFalseVal or SelectTrueVal depending on if ICI is EQ or NE.
5215 if (Select) {
5216 Select->replaceAllUsesWith(DefaultCst);
5217 Select->eraseFromParent();
5218 } else {
5219 ICI->replaceAllUsesWith(DefaultCst);
5220 }
5221 ICI->eraseFromParent();
5222
5223 SmallVector<DominatorTree::UpdateType, 2> Updates;
5224
5225 // Okay, the switch goes to this block on a default value. Add an edge from
5226 // the switch to the merge point on the compared value.
5227 BasicBlock *NewBB =
5228 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
5229 {
5230 SwitchInstProfUpdateWrapper SIW(*SI);
5231 auto W0 = SIW.getSuccessorWeight(0);
5233 if (W0) {
5234 NewW = ((uint64_t(*W0) + 1) >> 1);
5235 SIW.setSuccessorWeight(0, *NewW);
5236 }
5237 SIW.addCase(NewCaseVal, NewBB, NewW);
5238 if (DTU)
5239 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
5240 }
5241
5242 // NewBB branches to the phi block, add the uncond branch and the phi entry.
5243 Builder.SetInsertPoint(NewBB);
5244 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
5245 Builder.CreateBr(SuccBlock);
5246 PHIUse->addIncoming(NewCst, NewBB);
5247 if (DTU) {
5248 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
5249 DTU->applyUpdates(Updates);
5250 }
5251 return true;
5252}
5253
5254/// Check to see if it is branching on an or/and chain of icmp instructions, and
5255/// fold it into a switch instruction if so.
5256bool SimplifyCFGOpt::simplifyBranchOnICmpChain(CondBrInst *BI,
5257 IRBuilder<> &Builder,
5258 const DataLayout &DL) {
5260 if (!Cond)
5261 return false;
5262
5263 // Change br (X == 0 | X == 1), T, F into a switch instruction.
5264 // If this is a bunch of seteq's or'd together, or if it's a bunch of
5265 // 'setne's and'ed together, collect them.
5266
5267 // Try to gather values from a chain of and/or to be turned into a switch
5268 ConstantComparesGatherer ConstantCompare(Cond, DL);
5269 // Unpack the result
5270 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
5271 Value *CompVal = ConstantCompare.CompValue;
5272 unsigned UsedICmps = ConstantCompare.UsedICmps;
5273 Value *ExtraCase = ConstantCompare.Extra;
5274 bool TrueWhenEqual = ConstantCompare.IsEq;
5275
5276 // If we didn't have a multiply compared value, fail.
5277 if (!CompVal)
5278 return false;
5279
5280 // Avoid turning single icmps into a switch.
5281 if (UsedICmps <= 1)
5282 return false;
5283
5284 // There might be duplicate constants in the list, which the switch
5285 // instruction can't handle, remove them now.
5286 array_pod_sort(Values.begin(), Values.end(), constantIntSortPredicate);
5287 Values.erase(llvm::unique(Values), Values.end());
5288
5289 // If Extra was used, we require at least two switch values to do the
5290 // transformation. A switch with one value is just a conditional branch.
5291 if (ExtraCase && Values.size() < 2)
5292 return false;
5293
5294 SmallVector<uint32_t> BranchWeights;
5295 const bool HasProfile = !ProfcheckDisableMetadataFixes &&
5296 extractBranchWeights(*BI, BranchWeights);
5297
5298 // Figure out which block is which destination.
5299 BasicBlock *DefaultBB = BI->getSuccessor(1);
5300 BasicBlock *EdgeBB = BI->getSuccessor(0);
5301 if (!TrueWhenEqual) {
5302 std::swap(DefaultBB, EdgeBB);
5303 if (HasProfile)
5304 std::swap(BranchWeights[0], BranchWeights[1]);
5305 }
5306
5307 BasicBlock *BB = BI->getParent();
5308
5309 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
5310 << " cases into SWITCH. BB is:\n"
5311 << *BB);
5312
5313 SmallVector<DominatorTree::UpdateType, 2> Updates;
5314
5315 // If there are any extra values that couldn't be folded into the switch
5316 // then we evaluate them with an explicit branch first. Split the block
5317 // right before the condbr to handle it.
5318 if (ExtraCase) {
5319 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
5320 /*MSSAU=*/nullptr, "switch.early.test");
5321
5322 // Remove the uncond branch added to the old block.
5323 Instruction *OldTI = BB->getTerminator();
5324 Builder.SetInsertPoint(OldTI);
5325
5326 // There can be an unintended UB if extra values are Poison. Before the
5327 // transformation, extra values may not be evaluated according to the
5328 // condition, and it will not raise UB. But after transformation, we are
5329 // evaluating extra values before checking the condition, and it will raise
5330 // UB. It can be solved by adding freeze instruction to extra values.
5331 AssumptionCache *AC = Options.AC;
5332
5333 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
5334 ExtraCase = Builder.CreateFreeze(ExtraCase);
5335
5336 // We don't have any info about this condition.
5337 auto *Br = TrueWhenEqual ? Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB)
5338 : Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
5340
5341 OldTI->eraseFromParent();
5342
5343 if (DTU)
5344 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
5345
5346 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
5347 // for the edge we just added.
5348 addPredecessorToBlock(EdgeBB, BB, NewBB);
5349
5350 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCase
5351 << "\nEXTRABB = " << *BB);
5352 BB = NewBB;
5353 }
5354
5355 Builder.SetInsertPoint(BI);
5356 // Convert pointer to int before we switch.
5357 if (CompVal->getType()->isPointerTy()) {
5358 assert(!DL.hasUnstableRepresentation(CompVal->getType()) &&
5359 "Should not end up here with unstable pointers");
5360 CompVal = Builder.CreatePtrToInt(
5361 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
5362 }
5363
5364 // Check if we can represent the values as a contiguous range. If so, we use a
5365 // range check + conditional branch instead of a switch.
5366 if (Values.front()->getValue() - Values.back()->getValue() ==
5367 Values.size() - 1) {
5368 ConstantRange RangeToCheck = ConstantRange::getNonEmpty(
5369 Values.back()->getValue(), Values.front()->getValue() + 1);
5370 APInt Offset, RHS;
5371 ICmpInst::Predicate Pred;
5372 RangeToCheck.getEquivalentICmp(Pred, RHS, Offset);
5373 Value *X = CompVal;
5374 if (!Offset.isZero())
5375 X = Builder.CreateAdd(X, ConstantInt::get(CompVal->getType(), Offset));
5376 Value *Cond =
5377 Builder.CreateICmp(Pred, X, ConstantInt::get(CompVal->getType(), RHS));
5378 CondBrInst *NewBI = Builder.CreateCondBr(Cond, EdgeBB, DefaultBB);
5379 if (HasProfile)
5380 setBranchWeights(*NewBI, BranchWeights, /*IsExpected=*/false);
5381 // We don't need to update PHI nodes since we don't add any new edges.
5382 } else {
5383 // Create the new switch instruction now.
5384 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
5385 if (HasProfile) {
5386 // We know the weight of the default case. We don't know the weight of the
5387 // other cases, but rather than completely lose profiling info, we split
5388 // the remaining probability equally over them.
5389 SmallVector<uint32_t> NewWeights(Values.size() + 1);
5390 NewWeights[0] = BranchWeights[1]; // this is the default, and we swapped
5391 // if TrueWhenEqual.
5392 for (auto &V : drop_begin(NewWeights))
5393 V = BranchWeights[0] / Values.size();
5394 setBranchWeights(*New, NewWeights, /*IsExpected=*/false);
5395 }
5396
5397 // Add all of the 'cases' to the switch instruction.
5398 for (ConstantInt *Val : Values)
5399 New->addCase(Val, EdgeBB);
5400
5401 // We added edges from PI to the EdgeBB. As such, if there were any
5402 // PHI nodes in EdgeBB, they need entries to be added corresponding to
5403 // the number of edges added.
5404 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
5405 PHINode *PN = cast<PHINode>(BBI);
5406 Value *InVal = PN->getIncomingValueForBlock(BB);
5407 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
5408 PN->addIncoming(InVal, BB);
5409 }
5410 }
5411
5412 // Erase the old branch instruction.
5414 if (DTU)
5415 DTU->applyUpdates(Updates);
5416
5417 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n');
5418 return true;
5419}
5420
5421bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
5422 if (isa<PHINode>(RI->getValue()))
5423 return simplifyCommonResume(RI);
5424 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHIIt()) &&
5425 RI->getValue() == &*RI->getParent()->getFirstNonPHIIt())
5426 // The resume must unwind the exception that caused control to branch here.
5427 return simplifySingleResume(RI);
5428
5429 return false;
5430}
5431
5432// Check if cleanup block is empty
5434 for (Instruction &I : R) {
5435 auto *II = dyn_cast<IntrinsicInst>(&I);
5436 if (!II)
5437 return false;
5438
5439 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
5440 switch (IntrinsicID) {
5441 case Intrinsic::dbg_declare:
5442 case Intrinsic::dbg_value:
5443 case Intrinsic::dbg_label:
5444 case Intrinsic::lifetime_end:
5445 break;
5446 default:
5447 return false;
5448 }
5449 }
5450 return true;
5451}
5452
5453// Simplify resume that is shared by several landing pads (phi of landing pad).
5454bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
5455 BasicBlock *BB = RI->getParent();
5456
5457 // Check that there are no other instructions except for debug and lifetime
5458 // intrinsics between the phi's and resume instruction.
5459 if (!isCleanupBlockEmpty(make_range(RI->getParent()->getFirstNonPHIIt(),
5460 BB->getTerminator()->getIterator())))
5461 return false;
5462
5463 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
5464 auto *PhiLPInst = cast<PHINode>(RI->getValue());
5465
5466 // Check incoming blocks to see if any of them are trivial.
5467 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
5468 Idx++) {
5469 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
5470 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
5471
5472 // If the block has other successors, we can not delete it because
5473 // it has other dependents.
5474 if (IncomingBB->getUniqueSuccessor() != BB)
5475 continue;
5476
5477 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHIIt());
5478 // Not the landing pad that caused the control to branch here.
5479 if (IncomingValue != LandingPad)
5480 continue;
5481
5483 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5484 TrivialUnwindBlocks.insert(IncomingBB);
5485 }
5486
5487 // If no trivial unwind blocks, don't do any simplifications.
5488 if (TrivialUnwindBlocks.empty())
5489 return false;
5490
5491 // Turn all invokes that unwind here into calls.
5492 for (auto *TrivialBB : TrivialUnwindBlocks) {
5493 // Blocks that will be simplified should be removed from the phi node.
5494 // Note there could be multiple edges to the resume block, and we need
5495 // to remove them all.
5496 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5497 BB->removePredecessor(TrivialBB, true);
5498
5499 for (BasicBlock *Pred :
5501 removeUnwindEdge(Pred, DTU);
5502 ++NumInvokes;
5503 }
5504
5505 // In each SimplifyCFG run, only the current processed block can be erased.
5506 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5507 // of erasing TrivialBB, we only remove the branch to the common resume
5508 // block so that we can later erase the resume block since it has no
5509 // predecessors.
5510 TrivialBB->getTerminator()->eraseFromParent();
5511 new UnreachableInst(RI->getContext(), TrivialBB);
5512 if (DTU)
5513 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5514 }
5515
5516 // Delete the resume block if all its predecessors have been removed.
5517 if (pred_empty(BB))
5518 DeleteDeadBlock(BB, DTU);
5519
5520 return !TrivialUnwindBlocks.empty();
5521}
5522
5523// Simplify resume that is only used by a single (non-phi) landing pad.
5524bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5525 BasicBlock *BB = RI->getParent();
5526 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHIIt());
5527 assert(RI->getValue() == LPInst &&
5528 "Resume must unwind the exception that caused control to here");
5529
5530 // Check that there are no other instructions except for debug intrinsics.
5532 make_range<Instruction *>(LPInst->getNextNode(), RI)))
5533 return false;
5534
5535 // Turn all invokes that unwind here into calls and delete the basic block.
5536 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5537 removeUnwindEdge(Pred, DTU);
5538 ++NumInvokes;
5539 }
5540
5541 // The landingpad is now unreachable. Zap it.
5542 DeleteDeadBlock(BB, DTU);
5543 return true;
5544}
5545
5547 // If this is a trivial cleanup pad that executes no instructions, it can be
5548 // eliminated. If the cleanup pad continues to the caller, any predecessor
5549 // that is an EH pad will be updated to continue to the caller and any
5550 // predecessor that terminates with an invoke instruction will have its invoke
5551 // instruction converted to a call instruction. If the cleanup pad being
5552 // simplified does not continue to the caller, each predecessor will be
5553 // updated to continue to the unwind destination of the cleanup pad being
5554 // simplified.
5555 BasicBlock *BB = RI->getParent();
5556 CleanupPadInst *CPInst = RI->getCleanupPad();
5557 if (CPInst->getParent() != BB)
5558 // This isn't an empty cleanup.
5559 return false;
5560
5561 // We cannot kill the pad if it has multiple uses. This typically arises
5562 // from unreachable basic blocks.
5563 if (!CPInst->hasOneUse())
5564 return false;
5565
5566 // Check that there are no other instructions except for benign intrinsics.
5568 make_range<Instruction *>(CPInst->getNextNode(), RI)))
5569 return false;
5570
5571 // If the cleanup return we are simplifying unwinds to the caller, this will
5572 // set UnwindDest to nullptr.
5573 BasicBlock *UnwindDest = RI->getUnwindDest();
5574
5575 // We're about to remove BB from the control flow. Before we do, sink any
5576 // PHINodes into the unwind destination. Doing this before changing the
5577 // control flow avoids some potentially slow checks, since we can currently
5578 // be certain that UnwindDest and BB have no common predecessors (since they
5579 // are both EH pads).
5580 if (UnwindDest) {
5581 // First, go through the PHI nodes in UnwindDest and update any nodes that
5582 // reference the block we are removing
5583 for (PHINode &DestPN : UnwindDest->phis()) {
5584 int Idx = DestPN.getBasicBlockIndex(BB);
5585 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5586 assert(Idx != -1);
5587 // This PHI node has an incoming value that corresponds to a control
5588 // path through the cleanup pad we are removing. If the incoming
5589 // value is in the cleanup pad, it must be a PHINode (because we
5590 // verified above that the block is otherwise empty). Otherwise, the
5591 // value is either a constant or a value that dominates the cleanup
5592 // pad being removed.
5593 //
5594 // Because BB and UnwindDest are both EH pads, all of their
5595 // predecessors must unwind to these blocks, and since no instruction
5596 // can have multiple unwind destinations, there will be no overlap in
5597 // incoming blocks between SrcPN and DestPN.
5598 Value *SrcVal = DestPN.getIncomingValue(Idx);
5599 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5600
5601 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5602 for (auto *Pred : predecessors(BB)) {
5603 Value *Incoming =
5604 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5605 DestPN.addIncoming(Incoming, Pred);
5606 }
5607 }
5608
5609 // Sink any remaining PHI nodes directly into UnwindDest.
5610 BasicBlock::iterator InsertPt = UnwindDest->getFirstNonPHIIt();
5611 for (PHINode &PN : make_early_inc_range(BB->phis())) {
5612 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5613 // If the PHI node has no uses or all of its uses are in this basic
5614 // block (meaning they are debug or lifetime intrinsics), just leave
5615 // it. It will be erased when we erase BB below.
5616 continue;
5617
5618 // Otherwise, sink this PHI node into UnwindDest.
5619 // Any predecessors to UnwindDest which are not already represented
5620 // must be back edges which inherit the value from the path through
5621 // BB. In this case, the PHI value must reference itself.
5622 for (auto *pred : predecessors(UnwindDest))
5623 if (pred != BB)
5624 PN.addIncoming(&PN, pred);
5625 PN.moveBefore(InsertPt);
5626 // Also, add a dummy incoming value for the original BB itself,
5627 // so that the PHI is well-formed until we drop said predecessor.
5628 PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5629 }
5630 }
5631
5632 std::vector<DominatorTree::UpdateType> Updates;
5633
5634 // We use make_early_inc_range here because we will remove all predecessors.
5636 if (UnwindDest == nullptr) {
5637 if (DTU) {
5638 DTU->applyUpdates(Updates);
5639 Updates.clear();
5640 }
5641 removeUnwindEdge(PredBB, DTU);
5642 ++NumInvokes;
5643 } else {
5644 BB->removePredecessor(PredBB);
5645 Instruction *TI = PredBB->getTerminator();
5646 TI->replaceUsesOfWith(BB, UnwindDest);
5647 if (DTU) {
5648 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5649 Updates.push_back({DominatorTree::Delete, PredBB, BB});
5650 }
5651 }
5652 }
5653
5654 if (DTU)
5655 DTU->applyUpdates(Updates);
5656
5657 DeleteDeadBlock(BB, DTU);
5658
5659 return true;
5660}
5661
5662// Try to merge two cleanuppads together.
5664 // Skip any cleanuprets which unwind to caller, there is nothing to merge
5665 // with.
5666 BasicBlock *UnwindDest = RI->getUnwindDest();
5667 if (!UnwindDest)
5668 return false;
5669
5670 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5671 // be safe to merge without code duplication.
5672 if (UnwindDest->getSinglePredecessor() != RI->getParent())
5673 return false;
5674
5675 // Verify that our cleanuppad's unwind destination is another cleanuppad.
5676 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5677 if (!SuccessorCleanupPad)
5678 return false;
5679
5680 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5681 // Replace any uses of the successor cleanupad with the predecessor pad
5682 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5683 // funclet bundle operands.
5684 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5685 // Remove the old cleanuppad.
5686 SuccessorCleanupPad->eraseFromParent();
5687 // Now, we simply replace the cleanupret with a branch to the unwind
5688 // destination.
5689 UncondBrInst::Create(UnwindDest, RI->getParent());
5690 RI->eraseFromParent();
5691
5692 return true;
5693}
5694
5695bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5696 // It is possible to transiantly have an undef cleanuppad operand because we
5697 // have deleted some, but not all, dead blocks.
5698 // Eventually, this block will be deleted.
5699 if (isa<UndefValue>(RI->getOperand(0)))
5700 return false;
5701
5702 if (mergeCleanupPad(RI))
5703 return true;
5704
5705 if (removeEmptyCleanup(RI, DTU))
5706 return true;
5707
5708 return false;
5709}
5710
5711// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5712bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5713 BasicBlock *BB = UI->getParent();
5714
5715 bool Changed = false;
5716
5717 // Ensure that any debug-info records that used to occur after the Unreachable
5718 // are moved to in front of it -- otherwise they'll "dangle" at the end of
5719 // the block.
5721
5722 // Debug-info records on the unreachable inst itself should be deleted, as
5723 // below we delete everything past the final executable instruction.
5724 UI->dropDbgRecords();
5725
5726 // If there are any instructions immediately before the unreachable that can
5727 // be removed, do so.
5728 while (UI->getIterator() != BB->begin()) {
5730 --BBI;
5731
5733 break; // Can not drop any more instructions. We're done here.
5734 // Otherwise, this instruction can be freely erased,
5735 // even if it is not side-effect free.
5736
5737 // Note that deleting EH's here is in fact okay, although it involves a bit
5738 // of subtle reasoning. If this inst is an EH, all the predecessors of this
5739 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5740 // and we can therefore guarantee this block will be erased.
5741
5742 // If we're deleting this, we're deleting any subsequent debug info, so
5743 // delete DbgRecords.
5744 BBI->dropDbgRecords();
5745
5746 // Delete this instruction (any uses are guaranteed to be dead)
5747 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5748 BBI->eraseFromParent();
5749 Changed = true;
5750 }
5751
5752 // If the unreachable instruction is the first in the block, take a gander
5753 // at all of the predecessors of this instruction, and simplify them.
5754 if (&BB->front() != UI)
5755 return Changed;
5756
5757 std::vector<DominatorTree::UpdateType> Updates;
5758
5759 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5760 for (BasicBlock *Predecessor : Preds) {
5761 Instruction *TI = Predecessor->getTerminator();
5762 IRBuilder<> Builder(TI);
5763 if (isa<UncondBrInst>(TI)) {
5764 new UnreachableInst(TI->getContext(), TI->getIterator());
5765 TI->eraseFromParent();
5766 Changed = true;
5767 if (DTU)
5768 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5769 } else if (auto *BI = dyn_cast<CondBrInst>(TI)) {
5770 // We could either have a proper unconditional branch,
5771 // or a degenerate conditional branch with matching destinations.
5772 if (BI->getSuccessor(0) == BI->getSuccessor(1)) {
5773 new UnreachableInst(TI->getContext(), TI->getIterator());
5774 TI->eraseFromParent();
5775 Changed = true;
5776 } else {
5777 Value* Cond = BI->getCondition();
5778 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5779 "The destinations are guaranteed to be different here.");
5780 CallInst *Assumption;
5781 if (BI->getSuccessor(0) == BB) {
5782 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5783 Builder.CreateBr(BI->getSuccessor(1));
5784 } else {
5785 assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5786 Assumption = Builder.CreateAssumption(Cond);
5787 Builder.CreateBr(BI->getSuccessor(0));
5788 }
5789 if (Options.AC)
5790 Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5791
5793 Changed = true;
5794 }
5795 if (DTU)
5796 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5797 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5798 SwitchInstProfUpdateWrapper SU(*SI);
5799 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5800 if (i->getCaseSuccessor() != BB) {
5801 ++i;
5802 continue;
5803 }
5804 BB->removePredecessor(SU->getParent());
5805 i = SU.removeCase(i);
5806 e = SU->case_end();
5807 Changed = true;
5808 }
5809 // Note that the default destination can't be removed!
5810 if (DTU && SI->getDefaultDest() != BB)
5811 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5812 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5813 if (II->getUnwindDest() == BB) {
5814 if (DTU) {
5815 DTU->applyUpdates(Updates);
5816 Updates.clear();
5817 }
5818 auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5819 if (!CI->doesNotThrow())
5820 CI->setDoesNotThrow();
5821 Changed = true;
5822 }
5823 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5824 if (CSI->getUnwindDest() == BB) {
5825 if (DTU) {
5826 DTU->applyUpdates(Updates);
5827 Updates.clear();
5828 }
5829 removeUnwindEdge(TI->getParent(), DTU);
5830 Changed = true;
5831 continue;
5832 }
5833
5834 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5835 E = CSI->handler_end();
5836 I != E; ++I) {
5837 if (*I == BB) {
5838 CSI->removeHandler(I);
5839 --I;
5840 --E;
5841 Changed = true;
5842 }
5843 }
5844 if (DTU)
5845 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5846 if (CSI->getNumHandlers() == 0) {
5847 if (CSI->hasUnwindDest()) {
5848 // Redirect all predecessors of the block containing CatchSwitchInst
5849 // to instead branch to the CatchSwitchInst's unwind destination.
5850 if (DTU) {
5851 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5852 Updates.push_back({DominatorTree::Insert,
5853 PredecessorOfPredecessor,
5854 CSI->getUnwindDest()});
5855 Updates.push_back({DominatorTree::Delete,
5856 PredecessorOfPredecessor, Predecessor});
5857 }
5858 }
5859 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5860 } else {
5861 // Rewrite all preds to unwind to caller (or from invoke to call).
5862 if (DTU) {
5863 DTU->applyUpdates(Updates);
5864 Updates.clear();
5865 }
5866 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5867 for (BasicBlock *EHPred : EHPreds)
5868 removeUnwindEdge(EHPred, DTU);
5869 }
5870 // The catchswitch is no longer reachable.
5871 new UnreachableInst(CSI->getContext(), CSI->getIterator());
5872 CSI->eraseFromParent();
5873 Changed = true;
5874 }
5875 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5876 (void)CRI;
5877 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5878 "Expected to always have an unwind to BB.");
5879 if (DTU)
5880 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5881 new UnreachableInst(TI->getContext(), TI->getIterator());
5882 TI->eraseFromParent();
5883 Changed = true;
5884 }
5885 }
5886
5887 if (DTU)
5888 DTU->applyUpdates(Updates);
5889
5890 // If this block is now dead, remove it.
5891 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5892 DeleteDeadBlock(BB, DTU);
5893 return true;
5894 }
5895
5896 return Changed;
5897}
5898
5907
5908static std::optional<ContiguousCasesResult>
5911 BasicBlock *Dest, BasicBlock *OtherDest) {
5912 assert(Cases.size() >= 1);
5913
5915 const APInt &Min = Cases.back()->getValue();
5916 const APInt &Max = Cases.front()->getValue();
5917 APInt Offset = Max - Min;
5918 size_t ContiguousOffset = Cases.size() - 1;
5919 if (Offset == ContiguousOffset) {
5920 return ContiguousCasesResult{
5921 /*Min=*/Cases.back(),
5922 /*Max=*/Cases.front(),
5923 /*Dest=*/Dest,
5924 /*OtherDest=*/OtherDest,
5925 /*Cases=*/&Cases,
5926 /*OtherCases=*/&OtherCases,
5927 };
5928 }
5929 ConstantRange CR = computeConstantRange(Condition, /*ForSigned=*/false,
5930 SimplifyQuery(Dest->getDataLayout()));
5931 // If this is a wrapping contiguous range, that is, [Min, OtherMin] +
5932 // [OtherMax, Max] (also [OtherMax, OtherMin]), [OtherMin+1, OtherMax-1] is a
5933 // contiguous range for the other destination. N.B. If CR is not a full range,
5934 // Max+1 is not equal to Min. It's not continuous in arithmetic.
5935 if (Max == CR.getUnsignedMax() && Min == CR.getUnsignedMin()) {
5936 assert(Cases.size() >= 2);
5937 auto *It =
5938 std::adjacent_find(Cases.begin(), Cases.end(), [](auto L, auto R) {
5939 return L->getValue() != R->getValue() + 1;
5940 });
5941 if (It == Cases.end())
5942 return std::nullopt;
5943 auto [OtherMax, OtherMin] = std::make_pair(*It, *std::next(It));
5944 if ((Max - OtherMax->getValue()) + (OtherMin->getValue() - Min) ==
5945 Cases.size() - 2) {
5946 return ContiguousCasesResult{
5947 /*Min=*/cast<ConstantInt>(
5948 ConstantInt::get(OtherMin->getType(), OtherMin->getValue() + 1)),
5949 /*Max=*/
5951 ConstantInt::get(OtherMax->getType(), OtherMax->getValue() - 1)),
5952 /*Dest=*/OtherDest,
5953 /*OtherDest=*/Dest,
5954 /*Cases=*/&OtherCases,
5955 /*OtherCases=*/&Cases,
5956 };
5957 }
5958 }
5959 return std::nullopt;
5960}
5961
5963 DomTreeUpdater *DTU,
5964 bool RemoveOrigDefaultBlock = true) {
5965 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5966 auto *BB = Switch->getParent();
5967 auto *OrigDefaultBlock = Switch->getDefaultDest();
5968 if (RemoveOrigDefaultBlock)
5969 OrigDefaultBlock->removePredecessor(BB);
5970 BasicBlock *NewDefaultBlock = BasicBlock::Create(
5971 BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5972 OrigDefaultBlock);
5973 auto *UI = new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5975 Switch->setDefaultDest(&*NewDefaultBlock);
5976 if (DTU) {
5978 Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5979 if (RemoveOrigDefaultBlock &&
5980 !is_contained(successors(BB), OrigDefaultBlock))
5981 Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5982 DTU->applyUpdates(Updates);
5983 }
5984}
5985
5986/// Turn a switch into an integer range comparison and branch.
5987/// Switches with more than 2 destinations are ignored.
5988/// Switches with 1 destination are also ignored.
5989bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI,
5990 IRBuilder<> &Builder) {
5991 assert(SI->getNumCases() > 1 && "Degenerate switch?");
5992
5993 bool HasDefault = !SI->defaultDestUnreachable();
5994
5995 auto *BB = SI->getParent();
5996 // Partition the cases into two sets with different destinations.
5997 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5998 BasicBlock *DestB = nullptr;
6001
6002 for (auto Case : SI->cases()) {
6003 BasicBlock *Dest = Case.getCaseSuccessor();
6004 if (!DestA)
6005 DestA = Dest;
6006 if (Dest == DestA) {
6007 CasesA.push_back(Case.getCaseValue());
6008 continue;
6009 }
6010 if (!DestB)
6011 DestB = Dest;
6012 if (Dest == DestB) {
6013 CasesB.push_back(Case.getCaseValue());
6014 continue;
6015 }
6016 return false; // More than two destinations.
6017 }
6018 if (!DestB)
6019 return false; // All destinations are the same and the default is unreachable
6020
6021 assert(DestA && DestB &&
6022 "Single-destination switch should have been folded.");
6023 assert(DestA != DestB);
6024 assert(DestB != SI->getDefaultDest());
6025 assert(!CasesB.empty() && "There must be non-default cases.");
6026 assert(!CasesA.empty() || HasDefault);
6027
6028 // Figure out if one of the sets of cases form a contiguous range.
6029 std::optional<ContiguousCasesResult> ContiguousCases;
6030
6031 // Only one icmp is needed when there is only one case.
6032 if (!HasDefault && CasesA.size() == 1)
6033 ContiguousCases = ContiguousCasesResult{
6034 /*Min=*/CasesA[0],
6035 /*Max=*/CasesA[0],
6036 /*Dest=*/DestA,
6037 /*OtherDest=*/DestB,
6038 /*Cases=*/&CasesA,
6039 /*OtherCases=*/&CasesB,
6040 };
6041 else if (CasesB.size() == 1)
6042 ContiguousCases = ContiguousCasesResult{
6043 /*Min=*/CasesB[0],
6044 /*Max=*/CasesB[0],
6045 /*Dest=*/DestB,
6046 /*OtherDest=*/DestA,
6047 /*Cases=*/&CasesB,
6048 /*OtherCases=*/&CasesA,
6049 };
6050 // Correctness: Cases to the default destination cannot be contiguous cases.
6051 else if (!HasDefault)
6052 ContiguousCases =
6053 findContiguousCases(SI->getCondition(), CasesA, CasesB, DestA, DestB);
6054
6055 if (!ContiguousCases)
6056 ContiguousCases =
6057 findContiguousCases(SI->getCondition(), CasesB, CasesA, DestB, DestA);
6058
6059 if (!ContiguousCases)
6060 return false;
6061
6062 auto [Min, Max, Dest, OtherDest, Cases, OtherCases] = *ContiguousCases;
6063
6064 // Start building the compare and branch.
6065
6067 Constant *NumCases = ConstantInt::get(Offset->getType(),
6068 Max->getValue() - Min->getValue() + 1);
6069 Instruction *NewBI;
6070 if (NumCases->isOneValue()) {
6071 assert(Max->getValue() == Min->getValue());
6072 Value *Cmp = Builder.CreateICmpEQ(SI->getCondition(), Min);
6073 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6074 }
6075 // If NumCases overflowed, then all possible values jump to the successor.
6076 else if (NumCases->isNullValue() && !Cases->empty()) {
6077 NewBI = Builder.CreateBr(Dest);
6078 } else {
6079 Value *Sub = SI->getCondition();
6080 if (!Offset->isNullValue())
6081 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
6082 Value *Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
6083 NewBI = Builder.CreateCondBr(Cmp, Dest, OtherDest);
6084 }
6085
6086 // Update weight for the newly-created conditional branch.
6087 if (hasBranchWeightMD(*SI) && isa<CondBrInst>(NewBI)) {
6088 SmallVector<uint64_t, 8> Weights;
6089 getBranchWeights(SI, Weights);
6090 if (Weights.size() == 1 + SI->getNumCases()) {
6091 uint64_t TrueWeight = 0;
6092 uint64_t FalseWeight = 0;
6093 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
6094 if (SI->getSuccessor(I) == Dest)
6095 TrueWeight += Weights[I];
6096 else
6097 FalseWeight += Weights[I];
6098 }
6099 while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
6100 TrueWeight /= 2;
6101 FalseWeight /= 2;
6102 }
6103 setFittedBranchWeights(*NewBI, {TrueWeight, FalseWeight},
6104 /*IsExpected=*/false, /*ElideAllZero=*/true);
6105 }
6106 }
6107
6108 // Prune obsolete incoming values off the successors' PHI nodes.
6109 for (auto &PHI : make_early_inc_range(Dest->phis())) {
6110 unsigned PreviousEdges = Cases->size();
6111 if (Dest == SI->getDefaultDest())
6112 ++PreviousEdges;
6113 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
6114 PHI.removeIncomingValue(SI->getParent());
6115 }
6116 for (auto &PHI : make_early_inc_range(OtherDest->phis())) {
6117 unsigned PreviousEdges = OtherCases->size();
6118 if (OtherDest == SI->getDefaultDest())
6119 ++PreviousEdges;
6120 unsigned E = PreviousEdges - 1;
6121 // Remove all incoming values from OtherDest if OtherDest is unreachable.
6122 if (isa<UncondBrInst>(NewBI))
6123 ++E;
6124 for (unsigned I = 0; I != E; ++I)
6125 PHI.removeIncomingValue(SI->getParent());
6126 }
6127
6128 // Clean up the default block - it may have phis or other instructions before
6129 // the unreachable terminator.
6130 if (!HasDefault)
6132
6133 auto *UnreachableDefault = SI->getDefaultDest();
6134
6135 // Drop the switch.
6136 SI->eraseFromParent();
6137
6138 if (!HasDefault && DTU)
6139 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
6140
6141 return true;
6142}
6143
6144/// Compute masked bits for the condition of a switch
6145/// and use it to remove dead cases.
6147 AssumptionCache *AC,
6148 const DataLayout &DL) {
6149 Value *Cond = SI->getCondition();
6150 KnownBits Known = computeKnownBits(Cond, DL, AC, SI);
6152 bool IsKnownValuesValid = collectPossibleValues(Cond, KnownValues, 4);
6153
6154 // We can also eliminate cases by determining that their values are outside of
6155 // the limited range of the condition based on how many significant (non-sign)
6156 // bits are in the condition value.
6157 unsigned MaxSignificantBitsInCond =
6159
6160 // Gather dead cases.
6162 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
6163 SmallVector<BasicBlock *, 8> UniqueSuccessors;
6164 for (const auto &Case : SI->cases()) {
6165 auto *Successor = Case.getCaseSuccessor();
6166 if (DTU) {
6167 auto [It, Inserted] = NumPerSuccessorCases.try_emplace(Successor);
6168 if (Inserted)
6169 UniqueSuccessors.push_back(Successor);
6170 ++It->second;
6171 }
6172 ConstantInt *CaseC = Case.getCaseValue();
6173 const APInt &CaseVal = CaseC->getValue();
6174 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
6175 (CaseVal.getSignificantBits() > MaxSignificantBitsInCond) ||
6176 (IsKnownValuesValid && !KnownValues.contains(CaseC))) {
6177 DeadCases.push_back(CaseC);
6178 if (DTU)
6179 --NumPerSuccessorCases[Successor];
6180 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
6181 << " is dead.\n");
6182 } else if (IsKnownValuesValid)
6183 KnownValues.erase(CaseC);
6184 }
6185
6186 // If we can prove that the cases must cover all possible values, the
6187 // default destination becomes dead and we can remove it. If we know some
6188 // of the bits in the value, we can use that to more precisely compute the
6189 // number of possible unique case values.
6190 bool HasDefault = !SI->defaultDestUnreachable();
6191 const unsigned NumUnknownBits =
6192 Known.getBitWidth() - (Known.Zero | Known.One).popcount();
6193 assert(NumUnknownBits <= Known.getBitWidth());
6194 if (HasDefault && DeadCases.empty()) {
6195 if (IsKnownValuesValid && all_of(KnownValues, IsaPred<UndefValue>)) {
6197 return true;
6198 }
6199
6200 if (NumUnknownBits < 64 /* avoid overflow */) {
6201 uint64_t AllNumCases = 1ULL << NumUnknownBits;
6202 if (SI->getNumCases() == AllNumCases) {
6204 return true;
6205 }
6206 // When only one case value is missing, replace default with that case.
6207 // Eliminating the default branch will provide more opportunities for
6208 // optimization, such as lookup tables.
6209 if (SI->getNumCases() == AllNumCases - 1) {
6210 assert(NumUnknownBits > 1 && "Should be canonicalized to a branch");
6211 IntegerType *CondTy = cast<IntegerType>(Cond->getType());
6212 if (CondTy->getIntegerBitWidth() > 64 ||
6213 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6214 return false;
6215
6216 uint64_t MissingCaseVal = 0;
6217 for (const auto &Case : SI->cases())
6218 MissingCaseVal ^= Case.getCaseValue()->getValue().getLimitedValue();
6219 auto *MissingCase = cast<ConstantInt>(
6220 ConstantInt::get(Cond->getType(), MissingCaseVal));
6222 SIW.addCase(MissingCase, SI->getDefaultDest(),
6223 SIW.getSuccessorWeight(0));
6225 /*RemoveOrigDefaultBlock*/ false);
6226 SIW.setSuccessorWeight(0, 0);
6227 return true;
6228 }
6229 }
6230 }
6231
6232 if (DeadCases.empty())
6233 return false;
6234
6236 for (ConstantInt *DeadCase : DeadCases) {
6237 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
6238 assert(CaseI != SI->case_default() &&
6239 "Case was not found. Probably mistake in DeadCases forming.");
6240 // Prune unused values from PHI nodes.
6241 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
6242 SIW.removeCase(CaseI);
6243 }
6244
6245 if (DTU) {
6246 std::vector<DominatorTree::UpdateType> Updates;
6247 for (auto *Successor : UniqueSuccessors)
6248 if (NumPerSuccessorCases[Successor] == 0)
6249 Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
6250 DTU->applyUpdates(Updates);
6251 }
6252
6253 return true;
6254}
6255
6256/// If BB would be eligible for simplification by
6257/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
6258/// by an unconditional branch), look at the phi node for BB in the successor
6259/// block and see if the incoming value is equal to CaseValue. If so, return
6260/// the phi node, and set PhiIndex to BB's index in the phi node.
6262 BasicBlock *BB, int *PhiIndex) {
6263 if (&*BB->getFirstNonPHIIt() != BB->getTerminator())
6264 return nullptr; // BB must be empty to be a candidate for simplification.
6265 if (!BB->getSinglePredecessor())
6266 return nullptr; // BB must be dominated by the switch.
6267
6269 if (!Branch)
6270 return nullptr; // Terminator must be unconditional branch.
6271
6272 BasicBlock *Succ = Branch->getSuccessor();
6273
6274 for (PHINode &PHI : Succ->phis()) {
6275 int Idx = PHI.getBasicBlockIndex(BB);
6276 assert(Idx >= 0 && "PHI has no entry for predecessor?");
6277
6278 Value *InValue = PHI.getIncomingValue(Idx);
6279 if (InValue != CaseValue)
6280 continue;
6281
6282 *PhiIndex = Idx;
6283 return &PHI;
6284 }
6285
6286 return nullptr;
6287}
6288
6289/// Try to forward the condition of a switch instruction to a phi node
6290/// dominated by the switch, if that would mean that some of the destination
6291/// blocks of the switch can be folded away. Return true if a change is made.
6293 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
6294
6295 ForwardingNodesMap ForwardingNodes;
6296 BasicBlock *SwitchBlock = SI->getParent();
6297 bool Changed = false;
6298 for (const auto &Case : SI->cases()) {
6299 ConstantInt *CaseValue = Case.getCaseValue();
6300 BasicBlock *CaseDest = Case.getCaseSuccessor();
6301
6302 // Replace phi operands in successor blocks that are using the constant case
6303 // value rather than the switch condition variable:
6304 // switchbb:
6305 // switch i32 %x, label %default [
6306 // i32 17, label %succ
6307 // ...
6308 // succ:
6309 // %r = phi i32 ... [ 17, %switchbb ] ...
6310 // -->
6311 // %r = phi i32 ... [ %x, %switchbb ] ...
6312
6313 for (PHINode &Phi : CaseDest->phis()) {
6314 // This only works if there is exactly 1 incoming edge from the switch to
6315 // a phi. If there is >1, that means multiple cases of the switch map to 1
6316 // value in the phi, and that phi value is not the switch condition. Thus,
6317 // this transform would not make sense (the phi would be invalid because
6318 // a phi can't have different incoming values from the same block).
6319 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
6320 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
6321 count(Phi.blocks(), SwitchBlock) == 1) {
6322 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
6323 Changed = true;
6324 }
6325 }
6326
6327 // Collect phi nodes that are indirectly using this switch's case constants.
6328 int PhiIdx;
6329 if (auto *Phi = findPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
6330 ForwardingNodes[Phi].push_back(PhiIdx);
6331 }
6332
6333 for (auto &ForwardingNode : ForwardingNodes) {
6334 PHINode *Phi = ForwardingNode.first;
6335 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
6336 // Check if it helps to fold PHI.
6337 if (Indexes.size() < 2 && !llvm::is_contained(Phi->incoming_values(), SI->getCondition()))
6338 continue;
6339
6340 for (int Index : Indexes)
6341 Phi->setIncomingValue(Index, SI->getCondition());
6342 Changed = true;
6343 }
6344
6345 return Changed;
6346}
6347
6348/// Return true if the backend will be able to handle
6349/// initializing an array of constants like C.
6351 if (C->isThreadDependent())
6352 return false;
6353 if (C->isDLLImportDependent())
6354 return false;
6355
6358 return false;
6359
6360 // Globals cannot contain scalable types.
6361 if (C->getType()->isScalableTy())
6362 return false;
6363
6365 // Pointer casts and in-bounds GEPs will not prohibit the backend from
6366 // materializing the array of constants.
6367 Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
6368 if (StrippedC == C || !validLookupTableConstant(StrippedC, TTI))
6369 return false;
6370 }
6371
6372 if (!TTI.shouldBuildLookupTablesForConstant(C))
6373 return false;
6374
6375 return true;
6376}
6377
6378/// If V is a Constant, return it. Otherwise, try to look up
6379/// its constant value in ConstantPool, returning 0 if it's not there.
6380static Constant *
6383 if (Constant *C = dyn_cast<Constant>(V))
6384 return C;
6385 return ConstantPool.lookup(V);
6386}
6387
6388/// Try to fold instruction I into a constant. This works for
6389/// simple instructions such as binary operations where both operands are
6390/// constant or can be replaced by constants from the ConstantPool. Returns the
6391/// resulting constant on success, 0 otherwise.
6392static Constant *
6396 Constant *A = lookupConstant(Select->getCondition(), ConstantPool);
6397 if (!A)
6398 return nullptr;
6399 if (A->isAllOnesValue())
6400 return lookupConstant(Select->getTrueValue(), ConstantPool);
6401 if (A->isNullValue())
6402 return lookupConstant(Select->getFalseValue(), ConstantPool);
6403 return nullptr;
6404 }
6405
6407 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
6408 if (Constant *A = lookupConstant(I->getOperand(N), ConstantPool))
6409 COps.push_back(A);
6410 else
6411 return nullptr;
6412 }
6413
6414 return ConstantFoldInstOperands(I, COps, DL);
6415}
6416
6417/// Try to determine the resulting constant values in phi nodes
6418/// at the common destination basic block, *CommonDest, for one of the case
6419/// destinations CaseDest corresponding to value CaseVal (nullptr for the
6420/// default case), of a switch instruction SI.
6421static bool
6423 BasicBlock **CommonDest,
6424 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
6425 const DataLayout &DL, const TargetTransformInfo &TTI) {
6426 // The block from which we enter the common destination.
6427 BasicBlock *Pred = SI->getParent();
6428
6429 // If CaseDest is empty except for some side-effect free instructions through
6430 // which we can constant-propagate the CaseVal, continue to its successor.
6432 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
6433 for (Instruction &I : *CaseDest) {
6434 if (I.isTerminator()) {
6435 // If the terminator is a simple branch, continue to the next block.
6436 if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
6437 return false;
6438 Pred = CaseDest;
6439 CaseDest = I.getSuccessor(0);
6440 } else if (Constant *C = constantFold(&I, DL, ConstantPool)) {
6441 // Instruction is side-effect free and constant.
6442
6443 // If the instruction has uses outside this block or a phi node slot for
6444 // the block, it is not safe to bypass the instruction since it would then
6445 // no longer dominate all its uses.
6446 for (auto &Use : I.uses()) {
6447 User *User = Use.getUser();
6449 if (I->getParent() == CaseDest)
6450 continue;
6451 if (PHINode *Phi = dyn_cast<PHINode>(User))
6452 if (Phi->getIncomingBlock(Use) == CaseDest)
6453 continue;
6454 return false;
6455 }
6456
6457 ConstantPool.insert(std::make_pair(&I, C));
6458 } else {
6459 break;
6460 }
6461 }
6462
6463 // If we did not have a CommonDest before, use the current one.
6464 if (!*CommonDest)
6465 *CommonDest = CaseDest;
6466 // If the destination isn't the common one, abort.
6467 if (CaseDest != *CommonDest)
6468 return false;
6469
6470 // Get the values for this case from phi nodes in the destination block.
6471 for (PHINode &PHI : (*CommonDest)->phis()) {
6472 int Idx = PHI.getBasicBlockIndex(Pred);
6473 if (Idx == -1)
6474 continue;
6475
6476 Constant *ConstVal =
6477 lookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
6478 if (!ConstVal)
6479 return false;
6480
6481 // Be conservative about which kinds of constants we support.
6482 if (!validLookupTableConstant(ConstVal, TTI))
6483 return false;
6484
6485 Res.push_back(std::make_pair(&PHI, ConstVal));
6486 }
6487
6488 return Res.size() > 0;
6489}
6490
6491// Helper function used to add CaseVal to the list of cases that generate
6492// Result. Returns the updated number of cases that generate this result.
6493static size_t mapCaseToResult(ConstantInt *CaseVal,
6494 SwitchCaseResultVectorTy &UniqueResults,
6495 Constant *Result) {
6496 for (auto &I : UniqueResults) {
6497 if (I.first == Result) {
6498 I.second.push_back(CaseVal);
6499 return I.second.size();
6500 }
6501 }
6502 UniqueResults.push_back(
6503 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
6504 return 1;
6505}
6506
6507// Helper function that initializes a map containing
6508// results for the PHI node of the common destination block for a switch
6509// instruction. Returns false if multiple PHI nodes have been found or if
6510// there is not a common destination block for the switch.
6512 BasicBlock *&CommonDest,
6513 SwitchCaseResultVectorTy &UniqueResults,
6514 Constant *&DefaultResult,
6515 const DataLayout &DL,
6516 const TargetTransformInfo &TTI,
6517 uintptr_t MaxUniqueResults) {
6518 for (const auto &I : SI->cases()) {
6519 ConstantInt *CaseVal = I.getCaseValue();
6520
6521 // Resulting value at phi nodes for this case value.
6522 SwitchCaseResultsTy Results;
6523 if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
6524 DL, TTI))
6525 return false;
6526
6527 // Only one value per case is permitted.
6528 if (Results.size() > 1)
6529 return false;
6530
6531 // Add the case->result mapping to UniqueResults.
6532 const size_t NumCasesForResult =
6533 mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
6534
6535 // Early out if there are too many cases for this result.
6536 if (NumCasesForResult > MaxSwitchCasesPerResult)
6537 return false;
6538
6539 // Early out if there are too many unique results.
6540 if (UniqueResults.size() > MaxUniqueResults)
6541 return false;
6542
6543 // Check the PHI consistency.
6544 if (!PHI)
6545 PHI = Results[0].first;
6546 else if (PHI != Results[0].first)
6547 return false;
6548 }
6549 // Find the default result value.
6551 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
6552 DL, TTI);
6553 // If the default value is not found abort unless the default destination
6554 // is unreachable.
6555 DefaultResult =
6556 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
6557
6558 return DefaultResult || SI->defaultDestUnreachable();
6559}
6560
6561// Helper function that checks if it is possible to transform a switch with only
6562// two cases (or two cases + default) that produces a result into a select.
6563// TODO: Handle switches with more than 2 cases that map to the same result.
6564// The branch weights correspond to the provided Condition (i.e. if Condition is
6565// modified from the original SwitchInst, the caller must adjust the weights)
6566static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
6567 Constant *DefaultResult, Value *Condition,
6568 IRBuilder<> &Builder, const DataLayout &DL,
6569 ArrayRef<uint32_t> BranchWeights) {
6570 // If we are selecting between only two cases transform into a simple
6571 // select or a two-way select if default is possible.
6572 // Example:
6573 // switch (a) { %0 = icmp eq i32 %a, 10
6574 // case 10: return 42; %1 = select i1 %0, i32 42, i32 4
6575 // case 20: return 2; ----> %2 = icmp eq i32 %a, 20
6576 // default: return 4; %3 = select i1 %2, i32 2, i32 %1
6577 // }
6578
6579 const bool HasBranchWeights =
6580 !BranchWeights.empty() && !ProfcheckDisableMetadataFixes;
6581
6582 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
6583 ResultVector[1].second.size() == 1) {
6584 ConstantInt *FirstCase = ResultVector[0].second[0];
6585 ConstantInt *SecondCase = ResultVector[1].second[0];
6586 Value *SelectValue = ResultVector[1].first;
6587 if (DefaultResult) {
6588 Value *ValueCompare =
6589 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
6590 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
6591 DefaultResult, "switch.select");
6592 if (auto *SI = dyn_cast<SelectInst>(SelectValue);
6593 SI && HasBranchWeights) {
6594 // We start with 3 probabilities, where the numerator is the
6595 // corresponding BranchWeights[i], and the denominator is the sum over
6596 // BranchWeights. We want the probability and negative probability of
6597 // Condition == SecondCase.
6598 assert(BranchWeights.size() == 3);
6600 *SI, {BranchWeights[2], BranchWeights[0] + BranchWeights[1]},
6601 /*IsExpected=*/false, /*ElideAllZero=*/true);
6602 }
6603 }
6604 Value *ValueCompare =
6605 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
6606 Value *Ret = Builder.CreateSelect(ValueCompare, ResultVector[0].first,
6607 SelectValue, "switch.select");
6608 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6609 // We may have had a DefaultResult. Base the position of the first and
6610 // second's branch weights accordingly. Also the proability that Condition
6611 // != FirstCase needs to take that into account.
6612 assert(BranchWeights.size() >= 2);
6613 size_t FirstCasePos = (Condition != nullptr);
6614 size_t SecondCasePos = FirstCasePos + 1;
6615 uint32_t DefaultCase = (Condition != nullptr) ? BranchWeights[0] : 0;
6617 {BranchWeights[FirstCasePos],
6618 DefaultCase + BranchWeights[SecondCasePos]},
6619 /*IsExpected=*/false, /*ElideAllZero=*/true);
6620 }
6621 return Ret;
6622 }
6623
6624 // Handle the degenerate case where two cases have the same result value.
6625 if (ResultVector.size() == 1 && DefaultResult) {
6626 ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
6627 unsigned CaseCount = CaseValues.size();
6628 // n bits group cases map to the same result:
6629 // case 0,4 -> Cond & 0b1..1011 == 0 ? result : default
6630 // case 0,2,4,6 -> Cond & 0b1..1001 == 0 ? result : default
6631 // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
6632 if (isPowerOf2_32(CaseCount)) {
6633 ConstantInt *MinCaseVal = CaseValues[0];
6634 // If there are bits that are set exclusively by CaseValues, we
6635 // can transform the switch into a select if the conjunction of
6636 // all the values uniquely identify CaseValues.
6637 APInt AndMask = APInt::getAllOnes(MinCaseVal->getBitWidth());
6638
6639 // Find the minimum value and compute the and of all the case values.
6640 for (auto *Case : CaseValues) {
6641 if (Case->getValue().slt(MinCaseVal->getValue()))
6642 MinCaseVal = Case;
6643 AndMask &= Case->getValue();
6644 }
6645 KnownBits Known = computeKnownBits(Condition, DL);
6646
6647 if (!AndMask.isZero() && Known.getMaxValue().uge(AndMask)) {
6648 // Compute the number of bits that are free to vary.
6649 unsigned FreeBits = Known.countMaxActiveBits() - AndMask.popcount();
6650
6651 // Check if the number of values covered by the mask is equal
6652 // to the number of cases.
6653 if (FreeBits == Log2_32(CaseCount)) {
6654 Value *And = Builder.CreateAnd(Condition, AndMask);
6655 Value *Cmp = Builder.CreateICmpEQ(
6656 And, Constant::getIntegerValue(And->getType(), AndMask));
6657 Value *Ret =
6658 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6659 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6660 // We know there's a Default case. We base the resulting branch
6661 // weights off its probability.
6662 assert(BranchWeights.size() >= 2);
6664 *SI,
6665 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6666 /*IsExpected=*/false, /*ElideAllZero=*/true);
6667 }
6668 return Ret;
6669 }
6670 }
6671
6672 // Mark the bits case number touched.
6673 APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6674 for (auto *Case : CaseValues)
6675 BitMask |= (Case->getValue() - MinCaseVal->getValue());
6676
6677 // Check if cases with the same result can cover all number
6678 // in touched bits.
6679 if (BitMask.popcount() == Log2_32(CaseCount)) {
6680 if (!MinCaseVal->isNullValue())
6681 Condition = Builder.CreateSub(Condition, MinCaseVal);
6682 Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6683 Value *Cmp = Builder.CreateICmpEQ(
6684 And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6685 Value *Ret =
6686 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6687 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6688 assert(BranchWeights.size() >= 2);
6690 *SI,
6691 {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6692 /*IsExpected=*/false, /*ElideAllZero=*/true);
6693 }
6694 return Ret;
6695 }
6696 }
6697
6698 // Handle the degenerate case where two cases have the same value.
6699 if (CaseValues.size() == 2) {
6700 Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6701 "switch.selectcmp.case1");
6702 Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6703 "switch.selectcmp.case2");
6704 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6705 Value *Ret =
6706 Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6707 if (auto *SI = dyn_cast<SelectInst>(Ret); SI && HasBranchWeights) {
6708 assert(BranchWeights.size() >= 2);
6710 *SI, {accumulate(drop_begin(BranchWeights), 0U), BranchWeights[0]},
6711 /*IsExpected=*/false, /*ElideAllZero=*/true);
6712 }
6713 return Ret;
6714 }
6715 }
6716
6717 return nullptr;
6718}
6719
6720// Helper function to cleanup a switch instruction that has been converted into
6721// a select, fixing up PHI nodes and basic blocks.
6723 Value *SelectValue,
6724 IRBuilder<> &Builder,
6725 DomTreeUpdater *DTU) {
6726 std::vector<DominatorTree::UpdateType> Updates;
6727
6728 BasicBlock *SelectBB = SI->getParent();
6729 BasicBlock *DestBB = PHI->getParent();
6730
6731 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6732 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6733 Builder.CreateBr(DestBB);
6734
6735 // Remove the switch.
6736
6737 PHI->removeIncomingValueIf(
6738 [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6739 PHI->addIncoming(SelectValue, SelectBB);
6740
6741 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6742 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6743 BasicBlock *Succ = SI->getSuccessor(i);
6744
6745 if (Succ == DestBB)
6746 continue;
6747 Succ->removePredecessor(SelectBB);
6748 if (DTU && RemovedSuccessors.insert(Succ).second)
6749 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6750 }
6751 SI->eraseFromParent();
6752 if (DTU)
6753 DTU->applyUpdates(Updates);
6754}
6755
6756/// If a switch is only used to initialize one or more phi nodes in a common
6757/// successor block with only two different constant values, try to replace the
6758/// switch with a select. Returns true if the fold was made.
6760 DomTreeUpdater *DTU, const DataLayout &DL,
6761 const TargetTransformInfo &TTI) {
6762 Value *const Cond = SI->getCondition();
6763 PHINode *PHI = nullptr;
6764 BasicBlock *CommonDest = nullptr;
6765 Constant *DefaultResult;
6766 SwitchCaseResultVectorTy UniqueResults;
6767 // Collect all the cases that will deliver the same value from the switch.
6768 if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6769 DL, TTI, /*MaxUniqueResults*/ 2))
6770 return false;
6771
6772 assert(PHI != nullptr && "PHI for value select not found");
6773 Builder.SetInsertPoint(SI);
6774 SmallVector<uint32_t, 4> BranchWeights;
6776 [[maybe_unused]] auto HasWeights =
6778 assert(!HasWeights == (BranchWeights.empty()));
6779 }
6780 assert(BranchWeights.empty() ||
6781 (BranchWeights.size() >=
6782 UniqueResults.size() + (DefaultResult != nullptr)));
6783
6784 Value *SelectValue = foldSwitchToSelect(UniqueResults, DefaultResult, Cond,
6785 Builder, DL, BranchWeights);
6786 if (!SelectValue)
6787 return false;
6788
6789 removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6790 return true;
6791}
6792
6793namespace {
6794
6795/// This class finds alternatives for switches to ultimately
6796/// replace the switch.
6797class SwitchReplacement {
6798public:
6799 /// Create a helper for optimizations to use as a switch replacement.
6800 /// Find a better representation for the content of Values,
6801 /// using DefaultValue to fill any holes in the table.
6802 SwitchReplacement(
6803 Module &M, uint64_t TableSize, ConstantInt *Offset,
6804 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6805 Constant *DefaultValue, const DataLayout &DL,
6806 const TargetTransformInfo &TTI, const StringRef &FuncName);
6807
6808 /// Build instructions with Builder to retrieve values using Index
6809 /// and replace the switch.
6810 Value *replaceSwitch(Value *Index, IRBuilder<> &Builder, const DataLayout &DL,
6811 Function *Func);
6812
6813 /// Return true if a table with TableSize elements of
6814 /// type ElementType would fit in a target-legal register.
6815 static bool wouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6816 Type *ElementType);
6817
6818 /// Return the default value of the switch.
6819 Constant *getDefaultValue();
6820
6821 /// Return true if the replacement is a lookup table.
6822 bool isLookupTable();
6823
6824 /// Return true if the replacement is a bit map.
6825 bool isBitMap();
6826
6827private:
6828 // Depending on the switch, there are different alternatives.
6829 enum {
6830 // For switches where each case contains the same value, we just have to
6831 // store that single value and return it for each lookup.
6832 SingleValueKind,
6833
6834 // For switches where there is a linear relationship between table index
6835 // and values. We calculate the result with a simple multiplication
6836 // and addition instead of a table lookup.
6837 LinearMapKind,
6838
6839 // For small tables with integer elements, we can pack them into a bitmap
6840 // that fits into a target-legal register. Values are retrieved by
6841 // shift and mask operations.
6842 BitMapKind,
6843
6844 // The table is stored as an array of values. Values are retrieved by load
6845 // instructions from the table.
6846 LookupTableKind
6847 } Kind;
6848
6849 // The default value of the switch.
6850 Constant *DefaultValue;
6851
6852 // The type of the output values.
6853 Type *ValueType;
6854
6855 // For SingleValueKind, this is the single value.
6856 Constant *SingleValue = nullptr;
6857
6858 // For BitMapKind, this is the bitmap.
6859 ConstantInt *BitMap = nullptr;
6860 IntegerType *BitMapElementTy = nullptr;
6861
6862 // For LinearMapKind, these are the constants used to derive the value.
6863 ConstantInt *LinearOffset = nullptr;
6864 ConstantInt *LinearMultiplier = nullptr;
6865 bool LinearMapValWrapped = false;
6866
6867 // For LookupTableKind, this is the table.
6868 Constant *Initializer = nullptr;
6869};
6870
6871} // end anonymous namespace
6872
6873SwitchReplacement::SwitchReplacement(
6874 Module &M, uint64_t TableSize, ConstantInt *Offset,
6875 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6876 Constant *DefaultValue, const DataLayout &DL,
6877 const TargetTransformInfo &TTI, const StringRef &FuncName)
6878 : DefaultValue(DefaultValue) {
6879 assert(Values.size() && "Can't build lookup table without values!");
6880 assert(TableSize >= Values.size() && "Can't fit values in table!");
6881
6882 // If all values in the table are equal, this is that value.
6883 SingleValue = Values.begin()->second;
6884
6885 ValueType = Values.begin()->second->getType();
6886
6887 // Build up the table contents.
6888 SmallVector<Constant *, 64> TableContents(TableSize);
6889 for (const auto &[CaseVal, CaseRes] : Values) {
6890 assert(CaseRes->getType() == ValueType);
6891
6892 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6893 TableContents[Idx] = CaseRes;
6894
6895 if (SingleValue && !isa<PoisonValue>(CaseRes) && CaseRes != SingleValue)
6896 SingleValue = isa<PoisonValue>(SingleValue) ? CaseRes : nullptr;
6897 }
6898
6899 // Fill in any holes in the table with the default result.
6900 if (Values.size() < TableSize) {
6901 assert(DefaultValue &&
6902 "Need a default value to fill the lookup table holes.");
6903 assert(DefaultValue->getType() == ValueType);
6904 for (uint64_t I = 0; I < TableSize; ++I) {
6905 if (!TableContents[I])
6906 TableContents[I] = DefaultValue;
6907 }
6908
6909 // If the default value is poison, all the holes are poison.
6910 bool DefaultValueIsPoison = isa<PoisonValue>(DefaultValue);
6911
6912 if (DefaultValue != SingleValue && !DefaultValueIsPoison)
6913 SingleValue = nullptr;
6914 }
6915
6916 // If each element in the table contains the same value, we only need to store
6917 // that single value.
6918 if (SingleValue) {
6919 Kind = SingleValueKind;
6920 return;
6921 }
6922
6923 // Check if we can derive the value with a linear transformation from the
6924 // table index.
6926 bool LinearMappingPossible = true;
6927 APInt PrevVal;
6928 APInt DistToPrev;
6929 // When linear map is monotonic and signed overflow doesn't happen on
6930 // maximum index, we can attach nsw on Add and Mul.
6931 bool NonMonotonic = false;
6932 assert(TableSize >= 2 && "Should be a SingleValue table.");
6933 // Check if there is the same distance between two consecutive values.
6934 for (uint64_t I = 0; I < TableSize; ++I) {
6935 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6936
6937 if (!ConstVal && isa<PoisonValue>(TableContents[I])) {
6938 // This is an poison, so it's (probably) a lookup table hole.
6939 // To prevent any regressions from before we switched to using poison as
6940 // the default value, holes will fall back to using the first value.
6941 // This can be removed once we add proper handling for poisons in lookup
6942 // tables.
6943 ConstVal = dyn_cast<ConstantInt>(Values[0].second);
6944 }
6945
6946 if (!ConstVal) {
6947 // This is an undef. We could deal with it, but undefs in lookup tables
6948 // are very seldom. It's probably not worth the additional complexity.
6949 LinearMappingPossible = false;
6950 break;
6951 }
6952 const APInt &Val = ConstVal->getValue();
6953 if (I != 0) {
6954 APInt Dist = Val - PrevVal;
6955 if (I == 1) {
6956 DistToPrev = Dist;
6957 } else if (Dist != DistToPrev) {
6958 LinearMappingPossible = false;
6959 break;
6960 }
6961 NonMonotonic |=
6962 Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6963 }
6964 PrevVal = Val;
6965 }
6966 if (LinearMappingPossible) {
6967 LinearOffset = cast<ConstantInt>(TableContents[0]);
6968 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6969 APInt M = LinearMultiplier->getValue();
6970 bool MayWrap = true;
6971 if (isIntN(M.getBitWidth(), TableSize - 1))
6972 (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6973 LinearMapValWrapped = NonMonotonic || MayWrap;
6974 Kind = LinearMapKind;
6975 return;
6976 }
6977 }
6978
6979 // If the type is integer and the table fits in a register, build a bitmap.
6980 if (wouldFitInRegister(DL, TableSize, ValueType)) {
6982 APInt TableInt(TableSize * IT->getBitWidth(), 0);
6983 for (uint64_t I = TableSize; I > 0; --I) {
6984 TableInt <<= IT->getBitWidth();
6985 // Insert values into the bitmap. Undef values are set to zero.
6986 if (!isa<UndefValue>(TableContents[I - 1])) {
6987 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6988 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6989 }
6990 }
6991 BitMap = ConstantInt::get(M.getContext(), TableInt);
6992 BitMapElementTy = IT;
6993 Kind = BitMapKind;
6994 return;
6995 }
6996
6997 if (auto *IT = dyn_cast<IntegerType>(ValueType)) {
6998 ConstantRange Range(IT->getBitWidth(), false);
6999 for (Constant *Value : TableContents)
7000 if (!isa<UndefValue>(Value))
7001 Range = Range.unionWith(cast<ConstantInt>(Value)->getValue());
7002 // TODO: handle sign extension as well?
7003 unsigned NeededBitWidth =
7004 std::max(TTI.getMinimumLookupTableEntryBitWidth(),
7005 unsigned(PowerOf2Ceil(Range.getActiveBits())));
7006 if (NeededBitWidth < IT->getBitWidth()) {
7007 IntegerType *DstTy = IntegerType::get(IT->getContext(), NeededBitWidth);
7008 for (Constant *&Value : TableContents)
7009 Value = ConstantFoldCastInstruction(Instruction::Trunc, Value, DstTy);
7010 }
7011 }
7012
7013 // Store the table in an array.
7014 auto *TableTy = ArrayType::get(TableContents[0]->getType(), TableSize);
7015 Initializer = ConstantArray::get(TableTy, TableContents);
7016
7017 Kind = LookupTableKind;
7018}
7019
7020Value *SwitchReplacement::replaceSwitch(Value *Index, IRBuilder<> &Builder,
7021 const DataLayout &DL, Function *Func) {
7022 switch (Kind) {
7023 case SingleValueKind:
7024 return SingleValue;
7025 case LinearMapKind: {
7026 ++NumLinearMaps;
7027 // Derive the result value from the input value.
7028 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
7029 false, "switch.idx.cast");
7030 if (!LinearMultiplier->isOne())
7031 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
7032 /*HasNUW = */ false,
7033 /*HasNSW = */ !LinearMapValWrapped);
7034
7035 if (!LinearOffset->isZero())
7036 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
7037 /*HasNUW = */ false,
7038 /*HasNSW = */ !LinearMapValWrapped);
7039 return Result;
7040 }
7041 case BitMapKind: {
7042 ++NumBitMaps;
7043 // Type of the bitmap (e.g. i59).
7044 IntegerType *MapTy = BitMap->getIntegerType();
7045
7046 // Cast Index to the same type as the bitmap.
7047 // Note: The Index is <= the number of elements in the table, so
7048 // truncating it to the width of the bitmask is safe.
7049 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
7050
7051 // Multiply the shift amount by the element width. NUW/NSW can always be
7052 // set, because wouldFitInRegister guarantees Index * ShiftAmt is in
7053 // BitMap's bit width.
7054 ShiftAmt = Builder.CreateMul(
7055 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
7056 "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
7057
7058 // Shift down.
7059 Value *DownShifted =
7060 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
7061 // Mask off.
7062 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
7063 }
7064 case LookupTableKind: {
7065 ++NumLookupTables;
7066 auto *Table =
7067 new GlobalVariable(*Func->getParent(), Initializer->getType(),
7068 /*isConstant=*/true, GlobalVariable::PrivateLinkage,
7069 Initializer, "switch.table." + Func->getName());
7070 Table->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
7071 // Set the alignment to that of an array items. We will be only loading one
7072 // value out of it.
7073 Table->setAlignment(DL.getPrefTypeAlign(ValueType));
7074 Type *IndexTy = DL.getIndexType(Table->getType());
7075 auto *ArrayTy = cast<ArrayType>(Table->getValueType());
7076
7077 if (Index->getType() != IndexTy) {
7078 unsigned OldBitWidth = Index->getType()->getIntegerBitWidth();
7079 Index = Builder.CreateZExtOrTrunc(Index, IndexTy);
7080 if (auto *Zext = dyn_cast<ZExtInst>(Index))
7081 Zext->setNonNeg(
7082 isUIntN(OldBitWidth - 1, ArrayTy->getNumElements() - 1));
7083 }
7084
7085 Value *GEPIndices[] = {ConstantInt::get(IndexTy, 0), Index};
7086 Value *GEP =
7087 Builder.CreateInBoundsGEP(ArrayTy, Table, GEPIndices, "switch.gep");
7088 Value *Load =
7089 Builder.CreateLoad(ArrayTy->getElementType(), GEP, "switch.load");
7090 if (Load->getType() == ValueType)
7091 return Load;
7092 return Builder.CreateZExt(Load, ValueType, "switch.ext");
7093 }
7094 }
7095 llvm_unreachable("Unknown helper kind!");
7096}
7097
7098bool SwitchReplacement::wouldFitInRegister(const DataLayout &DL,
7099 uint64_t TableSize,
7100 Type *ElementType) {
7101 auto *IT = dyn_cast<IntegerType>(ElementType);
7102 if (!IT)
7103 return false;
7104 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
7105 // are <= 15, we could try to narrow the type.
7106
7107 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
7108 if (TableSize >= UINT_MAX / IT->getBitWidth())
7109 return false;
7110 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
7111}
7112
7114 const DataLayout &DL) {
7115 // Allow any legal type.
7116 if (TTI.isTypeLegal(Ty))
7117 return true;
7118
7119 auto *IT = dyn_cast<IntegerType>(Ty);
7120 if (!IT)
7121 return false;
7122
7123 // Also allow power of 2 integer types that have at least 8 bits and fit in
7124 // a register. These types are common in frontend languages and targets
7125 // usually support loads of these types.
7126 // TODO: We could relax this to any integer that fits in a register and rely
7127 // on ABI alignment and padding in the table to allow the load to be widened.
7128 // Or we could widen the constants and truncate the load.
7129 unsigned BitWidth = IT->getBitWidth();
7130 return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
7131 DL.fitsInLegalInteger(IT->getBitWidth());
7132}
7133
7134Constant *SwitchReplacement::getDefaultValue() { return DefaultValue; }
7135
7136bool SwitchReplacement::isLookupTable() { return Kind == LookupTableKind; }
7137
7138bool SwitchReplacement::isBitMap() { return Kind == BitMapKind; }
7139
7140static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange, bool OptSize) {
7141 // 40% is the default density for building a jump table in optsize/minsize
7142 // mode, 10% is the default density for jump tables. See also
7143 // TargetLoweringBase::isSuitableForJumpTable(), which this function was based
7144 // on.
7145 const uint64_t MinDensity = OptSize ? 40 : 10;
7146
7147 if (CaseRange >= UINT64_MAX / 100)
7148 return false; // Avoid multiplication overflows below.
7149
7150 return NumCases * 100 >= CaseRange * MinDensity;
7151}
7152
7153static bool isSwitchDense(ArrayRef<int64_t> Values, bool OptSize) {
7154 uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
7155 uint64_t Range = Diff + 1;
7156 if (Range < Diff)
7157 return false; // Overflow.
7158
7159 return isSwitchDense(Values.size(), Range, OptSize);
7160}
7161
7162/// Determine whether a lookup table should be built for this switch, based on
7163/// the number of cases, size of the table, and the types of the results.
7164// TODO: We could support larger than legal types by limiting based on the
7165// number of loads required and/or table size. If the constants are small we
7166// could use smaller table entries and extend after the load.
7168 const TargetTransformInfo &TTI,
7169 const DataLayout &DL,
7170 const SmallVector<Type *> &ResultTypes) {
7171 if (SI->getNumCases() > TableSize)
7172 return false; // TableSize overflowed.
7173
7174 bool AllTablesFitInRegister = true;
7175 bool HasIllegalType = false;
7176 for (const auto &Ty : ResultTypes) {
7177 // Saturate this flag to true.
7178 HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
7179
7180 // Saturate this flag to false.
7181 AllTablesFitInRegister =
7182 AllTablesFitInRegister &&
7183 SwitchReplacement::wouldFitInRegister(DL, TableSize, Ty);
7184
7185 // If both flags saturate, we're done. NOTE: This *only* works with
7186 // saturating flags, and all flags have to saturate first due to the
7187 // non-deterministic behavior of iterating over a dense map.
7188 if (HasIllegalType && !AllTablesFitInRegister)
7189 break;
7190 }
7191
7192 // If each table would fit in a register, we should build it anyway.
7193 if (AllTablesFitInRegister)
7194 return true;
7195
7196 // Don't build a table that doesn't fit in-register if it has illegal types.
7197 if (HasIllegalType)
7198 return false;
7199
7200 return isSwitchDense(SI->getNumCases(), TableSize,
7201 SI->getFunction()->hasOptSize());
7202}
7203
7205 ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
7206 bool HasDefaultResults, const SmallVector<Type *> &ResultTypes,
7207 const DataLayout &DL, const TargetTransformInfo &TTI) {
7208 if (MinCaseVal.isNullValue())
7209 return true;
7210 if (MinCaseVal.isNegative() ||
7211 MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
7212 !HasDefaultResults)
7213 return false;
7214 return all_of(ResultTypes, [&](const auto &ResultType) {
7215 return SwitchReplacement::wouldFitInRegister(
7216 DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */, ResultType);
7217 });
7218}
7219
7220/// Try to reuse the switch table index compare. Following pattern:
7221/// \code
7222/// if (idx < tablesize)
7223/// r = table[idx]; // table does not contain default_value
7224/// else
7225/// r = default_value;
7226/// if (r != default_value)
7227/// ...
7228/// \endcode
7229/// Is optimized to:
7230/// \code
7231/// cond = idx < tablesize;
7232/// if (cond)
7233/// r = table[idx];
7234/// else
7235/// r = default_value;
7236/// if (cond)
7237/// ...
7238/// \endcode
7239/// Jump threading will then eliminate the second if(cond).
7241 User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch,
7242 Constant *DefaultValue,
7243 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
7245 if (!CmpInst)
7246 return;
7247
7248 // We require that the compare is in the same block as the phi so that jump
7249 // threading can do its work afterwards.
7250 if (CmpInst->getParent() != PhiBlock)
7251 return;
7252
7254 if (!CmpOp1)
7255 return;
7256
7257 Value *RangeCmp = RangeCheckBranch->getCondition();
7258 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
7259 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
7260
7261 // Check if the compare with the default value is constant true or false.
7262 const DataLayout &DL = PhiBlock->getDataLayout();
7264 CmpInst->getPredicate(), DefaultValue, CmpOp1, DL);
7265 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
7266 return;
7267
7268 // Check if the compare with the case values is distinct from the default
7269 // compare result.
7270 for (auto ValuePair : Values) {
7272 CmpInst->getPredicate(), ValuePair.second, CmpOp1, DL);
7273 if (!CaseConst || CaseConst == DefaultConst ||
7274 (CaseConst != TrueConst && CaseConst != FalseConst))
7275 return;
7276 }
7277
7278 // Check if the branch instruction dominates the phi node. It's a simple
7279 // dominance check, but sufficient for our needs.
7280 // Although this check is invariant in the calling loops, it's better to do it
7281 // at this late stage. Practically we do it at most once for a switch.
7282 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
7283 for (BasicBlock *Pred : predecessors(PhiBlock)) {
7284 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
7285 return;
7286 }
7287
7288 if (DefaultConst == FalseConst) {
7289 // The compare yields the same result. We can replace it.
7290 CmpInst->replaceAllUsesWith(RangeCmp);
7291 ++NumTableCmpReuses;
7292 } else {
7293 // The compare yields the same result, just inverted. We can replace it.
7294 Value *InvertedTableCmp = BinaryOperator::CreateXor(
7295 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
7296 RangeCheckBranch->getIterator());
7297 CmpInst->replaceAllUsesWith(InvertedTableCmp);
7298 ++NumTableCmpReuses;
7299 }
7300}
7301
7302/// If the switch is only used to initialize one or more phi nodes in a common
7303/// successor block with different constant values, replace the switch with
7304/// lookup tables.
7306 DomTreeUpdater *DTU, const DataLayout &DL,
7307 const TargetTransformInfo &TTI,
7308 bool ConvertSwitchToLookupTable) {
7309 assert(SI->getNumCases() > 1 && "Degenerate switch?");
7310
7311 BasicBlock *BB = SI->getParent();
7312 Function *Fn = BB->getParent();
7313
7314 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
7315 // split off a dense part and build a lookup table for that.
7316
7317 // FIXME: This creates arrays of GEPs to constant strings, which means each
7318 // GEP needs a runtime relocation in PIC code. We should just build one big
7319 // string and lookup indices into that.
7320
7321 // Ignore switches with less than three cases. Lookup tables will not make
7322 // them faster, so we don't analyze them.
7323 if (SI->getNumCases() < 3)
7324 return false;
7325
7326 // Figure out the corresponding result for each case value and phi node in the
7327 // common destination, as well as the min and max case values.
7328 assert(!SI->cases().empty());
7329 SwitchInst::CaseIt CI = SI->case_begin();
7330 ConstantInt *MinCaseVal = CI->getCaseValue();
7331 ConstantInt *MaxCaseVal = CI->getCaseValue();
7332
7333 BasicBlock *CommonDest = nullptr;
7334
7335 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
7337
7339 SmallVector<Type *> ResultTypes;
7341
7342 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
7343 ConstantInt *CaseVal = CI->getCaseValue();
7344 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
7345 MinCaseVal = CaseVal;
7346 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
7347 MaxCaseVal = CaseVal;
7348
7349 // Resulting value at phi nodes for this case value.
7351 ResultsTy Results;
7352 if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
7353 Results, DL, TTI))
7354 return false;
7355
7356 // Append the result and result types from this case to the list for each
7357 // phi.
7358 for (const auto &I : Results) {
7359 PHINode *PHI = I.first;
7360 Constant *Value = I.second;
7361 auto [It, Inserted] = ResultLists.try_emplace(PHI);
7362 if (Inserted)
7363 PHIs.push_back(PHI);
7364 It->second.push_back(std::make_pair(CaseVal, Value));
7365 ResultTypes.push_back(PHI->getType());
7366 }
7367 }
7368
7369 // If the table has holes, we need a constant result for the default case
7370 // or a bitmask that fits in a register.
7371 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
7372 bool HasDefaultResults =
7373 getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
7374 DefaultResultsList, DL, TTI);
7375 for (const auto &I : DefaultResultsList) {
7376 PHINode *PHI = I.first;
7377 Constant *Result = I.second;
7378 DefaultResults[PHI] = Result;
7379 }
7380
7381 bool UseSwitchConditionAsTableIndex = shouldUseSwitchConditionAsTableIndex(
7382 *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
7383 uint64_t TableSize;
7384 ConstantInt *TableIndexOffset;
7385 if (UseSwitchConditionAsTableIndex) {
7386 TableSize = MaxCaseVal->getLimitedValue() + 1;
7387 TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
7388 } else {
7389 TableSize =
7390 (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
7391
7392 TableIndexOffset = MinCaseVal;
7393 }
7394
7395 // If the default destination is unreachable, or if the lookup table covers
7396 // all values of the conditional variable, branch directly to the lookup table
7397 // BB. Otherwise, check that the condition is within the case range.
7398 uint64_t NumResults = ResultLists[PHIs[0]].size();
7399 bool DefaultIsReachable = !SI->defaultDestUnreachable();
7400
7401 bool TableHasHoles = (NumResults < TableSize);
7402
7403 // If the table has holes but the default destination doesn't produce any
7404 // constant results, the lookup table entries corresponding to the holes will
7405 // contain poison.
7406 bool AllHolesArePoison = TableHasHoles && !HasDefaultResults;
7407
7408 // If the default destination doesn't produce a constant result but is still
7409 // reachable, and the lookup table has holes, we need to use a mask to
7410 // determine if the current index should load from the lookup table or jump
7411 // to the default case.
7412 // The mask is unnecessary if the table has holes but the default destination
7413 // is unreachable, as in that case the holes must also be unreachable.
7414 bool NeedMask = AllHolesArePoison && DefaultIsReachable;
7415 if (NeedMask) {
7416 // As an extra penalty for the validity test we require more cases.
7417 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
7418 return false;
7419 if (!DL.fitsInLegalInteger(TableSize))
7420 return false;
7421 }
7422
7423 if (!shouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
7424 return false;
7425
7426 // Compute the table index value.
7427 Value *TableIndex;
7428 if (UseSwitchConditionAsTableIndex) {
7429 TableIndex = SI->getCondition();
7430 if (HasDefaultResults) {
7431 // Grow the table to cover all possible index values to avoid the range
7432 // check. It will use the default result to fill in the table hole later,
7433 // so make sure it exist.
7434 ConstantRange CR = computeConstantRange(TableIndex, /*ForSigned=*/false,
7435 SimplifyQuery(DL));
7436 // Grow the table shouldn't have any size impact by checking
7437 // wouldFitInRegister.
7438 // TODO: Consider growing the table also when it doesn't fit in a register
7439 // if no optsize is specified.
7440 const uint64_t UpperBound = CR.getUpper().getLimitedValue();
7441 if (!CR.isUpperWrapped() &&
7442 all_of(ResultTypes, [&](const auto &ResultType) {
7443 return SwitchReplacement::wouldFitInRegister(DL, UpperBound,
7444 ResultType);
7445 })) {
7446 // There may be some case index larger than the UpperBound (unreachable
7447 // case), so make sure the table size does not get smaller.
7448 TableSize = std::max(UpperBound, TableSize);
7449 // The default branch is unreachable after we enlarge the lookup table.
7450 // Adjust DefaultIsReachable to reuse code path.
7451 DefaultIsReachable = false;
7452 }
7453 }
7454 }
7455
7456 // Keep track of the switch replacement for each phi
7458 for (PHINode *PHI : PHIs) {
7459 const auto &ResultList = ResultLists[PHI];
7460
7461 Type *ResultType = ResultList.begin()->second->getType();
7462 // Use any value to fill the lookup table holes.
7463 Constant *DefaultVal =
7464 AllHolesArePoison ? PoisonValue::get(ResultType) : DefaultResults[PHI];
7465 StringRef FuncName = Fn->getName();
7466 SwitchReplacement Replacement(*Fn->getParent(), TableSize, TableIndexOffset,
7467 ResultList, DefaultVal, DL, TTI, FuncName);
7468 PhiToReplacementMap.insert({PHI, Replacement});
7469 }
7470
7471 bool AnyLookupTables = any_of(
7472 PhiToReplacementMap, [](auto &KV) { return KV.second.isLookupTable(); });
7473 bool AnyBitMaps = any_of(PhiToReplacementMap,
7474 [](auto &KV) { return KV.second.isBitMap(); });
7475
7476 // A few conditions prevent the generation of lookup tables:
7477 // 1. The target does not support lookup tables.
7478 // 2. The "no-jump-tables" function attribute is set.
7479 // However, these objections do not apply to other switch replacements, like
7480 // the bitmap, so we only stop here if any of these conditions are met and we
7481 // want to create a LUT. Otherwise, continue with the switch replacement.
7482 if (AnyLookupTables &&
7483 (!TTI.shouldBuildLookupTables() ||
7484 Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
7485 return false;
7486
7487 // In the early optimization pipeline, disable formation of lookup tables,
7488 // bit maps and mask checks, as they may inhibit further optimization.
7489 if (!ConvertSwitchToLookupTable &&
7490 (AnyLookupTables || AnyBitMaps || NeedMask))
7491 return false;
7492
7493 Builder.SetInsertPoint(SI);
7494 // TableIndex is the switch condition - TableIndexOffset if we don't
7495 // use the condition directly
7496 if (!UseSwitchConditionAsTableIndex) {
7497 // If the default is unreachable, all case values are s>= MinCaseVal. Then
7498 // we can try to attach nsw.
7499 bool MayWrap = true;
7500 if (!DefaultIsReachable) {
7501 APInt Res =
7502 MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
7503 (void)Res;
7504 }
7505 TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
7506 "switch.tableidx", /*HasNUW =*/false,
7507 /*HasNSW =*/!MayWrap);
7508 }
7509
7510 std::vector<DominatorTree::UpdateType> Updates;
7511
7512 // Compute the maximum table size representable by the integer type we are
7513 // switching upon.
7514 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
7515 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
7516 assert(MaxTableSize >= TableSize &&
7517 "It is impossible for a switch to have more entries than the max "
7518 "representable value of its input integer type's size.");
7519
7520 // Create the BB that does the lookups.
7521 Module &Mod = *CommonDest->getParent()->getParent();
7522 BasicBlock *LookupBB = BasicBlock::Create(
7523 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
7524
7525 CondBrInst *RangeCheckBranch = nullptr;
7526 CondBrInst *CondBranch = nullptr;
7527
7528 Builder.SetInsertPoint(SI);
7529 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
7530 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7531 Builder.CreateBr(LookupBB);
7532 if (DTU)
7533 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7534 // Note: We call removeProdecessor later since we need to be able to get the
7535 // PHI value for the default case in case we're using a bit mask.
7536 } else {
7537 Value *Cmp = Builder.CreateICmpULT(
7538 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
7539 RangeCheckBranch =
7540 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
7541 CondBranch = RangeCheckBranch;
7542 if (DTU)
7543 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
7544 }
7545
7546 // Populate the BB that does the lookups.
7547 Builder.SetInsertPoint(LookupBB);
7548
7549 if (NeedMask) {
7550 // Before doing the lookup, we do the hole check. The LookupBB is therefore
7551 // re-purposed to do the hole check, and we create a new LookupBB.
7552 BasicBlock *MaskBB = LookupBB;
7553 MaskBB->setName("switch.hole_check");
7554 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
7555 CommonDest->getParent(), CommonDest);
7556
7557 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
7558 // unnecessary illegal types.
7559 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
7560 APInt MaskInt(TableSizePowOf2, 0);
7561 APInt One(TableSizePowOf2, 1);
7562 // Build bitmask; fill in a 1 bit for every case.
7563 const ResultListTy &ResultList = ResultLists[PHIs[0]];
7564 for (const auto &Result : ResultList) {
7565 uint64_t Idx = (Result.first->getValue() - TableIndexOffset->getValue())
7566 .getLimitedValue();
7567 MaskInt |= One << Idx;
7568 }
7569 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
7570
7571 // Get the TableIndex'th bit of the bitmask.
7572 // If this bit is 0 (meaning hole) jump to the default destination,
7573 // else continue with table lookup.
7574 IntegerType *MapTy = TableMask->getIntegerType();
7575 Value *MaskIndex =
7576 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
7577 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
7578 Value *LoBit = Builder.CreateTrunc(
7579 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
7580 CondBranch = Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
7581 if (DTU) {
7582 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
7583 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
7584 }
7585 Builder.SetInsertPoint(LookupBB);
7586 addPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
7587 }
7588
7589 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
7590 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
7591 // do not delete PHINodes here.
7592 SI->getDefaultDest()->removePredecessor(BB,
7593 /*KeepOneInputPHIs=*/true);
7594 if (DTU)
7595 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
7596 }
7597
7598 for (PHINode *PHI : PHIs) {
7599 const ResultListTy &ResultList = ResultLists[PHI];
7600 auto Replacement = PhiToReplacementMap.at(PHI);
7601 auto *Result = Replacement.replaceSwitch(TableIndex, Builder, DL, Fn);
7602 // Do a small peephole optimization: re-use the switch table compare if
7603 // possible.
7604 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
7605 BasicBlock *PhiBlock = PHI->getParent();
7606 // Search for compare instructions which use the phi.
7607 for (auto *User : PHI->users()) {
7608 reuseTableCompare(User, PhiBlock, RangeCheckBranch,
7609 Replacement.getDefaultValue(), ResultList);
7610 }
7611 }
7612
7613 PHI->addIncoming(Result, LookupBB);
7614 }
7615
7616 Builder.CreateBr(CommonDest);
7617 if (DTU)
7618 Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
7619
7620 SmallVector<uint32_t> BranchWeights;
7621 const bool HasBranchWeights = CondBranch && !ProfcheckDisableMetadataFixes &&
7622 extractBranchWeights(*SI, BranchWeights);
7623 uint64_t ToLookupWeight = 0;
7624 uint64_t ToDefaultWeight = 0;
7625
7626 // Remove the switch.
7627 SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
7628 for (unsigned I = 0, E = SI->getNumSuccessors(); I < E; ++I) {
7629 BasicBlock *Succ = SI->getSuccessor(I);
7630
7631 if (Succ == SI->getDefaultDest()) {
7632 if (HasBranchWeights)
7633 ToDefaultWeight += BranchWeights[I];
7634 continue;
7635 }
7636 Succ->removePredecessor(BB);
7637 if (DTU && RemovedSuccessors.insert(Succ).second)
7638 Updates.push_back({DominatorTree::Delete, BB, Succ});
7639 if (HasBranchWeights)
7640 ToLookupWeight += BranchWeights[I];
7641 }
7642 SI->eraseFromParent();
7643 if (HasBranchWeights)
7644 setFittedBranchWeights(*CondBranch, {ToLookupWeight, ToDefaultWeight},
7645 /*IsExpected=*/false);
7646 if (DTU)
7647 DTU->applyUpdates(Updates);
7648
7649 if (NeedMask)
7650 ++NumLookupTablesHoles;
7651 return true;
7652}
7653
7654/// Try to transform a switch that has "holes" in it to a contiguous sequence
7655/// of cases.
7656///
7657/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
7658/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
7659///
7660/// This converts a sparse switch into a dense switch which allows better
7661/// lowering and could also allow transforming into a lookup table.
7663 const DataLayout &DL,
7664 const TargetTransformInfo &TTI) {
7665 auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
7666 if (CondTy->getIntegerBitWidth() > 64 ||
7667 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7668 return false;
7669 // Only bother with this optimization if there are more than 3 switch cases;
7670 // SDAG will only bother creating jump tables for 4 or more cases.
7671 if (SI->getNumCases() < 4)
7672 return false;
7673
7674 // This transform is agnostic to the signedness of the input or case values. We
7675 // can treat the case values as signed or unsigned. We can optimize more common
7676 // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
7677 // as signed.
7679 for (const auto &C : SI->cases())
7680 Values.push_back(C.getCaseValue()->getValue().getSExtValue());
7681 llvm::sort(Values);
7682
7683 // If the switch is already dense, there's nothing useful to do here.
7684 if (isSwitchDense(Values, SI->getFunction()->hasOptSize()))
7685 return false;
7686
7687 // First, transform the values such that they start at zero and ascend.
7688 int64_t Base = Values[0];
7689 for (auto &V : Values)
7690 V -= (uint64_t)(Base);
7691
7692 // Now we have signed numbers that have been shifted so that, given enough
7693 // precision, there are no negative values. Since the rest of the transform
7694 // is bitwise only, we switch now to an unsigned representation.
7695
7696 // This transform can be done speculatively because it is so cheap - it
7697 // results in a single rotate operation being inserted.
7698
7699 // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
7700 // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
7701 // less than 64.
7702 unsigned Shift = 64;
7703 for (auto &V : Values)
7704 Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
7705 assert(Shift < 64);
7706 if (Shift > 0)
7707 for (auto &V : Values)
7708 V = (int64_t)((uint64_t)V >> Shift);
7709
7710 if (!isSwitchDense(Values, SI->getFunction()->hasOptSize()))
7711 // Transform didn't create a dense switch.
7712 return false;
7713
7714 // The obvious transform is to shift the switch condition right and emit a
7715 // check that the condition actually cleanly divided by GCD, i.e.
7716 // C & (1 << Shift - 1) == 0
7717 // inserting a new CFG edge to handle the case where it didn't divide cleanly.
7718 //
7719 // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
7720 // shift and puts the shifted-off bits in the uppermost bits. If any of these
7721 // are nonzero then the switch condition will be very large and will hit the
7722 // default case.
7723
7724 auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
7725 Builder.SetInsertPoint(SI);
7726 Value *Sub =
7727 Builder.CreateSub(SI->getCondition(), ConstantInt::getSigned(Ty, Base));
7728 Value *Rot = Builder.CreateIntrinsic(
7729 Ty, Intrinsic::fshl,
7730 {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
7731 SI->replaceUsesOfWith(SI->getCondition(), Rot);
7732
7733 for (auto Case : SI->cases()) {
7734 auto *Orig = Case.getCaseValue();
7735 auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base, true);
7736 Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
7737 }
7738 return true;
7739}
7740
7741/// Tries to transform the switch when the condition is umin with a constant.
7742/// In that case, the default branch can be replaced by the constant's branch.
7743/// This method also removes dead cases when the simplification cannot replace
7744/// the default branch.
7745///
7746/// For example:
7747/// switch(umin(a, 3)) {
7748/// case 0:
7749/// case 1:
7750/// case 2:
7751/// case 3:
7752/// case 4:
7753/// // ...
7754/// default:
7755/// unreachable
7756/// }
7757///
7758/// Transforms into:
7759///
7760/// switch(a) {
7761/// case 0:
7762/// case 1:
7763/// case 2:
7764/// default:
7765/// // This is case 3
7766/// }
7768 Value *A;
7770
7771 if (!match(SI->getCondition(), m_UMin(m_Value(A), m_ConstantInt(Constant))))
7772 return false;
7773
7776 BasicBlock *BB = SIW->getParent();
7777
7778 // Dead cases are removed even when the simplification fails.
7779 // A case is dead when its value is higher than the Constant.
7780 for (auto I = SI->case_begin(), E = SI->case_end(); I != E;) {
7781 if (!I->getCaseValue()->getValue().ugt(Constant->getValue())) {
7782 ++I;
7783 continue;
7784 }
7785 BasicBlock *DeadCaseBB = I->getCaseSuccessor();
7786 DeadCaseBB->removePredecessor(BB);
7787 Updates.push_back({DominatorTree::Delete, BB, DeadCaseBB});
7788 I = SIW.removeCase(I);
7789 E = SIW->case_end();
7790 }
7791
7792 auto Case = SI->findCaseValue(Constant);
7793 // If the case value is not found, `findCaseValue` returns the default case.
7794 // In this scenario, since there is no explicit `case 3:`, the simplification
7795 // fails. The simplification also fails when the switch’s default destination
7796 // is reachable.
7797 if (!SI->defaultDestUnreachable() || Case == SI->case_default()) {
7798 if (DTU)
7799 DTU->applyUpdates(Updates);
7800 return !Updates.empty();
7801 }
7802
7803 BasicBlock *Unreachable = SI->getDefaultDest();
7804 SIW.replaceDefaultDest(Case);
7805 SIW.removeCase(Case);
7806 SIW->setCondition(A);
7807
7808 Updates.push_back({DominatorTree::Delete, BB, Unreachable});
7809
7810 if (DTU)
7811 DTU->applyUpdates(Updates);
7812
7813 return true;
7814}
7815
7816/// Tries to transform switch of powers of two to reduce switch range.
7817/// For example, switch like:
7818/// switch (C) { case 1: case 2: case 64: case 128: }
7819/// will be transformed to:
7820/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
7821///
7822/// This transformation allows better lowering and may transform the switch
7823/// instruction into a sequence of bit manipulation and a smaller
7824/// log2(C)-indexed value table (instead of traditionally emitting a load of the
7825/// address of the jump target, and indirectly jump to it).
7827 DomTreeUpdater *DTU,
7828 const DataLayout &DL,
7829 const TargetTransformInfo &TTI) {
7830 Value *Condition = SI->getCondition();
7831 LLVMContext &Context = SI->getContext();
7832 auto *CondTy = cast<IntegerType>(Condition->getType());
7833
7834 if (CondTy->getIntegerBitWidth() > 64 ||
7835 !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
7836 return false;
7837
7838 // Ensure trailing zeroes count intrinsic emission is not too expensive.
7839 IntrinsicCostAttributes Attrs(Intrinsic::cttz, CondTy,
7840 {Condition, ConstantInt::getTrue(Context)});
7841 if (TTI.getIntrinsicInstrCost(Attrs, TTI::TCK_SizeAndLatency) >
7842 TTI::TCC_Basic * 2)
7843 return false;
7844
7845 // Only bother with this optimization if there are more than 3 switch cases.
7846 // SDAG will start emitting jump tables for 4 or more cases.
7847 if (SI->getNumCases() < 4)
7848 return false;
7849
7850 // Check that switch cases are powers of two.
7852 for (const auto &Case : SI->cases()) {
7853 uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
7854 if (llvm::has_single_bit(CaseValue))
7855 Values.push_back(CaseValue);
7856 else
7857 return false;
7858 }
7859
7860 // isSwichDense requires case values to be sorted.
7861 llvm::sort(Values);
7862 if (!isSwitchDense(Values.size(),
7863 llvm::countr_zero(Values.back()) -
7864 llvm::countr_zero(Values.front()) + 1,
7865 SI->getFunction()->hasOptSize()))
7866 // Transform is unable to generate dense switch.
7867 return false;
7868
7869 Builder.SetInsertPoint(SI);
7870
7871 if (!SI->defaultDestUnreachable()) {
7872 // Let non-power-of-two inputs jump to the default case, when the latter is
7873 // reachable.
7874 auto *PopC = Builder.CreateUnaryIntrinsic(Intrinsic::ctpop, Condition);
7875 auto *IsPow2 = Builder.CreateICmpEQ(PopC, ConstantInt::get(CondTy, 1));
7876
7877 auto *OrigBB = SI->getParent();
7878 auto *DefaultCaseBB = SI->getDefaultDest();
7879 BasicBlock *SplitBB = SplitBlock(OrigBB, SI, DTU);
7880 auto It = OrigBB->getTerminator()->getIterator();
7881 SmallVector<uint32_t> Weights;
7882 auto HasWeights =
7884 auto *BI = CondBrInst::Create(IsPow2, SplitBB, DefaultCaseBB, It);
7885 if (HasWeights && any_of(Weights, not_equal_to(0))) {
7886 // IsPow2 covers a subset of the cases in which we'd go to the default
7887 // label. The other is those powers of 2 that don't appear in the case
7888 // statement. We don't know the distribution of the values coming in, so
7889 // the safest is to split 50-50 the original probability to `default`.
7890 uint64_t OrigDenominator =
7892 SmallVector<uint64_t> NewWeights(2);
7893 NewWeights[1] = Weights[0] / 2;
7894 NewWeights[0] = OrigDenominator - NewWeights[1];
7895 setFittedBranchWeights(*BI, NewWeights, /*IsExpected=*/false);
7896 // The probability of executing the default block stays constant. It was
7897 // p_d = Weights[0] / OrigDenominator
7898 // we rewrite as W/D
7899 // We want to find the probability of the default branch of the switch
7900 // statement. Let's call it X. We have W/D = W/2D + X * (1-W/2D)
7901 // i.e. the original probability is the probability we go to the default
7902 // branch from the BI branch, or we take the default branch on the SI.
7903 // Meaning X = W / (2D - W), or (W/2) / (D - W/2)
7904 // This matches using W/2 for the default branch probability numerator and
7905 // D-W/2 as the denominator.
7906 Weights[0] = NewWeights[1];
7907 uint64_t CasesDenominator = OrigDenominator - Weights[0];
7908 for (auto &W : drop_begin(Weights))
7909 W = NewWeights[0] * static_cast<double>(W) / CasesDenominator;
7910
7911 setBranchWeights(*SI, Weights, /*IsExpected=*/false);
7912 }
7913 // BI is handling the default case for SI, and so should share its DebugLoc.
7914 BI->setDebugLoc(SI->getDebugLoc());
7915 It->eraseFromParent();
7916
7917 addPredecessorToBlock(DefaultCaseBB, OrigBB, SplitBB);
7918 if (DTU)
7919 DTU->applyUpdates({{DominatorTree::Insert, OrigBB, DefaultCaseBB}});
7920 }
7921
7922 // Replace each case with its trailing zeros number.
7923 for (auto &Case : SI->cases()) {
7924 auto *OrigValue = Case.getCaseValue();
7925 Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
7926 OrigValue->getValue().countr_zero()));
7927 }
7928
7929 // Replace condition with its trailing zeros number.
7930 auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
7931 Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
7932
7933 SI->setCondition(ConditionTrailingZeros);
7934
7935 return true;
7936}
7937
7938/// Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have
7939/// the same destination.
7941 DomTreeUpdater *DTU) {
7942 auto *Cmp = dyn_cast<CmpIntrinsic>(SI->getCondition());
7943 if (!Cmp || !Cmp->hasOneUse())
7944 return false;
7945
7947 bool HasWeights = extractBranchWeights(getBranchWeightMDNode(*SI), Weights);
7948 if (!HasWeights)
7949 Weights.resize(4); // Avoid checking HasWeights everywhere.
7950
7951 // Normalize to [us]cmp == Res ? Succ : OtherSucc.
7952 int64_t Res;
7953 BasicBlock *Succ, *OtherSucc;
7954 uint32_t SuccWeight = 0, OtherSuccWeight = 0;
7955 BasicBlock *Unreachable = nullptr;
7956
7957 if (SI->getNumCases() == 2) {
7958 // Find which of 1, 0 or -1 is missing (handled by default dest).
7959 SmallSet<int64_t, 3> Missing;
7960 Missing.insert(1);
7961 Missing.insert(0);
7962 Missing.insert(-1);
7963
7964 Succ = SI->getDefaultDest();
7965 SuccWeight = Weights[0];
7966 OtherSucc = nullptr;
7967 for (auto &Case : SI->cases()) {
7968 std::optional<int64_t> Val =
7969 Case.getCaseValue()->getValue().trySExtValue();
7970 if (!Val)
7971 return false;
7972 if (!Missing.erase(*Val))
7973 return false;
7974 if (OtherSucc && OtherSucc != Case.getCaseSuccessor())
7975 return false;
7976 OtherSucc = Case.getCaseSuccessor();
7977 OtherSuccWeight += Weights[Case.getSuccessorIndex()];
7978 }
7979
7980 assert(Missing.size() == 1 && "Should have one case left");
7981 Res = *Missing.begin();
7982 } else if (SI->getNumCases() == 3 && SI->defaultDestUnreachable()) {
7983 // Normalize so that Succ is taken once and OtherSucc twice.
7984 Unreachable = SI->getDefaultDest();
7985 Succ = OtherSucc = nullptr;
7986 for (auto &Case : SI->cases()) {
7987 BasicBlock *NewSucc = Case.getCaseSuccessor();
7988 uint32_t Weight = Weights[Case.getSuccessorIndex()];
7989 if (!OtherSucc || OtherSucc == NewSucc) {
7990 OtherSucc = NewSucc;
7991 OtherSuccWeight += Weight;
7992 } else if (!Succ) {
7993 Succ = NewSucc;
7994 SuccWeight = Weight;
7995 } else if (Succ == NewSucc) {
7996 std::swap(Succ, OtherSucc);
7997 std::swap(SuccWeight, OtherSuccWeight);
7998 } else
7999 return false;
8000 }
8001 for (auto &Case : SI->cases()) {
8002 std::optional<int64_t> Val =
8003 Case.getCaseValue()->getValue().trySExtValue();
8004 if (!Val || (Val != 1 && Val != 0 && Val != -1))
8005 return false;
8006 if (Case.getCaseSuccessor() == Succ) {
8007 Res = *Val;
8008 break;
8009 }
8010 }
8011 } else {
8012 return false;
8013 }
8014
8015 // Determine predicate for the missing case.
8017 switch (Res) {
8018 case 1:
8019 Pred = ICmpInst::ICMP_UGT;
8020 break;
8021 case 0:
8022 Pred = ICmpInst::ICMP_EQ;
8023 break;
8024 case -1:
8025 Pred = ICmpInst::ICMP_ULT;
8026 break;
8027 }
8028 if (Cmp->isSigned())
8029 Pred = ICmpInst::getSignedPredicate(Pred);
8030
8031 MDNode *NewWeights = nullptr;
8032 if (HasWeights)
8033 NewWeights = MDBuilder(SI->getContext())
8034 .createBranchWeights(SuccWeight, OtherSuccWeight);
8035
8036 BasicBlock *BB = SI->getParent();
8037 Builder.SetInsertPoint(SI->getIterator());
8038 Value *ICmp = Builder.CreateICmp(Pred, Cmp->getLHS(), Cmp->getRHS());
8039 Builder.CreateCondBr(ICmp, Succ, OtherSucc, NewWeights,
8040 SI->getMetadata(LLVMContext::MD_unpredictable));
8041 OtherSucc->removePredecessor(BB);
8042 if (Unreachable)
8043 Unreachable->removePredecessor(BB);
8044 SI->eraseFromParent();
8045 Cmp->eraseFromParent();
8046 if (DTU && Unreachable)
8047 DTU->applyUpdates({{DominatorTree::Delete, BB, Unreachable}});
8048 return true;
8049}
8050
8051/// Checking whether two BBs are equal depends on the contents of the
8052/// BasicBlock and the incoming values of their successor PHINodes.
8053/// PHINode::getIncomingValueForBlock is O(|Preds|), so we'd like to avoid
8054/// calling this function on each BasicBlock every time isEqual is called,
8055/// especially since the same BasicBlock may be passed as an argument multiple
8056/// times. To do this, we can precompute a map of PHINode -> Pred BasicBlock ->
8057/// IncomingValue and add it in the Wrapper so isEqual can do O(1) checking
8058/// of the incoming values.
8061
8062 // One Phi usually has < 8 incoming values.
8066
8067 // We only merge the identical non-entry BBs with
8068 // - terminator unconditional br to Succ (pending relaxation),
8069 // - does not have address taken / weird control.
8070 static bool canBeMerged(const BasicBlock *BB) {
8071 assert(BB && "Expected non-null BB");
8072 // Entry block cannot be eliminated or have predecessors.
8073 if (BB->isEntryBlock())
8074 return false;
8075
8076 // Single successor and must be Succ.
8077 // FIXME: Relax that the terminator is a BranchInst by checking for equality
8078 // on other kinds of terminators. We decide to only support unconditional
8079 // branches for now for compile time reasons.
8080 auto *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
8081 if (!BI)
8082 return false;
8083
8084 // Avoid blocks that are "address-taken" (blockaddress) or have unusual
8085 // uses.
8086 if (BB->hasAddressTaken() || BB->isEHPad())
8087 return false;
8088
8089 // TODO: relax this condition to merge equal blocks with >1 instructions?
8090 // Here, we use a O(1) form of the O(n) comparison of `size() != 1`.
8091 if (&BB->front() != &BB->back())
8092 return false;
8093
8094 // The BB must have at least one predecessor.
8095 if (pred_empty(BB))
8096 return false;
8097
8098 return true;
8099 }
8100};
8101
8103 static unsigned getHashValue(const EqualBBWrapper *EBW) {
8104 BasicBlock *BB = EBW->BB;
8106 assert(BB->size() == 1 && "Expected just a single branch in the BB");
8107
8108 // Since we assume the BB is just a single UncondBrInst with a single
8109 // successor, we hash as the BB and the incoming Values of its successor
8110 // PHIs. Initially, we tried to just use the successor BB as the hash, but
8111 // including the incoming PHI values leads to better performance.
8112 // We also tried to build a map from BB -> Succs.IncomingValues ahead of
8113 // time and passing it in EqualBBWrapper, but this slowed down the average
8114 // compile time without having any impact on the worst case compile time.
8115 BasicBlock *Succ = BI->getSuccessor();
8116 auto PhiValsForBB = map_range(Succ->phis(), [&](PHINode &Phi) {
8117 return (*EBW->PhiPredIVs)[&Phi][BB];
8118 });
8119 return hash_combine(Succ, hash_combine_range(PhiValsForBB));
8120 }
8121 static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS) {
8122 BasicBlock *A = LHS->BB;
8123 BasicBlock *B = RHS->BB;
8124
8125 // FIXME: we checked that the size of A and B are both 1 in
8126 // mergeIdenticalUncondBBs to make the Case list smaller to
8127 // improve performance. If we decide to support BasicBlocks with more
8128 // than just a single instruction, we need to check that A.size() ==
8129 // B.size() here, and we need to check more than just the BranchInsts
8130 // for equality.
8131
8132 UncondBrInst *ABI = cast<UncondBrInst>(A->getTerminator());
8133 UncondBrInst *BBI = cast<UncondBrInst>(B->getTerminator());
8134 if (ABI->getSuccessor() != BBI->getSuccessor())
8135 return false;
8136
8137 // Need to check that PHIs in successor have matching values.
8138 BasicBlock *Succ = ABI->getSuccessor();
8139 auto IfPhiIVMatch = [&](PHINode &Phi) {
8140 // Replace O(|Pred|) Phi.getIncomingValueForBlock with this O(1) hashmap
8141 // query.
8142 auto &PredIVs = (*LHS->PhiPredIVs)[&Phi];
8143 return PredIVs[A] == PredIVs[B];
8144 };
8145 return all_of(Succ->phis(), IfPhiIVMatch);
8146 }
8147};
8148
8149// Merge identical BBs into one of them.
8151 DomTreeUpdater *DTU) {
8152 if (Candidates.size() < 2)
8153 return false;
8154
8155 // Build Cases. Skip BBs that are not candidates for simplification. Mark
8156 // PHINodes which need to be processed into PhiPredIVs. We decide to process
8157 // an entire PHI at once after the loop, opposed to calling
8158 // getIncomingValueForBlock inside this loop, since each call to
8159 // getIncomingValueForBlock is O(|Preds|).
8160 EqualBBWrapper::Phi2IVsMap PhiPredIVs;
8162 BBs2Merge.reserve(Candidates.size());
8164
8165 for (BasicBlock *BB : Candidates) {
8166 BasicBlock *Succ = BB->getSingleSuccessor();
8167 assert(Succ && "Expected unconditional BB");
8168 BBs2Merge.emplace_back(EqualBBWrapper{BB, &PhiPredIVs});
8169 Phis.insert_range(make_pointer_range(Succ->phis()));
8170 }
8171
8172 // Precompute a data structure to improve performance of isEqual for
8173 // EqualBBWrapper.
8174 PhiPredIVs.reserve(Phis.size());
8175 for (PHINode *Phi : Phis) {
8176 auto &IVs =
8177 PhiPredIVs.try_emplace(Phi, Phi->getNumIncomingValues()).first->second;
8178 // Pre-fill all incoming for O(1) lookup as Phi.getIncomingValueForBlock is
8179 // O(|Pred|).
8180 for (auto &IV : Phi->incoming_values())
8181 IVs.insert({Phi->getIncomingBlock(IV), IV.get()});
8182 }
8183
8184 // Group duplicates using DenseSet with custom equality/hashing.
8185 // Build a set such that if the EqualBBWrapper exists in the set and another
8186 // EqualBBWrapper isEqual, then the equivalent EqualBBWrapper which is not in
8187 // the set should be replaced with the one in the set. If the EqualBBWrapper
8188 // is not in the set, then it should be added to the set so other
8189 // EqualBBWrapper can check against it in the same manner. We use
8190 // EqualBBWrapper instead of just BasicBlock because we'd like to pass around
8191 // information to isEquality, getHashValue, and when doing the replacement
8192 // with better performance.
8194 Keep.reserve(BBs2Merge.size());
8195
8197 Updates.reserve(BBs2Merge.size() * 2);
8198
8199 bool MadeChange = false;
8200
8201 // Helper: redirect all edges X -> DeadPred to X -> LivePred.
8202 auto RedirectIncomingEdges = [&](BasicBlock *Dead, BasicBlock *Live) {
8205 if (DTU) {
8206 // All predecessors of DeadPred (except the common predecessor) will be
8207 // moved to LivePred.
8208 Updates.reserve(Updates.size() + DeadPreds.size() * 2);
8210 predecessors(Live));
8211 for (BasicBlock *PredOfDead : DeadPreds) {
8212 // Do not modify those common predecessors of DeadPred and LivePred.
8213 if (!LivePreds.contains(PredOfDead))
8214 Updates.push_back({DominatorTree::Insert, PredOfDead, Live});
8215 Updates.push_back({DominatorTree::Delete, PredOfDead, Dead});
8216 }
8217 }
8218 LLVM_DEBUG(dbgs() << "Replacing duplicate pred BB ";
8219 Dead->printAsOperand(dbgs()); dbgs() << " with pred ";
8220 Live->printAsOperand(dbgs()); dbgs() << " for ";
8221 Live->getSingleSuccessor()->printAsOperand(dbgs());
8222 dbgs() << "\n");
8223 // Replace successors in all predecessors of DeadPred.
8224 for (BasicBlock *PredOfDead : DeadPreds) {
8225 Instruction *T = PredOfDead->getTerminator();
8226 T->replaceSuccessorWith(Dead, Live);
8227 }
8228 };
8229
8230 // Try to eliminate duplicate predecessors.
8231 for (const auto &EBW : BBs2Merge) {
8232 // EBW is a candidate for simplification. If we find a duplicate BB,
8233 // replace it.
8234 const auto &[It, Inserted] = Keep.insert(&EBW);
8235 if (Inserted)
8236 continue;
8237
8238 // Found duplicate: merge P into canonical predecessor It->Pred.
8239 BasicBlock *KeepBB = (*It)->BB;
8240 BasicBlock *DeadBB = EBW.BB;
8241
8242 // Avoid merging a BB with itself.
8243 if (KeepBB == DeadBB)
8244 continue;
8245
8246 // Redirect all edges into DeadPred to KeepPred.
8247 RedirectIncomingEdges(DeadBB, KeepBB);
8248
8249 // Now DeadBB should become unreachable; leave DCE to later,
8250 // but we can try to simplify it if it only branches to Succ.
8251 // (We won't erase here to keep the routine simple and DT-safe.)
8252 assert(pred_empty(DeadBB) && "DeadBB should be unreachable.");
8253 MadeChange = true;
8254 }
8255
8256 if (DTU && !Updates.empty())
8257 DTU->applyUpdates(Updates);
8258
8259 return MadeChange;
8260}
8261
8262bool SimplifyCFGOpt::simplifyDuplicateSwitchArms(SwitchInst *SI,
8263 DomTreeUpdater *DTU) {
8264 // Collect candidate switch-arms top-down.
8265 SmallSetVector<BasicBlock *, 16> FilteredArms(
8268 return mergeIdenticalBBs(FilteredArms.getArrayRef(), DTU);
8269}
8270
8271bool SimplifyCFGOpt::simplifyDuplicatePredecessors(BasicBlock *BB,
8272 DomTreeUpdater *DTU) {
8273 // Need at least 2 predecessors to do anything.
8274 if (!BB || !BB->hasNPredecessorsOrMore(2))
8275 return false;
8276
8277 // Compilation time consideration: retain the canonical loop, otherwise, we
8278 // require more time in the later loop canonicalization.
8279 if (Options.NeedCanonicalLoop && is_contained(LoopHeaders, BB))
8280 return false;
8281
8282 // Collect candidate predecessors bottom-up.
8283 SmallSetVector<BasicBlock *, 8> FilteredPreds(
8286 return mergeIdenticalBBs(FilteredPreds.getArrayRef(), DTU);
8287}
8288
8289bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
8290 BasicBlock *BB = SI->getParent();
8291
8292 if (isValueEqualityComparison(SI)) {
8293 // If we only have one predecessor, and if it is a branch on this value,
8294 // see if that predecessor totally determines the outcome of this switch.
8295 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8296 if (simplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
8297 return requestResimplify();
8298
8299 Value *Cond = SI->getCondition();
8300 if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
8301 if (simplifySwitchOnSelect(SI, Select))
8302 return requestResimplify();
8303
8304 // If the block only contains the switch, see if we can fold the block
8305 // away into any preds.
8306 if (SI == &*BB->begin())
8307 if (foldValueComparisonIntoPredecessors(SI, Builder))
8308 return requestResimplify();
8309 }
8310
8311 // Try to transform the switch into an icmp and a branch.
8312 // The conversion from switch to comparison may lose information on
8313 // impossible switch values, so disable it early in the pipeline.
8314 if (Options.ConvertSwitchRangeToICmp && turnSwitchRangeIntoICmp(SI, Builder))
8315 return requestResimplify();
8316
8317 // Remove unreachable cases.
8318 if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
8319 return requestResimplify();
8320
8321 if (simplifySwitchOfCmpIntrinsic(SI, Builder, DTU))
8322 return requestResimplify();
8323
8324 if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
8325 return requestResimplify();
8326
8327 if (Options.ForwardSwitchCondToPhi && forwardSwitchConditionToPHI(SI))
8328 return requestResimplify();
8329
8330 // The conversion of switches to arithmetic or lookup table is disabled in
8331 // the early optimization pipeline, as it may lose information or make the
8332 // resulting code harder to analyze.
8333 if (Options.ConvertSwitchToArithmetic || Options.ConvertSwitchToLookupTable)
8334 if (simplifySwitchLookup(SI, Builder, DTU, DL, TTI,
8335 Options.ConvertSwitchToLookupTable))
8336 return requestResimplify();
8337
8338 if (simplifySwitchOfPowersOfTwo(SI, Builder, DTU, DL, TTI))
8339 return requestResimplify();
8340
8341 if (reduceSwitchRange(SI, Builder, DL, TTI))
8342 return requestResimplify();
8343
8344 if (HoistCommon &&
8345 hoistCommonCodeFromSuccessors(SI, !Options.HoistCommonInsts))
8346 return requestResimplify();
8347
8348 // We can merge identical switch arms early to enhance more aggressive
8349 // optimization on switch.
8350 if (simplifyDuplicateSwitchArms(SI, DTU))
8351 return requestResimplify();
8352
8353 if (simplifySwitchWhenUMin(SI, DTU))
8354 return requestResimplify();
8355
8356 return false;
8357}
8358
8359bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
8360 BasicBlock *BB = IBI->getParent();
8361 bool Changed = false;
8362 SmallVector<uint32_t> BranchWeights;
8363 const bool HasBranchWeights = !ProfcheckDisableMetadataFixes &&
8364 extractBranchWeights(*IBI, BranchWeights);
8365
8366 DenseMap<const BasicBlock *, uint64_t> TargetWeight;
8367 if (HasBranchWeights)
8368 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8369 TargetWeight[IBI->getDestination(I)] += BranchWeights[I];
8370
8371 // Eliminate redundant destinations.
8372 SmallPtrSet<Value *, 8> Succs;
8373 SmallSetVector<BasicBlock *, 8> RemovedSuccs;
8374 for (unsigned I = 0, E = IBI->getNumDestinations(); I != E; ++I) {
8375 BasicBlock *Dest = IBI->getDestination(I);
8376 if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
8377 if (!Dest->hasAddressTaken())
8378 RemovedSuccs.insert(Dest);
8379 Dest->removePredecessor(BB);
8380 IBI->removeDestination(I);
8381 --I;
8382 --E;
8383 Changed = true;
8384 }
8385 }
8386
8387 if (DTU) {
8388 std::vector<DominatorTree::UpdateType> Updates;
8389 Updates.reserve(RemovedSuccs.size());
8390 for (auto *RemovedSucc : RemovedSuccs)
8391 Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
8392 DTU->applyUpdates(Updates);
8393 }
8394
8395 if (IBI->getNumDestinations() == 0) {
8396 // If the indirectbr has no successors, change it to unreachable.
8397 new UnreachableInst(IBI->getContext(), IBI->getIterator());
8399 return true;
8400 }
8401
8402 if (IBI->getNumDestinations() == 1) {
8403 // If the indirectbr has one successor, change it to a direct branch.
8406 return true;
8407 }
8408 if (HasBranchWeights) {
8409 SmallVector<uint64_t> NewBranchWeights(IBI->getNumDestinations());
8410 for (size_t I = 0, E = IBI->getNumDestinations(); I < E; ++I)
8411 NewBranchWeights[I] += TargetWeight.find(IBI->getDestination(I))->second;
8412 setFittedBranchWeights(*IBI, NewBranchWeights, /*IsExpected=*/false);
8413 }
8414 if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
8415 if (simplifyIndirectBrOnSelect(IBI, SI))
8416 return requestResimplify();
8417 }
8418 return Changed;
8419}
8420
8421/// Given an block with only a single landing pad and a unconditional branch
8422/// try to find another basic block which this one can be merged with. This
8423/// handles cases where we have multiple invokes with unique landing pads, but
8424/// a shared handler.
8425///
8426/// We specifically choose to not worry about merging non-empty blocks
8427/// here. That is a PRE/scheduling problem and is best solved elsewhere. In
8428/// practice, the optimizer produces empty landing pad blocks quite frequently
8429/// when dealing with exception dense code. (see: instcombine, gvn, if-else
8430/// sinking in this file)
8431///
8432/// This is primarily a code size optimization. We need to avoid performing
8433/// any transform which might inhibit optimization (such as our ability to
8434/// specialize a particular handler via tail commoning). We do this by not
8435/// merging any blocks which require us to introduce a phi. Since the same
8436/// values are flowing through both blocks, we don't lose any ability to
8437/// specialize. If anything, we make such specialization more likely.
8438///
8439/// TODO - This transformation could remove entries from a phi in the target
8440/// block when the inputs in the phi are the same for the two blocks being
8441/// merged. In some cases, this could result in removal of the PHI entirely.
8443 BasicBlock *BB, DomTreeUpdater *DTU) {
8444 auto Succ = BB->getUniqueSuccessor();
8445 assert(Succ);
8446 // If there's a phi in the successor block, we'd likely have to introduce
8447 // a phi into the merged landing pad block.
8448 if (isa<PHINode>(*Succ->begin()))
8449 return false;
8450
8451 for (BasicBlock *OtherPred : predecessors(Succ)) {
8452 if (BB == OtherPred)
8453 continue;
8454 BasicBlock::iterator I = OtherPred->begin();
8456 if (!LPad2 || !LPad2->isIdenticalTo(LPad))
8457 continue;
8458 ++I;
8460 if (!BI2 || !BI2->isIdenticalTo(BI))
8461 continue;
8462
8463 std::vector<DominatorTree::UpdateType> Updates;
8464
8465 // We've found an identical block. Update our predecessors to take that
8466 // path instead and make ourselves dead.
8468 for (BasicBlock *Pred : UniquePreds) {
8469 InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
8470 assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
8471 "unexpected successor");
8472 II->setUnwindDest(OtherPred);
8473 if (DTU) {
8474 Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
8475 Updates.push_back({DominatorTree::Delete, Pred, BB});
8476 }
8477 }
8478
8480 for (BasicBlock *Succ : UniqueSuccs) {
8481 Succ->removePredecessor(BB);
8482 if (DTU)
8483 Updates.push_back({DominatorTree::Delete, BB, Succ});
8484 }
8485
8486 IRBuilder<> Builder(BI);
8487 Builder.CreateUnreachable();
8488 BI->eraseFromParent();
8489 if (DTU)
8490 DTU->applyUpdates(Updates);
8491 return true;
8492 }
8493 return false;
8494}
8495
8496bool SimplifyCFGOpt::simplifyUncondBranch(UncondBrInst *BI,
8497 IRBuilder<> &Builder) {
8498 BasicBlock *BB = BI->getParent();
8499 BasicBlock *Succ = BI->getSuccessor(0);
8500
8501 // If the Terminator is the only non-phi instruction, simplify the block.
8502 // If LoopHeader is provided, check if the block or its successor is a loop
8503 // header. (This is for early invocations before loop simplify and
8504 // vectorization to keep canonical loop forms for nested loops. These blocks
8505 // can be eliminated when the pass is invoked later in the back-end.)
8506 // Note that if BB has only one predecessor then we do not introduce new
8507 // backedge, so we can eliminate BB.
8508 bool NeedCanonicalLoop =
8509 Options.NeedCanonicalLoop &&
8510 (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
8511 (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
8513 if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
8514 !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
8515 return true;
8516
8517 // If the only instruction in the block is a seteq/setne comparison against a
8518 // constant, try to simplify the block.
8519 if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
8520 if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
8521 ++I;
8522 if (I->isTerminator() &&
8523 tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
8524 return true;
8525 if (isa<SelectInst>(I) && I->getNextNode()->isTerminator() &&
8526 tryToSimplifyUncondBranchWithICmpSelectInIt(ICI, cast<SelectInst>(I),
8527 Builder))
8528 return true;
8529 }
8530 }
8531
8532 // See if we can merge an empty landing pad block with another which is
8533 // equivalent.
8534 if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
8535 ++I;
8536 if (I->isTerminator() && tryToMergeLandingPad(LPad, BI, BB, DTU))
8537 return true;
8538 }
8539
8540 return false;
8541}
8542
8544 BasicBlock *PredPred = nullptr;
8545 for (auto *P : predecessors(BB)) {
8546 BasicBlock *PPred = P->getSinglePredecessor();
8547 if (!PPred || (PredPred && PredPred != PPred))
8548 return nullptr;
8549 PredPred = PPred;
8550 }
8551 return PredPred;
8552}
8553
8554/// Fold the following pattern:
8555/// bb0:
8556/// br i1 %cond1, label %bb1, label %bb2
8557/// bb1:
8558/// br i1 %cond2, label %bb3, label %bb4
8559/// bb2:
8560/// br i1 %cond2, label %bb4, label %bb3
8561/// bb3:
8562/// ...
8563/// bb4:
8564/// ...
8565/// into
8566/// bb0:
8567/// %cond = xor i1 %cond1, %cond2
8568/// br i1 %cond, label %bb4, label %bb3
8569/// bb3:
8570/// ...
8571/// bb4:
8572/// ...
8573/// NOTE: %cond2 always dominates the terminator of bb0.
8575 BasicBlock *BB = BI->getParent();
8576 BasicBlock *BB1 = BI->getSuccessor(0);
8577 BasicBlock *BB2 = BI->getSuccessor(1);
8578 auto IsSimpleSuccessor = [BB](BasicBlock *Succ, CondBrInst *&SuccBI) {
8579 if (Succ == BB)
8580 return false;
8581 if (&Succ->front() != Succ->getTerminator())
8582 return false;
8583 SuccBI = dyn_cast<CondBrInst>(Succ->getTerminator());
8584 if (!SuccBI)
8585 return false;
8586 BasicBlock *Succ1 = SuccBI->getSuccessor(0);
8587 BasicBlock *Succ2 = SuccBI->getSuccessor(1);
8588 return Succ1 != Succ && Succ2 != Succ && Succ1 != BB && Succ2 != BB &&
8589 !isa<PHINode>(Succ1->front()) && !isa<PHINode>(Succ2->front());
8590 };
8591 CondBrInst *BB1BI, *BB2BI;
8592 if (!IsSimpleSuccessor(BB1, BB1BI) || !IsSimpleSuccessor(BB2, BB2BI))
8593 return false;
8594
8595 if (BB1BI->getCondition() != BB2BI->getCondition() ||
8596 BB1BI->getSuccessor(0) != BB2BI->getSuccessor(1) ||
8597 BB1BI->getSuccessor(1) != BB2BI->getSuccessor(0))
8598 return false;
8599
8600 BasicBlock *BB3 = BB1BI->getSuccessor(0);
8601 BasicBlock *BB4 = BB1BI->getSuccessor(1);
8602 IRBuilder<> Builder(BI);
8603 BI->setCondition(
8604 Builder.CreateXor(BI->getCondition(), BB1BI->getCondition()));
8605 BB1->removePredecessor(BB);
8606 BI->setSuccessor(0, BB4);
8607 BB2->removePredecessor(BB);
8608 BI->setSuccessor(1, BB3);
8609 if (DTU) {
8611 Updates.push_back({DominatorTree::Delete, BB, BB1});
8612 Updates.push_back({DominatorTree::Insert, BB, BB4});
8613 Updates.push_back({DominatorTree::Delete, BB, BB2});
8614 Updates.push_back({DominatorTree::Insert, BB, BB3});
8615
8616 DTU->applyUpdates(Updates);
8617 }
8618 bool HasWeight = false;
8619 uint64_t BBTWeight, BBFWeight;
8620 if (extractBranchWeights(*BI, BBTWeight, BBFWeight))
8621 HasWeight = true;
8622 else
8623 BBTWeight = BBFWeight = 1;
8624 uint64_t BB1TWeight, BB1FWeight;
8625 if (extractBranchWeights(*BB1BI, BB1TWeight, BB1FWeight))
8626 HasWeight = true;
8627 else
8628 BB1TWeight = BB1FWeight = 1;
8629 uint64_t BB2TWeight, BB2FWeight;
8630 if (extractBranchWeights(*BB2BI, BB2TWeight, BB2FWeight))
8631 HasWeight = true;
8632 else
8633 BB2TWeight = BB2FWeight = 1;
8634 if (HasWeight) {
8635 uint64_t Weights[2] = {BBTWeight * BB1FWeight + BBFWeight * BB2TWeight,
8636 BBTWeight * BB1TWeight + BBFWeight * BB2FWeight};
8637 setFittedBranchWeights(*BI, Weights, /*IsExpected=*/false,
8638 /*ElideAllZero=*/true);
8639 }
8640 return true;
8641}
8642
8643bool SimplifyCFGOpt::simplifyCondBranch(CondBrInst *BI, IRBuilder<> &Builder) {
8644 assert(
8646 BI->getSuccessor(0) != BI->getSuccessor(1) &&
8647 "Tautological conditional branch should have been eliminated already.");
8648
8649 BasicBlock *BB = BI->getParent();
8650 if (!Options.SimplifyCondBranch ||
8651 BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
8652 return false;
8653
8654 // Conditional branch
8655 if (isValueEqualityComparison(BI)) {
8656 // If we only have one predecessor, and if it is a branch on this value,
8657 // see if that predecessor totally determines the outcome of this
8658 // switch.
8659 if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
8660 if (simplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
8661 return requestResimplify();
8662
8663 // This block must be empty, except for the setcond inst, if it exists.
8664 // Ignore pseudo intrinsics.
8665 for (auto &I : *BB) {
8666 if (isa<PseudoProbeInst>(I) ||
8667 &I == cast<Instruction>(BI->getCondition()))
8668 continue;
8669 if (&I == BI)
8670 if (foldValueComparisonIntoPredecessors(BI, Builder))
8671 return requestResimplify();
8672 break;
8673 }
8674 }
8675
8676 // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
8677 if (simplifyBranchOnICmpChain(BI, Builder, DL))
8678 return true;
8679
8680 // If this basic block has dominating predecessor blocks and the dominating
8681 // blocks' conditions imply BI's condition, we know the direction of BI.
8682 std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
8683 if (Imp) {
8684 // Turn this into a branch on constant.
8685 auto *OldCond = BI->getCondition();
8686 ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
8687 : ConstantInt::getFalse(BB->getContext());
8688 BI->setCondition(TorF);
8690 return requestResimplify();
8691 }
8692
8693 // If this basic block is ONLY a compare and a branch, and if a predecessor
8694 // branches to us and one of our successors, fold the comparison into the
8695 // predecessor and use logical operations to pick the right destination.
8696 if (Options.SpeculateBlocks &&
8697 foldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI, Options.AC,
8698 Options.BonusInstThreshold))
8699 return requestResimplify();
8700
8701 // We have a conditional branch to two blocks that are only reachable
8702 // from BI. We know that the condbr dominates the two blocks, so see if
8703 // there is any identical code in the "then" and "else" blocks. If so, we
8704 // can hoist it up to the branching block.
8705 if (BI->getSuccessor(0)->getSinglePredecessor()) {
8706 if (BI->getSuccessor(1)->getSinglePredecessor()) {
8707 if (HoistCommon &&
8708 hoistCommonCodeFromSuccessors(BI, !Options.HoistCommonInsts))
8709 return requestResimplify();
8710
8711 if (BI && Options.HoistLoadsStoresWithCondFaulting &&
8712 isProfitableToSpeculate(BI, std::nullopt, TTI)) {
8713 SmallVector<Instruction *, 2> SpeculatedConditionalLoadsStores;
8714 auto CanSpeculateConditionalLoadsStores = [&]() {
8715 for (auto *Succ : successors(BB)) {
8716 for (Instruction &I : *Succ) {
8717 if (I.isTerminator()) {
8718 if (I.getNumSuccessors() > 1)
8719 return false;
8720 continue;
8721 } else if (!isSafeCheapLoadStore(&I, TTI) ||
8722 SpeculatedConditionalLoadsStores.size() ==
8724 return false;
8725 }
8726 SpeculatedConditionalLoadsStores.push_back(&I);
8727 }
8728 }
8729 return !SpeculatedConditionalLoadsStores.empty();
8730 };
8731
8732 if (CanSpeculateConditionalLoadsStores()) {
8733 hoistConditionalLoadsStores(BI, SpeculatedConditionalLoadsStores,
8734 std::nullopt, nullptr);
8735 return requestResimplify();
8736 }
8737 }
8738 } else {
8739 // If Successor #1 has multiple preds, we may be able to conditionally
8740 // execute Successor #0 if it branches to Successor #1.
8741 Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
8742 if (Succ0TI->getNumSuccessors() == 1 &&
8743 Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
8744 if (speculativelyExecuteBB(BI, BI->getSuccessor(0)))
8745 return requestResimplify();
8746 }
8747 } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
8748 // If Successor #0 has multiple preds, we may be able to conditionally
8749 // execute Successor #1 if it branches to Successor #0.
8750 Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
8751 if (Succ1TI->getNumSuccessors() == 1 &&
8752 Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
8753 if (speculativelyExecuteBB(BI, BI->getSuccessor(1)))
8754 return requestResimplify();
8755 }
8756
8757 // If this is a branch on something for which we know the constant value in
8758 // predecessors (e.g. a phi node in the current block), thread control
8759 // through this block.
8760 if (foldCondBranchOnValueKnownInPredecessor(BI))
8761 return requestResimplify();
8762
8763 // Scan predecessor blocks for conditional branches.
8764 for (BasicBlock *Pred : predecessors(BB))
8765 if (CondBrInst *PBI = dyn_cast<CondBrInst>(Pred->getTerminator()))
8766 if (PBI != BI)
8767 if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
8768 return requestResimplify();
8769
8770 // Look for diamond patterns.
8771 if (MergeCondStores)
8772 if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
8773 if (CondBrInst *PBI = dyn_cast<CondBrInst>(PrevBB->getTerminator()))
8774 if (PBI != BI)
8775 if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
8776 return requestResimplify();
8777
8778 // Look for nested conditional branches.
8779 if (mergeNestedCondBranch(BI, DTU))
8780 return requestResimplify();
8781
8782 return false;
8783}
8784
8785/// Check if passing a value to an instruction will cause undefined behavior.
8786static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
8787 assert(V->getType() == I->getType() && "Mismatched types");
8789 if (!C)
8790 return false;
8791
8792 if (I->use_empty())
8793 return false;
8794
8795 if (C->isNullValue() || isa<UndefValue>(C)) {
8796 // Find the first same-block use with a UB-triggering opcode, skipping
8797 // cross-block or before-I uses.
8798 auto FindUse = llvm::find_if(I->uses(), [I](auto &U) {
8799 auto *Use = cast<Instruction>(U.getUser());
8800 // Only same-block uses after I can witness UB at I's program point.
8801 // Self-uses and before-I uses can occur when I is a PHI node.
8802 if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
8803 return false;
8804 // Change this list when we want to add new instructions.
8805 switch (Use->getOpcode()) {
8806 default:
8807 return false;
8808 case Instruction::GetElementPtr:
8809 case Instruction::Ret:
8810 case Instruction::BitCast:
8811 case Instruction::Load:
8812 case Instruction::Store:
8813 case Instruction::Call:
8814 case Instruction::CallBr:
8815 case Instruction::Invoke:
8816 case Instruction::UDiv:
8817 case Instruction::URem:
8818 // Note: signed div/rem of INT_MIN / -1 is also immediate UB, not
8819 // implemented to avoid code complexity as it is unclear how useful such
8820 // logic is.
8821 case Instruction::SDiv:
8822 case Instruction::SRem:
8823 return true;
8824 }
8825 });
8826 if (FindUse == I->use_end())
8827 return false;
8828 auto &Use = *FindUse;
8829 auto *User = cast<Instruction>(Use.getUser());
8830
8831 // Now make sure that there are no instructions in between that can alter
8832 // control flow (eg. calls)
8833 auto InstrRange =
8834 make_range(std::next(I->getIterator()), User->getIterator());
8835 if (any_of(InstrRange, [](Instruction &I) {
8837 }))
8838 return false;
8839
8840 // Look through GEPs. A load from a GEP derived from NULL is still undefined
8842 if (GEP->getPointerOperand() == I) {
8843 // The type of GEP may differ from the type of base pointer.
8844 // Bail out on vector GEPs, as they are not handled by other checks.
8845 if (GEP->getType()->isVectorTy())
8846 return false;
8847 // The current base address is null, there are four cases to consider:
8848 // getelementptr (TY, null, 0) -> null
8849 // getelementptr (TY, null, not zero) -> may be modified
8850 // getelementptr inbounds (TY, null, 0) -> null
8851 // getelementptr inbounds (TY, null, not zero) -> poison iff null is
8852 // undefined?
8853 if (!GEP->hasAllZeroIndices() &&
8854 (!GEP->isInBounds() ||
8855 NullPointerIsDefined(GEP->getFunction(),
8856 GEP->getPointerAddressSpace())))
8857 PtrValueMayBeModified = true;
8858 return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
8859 }
8860
8861 // Look through return.
8862 if (ReturnInst *Ret = dyn_cast<ReturnInst>(User)) {
8863 bool HasNoUndefAttr =
8864 Ret->getFunction()->hasRetAttribute(Attribute::NoUndef);
8865 // Return undefined to a noundef return value is undefined.
8866 if (isa<UndefValue>(C) && HasNoUndefAttr)
8867 return true;
8868 // Return null to a nonnull+noundef return value is undefined.
8869 if (C->isNullValue() && HasNoUndefAttr &&
8870 Ret->getFunction()->hasRetAttribute(Attribute::NonNull)) {
8871 return !PtrValueMayBeModified;
8872 }
8873 }
8874
8875 // Load from null is undefined.
8876 if (LoadInst *LI = dyn_cast<LoadInst>(User))
8877 if (!LI->isVolatile())
8878 return !NullPointerIsDefined(LI->getFunction(),
8879 LI->getPointerAddressSpace());
8880
8881 // Store to null is undefined.
8883 if (!SI->isVolatile())
8884 return (!NullPointerIsDefined(SI->getFunction(),
8885 SI->getPointerAddressSpace())) &&
8886 SI->getPointerOperand() == I;
8887
8888 // llvm.assume(false/undef) always triggers immediate UB.
8889 if (auto *Assume = dyn_cast<AssumeInst>(User)) {
8890 // Ignore assume operand bundles.
8891 if (I == Assume->getArgOperand(0))
8892 return true;
8893 }
8894
8895 if (auto *CB = dyn_cast<CallBase>(User)) {
8896 if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
8897 return false;
8898 // A call to null is undefined.
8899 if (CB->getCalledOperand() == I)
8900 return true;
8901
8902 if (CB->isArgOperand(&Use)) {
8903 unsigned ArgIdx = CB->getArgOperandNo(&Use);
8904 // Passing null to a nonnnull+noundef argument is undefined.
8906 CB->paramHasNonNullAttr(ArgIdx, /*AllowUndefOrPoison=*/false))
8907 return !PtrValueMayBeModified;
8908 // Passing undef to a noundef argument is undefined.
8909 if (isa<UndefValue>(C) && CB->isPassingUndefUB(ArgIdx))
8910 return true;
8911 }
8912 }
8913 // Div/Rem by zero is immediate UB
8914 if (match(User, m_BinOp(m_Value(), m_Specific(I))) && User->isIntDivRem())
8915 return true;
8916 }
8917 return false;
8918}
8919
8920/// If BB has an incoming value that will always trigger undefined behavior
8921/// (eg. null pointer dereference), remove the branch leading here.
8923 DomTreeUpdater *DTU,
8924 AssumptionCache *AC) {
8925 for (PHINode &PHI : BB->phis())
8926 for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
8927 if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
8928 BasicBlock *Predecessor = PHI.getIncomingBlock(i);
8929 Instruction *T = Predecessor->getTerminator();
8930 IRBuilder<> Builder(T);
8931 if (isa<UncondBrInst>(T)) {
8932 BB->removePredecessor(Predecessor);
8933 // Turn unconditional branches into unreachables.
8934 Builder.CreateUnreachable();
8935 T->eraseFromParent();
8936 if (DTU)
8937 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8938 return true;
8939 } else if (CondBrInst *BI = dyn_cast<CondBrInst>(T)) {
8940 BB->removePredecessor(Predecessor);
8941 // Preserve guarding condition in assume, because it might not be
8942 // inferrable from any dominating condition.
8943 Value *Cond = BI->getCondition();
8944 CallInst *Assumption;
8945 if (BI->getSuccessor(0) == BB)
8946 Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
8947 else
8948 Assumption = Builder.CreateAssumption(Cond);
8949 if (AC)
8950 AC->registerAssumption(cast<AssumeInst>(Assumption));
8951 Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
8952 : BI->getSuccessor(0));
8953 BI->eraseFromParent();
8954 if (DTU)
8955 DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
8956 return true;
8957 } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
8958 // Redirect all branches leading to UB into
8959 // a newly created unreachable block.
8960 BasicBlock *Unreachable = BasicBlock::Create(
8961 Predecessor->getContext(), "unreachable", BB->getParent(), BB);
8962 Builder.SetInsertPoint(Unreachable);
8963 // The new block contains only one instruction: Unreachable
8964 Builder.CreateUnreachable();
8965 for (const auto &Case : SI->cases())
8966 if (Case.getCaseSuccessor() == BB) {
8967 BB->removePredecessor(Predecessor);
8968 Case.setSuccessor(Unreachable);
8969 }
8970 if (SI->getDefaultDest() == BB) {
8971 BB->removePredecessor(Predecessor);
8972 SI->setDefaultDest(Unreachable);
8973 }
8974
8975 if (DTU)
8976 DTU->applyUpdates(
8977 { { DominatorTree::Insert, Predecessor, Unreachable },
8978 { DominatorTree::Delete, Predecessor, BB } });
8979 return true;
8980 }
8981 }
8982
8983 return false;
8984}
8985
8986bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
8987 bool Changed = false;
8988
8989 assert(BB && BB->getParent() && "Block not embedded in function!");
8990 assert(BB->getTerminator() && "Degenerate basic block encountered!");
8991
8992 // Remove basic blocks that have no predecessors (except the entry block)...
8993 // or that just have themself as a predecessor. These are unreachable.
8994 if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
8995 BB->getSinglePredecessor() == BB) {
8996 LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
8997 DeleteDeadBlock(BB, DTU);
8998 return true;
8999 }
9000
9001 // Check to see if we can constant propagate this terminator instruction
9002 // away...
9003 Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
9004 /*TLI=*/nullptr, DTU);
9005
9006 // Check for and eliminate duplicate PHI nodes in this block.
9008
9009 // Check for and remove branches that will always cause undefined behavior.
9011 return requestResimplify();
9012
9013 // Merge basic blocks into their predecessor if there is only one distinct
9014 // pred, and if there is only one distinct successor of the predecessor, and
9015 // if there are no PHI nodes.
9016 if (MergeBlockIntoPredecessor(BB, DTU))
9017 return true;
9018
9019 if (SinkCommon && Options.SinkCommonInsts) {
9020 if (sinkCommonCodeFromPredecessors(BB, DTU) ||
9021 mergeCompatibleInvokes(BB, DTU)) {
9022 // sinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
9023 // so we may now how duplicate PHI's.
9024 // Let's rerun EliminateDuplicatePHINodes() first,
9025 // before foldTwoEntryPHINode() potentially converts them into select's,
9026 // after which we'd need a whole EarlyCSE pass run to cleanup them.
9027 return true;
9028 }
9029 // Merge identical predecessors of this block.
9030 if (simplifyDuplicatePredecessors(BB, DTU))
9031 return true;
9032 }
9033
9034 if (Options.SpeculateBlocks &&
9035 !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
9036 // If there is a trivial two-entry PHI node in this basic block, and we can
9037 // eliminate it, do so now.
9038 if (auto *PN = dyn_cast<PHINode>(BB->begin()))
9039 if (PN->getNumIncomingValues() == 2)
9040 if (foldTwoEntryPHINode(PN, TTI, DTU, Options.AC, DL,
9041 Options.SpeculateUnpredictables))
9042 return true;
9043 }
9044
9045 IRBuilder<> Builder(BB);
9047 Builder.SetInsertPoint(Terminator);
9048 switch (Terminator->getOpcode()) {
9049 case Instruction::UncondBr:
9050 Changed |= simplifyUncondBranch(cast<UncondBrInst>(Terminator), Builder);
9051 break;
9052 case Instruction::CondBr:
9053 Changed |= simplifyCondBranch(cast<CondBrInst>(Terminator), Builder);
9054 break;
9055 case Instruction::Resume:
9056 Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
9057 break;
9058 case Instruction::CleanupRet:
9059 Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
9060 break;
9061 case Instruction::Switch:
9062 Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
9063 break;
9064 case Instruction::Unreachable:
9065 Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
9066 break;
9067 case Instruction::IndirectBr:
9068 Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
9069 break;
9070 }
9071
9072 return Changed;
9073}
9074
9075bool SimplifyCFGOpt::run(BasicBlock *BB) {
9076 bool Changed = false;
9077
9078 // Repeated simplify BB as long as resimplification is requested.
9079 do {
9080 Resimplify = false;
9081
9082 // Perform one round of simplifcation. Resimplify flag will be set if
9083 // another iteration is requested.
9084 Changed |= simplifyOnce(BB);
9085 } while (Resimplify);
9086
9087 return Changed;
9088}
9089
9092 ArrayRef<WeakVH> LoopHeaders) {
9093 return SimplifyCFGOpt(TTI, DTU, BB->getDataLayout(), LoopHeaders,
9094 Options)
9095 .run(BB);
9096}
#define Fail
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
aarch64 promote const
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
static MachineBasicBlock * OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ)
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Function Alias Analysis Results
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:854
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
#define DEBUG_TYPE
Hexagon Common GEP
static bool IsIndirectCall(const MachineInstr *MI)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
static constexpr Value * getValue(Ty &ValueOrUse)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file implements a map that provides insertion order iteration.
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
This file contains the declarations for metadata subclasses.
#define T
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
#define P(N)
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
Provides some synthesis utilities to produce sequences of values.
This file defines generic set operations that may be used on set's of different types,...
This file implements a set that has insertion order iteration characteristics.
static std::optional< ContiguousCasesResult > findContiguousCases(Value *Condition, SmallVectorImpl< ConstantInt * > &Cases, SmallVectorImpl< ConstantInt * > &OtherCases, BasicBlock *Dest, BasicBlock *OtherDest)
static void addPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, BasicBlock *ExistPred, MemorySSAUpdater *MSSAU=nullptr)
Update PHI nodes in Succ to indicate that there will now be entries in it from the 'NewPred' block.
static bool validLookupTableConstant(Constant *C, const TargetTransformInfo &TTI)
Return true if the backend will be able to handle initializing an array of constants like C.
static StoreInst * findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2)
static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange, bool OptSize)
static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB, BasicBlock *EndBB, unsigned &SpeculatedInstructions, InstructionCost &Cost, const TargetTransformInfo &TTI)
Estimate the cost of the insertion(s) and check that the PHI nodes can be converted to selects.
static bool simplifySwitchLookup(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI, bool ConvertSwitchToLookupTable)
If the switch is only used to initialize one or more phi nodes in a common successor block with diffe...
static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI, Value *SelectValue, IRBuilder<> &Builder, DomTreeUpdater *DTU)
static bool valuesOverlap(std::vector< ValueEqualityComparisonCase > &C1, std::vector< ValueEqualityComparisonCase > &C2)
Return true if there are any keys in C1 that exist in C2 as well.
static bool isProfitableToSpeculate(const CondBrInst *BI, std::optional< bool > Invert, const TargetTransformInfo &TTI)
static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeCleanupPad(CleanupReturnInst *RI)
static bool isVectorOp(Instruction &I)
Return if an instruction's type or any of its operands' types are a vector type.
static BasicBlock * allPredecessorsComeFromSameSource(BasicBlock *BB)
static void cloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap)
static int constantIntSortPredicate(ConstantInt *const *P1, ConstantInt *const *P2)
static bool getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest, BasicBlock **CommonDest, SmallVectorImpl< std::pair< PHINode *, Constant * > > &Res, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to determine the resulting constant values in phi nodes at the common destination basic block,...
static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified=false)
Check if passing a value to an instruction will cause undefined behavior.
static std::optional< std::tuple< BasicBlock *, Instruction::BinaryOps, bool > > shouldFoldCondBranchesToCommonDestination(CondBrInst *BI, CondBrInst *PBI, const TargetTransformInfo *TTI)
Determine if the two branches share a common destination and deduce a glue that joins the branches' c...
static bool isSafeToHoistInstr(Instruction *I, unsigned Flags)
static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, Instruction *I1, Instruction *I2)
static ConstantInt * getConstantInt(Value *V, const DataLayout &DL)
Extract ConstantInt from value, looking through IntToPtr and PointerNullValue.
static bool simplifySwitchOfCmpIntrinsic(SwitchInst *SI, IRBuilderBase &Builder, DomTreeUpdater *DTU)
Fold switch over ucmp/scmp intrinsic to br if two of the switch arms have the same destination.
static bool shouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize, const TargetTransformInfo &TTI, const DataLayout &DL, const SmallVector< Type * > &ResultTypes)
Determine whether a lookup table should be built for this switch, based on the number of cases,...
static Constant * constantFold(Instruction *I, const DataLayout &DL, const SmallDenseMap< Value *, Constant * > &ConstantPool)
Try to fold instruction I into a constant.
static bool areIdenticalUpToCommutativity(const Instruction *I1, const Instruction *I2)
static bool forwardSwitchConditionToPHI(SwitchInst *SI)
Try to forward the condition of a switch instruction to a phi node dominated by the switch,...
static PHINode * findPHIForConditionForwarding(ConstantInt *CaseValue, BasicBlock *BB, int *PhiIndex)
If BB would be eligible for simplification by TryToSimplifyUncondBranchFromEmptyBlock (i....
static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
Tries to transform switch of powers of two to reduce switch range.
static bool isCleanupBlockEmpty(iterator_range< BasicBlock::iterator > R)
static Value * ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB, Value *AlternativeV=nullptr)
static Value * createLogicalOp(IRBuilderBase &Builder, Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="")
static void hoistConditionalLoadsStores(CondBrInst *BI, SmallVectorImpl< Instruction * > &SpeculatedConditionalLoadsStores, std::optional< bool > Invert, Instruction *Sel)
If the target supports conditional faulting, we look for the following pattern:
static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2, const TargetTransformInfo &TTI)
Helper function for hoistCommonCodeFromSuccessors.
static bool reduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder, const DataLayout &DL, const TargetTransformInfo &TTI)
Try to transform a switch that has "holes" in it to a contiguous sequence of cases.
static bool safeToMergeTerminators(Instruction *SI1, Instruction *SI2, SmallSetVector< BasicBlock *, 4 > *FailBlocks=nullptr)
Return true if it is safe to merge these two terminator instructions together.
SkipFlags
@ SkipReadMem
@ SkipSideEffect
@ SkipImplicitControlFlow
static bool incomingValuesAreCompatible(BasicBlock *BB, ArrayRef< BasicBlock * > IncomingBlocks, SmallPtrSetImpl< Value * > *EquivalenceSet=nullptr)
Return true if all the PHI nodes in the basic block BB receive compatible (identical) incoming values...
static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If a switch is only used to initialize one or more phi nodes in a common successor block with only tw...
static void createUnreachableSwitchDefault(SwitchInst *Switch, DomTreeUpdater *DTU, bool RemoveOrigDefaultBlock=true)
static Value * foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector, Constant *DefaultResult, Value *Condition, IRBuilder<> &Builder, const DataLayout &DL, ArrayRef< uint32_t > BranchWeights)
static bool sinkCommonCodeFromPredecessors(BasicBlock *BB, DomTreeUpdater *DTU)
Check whether BB's predecessors end with unconditional branches.
static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI, const DataLayout &DL)
static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL)
Compute masked bits for the condition of a switch and use it to remove dead cases.
static bool blockIsSimpleEnoughToThreadThrough(BasicBlock *BB, BlocksSet &NonLocalUseBlocks)
Return true if we can thread a branch across this block.
static Value * isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, BasicBlock *StoreBB, BasicBlock *EndBB)
Determine if we can hoist sink a sole store instruction out of a conditional block.
static std::optional< bool > foldCondBranchOnValueKnownInPredecessorImpl(CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, AssumptionCache *AC)
If we have a conditional branch on something for which we know the constant value in predecessors (e....
static bool foldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI, DomTreeUpdater *DTU, AssumptionCache *AC, const DataLayout &DL, bool SpeculateUnpredictables)
Given a BB that starts with the specified two-entry PHI node, see if we can eliminate it.
static bool findReaching(BasicBlock *BB, BasicBlock *DefBB, BlocksSet &ReachesNonLocalUses)
static bool extractPredSuccWeights(CondBrInst *PBI, CondBrInst *BI, uint64_t &PredTrueWeight, uint64_t &PredFalseWeight, uint64_t &SuccTrueWeight, uint64_t &SuccFalseWeight)
Return true if either PBI or BI has branch weight available, and store the weights in {Pred|Succ}...
static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, SwitchCaseResultVectorTy &UniqueResults, Constant *&DefaultResult, const DataLayout &DL, const TargetTransformInfo &TTI, uintptr_t MaxUniqueResults)
static bool shouldUseSwitchConditionAsTableIndex(ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal, bool HasDefaultResults, const SmallVector< Type * > &ResultTypes, const DataLayout &DL, const TargetTransformInfo &TTI)
static InstructionCost computeSpeculationCost(const User *I, const TargetTransformInfo &TTI)
Compute an abstract "cost" of speculating the given instruction, which is assumed to be safe to specu...
static bool performBranchToCommonDestFolding(CondBrInst *BI, CondBrInst *PBI, DomTreeUpdater *DTU, MemorySSAUpdater *MSSAU, const TargetTransformInfo *TTI)
SmallPtrSet< BasicBlock *, 8 > BlocksSet
static unsigned skippedInstrFlags(Instruction *I)
static bool mergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU)
If this block is a landingpad exception handling block, categorize all the predecessor invokes into s...
static bool replacingOperandWithVariableIsCheap(const Instruction *I, int OpIdx)
static void eraseTerminatorAndDCECond(Instruction *TI, MemorySSAUpdater *MSSAU=nullptr)
static void eliminateBlockCases(BasicBlock *BB, std::vector< ValueEqualityComparisonCase > &Cases)
Given a vector of bb/value pairs, remove any entries in the list that match the specified block.
static bool mergeConditionalStores(CondBrInst *PBI, CondBrInst *QBI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
static bool mergeNestedCondBranch(CondBrInst *BI, DomTreeUpdater *DTU)
Fold the following pattern: bb0: br i1 cond1, label bb1, label bb2 bb1: br i1 cond2,...
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
static size_t mapCaseToResult(ConstantInt *CaseVal, SwitchCaseResultVectorTy &UniqueResults, Constant *Result)
static bool tryWidenCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU)
If the previous block ended with a widenable branch, determine if reusing the target block is profita...
static void mergeCompatibleInvokesImpl(ArrayRef< InvokeInst * > Invokes, DomTreeUpdater *DTU)
static bool mergeIdenticalBBs(ArrayRef< BasicBlock * > Candidates, DomTreeUpdater *DTU)
static void getBranchWeights(Instruction *TI, SmallVectorImpl< uint64_t > &Weights)
Get Weights of a given terminator, the default weight is at the front of the vector.
static bool tryToMergeLandingPad(LandingPadInst *LPad, UncondBrInst *BI, BasicBlock *BB, DomTreeUpdater *DTU)
Given an block with only a single landing pad and a unconditional branch try to find another basic bl...
static Constant * lookupConstant(Value *V, const SmallDenseMap< Value *, Constant * > &ConstantPool)
If V is a Constant, return it.
static bool SimplifyCondBranchToCondBranch(CondBrInst *PBI, CondBrInst *BI, DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI)
If we have a conditional branch as a predecessor of another block, this function tries to simplify it...
static bool canSinkInstructions(ArrayRef< Instruction * > Insts, DenseMap< const Use *, SmallVector< Value *, 4 > > &PHIOperands)
static void hoistLockstepIdenticalDbgVariableRecords(Instruction *TI, Instruction *I1, SmallVectorImpl< Instruction * > &OtherInsts)
Hoists DbgVariableRecords from I1 and OtherInstrs that are identical in lock-step to TI.
static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU)
static bool removeUndefIntroducingPredecessor(BasicBlock *BB, DomTreeUpdater *DTU, AssumptionCache *AC)
If BB has an incoming value that will always trigger undefined behavior (eg.
static bool simplifySwitchWhenUMin(SwitchInst *SI, DomTreeUpdater *DTU)
Tries to transform the switch when the condition is umin with a constant.
static bool isSafeCheapLoadStore(const Instruction *I, const TargetTransformInfo &TTI)
static ConstantInt * getKnownValueOnEdge(Value *V, BasicBlock *From, BasicBlock *To)
static bool dominatesMergePoint(Value *V, BasicBlock *BB, Instruction *InsertPt, SmallPtrSetImpl< Instruction * > &AggressiveInsts, InstructionCost &Cost, InstructionCost Budget, const TargetTransformInfo &TTI, AssumptionCache *AC, SmallPtrSetImpl< Instruction * > &ZeroCostInstructions, unsigned Depth=0)
If we have a merge point of an "if condition" as accepted above, return true if the specified value d...
static void reuseTableCompare(User *PhiUser, BasicBlock *PhiBlock, CondBrInst *RangeCheckBranch, Constant *DefaultValue, const SmallVectorImpl< std::pair< ConstantInt *, Constant * > > &Values)
Try to reuse the switch table index compare.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Value * RHS
Value * LHS
static const uint32_t IV[8]
Definition blake3_impl.h:83
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1055
unsigned popcount() const
Count the number of bits set.
Definition APInt.h:1693
bool sgt(const APInt &RHS) const
Signed greater than comparison.
Definition APInt.h:1208
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool intersects(const APInt &RHS) const
This operation tests if there are any pairs of corresponding bits between this APInt and RHS that are...
Definition APInt.h:1256
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition APInt.h:1173
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1554
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
Definition APInt.h:357
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
Definition APInt.h:476
LLVM_ABI APInt smul_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1995
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
Definition APInt.h:201
std::optional< int64_t > trySExtValue() const
Get sign extended value if possible.
Definition APInt.h:1597
LLVM_ABI APInt ssub_ov(const APInt &RHS, bool &Overflow) const
Definition APInt.cpp:1976
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & back() const
Get the last element.
Definition ArrayRef.h:150
const T & front() const
Get the first element.
Definition ArrayRef.h:144
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A cache of @llvm.assume calls within a function.
LLVM_ABI void registerAssumption(AssumeInst *CI)
Add an @llvm.assume intrinsic to this function's cache.
LLVM_ABI bool getValueAsBool() const
Return the attribute's value as a boolean.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:530
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:687
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI bool hasNPredecessors(unsigned N) const
Return true if this block has exactly N predecessors.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
Definition BasicBlock.h:484
LLVM_ABI const CallInst * getTerminatingDeoptimizeCall() const
Returns the call instruction calling @llvm.experimental.deoptimize prior to the terminating return in...
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI void flushTerminatorDbgRecords()
Eject any debug-info trailing at the end of a block.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this basic block belongs to.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
size_t size() const
Definition BasicBlock.h:482
LLVM_ABI bool isLandingPad() const
Return true if this basic block is a landing pad.
LLVM_ABI bool hasNPredecessorsOrMore(unsigned N) const
Return true if this block has N predecessors or more.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
Definition BasicBlock.h:659
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
BasicBlock * getBasicBlock() const
Definition Constants.h:1125
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
BranchProbability getCompl() const
void addRangeRetAttr(const ConstantRange &CR)
adds the range attribute to the list of attributes.
bool isCallee(Value::const_user_iterator UI) const
Determine whether the passed iterator points to the callee operand's Use.
bool isDataOperand(const Use *U) const
bool tryIntersectAttributes(const CallBase *Other)
Try to intersect the attributes from 'this' CallBase and the 'Other' CallBase.
This class represents a function call, abstracting a target machine's calling convention.
mapped_iterator< op_iterator, DerefFnTy > handler_iterator
CleanupPadInst * getCleanupPad() const
Convenience accessor.
BasicBlock * getUnwindDest() const
This class is the base class for the comparison instructions.
Definition InstrTypes.h:728
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
bool isEquality() const
Determine if this is an equals/not equals predicate.
Definition InstrTypes.h:978
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:828
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
void setCondition(Value *V)
Value * getCondition() const
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
A vector constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition Constants.h:951
A constant value that is initialized with an expression using other constant values.
Definition Constants.h:1316
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
ConstantFP - Floating Point Values [float, double].
Definition Constants.h:420
ConstantFolder - Create constants with minimum, target independent, folding.
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isOne() const
This is just a convenience method to make client code smaller for a common case.
Definition Constants.h:225
bool isNegative() const
Definition Constants.h:214
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition Constants.h:269
IntegerType * getIntegerType() const
Variant of the getType() method to always return an IntegerType, which reduces the amount of casting ...
Definition Constants.h:198
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
unsigned getBitWidth() const
getBitWidth - Return the scalar bitwidth of this constant.
Definition Constants.h:162
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
A constant pointer value that points to null.
Definition Constants.h:716
This class represents a range of values.
LLVM_ABI bool getEquivalentICmp(CmpInst::Predicate &Pred, APInt &RHS) const
Set up Pred and RHS such that ConstantRange::makeExactICmpRegion(Pred, RHS) == *this.
LLVM_ABI ConstantRange subtract(const APInt &CI) const
Subtract the specified constant from the endpoints of this constant range.
const APInt & getLower() const
Return the lower value for this range.
LLVM_ABI APInt getUnsignedMin() const
Return the smallest unsigned value contained in the ConstantRange.
LLVM_ABI bool isEmptySet() const
Return true if this set contains no members.
LLVM_ABI bool isSizeLargerThan(uint64_t MaxSize) const
Compare set size of this range with Value.
const APInt & getUpper() const
Return the upper value for this range.
LLVM_ABI bool isUpperWrapped() const
Return true if the exclusive upper bound wraps around the unsigned domain.
static LLVM_ABI ConstantRange makeExactICmpRegion(CmpInst::Predicate Pred, const APInt &Other)
Produce the exact range such that all values in the returned range satisfy the given predicate with a...
LLVM_ABI ConstantRange inverse() const
Return a new range that is the logical not of the current set.
LLVM_ABI APInt getUnsignedMax() const
Return the largest unsigned value contained in the ConstantRange.
static ConstantRange getNonEmpty(APInt Lower, APInt Upper)
Create non-empty constant range with the given bounds.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getIntegerValue(Type *Ty, const APInt &V)
Return the value for an integer or pointer constant, or a vector thereof, with the given scalar value...
bool isNullValue() const
Return true if this is the value that would be returned by getNullValue.
Definition Constant.h:64
LLVM_ABI bool isOneValue() const
Returns true if the value is one.
Definition Constants.cpp:89
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
Base class for non-instruction debug metadata records that have positions within IR.
LLVM_ABI void removeFromParent()
simple_ilist< DbgRecord >::iterator self_iterator
Record of a variable value-assignment, aka a non instruction representation of the dbg....
A debug info location.
Definition DebugLoc.h:124
bool isSameSourceLocation(const DebugLoc &Other) const
Return true if the source locations match, ignoring isImplicitCode and source atom info.
Definition DebugLoc.h:242
static DebugLoc getTemporary()
Definition DebugLoc.h:150
static LLVM_ABI DebugLoc getMergedLocation(DebugLoc LocA, DebugLoc LocB)
When two instructions are combined into a single instruction we also need to combine the original loc...
Definition DebugLoc.cpp:169
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:156
static DebugLoc getDropped()
Definition DebugLoc.h:153
ValueT & at(const_arg_type_t< KeyT > Val)
Return the entry for the specified key, or abort if no such entry exists.
Definition DenseMap.h:270
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
std::pair< iterator, bool > try_emplace(KeyT &&Key, Ts &&...Args)
Definition DenseMap.h:301
unsigned size() const
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:143
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:286
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition DenseMap.h:178
Implements a dense probed hash-table based set.
Definition DenseSet.h:289
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:867
const BasicBlock & getEntryBlock() const
Definition Function.h:783
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition Function.cpp:758
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:685
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Module * getParent()
Get the module that this global value is contained inside of...
This instruction compares its operands according to the predicate given to the constructor.
Predicate getSignedPredicate() const
For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
Value * CreateICmpULT(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2380
Value * CreateZExtOrTrunc(Value *V, Type *DestTy, const Twine &Name="")
Create a ZExt or Trunc from the integer value V to DestTy.
Definition IRBuilder.h:2128
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1216
LLVM_ABI Value * CreateSelectFMF(Value *C, Value *True, Value *False, FMFSource FMFSource, const Twine &Name="", Instruction *MDFrom=nullptr)
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition IRBuilder.h:457
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:176
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2683
void SetCurrentDebugLocation(const DebugLoc &L)
Set location information used by debugging information.
Definition IRBuilder.h:221
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition IRBuilder.h:1532
LLVM_ABI CallInst * CreateAssumption(Value *Cond)
Create an assume intrinsic call that allows the optimizer to assume that the provided condition will ...
Value * CreateInBoundsGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="")
Definition IRBuilder.h:2008
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1210
Value * CreateNot(Value *V, const Twine &Name="")
Definition IRBuilder.h:1854
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1239
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2364
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1906
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2110
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition IRBuilder.h:1919
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1422
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2222
ConstantInt * getFalse()
Get the constant value for i1 false.
Definition IRBuilder.h:462
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2096
Value * CreateIntCast(Value *V, Type *DestTy, bool isSigned, const Twine &Name="")
Definition IRBuilder.h:2305
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:181
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2474
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1592
Value * CreateMul(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1456
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
Indirect Branch Instruction.
BasicBlock * getDestination(unsigned i)
Return the specified destination.
unsigned getNumDestinations() const
return the number of possible destinations in this indirectbr instruction.
LLVM_ABI void removeDestination(unsigned i)
This method removes the specified successor from the indirectbr instruction.
LLVM_ABI void dropUBImplyingAttrsAndMetadata(ArrayRef< unsigned > Keep={})
Drop any attributes or metadata that can cause immediate undefined behavior.
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI iterator_range< simple_ilist< DbgRecord >::iterator > cloneDebugInfoFrom(const Instruction *From, std::optional< simple_ilist< DbgRecord >::iterator > FromHere=std::nullopt, bool InsertAtHead=false)
Clone any debug-info attached to From onto this instruction.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
iterator_range< simple_ilist< DbgRecord >::iterator > getDbgRecordRange() const
Return a range over the DbgRecords attached to this instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
bool isTerminator() const
LLVM_ABI bool isUsedOutsideOfBlock(const BasicBlock *BB) const LLVM_READONLY
Return true if there are any uses of this instruction in blocks other than the specified block.
@ CompareUsingIntersectedAttrs
Check for equivalence with intersected callbase attrs.
LLVM_ABI bool isIdenticalTo(const Instruction *I) const LLVM_READONLY
Return true if the specified instruction is exactly identical to the current one.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void applyMergedLocation(DebugLoc LocA, DebugLoc LocB)
Merge 2 debug locations and apply it to the Instruction.
LLVM_ABI void dropDbgRecords()
Erase any DbgRecords attached to this instruction.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:348
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Invoke instruction.
void setNormalDest(BasicBlock *B)
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
The landingpad instruction holds all of the information necessary to generate correct exception handl...
An instruction for reading from memory.
static unsigned getPointerOperandIndex()
Iterates through instructions in a set of blocks in reverse order from the first non-terminator.
LLVM_ABI MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition MDBuilder.cpp:38
Metadata node.
Definition Metadata.h:1069
Helper class to manipulate !mmra metadata nodes.
bool empty() const
Definition MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:126
size_type size() const
Definition MapVector.h:58
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
iterator_range< const_block_iterator > blocks() const
op_range incoming_values()
void setIncomingValue(unsigned i, Value *V)
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Value * getValue() const
Convenience accessor.
Return a value (possibly void), from a function.
This class represents the LLVM 'select' instruction.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:134
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Align getAlign() const
bool isSimple() const
Value * getValueOperand()
bool isUnordered() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Value * getPointerOperand()
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
A wrapper class to simplify modification of SwitchInst cases along with their prof branch_weights met...
LLVM_ABI void setSuccessorWeight(unsigned idx, CaseWeightOpt W)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest, CaseWeightOpt W)
Delegate the call to the underlying SwitchInst::addCase() and set the specified branch weight for the...
LLVM_ABI CaseWeightOpt getSuccessorWeight(unsigned idx)
LLVM_ABI void replaceDefaultDest(SwitchInst::CaseIt I)
Replace the default destination by given case.
std::optional< uint32_t > CaseWeightOpt
LLVM_ABI SwitchInst::CaseIt removeCase(SwitchInst::CaseIt I)
Delegate the call to the underlying SwitchInst::removeCase() and remove correspondent branch weight.
Multiway switch.
CaseIt case_end()
Returns a read/write iterator that points one past the last in the SwitchInst.
BasicBlock * getSuccessor(unsigned idx) const
void setCondition(Value *V)
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
CaseIteratorImpl< CaseHandle > CaseIt
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
unsigned getNumSuccessors() const
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
TargetCostKind
The kind of cost model.
@ TCK_CodeSize
Instruction code size.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
@ TCC_Free
Expected to fold away in lowering.
@ TCC_Basic
The cost of a typical 'add' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:282
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
Definition Type.cpp:306
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
Unconditional Branch instruction.
void setSuccessor(BasicBlock *NewSucc)
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i=0) const
'undef' values are things that do not have specified contents.
Definition Constants.h:1631
This function has undefined behavior.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI unsigned getOperandNo() const
Return the operand # of this use in its User.
Definition Use.cpp:36
LLVM_ABI void set(Value *Val)
Definition Value.h:874
User * getUser() const
Returns the User that contains this Use.
Definition Use.h:61
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
static constexpr uint64_t MaximumAlignment
Definition Value.h:799
LLVM_ABI Value(Type *Ty, unsigned scid)
Definition Value.cpp:54
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Represents an op.with.overflow intrinsic.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
A range adaptor for a pair of iterators.
Changed
#define UINT64_MAX
Definition DataTypes.h:77
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
match_combine_or< Ty... > m_CombineOr(const Ty &...Ps)
Combine pattern matchers matching any of Ps patterns.
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
auto m_Cmp()
Matches any compare instruction and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
bool match(Val *V, const Pattern &P)
match_bind< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
auto m_UMin(const Opnd0 &Op0, const Opnd1 &Op1)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
auto m_BinOp()
Match an arbitrary binary operation and ignore it.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_Value()
Match an arbitrary value and ignore it.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
ThreeOps_match< decltype(m_Value()), LHS, RHS, Instruction::Select, true > m_c_Select(const LHS &L, const RHS &R)
Match Select(C, LHS, RHS) or Select(C, RHS, LHS)
match_bind< WithOverflowInst > m_WithOverflowInst(WithOverflowInst *&I)
Match a with overflow intrinsic, capturing it if we match.
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
NoWrapTrunc_match< OpTy, TruncInst::NoUnsignedWrap > m_NUWTrunc(const OpTy &Op)
Matches trunc nuw.
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
auto m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:204
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
constexpr double e
@ User
could "use" a pointer
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:315
@ Offset
Definition DWP.cpp:558
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
Definition STLExtras.h:830
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
constexpr auto not_equal_to(T &&Arg)
Functor variant of std::not_equal_to that can be used as a UnaryPredicate in functional algorithms li...
Definition STLExtras.h:2180
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool foldBranchToCommonDest(CondBrInst *BI, llvm::DomTreeUpdater *DTU=nullptr, MemorySSAUpdater *MSSAU=nullptr, const TargetTransformInfo *TTI=nullptr, AssumptionCache *AC=nullptr, unsigned BonusInstThreshold=1)
If this basic block is ONLY a setcc and a branch, and if a predecessor branches to us and one of our ...
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
Definition LoopInfo.cpp:60
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
bool succ_empty(const Instruction *I)
Definition CFG.h:141
LLVM_ABI bool IsBlockFollowedByDeoptOrUnreachable(const BasicBlock *BB)
Check if we can prove that all paths starting from this block converge to a block that either has a @...
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
static cl::opt< unsigned > MaxSwitchCasesPerResult("max-switch-cases-per-result", cl::Hidden, cl::init(16), cl::desc("Limit cases to analyze when converting a switch to select"))
InstructionCost Cost
static cl::opt< bool > SpeculateOneExpensiveInst("speculate-one-expensive-inst", cl::Hidden, cl::init(true), cl::desc("Allow exactly one expensive instruction to be speculatively " "executed"))
@ Dead
Unused definition.
auto pred_end(const MachineBasicBlock *BB)
void set_intersect(S1Ty &S1, const S2Ty &S2)
set_intersect(A, B) - Compute A := A ^ B Identical to set_intersection, except that it works on set<>...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
auto accumulate(R &&Range, E &&Init)
Wrapper for std::accumulate.
Definition STLExtras.h:1702
constexpr from_range_t from_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI MDNode * getBranchWeightMDNode(const Instruction &I)
Get the branch weights metadata node.
constexpr bool isUIntN(unsigned N, uint64_t x)
Checks if an unsigned integer fits into the given (dynamic) bit width.
Definition MathExtras.h:243
LLVM_ABI Constant * ConstantFoldCompareInstOperands(unsigned Predicate, Constant *LHS, Constant *RHS, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, const Instruction *I=nullptr)
Attempt to constant fold a compare instruction (icmp/fcmp) with the specified operands.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
static cl::opt< unsigned > MaxSpeculationDepth("max-speculation-depth", cl::Hidden, cl::init(10), cl::desc("Limit maximum recursion depth when calculating costs of " "speculatively executed instructions"))
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1791
static cl::opt< unsigned > PHINodeFoldingThreshold("phi-node-folding-threshold", cl::Hidden, cl::init(2), cl::desc("Control the amount of phi node folding to perform (default = 2)"))
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
static cl::opt< bool > MergeCondStoresAggressively("simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false), cl::desc("When merging conditional stores, do so even if the resultant " "basic blocks are unlikely to be if-converted as a result"))
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
LLVM_ABI ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD)
Parse out a conservative ConstantRange from !range metadata.
auto map_range(ContainerTy &&C, FuncTy F)
Return a range that applies F to the elements of C.
Definition STLExtras.h:365
static cl::opt< unsigned > BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden, cl::init(2), cl::desc("Maximum cost of combining conditions when " "folding branches"))
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
static cl::opt< bool > SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true), cl::desc("Sink common instructions down to the end block"))
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2200
constexpr bool has_single_bit(T Value) noexcept
Definition bit.h:149
static cl::opt< bool > HoistStoresWithCondFaulting("simplifycfg-hoist-stores-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist stores if the target supports conditional faulting"))
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr detail::StaticCastFunc< To > StaticCastTo
Function objects corresponding to the Cast types defined above.
Definition Casting.h:882
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
LLVM_ABI CondBrInst * GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue, BasicBlock *&IfFalse)
Check whether BB is the merge point of a if-region.
LLVM_ABI bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
BB is known to contain an unconditional branch, and contains no instructions other than PHI nodes,...
Definition Local.cpp:1155
void RemapDbgRecordRange(Module *M, iterator_range< DbgRecordIterator > Range, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecords Range using the value map VM.
LLVM_ABI void InvertBranch(CondBrInst *PBI, IRBuilderBase &Builder)
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI bool impliesPoison(const Value *ValAssumedPoison, const Value *V)
Return true if V is poison given that ValAssumedPoison is already poison.
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
static cl::opt< bool > EnableMergeCompatibleInvokes("simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true), cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"))
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true, unsigned Depth=0)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
LLVM_ABI bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
LLVM_ABI bool collectPossibleValues(const Value *V, SmallPtrSetImpl< const Constant * > &Constants, unsigned MaxCount, bool AllowUndefOrPoison=true)
Enumerates all possible immediate values of V and inserts them into the set Constants.
LLVM_ABI Instruction * removeUnwindEdge(BasicBlock *BB, DomTreeUpdater *DTU=nullptr)
Replace 'BB's terminator with one that does not have an unwind successor block.
Definition Local.cpp:2863
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
auto succ_size(const MachineBasicBlock *BB)
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:551
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
static cl::opt< unsigned > MaxJumpThreadingLiveBlocks("max-jump-threading-live-blocks", cl::Hidden, cl::init(24), cl::desc("Limit number of blocks a define in a threaded block is allowed " "to be live in"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
LLVM_ABI void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition Local.cpp:3105
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:322
static cl::opt< int > MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10), cl::desc("Max size of a block which is still considered " "small enough to thread through"))
LLVM_ABI BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
LLVM_ABI bool isWidenableBranch(const User *U)
Returns true iff U is a widenable branch (that is, extractWidenableCondition returns widenable condit...
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
static cl::opt< unsigned > HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden, cl::init(20), cl::desc("Allow reordering across at most this many " "instructions when hoisting"))
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI cl::opt< bool > RequireAndPreserveDomTree
This function is used to do simplification of a CFG.
static cl::opt< bool > MergeCondStores("simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores even if an unconditional store does not " "precede - hoist multiple conditional stores into a single " "predicated store"))
static cl::opt< unsigned > BranchFoldToCommonDestVectorMultiplier("simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden, cl::init(2), cl::desc("Multiplier to apply to threshold when determining whether or not " "to fold branch to common destination when vector operations are " "present"))
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
LLVM_ABI void hoistAllInstructionsInto(BasicBlock *DomBlock, Instruction *InsertPt, BasicBlock *BB)
Hoist all of the instructions in the IfBlock to the dominant block DomBlock, by moving its instructio...
Definition Local.cpp:3389
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
LLVM_ABI bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition Local.cpp:3896
DWARFExpression::Operation Op
LLVM_ABI bool PointerMayBeCaptured(const Value *V, bool ReturnCaptures, unsigned MaxUsesToExplore=0)
PointerMayBeCaptured - Return true if this pointer value may be captured by the enclosing function (w...
LLVM_ABI bool FoldSingleEntryPHINodes(BasicBlock *BB, MemoryDependenceResults *MemDep=nullptr)
We know that BB has one predecessor.
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Remap the Values used in the DbgRecord DR using the value map VM.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
auto sum_of(R &&Range, E Init=E{0})
Returns the sum of all values in Range with Init initial value.
Definition STLExtras.h:1717
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
static cl::opt< bool > HoistCondStores("simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true), cl::desc("Hoist conditional stores if an unconditional store precedes"))
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
LLVM_ABI bool simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI, DomTreeUpdater *DTU=nullptr, const SimplifyCFGOptions &Options={}, ArrayRef< WeakVH > LoopHeaders={})
auto pred_begin(const MachineBasicBlock *BB)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
auto predecessors(const MachineBasicBlock *BB)
static cl::opt< unsigned > HoistLoadsStoresWithCondFaultingThreshold("hoist-loads-stores-with-cond-faulting-threshold", cl::Hidden, cl::init(6), cl::desc("Control the maximal conditional load/store that we are willing " "to speculatively execute to eliminate conditional branch " "(default = 6)"))
static cl::opt< bool > HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true), cl::desc("Hoist common instructions up to the parent block"))
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
LLVM_ABI unsigned ComputeMaxSignificantBits(const Value *Op, const DataLayout &DL, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Get the upper bound on bit size for this Value Op as a signed integer.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
static cl::opt< unsigned > TwoEntryPHINodeFoldingThreshold("two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4), cl::desc("Control the maximal total instruction cost that we are willing " "to speculatively execute to fold a 2-entry PHI node into a " "select (default = 4)"))
Type * getLoadStoreType(const Value *I)
A helper function that returns the type of a load or store instruction.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
SmallVector< uint64_t, 2 > getDisjunctionWeights(const SmallVector< T1, 2 > &B1, const SmallVector< T2, 2 > &B2)
Get the branch weights of a branch conditioned on b1 || b2, where b1 and b2 are 2 booleans that are t...
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:107
LLVM_ABI Constant * ConstantFoldCastInstruction(unsigned opcode, Constant *V, Type *DestTy)
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
LLVM_ABI std::optional< bool > isImpliedByDomCondition(const Value *Cond, const Instruction *ContextI, const DataLayout &DL)
Return the boolean condition value in the context of the given instruction if it is known based on do...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
void array_pod_sort(IteratorTy Start, IteratorTy End)
array_pod_sort - This sorts an array with the specified start and end extent.
Definition STLExtras.h:1596
LLVM_ABI bool hasBranchWeightMD(const Instruction &I)
Checks if an instructions has Branch Weight Metadata.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition Hashing.h:305
LLVM_ABI bool isDereferenceablePointer(const Value *V, Type *Ty, const SimplifyQuery &Q, bool IgnoreFree=false)
Equivalent to isDereferenceableAndAlignedPointer with an alignment of 1.
Definition Loads.cpp:264
bool equal(L &&LRange, R &&RRange)
Wrapper function around std::equal to detect if pair-wise elements between two ranges are the same.
Definition STLExtras.h:2146
static cl::opt< bool > HoistLoadsWithCondFaulting("simplifycfg-hoist-loads-with-cond-faulting", cl::Hidden, cl::init(true), cl::desc("Hoist loads if the target supports conditional faulting"))
LLVM_ABI Constant * ConstantFoldInstOperands(const Instruction *I, ArrayRef< Constant * > Ops, const DataLayout &DL, const TargetLibraryInfo *TLI=nullptr, bool AllowNonDeterministic=true)
ConstantFoldInstOperands - Attempt to constant fold an instruction with the specified operands.
LLVM_ABI void setFittedBranchWeights(Instruction &I, ArrayRef< uint64_t > Weights, bool IsExpected, bool ElideAllZero=false)
Variant of setBranchWeights where the Weights will be fit first to uint32_t by shifting right.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
LLVM_ABI Constant * ConstantFoldIntegerCast(Constant *C, Type *DestTy, bool IsSigned, const DataLayout &DL)
Constant fold a zext, sext or trunc, depending on IsSigned and whether the DestTy is wider or narrowe...
bool capturesNothing(CaptureComponents CC)
Definition ModRef.h:375
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI bool EliminateDuplicatePHINodes(BasicBlock *BB)
Check for and eliminate duplicate PHI nodes in this block.
Definition Local.cpp:1509
@ Keep
No function return thunk.
Definition CodeGen.h:162
constexpr detail::IsaCheckPredicate< Types... > IsaPred
Function object wrapper for the llvm::isa type check.
Definition Casting.h:866
LLVM_ABI void RemapSourceAtom(Instruction *I, ValueToValueMapTy &VM)
Remap source location atom.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition Hashing.h:285
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
LLVM_ABI void mapAtomInstance(const DebugLoc &DL, ValueToValueMapTy &VMap)
Mark a cloned instruction as a new instance so that its source loc can be updated when remapped.
constexpr uint64_t NextPowerOf2(uint64_t A)
Returns the next power of two (in 64-bits) that is strictly greater than A.
Definition MathExtras.h:373
LLVM_ABI void extractFromBranchWeightMD64(const MDNode *ProfileData, SmallVectorImpl< uint64_t > &Weights)
Faster version of extractBranchWeights() that skips checks and must only be called with "branch_weigh...
LLVM_ABI ConstantRange computeConstantRange(const Value *V, bool ForSigned, const SimplifyQuery &SQ, unsigned Depth=0)
Determine the possible constant range of an integer or vector of integer value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
SmallVectorImpl< ConstantInt * > * Cases
SmallVectorImpl< ConstantInt * > * OtherCases
Checking whether two BBs are equal depends on the contents of the BasicBlock and the incoming values ...
SmallDenseMap< BasicBlock *, Value *, 8 > BB2ValueMap
Phi2IVsMap * PhiPredIVs
DenseMap< PHINode *, BB2ValueMap > Phi2IVsMap
static bool canBeMerged(const BasicBlock *BB)
BasicBlock * BB
static bool isEqual(const EqualBBWrapper *LHS, const EqualBBWrapper *RHS)
static unsigned getHashValue(const EqualBBWrapper *EBW)
An information struct used to provide DenseMap with the various necessary components for a given valu...
unsigned getBitWidth() const
Get the bit width of this value.
Definition KnownBits.h:44
unsigned countMaxActiveBits() const
Returns the maximum number of bits needed to represent all possible unsigned values with these known ...
Definition KnownBits.h:310
APInt getMaxValue() const
Return the maximal unsigned value possible given these KnownBits.
Definition KnownBits.h:146
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:342