LLVM 20.0.0git
ControlHeightReduction.cpp
Go to the documentation of this file.
1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/StringSet.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/PassManager.h"
41
42#include <optional>
43#include <set>
44#include <sstream>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "chr"
49
50#define CHR_DEBUG(X) LLVM_DEBUG(X)
51
52static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
53 cl::desc("Disable CHR for all functions"));
54
55static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
56 cl::desc("Apply CHR for all functions"));
57
59 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
60 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
61
63 "chr-merge-threshold", cl::init(2), cl::Hidden,
64 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
65
67 "chr-module-list", cl::init(""), cl::Hidden,
68 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
69
71 "chr-function-list", cl::init(""), cl::Hidden,
72 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
73
75 "chr-dup-threshold", cl::init(3), cl::Hidden,
76 cl::desc("Max number of duplications by CHR for a region"));
77
80
81static void parseCHRFilterFiles() {
82 if (!CHRModuleList.empty()) {
83 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
84 if (!FileOrErr) {
85 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
86 std::exit(1);
87 }
88 StringRef Buf = FileOrErr->get()->getBuffer();
90 Buf.split(Lines, '\n');
91 for (StringRef Line : Lines) {
92 Line = Line.trim();
93 if (!Line.empty())
94 CHRModules.insert(Line);
95 }
96 }
97 if (!CHRFunctionList.empty()) {
98 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
99 if (!FileOrErr) {
100 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
101 std::exit(1);
102 }
103 StringRef Buf = FileOrErr->get()->getBuffer();
105 Buf.split(Lines, '\n');
106 for (StringRef Line : Lines) {
107 Line = Line.trim();
108 if (!Line.empty())
109 CHRFunctions.insert(Line);
110 }
111 }
112}
113
114namespace {
115
116struct CHRStats {
117 CHRStats() = default;
118 void print(raw_ostream &OS) const {
119 OS << "CHRStats: NumBranches " << NumBranches
120 << " NumBranchesDelta " << NumBranchesDelta
121 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
122 }
123 // The original number of conditional branches / selects
124 uint64_t NumBranches = 0;
125 // The decrease of the number of conditional branches / selects in the hot
126 // paths due to CHR.
127 uint64_t NumBranchesDelta = 0;
128 // NumBranchesDelta weighted by the profile count at the scope entry.
129 uint64_t WeightedNumBranchesDelta = 0;
130};
131
132// RegInfo - some properties of a Region.
133struct RegInfo {
134 RegInfo() = default;
135 RegInfo(Region *RegionIn) : R(RegionIn) {}
136 Region *R = nullptr;
137 bool HasBranch = false;
139};
140
141typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
142
143// CHRScope - a sequence of regions to CHR together. It corresponds to a
144// sequence of conditional blocks. It can have subscopes which correspond to
145// nested conditional blocks. Nested CHRScopes form a tree.
146class CHRScope {
147 public:
148 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
149 assert(RI.R && "Null RegionIn");
150 RegInfos.push_back(RI);
151 }
152
153 Region *getParentRegion() {
154 assert(RegInfos.size() > 0 && "Empty CHRScope");
155 Region *Parent = RegInfos[0].R->getParent();
156 assert(Parent && "Unexpected to call this on the top-level region");
157 return Parent;
158 }
159
160 BasicBlock *getEntryBlock() {
161 assert(RegInfos.size() > 0 && "Empty CHRScope");
162 return RegInfos.front().R->getEntry();
163 }
164
165 BasicBlock *getExitBlock() {
166 assert(RegInfos.size() > 0 && "Empty CHRScope");
167 return RegInfos.back().R->getExit();
168 }
169
170 bool appendable(CHRScope *Next) {
171 // The next scope is appendable only if this scope is directly connected to
172 // it (which implies it post-dominates this scope) and this scope dominates
173 // it (no edge to the next scope outside this scope).
174 BasicBlock *NextEntry = Next->getEntryBlock();
175 if (getExitBlock() != NextEntry)
176 // Not directly connected.
177 return false;
178 Region *LastRegion = RegInfos.back().R;
179 for (BasicBlock *Pred : predecessors(NextEntry))
180 if (!LastRegion->contains(Pred))
181 // There's an edge going into the entry of the next scope from outside
182 // of this scope.
183 return false;
184 return true;
185 }
186
187 void append(CHRScope *Next) {
188 assert(RegInfos.size() > 0 && "Empty CHRScope");
189 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
190 assert(getParentRegion() == Next->getParentRegion() &&
191 "Must be siblings");
192 assert(getExitBlock() == Next->getEntryBlock() &&
193 "Must be adjacent");
194 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
195 Subs.append(Next->Subs.begin(), Next->Subs.end());
196 }
197
198 void addSub(CHRScope *SubIn) {
199#ifndef NDEBUG
200 bool IsChild = false;
201 for (RegInfo &RI : RegInfos)
202 if (RI.R == SubIn->getParentRegion()) {
203 IsChild = true;
204 break;
205 }
206 assert(IsChild && "Must be a child");
207#endif
208 Subs.push_back(SubIn);
209 }
210
211 // Split this scope at the boundary region into two, which will belong to the
212 // tail and returns the tail.
213 CHRScope *split(Region *Boundary) {
214 assert(Boundary && "Boundary null");
215 assert(RegInfos.begin()->R != Boundary &&
216 "Can't be split at beginning");
217 auto BoundaryIt = llvm::find_if(
218 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
219 if (BoundaryIt == RegInfos.end())
220 return nullptr;
221 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
222 DenseSet<Region *> TailRegionSet;
223 for (const RegInfo &RI : TailRegInfos)
224 TailRegionSet.insert(RI.R);
225
226 auto TailIt =
227 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
228 assert(Sub && "null Sub");
229 Region *Parent = Sub->getParentRegion();
230 if (TailRegionSet.count(Parent))
231 return false;
232
233 assert(llvm::any_of(
234 RegInfos,
235 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
236 "Must be in head");
237 return true;
238 });
239 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
240
241 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
242 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
243 RegInfos.erase(BoundaryIt, RegInfos.end());
244 Subs.erase(TailIt, Subs.end());
245 return Scope;
246 }
247
248 bool contains(Instruction *I) const {
249 BasicBlock *Parent = I->getParent();
250 for (const RegInfo &RI : RegInfos)
251 if (RI.R->contains(Parent))
252 return true;
253 return false;
254 }
255
256 void print(raw_ostream &OS) const;
257
258 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
259 SmallVector<CHRScope *, 8> Subs; // Subscopes.
260
261 // The instruction at which to insert the CHR conditional branch (and hoist
262 // the dependent condition values).
263 Instruction *BranchInsertPoint;
264
265 // True-biased and false-biased regions (conditional blocks),
266 // respectively. Used only for the outermost scope and includes regions in
267 // subscopes. The rest are unbiased.
268 DenseSet<Region *> TrueBiasedRegions;
269 DenseSet<Region *> FalseBiasedRegions;
270 // Among the biased regions, the regions that get CHRed.
271 SmallVector<RegInfo, 8> CHRRegions;
272
273 // True-biased and false-biased selects, respectively. Used only for the
274 // outermost scope and includes ones in subscopes.
275 DenseSet<SelectInst *> TrueBiasedSelects;
276 DenseSet<SelectInst *> FalseBiasedSelects;
277
278 // Map from one of the above regions to the instructions to stop
279 // hoisting instructions at through use-def chains.
280 HoistStopMapTy HoistStopMap;
281
282 private:
283 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
284 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
285};
286
287class CHR {
288 public:
289 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
290 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
292 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
293
294 ~CHR() {
295 for (CHRScope *Scope : Scopes) {
296 delete Scope;
297 }
298 }
299
300 bool run();
301
302 private:
303 // See the comments in CHR::run() for the high level flow of the algorithm and
304 // what the following functions do.
305
306 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
307 Region *R = RI.getTopLevelRegion();
308 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
309 Output.push_back(Scope);
310 }
311 }
312 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
314 CHRScope *findScope(Region *R);
315 void checkScopeHoistable(CHRScope *Scope);
316
317 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
319 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
320 CHRScope *Outer,
321 DenseSet<Value *> *OuterConditionValues,
322 Instruction *OuterInsertPoint,
324 DenseSet<Instruction *> &Unhoistables);
325
326 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
327 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
328
329 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
331
332 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
334 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
335
336 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
338
339 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
340 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
341 void cloneScopeBlocks(CHRScope *Scope,
342 BasicBlock *PreEntryBlock,
343 BasicBlock *ExitBlock,
344 Region *LastRegion,
345 ValueToValueMapTy &VMap);
346 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
347 BasicBlock *EntryBlock,
348 BasicBlock *NewEntryBlock,
349 ValueToValueMapTy &VMap);
350 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
351 BranchInst *MergedBR, uint64_t ProfileCount);
352 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
353 Value *&MergedCondition, BranchProbability &CHRBranchBias);
354 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
355 Value *&MergedCondition, BranchProbability &CHRBranchBias);
356 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
357 Instruction *BranchOrSelect, CHRScope *Scope,
358 IRBuilder<> &IRB, Value *&MergedCondition);
359 unsigned getRegionDuplicationCount(const Region *R) {
360 unsigned Count = 0;
361 // Find out how many times region R is cloned. Note that if the parent
362 // of R is cloned, R is also cloned, but R's clone count is not updated
363 // from the clone of the parent. We need to accumlate all the counts
364 // from the ancestors to get the clone count.
365 while (R) {
366 Count += DuplicationCount[R];
367 R = R->getParent();
368 }
369 return Count;
370 }
371
372 Function &F;
374 DominatorTree &DT;
376 RegionInfo &RI;
378 CHRStats Stats;
379
380 // All the true-biased regions in the function
381 DenseSet<Region *> TrueBiasedRegionsGlobal;
382 // All the false-biased regions in the function
383 DenseSet<Region *> FalseBiasedRegionsGlobal;
384 // All the true-biased selects in the function
385 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
386 // All the false-biased selects in the function
387 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
388 // A map from biased regions to their branch bias
390 // A map from biased selects to their branch bias
392 // All the scopes.
394 // This maps records how many times this region is cloned.
395 DenseMap<const Region *, unsigned> DuplicationCount;
396};
397
398} // end anonymous namespace
399
400static inline
402 const CHRStats &Stats) {
403 Stats.print(OS);
404 return OS;
405}
406
407static inline
408raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
409 Scope.print(OS);
410 return OS;
411}
412
414 if (DisableCHR)
415 return false;
416
417 if (ForceCHR)
418 return true;
419
420 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
421 if (CHRModules.count(F.getParent()->getName()))
422 return true;
423 return CHRFunctions.count(F.getName());
424 }
425
426 return PSI.isFunctionEntryHot(&F);
427}
428
429static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
430 CHRStats *Stats) {
431 StringRef FuncName = F.getName();
432 StringRef ModuleName = F.getParent()->getName();
433 (void)(FuncName); // Unused in release build.
434 (void)(ModuleName); // Unused in release build.
435 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
436 << FuncName);
437 if (Stats)
438 CHR_DEBUG(dbgs() << " " << *Stats);
439 CHR_DEBUG(dbgs() << "\n");
440 CHR_DEBUG(F.dump());
441}
442
443void CHRScope::print(raw_ostream &OS) const {
444 assert(RegInfos.size() > 0 && "Empty CHRScope");
445 OS << "CHRScope[";
446 OS << RegInfos.size() << ", Regions[";
447 for (const RegInfo &RI : RegInfos) {
448 OS << RI.R->getNameStr();
449 if (RI.HasBranch)
450 OS << " B";
451 if (RI.Selects.size() > 0)
452 OS << " S" << RI.Selects.size();
453 OS << ", ";
454 }
455 if (RegInfos[0].R->getParent()) {
456 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
457 } else {
458 // top level region
459 OS << "]";
460 }
461 OS << ", Subs[";
462 for (CHRScope *Sub : Subs) {
463 OS << *Sub << ", ";
464 }
465 OS << "]]";
466}
467
468// Return true if the given instruction type can be hoisted by CHR.
470 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
471 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
472 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
473 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
474 isa<InsertValueInst>(I);
475}
476
477// Return true if the given instruction can be hoisted by CHR.
480 return false;
481 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
482}
483
484// Recursively traverse the use-def chains of the given value and return a set
485// of the unhoistable base values defined within the scope (excluding the
486// first-region entry block) or the (hoistable or unhoistable) base values that
487// are defined outside (including the first-region entry block) of the
488// scope. The returned set doesn't include constants.
489static const std::set<Value *> &
491 DenseMap<Value *, std::set<Value *>> &Visited) {
492 auto It = Visited.find(V);
493 if (It != Visited.end()) {
494 return It->second;
495 }
496 std::set<Value *> Result;
497 if (auto *I = dyn_cast<Instruction>(V)) {
498 // We don't stop at a block that's not in the Scope because we would miss
499 // some instructions that are based on the same base values if we stop
500 // there.
501 if (!isHoistable(I, DT)) {
502 Result.insert(I);
503 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
504 }
505 // I is hoistable above the Scope.
506 for (Value *Op : I->operands()) {
507 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
508 Result.insert(OpResult.begin(), OpResult.end());
509 }
510 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
511 }
512 if (isa<Argument>(V)) {
513 Result.insert(V);
514 }
515 // We don't include others like constants because those won't lead to any
516 // chance of folding of conditions (eg two bit checks merged into one check)
517 // after CHR.
518 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
519}
520
521// Return true if V is already hoisted or can be hoisted (along with its
522// operands) above the insert point. When it returns true and HoistStops is
523// non-null, the instructions to stop hoisting at through the use-def chains are
524// inserted into HoistStops.
525static bool
527 DenseSet<Instruction *> &Unhoistables,
528 DenseSet<Instruction *> *HoistStops,
530 assert(InsertPoint && "Null InsertPoint");
531 if (auto *I = dyn_cast<Instruction>(V)) {
532 auto It = Visited.find(I);
533 if (It != Visited.end()) {
534 return It->second;
535 }
536 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
537 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
538 if (Unhoistables.count(I)) {
539 // Don't hoist if they are not to be hoisted.
540 Visited[I] = false;
541 return false;
542 }
543 if (DT.dominates(I, InsertPoint)) {
544 // We are already above the insert point. Stop here.
545 if (HoistStops)
546 HoistStops->insert(I);
547 Visited[I] = true;
548 return true;
549 }
550 // We aren't not above the insert point, check if we can hoist it above the
551 // insert point.
552 if (isHoistable(I, DT)) {
553 // Check operands first.
554 DenseSet<Instruction *> OpsHoistStops;
555 bool AllOpsHoisted = true;
556 for (Value *Op : I->operands()) {
557 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
558 Visited)) {
559 AllOpsHoisted = false;
560 break;
561 }
562 }
563 if (AllOpsHoisted) {
564 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
565 if (HoistStops)
566 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
567 Visited[I] = true;
568 return true;
569 }
570 }
571 Visited[I] = false;
572 return false;
573 }
574 // Non-instructions are considered hoistable.
575 return true;
576}
577
578// Constructs the true and false branch probabilities if the the instruction has
579// valid branch weights. Returns true when this was successful, false otherwise.
581 BranchProbability &TrueProb,
582 BranchProbability &FalseProb) {
583 uint64_t TrueWeight;
584 uint64_t FalseWeight;
585 if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
586 return false;
587 uint64_t SumWeight = TrueWeight + FalseWeight;
588
589 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
590 "Overflow calculating branch probabilities.");
591
592 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
593 if (SumWeight == 0)
594 return false;
595
596 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
597 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
598 return true;
599}
600
603 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
604}
605
606// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
607// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
608// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
609// false.
610template <typename K, typename S, typename M>
611static bool checkBias(K *Key, BranchProbability TrueProb,
612 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
613 M &BiasMap) {
615 if (TrueProb >= Threshold) {
616 TrueSet.insert(Key);
617 BiasMap[Key] = TrueProb;
618 return true;
619 } else if (FalseProb >= Threshold) {
620 FalseSet.insert(Key);
621 BiasMap[Key] = FalseProb;
622 return true;
623 }
624 return false;
625}
626
627// Returns true and insert a region into the right biased set and the map if the
628// branch of the region is biased.
630 DenseSet<Region *> &TrueBiasedRegionsGlobal,
631 DenseSet<Region *> &FalseBiasedRegionsGlobal,
633 if (!BI->isConditional())
634 return false;
635 BranchProbability ThenProb, ElseProb;
636 if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
637 return false;
638 BasicBlock *IfThen = BI->getSuccessor(0);
639 BasicBlock *IfElse = BI->getSuccessor(1);
640 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
641 IfThen != IfElse &&
642 "Invariant from findScopes");
643 if (IfThen == R->getExit()) {
644 // Swap them so that IfThen/ThenProb means going into the conditional code
645 // and IfElse/ElseProb means skipping it.
646 std::swap(IfThen, IfElse);
647 std::swap(ThenProb, ElseProb);
648 }
649 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
650 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
651 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
652 return checkBias(R, ThenProb, ElseProb,
653 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
654 BranchBiasMap);
655}
656
657// Returns true and insert a select into the right biased set and the map if the
658// select is biased.
660 SelectInst *SI, Region *R,
661 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
662 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
664 BranchProbability TrueProb, FalseProb;
665 if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
666 return false;
667 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
668 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
669 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
670 return checkBias(SI, TrueProb, FalseProb,
671 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
672 SelectBiasMap);
673}
674
675// Returns the instruction at which to hoist the dependent condition values and
676// insert the CHR branch for a region. This is the terminator branch in the
677// entry block or the first select in the entry block, if any.
679 Region *R = RI.R;
680 BasicBlock *EntryBB = R->getEntry();
681 // The hoist point is by default the terminator of the entry block, which is
682 // the same as the branch instruction if RI.HasBranch is true.
683 Instruction *HoistPoint = EntryBB->getTerminator();
684 for (SelectInst *SI : RI.Selects) {
685 if (SI->getParent() == EntryBB) {
686 // Pick the first select in Selects in the entry block. Note Selects is
687 // sorted in the instruction order within a block (asserted below).
688 HoistPoint = SI;
689 break;
690 }
691 }
692 assert(HoistPoint && "Null HoistPoint");
693#ifndef NDEBUG
694 // Check that HoistPoint is the first one in Selects in the entry block,
695 // if any.
696 DenseSet<Instruction *> EntryBlockSelectSet;
697 for (SelectInst *SI : RI.Selects) {
698 if (SI->getParent() == EntryBB) {
699 EntryBlockSelectSet.insert(SI);
700 }
701 }
702 for (Instruction &I : *EntryBB) {
703 if (EntryBlockSelectSet.contains(&I)) {
704 assert(&I == HoistPoint &&
705 "HoistPoint must be the first one in Selects");
706 break;
707 }
708 }
709#endif
710 return HoistPoint;
711}
712
713// Find a CHR scope in the given region.
714CHRScope * CHR::findScope(Region *R) {
715 CHRScope *Result = nullptr;
716 BasicBlock *Entry = R->getEntry();
717 BasicBlock *Exit = R->getExit(); // null if top level.
718 assert(Entry && "Entry must not be null");
719 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
720 "Only top level region has a null exit");
721 if (Entry)
722 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
723 else
724 CHR_DEBUG(dbgs() << "Entry null\n");
725 if (Exit)
726 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
727 else
728 CHR_DEBUG(dbgs() << "Exit null\n");
729 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
730 // to this region).
731 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
732 if (EntryInSubregion)
733 return nullptr;
734 // Exclude loops
735 for (BasicBlock *Pred : predecessors(Entry))
736 if (R->contains(Pred))
737 return nullptr;
738 // If any of the basic blocks have address taken, we must skip this region
739 // because we cannot clone basic blocks that have address taken.
740 for (BasicBlock *BB : R->blocks()) {
741 if (BB->hasAddressTaken())
742 return nullptr;
743 // If we encounter llvm.coro.id, skip this region because if the basic block
744 // is cloned, we end up inserting a token type PHI node to the block with
745 // llvm.coro.begin.
746 // FIXME: This could lead to less optimal codegen, because the region is
747 // excluded, it can prevent CHR from merging adjacent regions into bigger
748 // scope and hoisting more branches.
749 for (Instruction &I : *BB)
750 if (auto *II = dyn_cast<IntrinsicInst>(&I))
751 if (II->getIntrinsicID() == Intrinsic::coro_id)
752 return nullptr;
753 }
754
755 if (Exit) {
756 // Try to find an if-then block (check if R is an if-then).
757 // if (cond) {
758 // ...
759 // }
760 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
761 if (BI)
762 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
763 else
764 CHR_DEBUG(dbgs() << "BI null\n");
765 if (BI && BI->isConditional()) {
766 BasicBlock *S0 = BI->getSuccessor(0);
767 BasicBlock *S1 = BI->getSuccessor(1);
768 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
769 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
770 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
771 RegInfo RI(R);
772 RI.HasBranch = checkBiasedBranch(
773 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
774 BranchBiasMap);
775 Result = new CHRScope(RI);
776 Scopes.insert(Result);
777 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
778 ++Stats.NumBranches;
779 if (!RI.HasBranch) {
780 ORE.emit([&]() {
781 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
782 << "Branch not biased";
783 });
784 }
785 }
786 }
787 }
788 {
789 // Try to look for selects in the direct child blocks (as opposed to in
790 // subregions) of R.
791 // ...
792 // if (..) { // Some subregion
793 // ...
794 // }
795 // if (..) { // Some subregion
796 // ...
797 // }
798 // ...
799 // a = cond ? b : c;
800 // ...
802 for (RegionNode *E : R->elements()) {
803 if (E->isSubRegion())
804 continue;
805 // This returns the basic block of E if E is a direct child of R (not a
806 // subregion.)
807 BasicBlock *BB = E->getEntry();
808 // Need to push in the order to make it easier to find the first Select
809 // later.
810 for (Instruction &I : *BB) {
811 if (auto *SI = dyn_cast<SelectInst>(&I)) {
812 Selects.push_back(SI);
813 ++Stats.NumBranches;
814 }
815 }
816 }
817 if (Selects.size() > 0) {
818 auto AddSelects = [&](RegInfo &RI) {
819 for (auto *SI : Selects)
820 if (checkBiasedSelect(SI, RI.R,
821 TrueBiasedSelectsGlobal,
822 FalseBiasedSelectsGlobal,
823 SelectBiasMap))
824 RI.Selects.push_back(SI);
825 else
826 ORE.emit([&]() {
827 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
828 << "Select not biased";
829 });
830 };
831 if (!Result) {
832 CHR_DEBUG(dbgs() << "Found a select-only region\n");
833 RegInfo RI(R);
834 AddSelects(RI);
835 Result = new CHRScope(RI);
836 Scopes.insert(Result);
837 } else {
838 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
839 AddSelects(Result->RegInfos[0]);
840 }
841 }
842 }
843
844 if (Result) {
845 checkScopeHoistable(Result);
846 }
847 return Result;
848}
849
850// Check that any of the branch and the selects in the region could be
851// hoisted above the the CHR branch insert point (the most dominating of
852// them, either the branch (at the end of the first block) or the first
853// select in the first block). If the branch can't be hoisted, drop the
854// selects in the first blocks.
855//
856// For example, for the following scope/region with selects, we want to insert
857// the merged branch right before the first select in the first/entry block by
858// hoisting c1, c2, c3, and c4.
859//
860// // Branch insert point here.
861// a = c1 ? b : c; // Select 1
862// d = c2 ? e : f; // Select 2
863// if (c3) { // Branch
864// ...
865// c4 = foo() // A call.
866// g = c4 ? h : i; // Select 3
867// }
868//
869// But suppose we can't hoist c4 because it's dependent on the preceding
870// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
871// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
872void CHR::checkScopeHoistable(CHRScope *Scope) {
873 RegInfo &RI = Scope->RegInfos[0];
874 Region *R = RI.R;
875 BasicBlock *EntryBB = R->getEntry();
876 auto *Branch = RI.HasBranch ?
877 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
878 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
879 if (RI.HasBranch || !Selects.empty()) {
880 Instruction *InsertPoint = getBranchInsertPoint(RI);
881 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
882 // Avoid a data dependence from a select or a branch to a(nother)
883 // select. Note no instruction can't data-depend on a branch (a branch
884 // instruction doesn't produce a value).
885 DenseSet<Instruction *> Unhoistables;
886 // Initialize Unhoistables with the selects.
887 for (SelectInst *SI : Selects) {
888 Unhoistables.insert(SI);
889 }
890 // Remove Selects that can't be hoisted.
891 for (auto it = Selects.begin(); it != Selects.end(); ) {
892 SelectInst *SI = *it;
893 if (SI == InsertPoint) {
894 ++it;
895 continue;
896 }
898 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
899 DT, Unhoistables, nullptr, Visited);
900 if (!IsHoistable) {
901 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
902 ORE.emit([&]() {
904 "DropUnhoistableSelect", SI)
905 << "Dropped unhoistable select";
906 });
907 it = Selects.erase(it);
908 // Since we are dropping the select here, we also drop it from
909 // Unhoistables.
910 Unhoistables.erase(SI);
911 } else
912 ++it;
913 }
914 // Update InsertPoint after potentially removing selects.
915 InsertPoint = getBranchInsertPoint(RI);
916 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
917 if (RI.HasBranch && InsertPoint != Branch) {
919 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
920 DT, Unhoistables, nullptr, Visited);
921 if (!IsHoistable) {
922 // If the branch isn't hoistable, drop the selects in the entry
923 // block, preferring the branch, which makes the branch the hoist
924 // point.
925 assert(InsertPoint != Branch && "Branch must not be the hoist point");
926 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
927 CHR_DEBUG(
928 for (SelectInst *SI : Selects) {
929 dbgs() << "SI " << *SI << "\n";
930 });
931 for (SelectInst *SI : Selects) {
932 ORE.emit([&]() {
934 "DropSelectUnhoistableBranch", SI)
935 << "Dropped select due to unhoistable branch";
936 });
937 }
938 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
939 return SI->getParent() == EntryBB;
940 });
941 Unhoistables.clear();
942 InsertPoint = Branch;
943 }
944 }
945 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
946#ifndef NDEBUG
947 if (RI.HasBranch) {
948 assert(!DT.dominates(Branch, InsertPoint) &&
949 "Branch can't be already above the hoist point");
951 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
952 DT, Unhoistables, nullptr, Visited) &&
953 "checkHoistValue for branch");
954 }
955 for (auto *SI : Selects) {
956 assert(!DT.dominates(SI, InsertPoint) &&
957 "SI can't be already above the hoist point");
959 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
960 Unhoistables, nullptr, Visited) &&
961 "checkHoistValue for selects");
962 }
963 CHR_DEBUG(dbgs() << "Result\n");
964 if (RI.HasBranch) {
965 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
966 }
967 for (auto *SI : Selects) {
968 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
969 }
970#endif
971 }
972}
973
974// Traverse the region tree, find all nested scopes and merge them if possible.
975CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
977 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
978 CHRScope *Result = findScope(R);
979 // Visit subscopes.
980 CHRScope *ConsecutiveSubscope = nullptr;
982 for (auto It = R->begin(); It != R->end(); ++It) {
983 const std::unique_ptr<Region> &SubR = *It;
984 auto NextIt = std::next(It);
985 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
986 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
987 << "\n");
988 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
989 if (SubCHRScope) {
990 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
991 } else {
992 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
993 }
994 if (SubCHRScope) {
995 if (!ConsecutiveSubscope)
996 ConsecutiveSubscope = SubCHRScope;
997 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
998 Subscopes.push_back(ConsecutiveSubscope);
999 ConsecutiveSubscope = SubCHRScope;
1000 } else
1001 ConsecutiveSubscope->append(SubCHRScope);
1002 } else {
1003 if (ConsecutiveSubscope) {
1004 Subscopes.push_back(ConsecutiveSubscope);
1005 }
1006 ConsecutiveSubscope = nullptr;
1007 }
1008 }
1009 if (ConsecutiveSubscope) {
1010 Subscopes.push_back(ConsecutiveSubscope);
1011 }
1012 for (CHRScope *Sub : Subscopes) {
1013 if (Result) {
1014 // Combine it with the parent.
1015 Result->addSub(Sub);
1016 } else {
1017 // Push Subscopes as they won't be combined with the parent.
1018 Scopes.push_back(Sub);
1019 }
1020 }
1021 return Result;
1022}
1023
1025 DenseSet<Value *> ConditionValues;
1026 if (RI.HasBranch) {
1027 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1028 ConditionValues.insert(BI->getCondition());
1029 }
1030 for (SelectInst *SI : RI.Selects) {
1031 ConditionValues.insert(SI->getCondition());
1032 }
1033 return ConditionValues;
1034}
1035
1036
1037// Determine whether to split a scope depending on the sets of the branch
1038// condition values of the previous region and the current region. We split
1039// (return true) it if 1) the condition values of the inner/lower scope can't be
1040// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1041// values have an empty intersection (because the combined branch conditions
1042// won't probably lead to a simpler combined condition).
1043static bool shouldSplit(Instruction *InsertPoint,
1044 DenseSet<Value *> &PrevConditionValues,
1045 DenseSet<Value *> &ConditionValues,
1046 DominatorTree &DT,
1047 DenseSet<Instruction *> &Unhoistables) {
1048 assert(InsertPoint && "Null InsertPoint");
1049 CHR_DEBUG(
1050 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1051 for (Value *V : PrevConditionValues) {
1052 dbgs() << *V << ", ";
1053 }
1054 dbgs() << " ConditionValues ";
1055 for (Value *V : ConditionValues) {
1056 dbgs() << *V << ", ";
1057 }
1058 dbgs() << "\n");
1059 // If any of Bases isn't hoistable to the hoist point, split.
1060 for (Value *V : ConditionValues) {
1062 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1063 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1064 return true; // Not hoistable, split.
1065 }
1066 }
1067 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1068 // unnecessary splits at scopes with no branch/selects. If
1069 // PrevConditionValues and ConditionValues don't intersect at all, split.
1070 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1071 // Use std::set as DenseSet doesn't work with set_intersection.
1072 std::set<Value *> PrevBases, Bases;
1074 for (Value *V : PrevConditionValues) {
1075 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1076 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1077 }
1078 for (Value *V : ConditionValues) {
1079 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1080 Bases.insert(BaseValues.begin(), BaseValues.end());
1081 }
1082 CHR_DEBUG(
1083 dbgs() << "PrevBases ";
1084 for (Value *V : PrevBases) {
1085 dbgs() << *V << ", ";
1086 }
1087 dbgs() << " Bases ";
1088 for (Value *V : Bases) {
1089 dbgs() << *V << ", ";
1090 }
1091 dbgs() << "\n");
1092 std::vector<Value *> Intersection;
1093 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1094 Bases.end(), std::back_inserter(Intersection));
1095 if (Intersection.empty()) {
1096 // Empty intersection, split.
1097 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1098 return true;
1099 }
1100 }
1101 CHR_DEBUG(dbgs() << "No split\n");
1102 return false; // Don't split.
1103}
1104
1105static void getSelectsInScope(CHRScope *Scope,
1106 DenseSet<Instruction *> &Output) {
1107 for (RegInfo &RI : Scope->RegInfos)
1108 for (SelectInst *SI : RI.Selects)
1109 Output.insert(SI);
1110 for (CHRScope *Sub : Scope->Subs)
1111 getSelectsInScope(Sub, Output);
1112}
1113
1114void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1116 for (CHRScope *Scope : Input) {
1117 assert(!Scope->BranchInsertPoint &&
1118 "BranchInsertPoint must not be set");
1119 DenseSet<Instruction *> Unhoistables;
1120 getSelectsInScope(Scope, Unhoistables);
1121 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1122 }
1123#ifndef NDEBUG
1124 for (CHRScope *Scope : Output) {
1125 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1126 }
1127#endif
1128}
1129
1130SmallVector<CHRScope *, 8> CHR::splitScope(
1131 CHRScope *Scope,
1132 CHRScope *Outer,
1133 DenseSet<Value *> *OuterConditionValues,
1134 Instruction *OuterInsertPoint,
1136 DenseSet<Instruction *> &Unhoistables) {
1137 if (Outer) {
1138 assert(OuterConditionValues && "Null OuterConditionValues");
1139 assert(OuterInsertPoint && "Null OuterInsertPoint");
1140 }
1141 bool PrevSplitFromOuter = true;
1142 DenseSet<Value *> PrevConditionValues;
1143 Instruction *PrevInsertPoint = nullptr;
1145 SmallVector<bool, 8> SplitsSplitFromOuter;
1146 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1147 SmallVector<Instruction *, 8> SplitsInsertPoints;
1148 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1149 for (RegInfo &RI : RegInfos) {
1150 Instruction *InsertPoint = getBranchInsertPoint(RI);
1152 CHR_DEBUG(
1153 dbgs() << "ConditionValues ";
1154 for (Value *V : ConditionValues) {
1155 dbgs() << *V << ", ";
1156 }
1157 dbgs() << "\n");
1158 if (RI.R == RegInfos[0].R) {
1159 // First iteration. Check to see if we should split from the outer.
1160 if (Outer) {
1161 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1162 CHR_DEBUG(dbgs() << "Should split from outer at "
1163 << RI.R->getNameStr() << "\n");
1164 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1165 ConditionValues, DT, Unhoistables)) {
1166 PrevConditionValues = ConditionValues;
1167 PrevInsertPoint = InsertPoint;
1168 ORE.emit([&]() {
1170 "SplitScopeFromOuter",
1171 RI.R->getEntry()->getTerminator())
1172 << "Split scope from outer due to unhoistable branch/select "
1173 << "and/or lack of common condition values";
1174 });
1175 } else {
1176 // Not splitting from the outer. Use the outer bases and insert
1177 // point. Union the bases.
1178 PrevSplitFromOuter = false;
1179 PrevConditionValues = *OuterConditionValues;
1180 PrevConditionValues.insert(ConditionValues.begin(),
1181 ConditionValues.end());
1182 PrevInsertPoint = OuterInsertPoint;
1183 }
1184 } else {
1185 CHR_DEBUG(dbgs() << "Outer null\n");
1186 PrevConditionValues = ConditionValues;
1187 PrevInsertPoint = InsertPoint;
1188 }
1189 } else {
1190 CHR_DEBUG(dbgs() << "Should split from prev at "
1191 << RI.R->getNameStr() << "\n");
1192 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1193 DT, Unhoistables)) {
1194 CHRScope *Tail = Scope->split(RI.R);
1195 Scopes.insert(Tail);
1196 Splits.push_back(Scope);
1197 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1198 SplitsConditionValues.push_back(PrevConditionValues);
1199 SplitsInsertPoints.push_back(PrevInsertPoint);
1200 Scope = Tail;
1201 PrevConditionValues = ConditionValues;
1202 PrevInsertPoint = InsertPoint;
1203 PrevSplitFromOuter = true;
1204 ORE.emit([&]() {
1206 "SplitScopeFromPrev",
1207 RI.R->getEntry()->getTerminator())
1208 << "Split scope from previous due to unhoistable branch/select "
1209 << "and/or lack of common condition values";
1210 });
1211 } else {
1212 // Not splitting. Union the bases. Keep the hoist point.
1213 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1214 }
1215 }
1216 }
1217 Splits.push_back(Scope);
1218 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1219 SplitsConditionValues.push_back(PrevConditionValues);
1220 assert(PrevInsertPoint && "Null PrevInsertPoint");
1221 SplitsInsertPoints.push_back(PrevInsertPoint);
1222 assert(Splits.size() == SplitsConditionValues.size() &&
1223 Splits.size() == SplitsSplitFromOuter.size() &&
1224 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1225 for (size_t I = 0; I < Splits.size(); ++I) {
1226 CHRScope *Split = Splits[I];
1227 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1228 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1230 DenseSet<Instruction *> SplitUnhoistables;
1231 getSelectsInScope(Split, SplitUnhoistables);
1232 for (CHRScope *Sub : Split->Subs) {
1233 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1234 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1235 SplitUnhoistables);
1236 llvm::append_range(NewSubs, SubSplits);
1237 }
1238 Split->Subs = NewSubs;
1239 }
1241 for (size_t I = 0; I < Splits.size(); ++I) {
1242 CHRScope *Split = Splits[I];
1243 if (SplitsSplitFromOuter[I]) {
1244 // Split from the outer.
1245 Output.push_back(Split);
1246 Split->BranchInsertPoint = SplitsInsertPoints[I];
1247 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1248 << "\n");
1249 } else {
1250 // Connected to the outer.
1251 Result.push_back(Split);
1252 }
1253 }
1254 if (!Outer)
1255 assert(Result.empty() &&
1256 "If no outer (top-level), must return no nested ones");
1257 return Result;
1258}
1259
1260void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1261 for (CHRScope *Scope : Scopes) {
1262 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1263 classifyBiasedScopes(Scope, Scope);
1264 CHR_DEBUG(
1265 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1266 dbgs() << "TrueBiasedRegions ";
1267 for (Region *R : Scope->TrueBiasedRegions) {
1268 dbgs() << R->getNameStr() << ", ";
1269 }
1270 dbgs() << "\n";
1271 dbgs() << "FalseBiasedRegions ";
1272 for (Region *R : Scope->FalseBiasedRegions) {
1273 dbgs() << R->getNameStr() << ", ";
1274 }
1275 dbgs() << "\n";
1276 dbgs() << "TrueBiasedSelects ";
1277 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1278 dbgs() << *SI << ", ";
1279 }
1280 dbgs() << "\n";
1281 dbgs() << "FalseBiasedSelects ";
1282 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1283 dbgs() << *SI << ", ";
1284 }
1285 dbgs() << "\n";);
1286 }
1287}
1288
1289void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1290 for (RegInfo &RI : Scope->RegInfos) {
1291 if (RI.HasBranch) {
1292 Region *R = RI.R;
1293 if (TrueBiasedRegionsGlobal.contains(R))
1294 OutermostScope->TrueBiasedRegions.insert(R);
1295 else if (FalseBiasedRegionsGlobal.contains(R))
1296 OutermostScope->FalseBiasedRegions.insert(R);
1297 else
1298 llvm_unreachable("Must be biased");
1299 }
1300 for (SelectInst *SI : RI.Selects) {
1301 if (TrueBiasedSelectsGlobal.contains(SI))
1302 OutermostScope->TrueBiasedSelects.insert(SI);
1303 else if (FalseBiasedSelectsGlobal.contains(SI))
1304 OutermostScope->FalseBiasedSelects.insert(SI);
1305 else
1306 llvm_unreachable("Must be biased");
1307 }
1308 }
1309 for (CHRScope *Sub : Scope->Subs) {
1310 classifyBiasedScopes(Sub, OutermostScope);
1311 }
1312}
1313
1314static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1315 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1316 Scope->FalseBiasedRegions.size() +
1317 Scope->TrueBiasedSelects.size() +
1318 Scope->FalseBiasedSelects.size();
1319 return NumBiased >= CHRMergeThreshold;
1320}
1321
1322void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1324 for (CHRScope *Scope : Input) {
1325 // Filter out the ones with only one region and no subs.
1326 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1327 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1328 << Scope->TrueBiasedRegions.size()
1329 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1330 << " true-selects " << Scope->TrueBiasedSelects.size()
1331 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1332 ORE.emit([&]() {
1334 DEBUG_TYPE,
1335 "DropScopeWithOneBranchOrSelect",
1336 Scope->RegInfos[0].R->getEntry()->getTerminator())
1337 << "Drop scope with < "
1338 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1339 << " biased branch(es) or select(s)";
1340 });
1341 continue;
1342 }
1343 Output.push_back(Scope);
1344 }
1345}
1346
1347void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1349 for (CHRScope *Scope : Input) {
1350 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1351 "Empty");
1352 setCHRRegions(Scope, Scope);
1353 Output.push_back(Scope);
1354 CHR_DEBUG(
1355 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1356 for (auto pair : Scope->HoistStopMap) {
1357 Region *R = pair.first;
1358 dbgs() << "Region " << R->getNameStr() << "\n";
1359 for (Instruction *I : pair.second) {
1360 dbgs() << "HoistStop " << *I << "\n";
1361 }
1362 }
1363 dbgs() << "CHRRegions" << "\n";
1364 for (RegInfo &RI : Scope->CHRRegions) {
1365 dbgs() << RI.R->getNameStr() << "\n";
1366 });
1367 }
1368}
1369
1370void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1371 DenseSet<Instruction *> Unhoistables;
1372 // Put the biased selects in Unhoistables because they should stay where they
1373 // are and constant-folded after CHR (in case one biased select or a branch
1374 // can depend on another biased select.)
1375 for (RegInfo &RI : Scope->RegInfos) {
1376 for (SelectInst *SI : RI.Selects) {
1377 Unhoistables.insert(SI);
1378 }
1379 }
1380 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1381 for (RegInfo &RI : Scope->RegInfos) {
1382 Region *R = RI.R;
1383 DenseSet<Instruction *> HoistStops;
1384 bool IsHoisted = false;
1385 if (RI.HasBranch) {
1386 assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1387 OutermostScope->FalseBiasedRegions.contains(R)) &&
1388 "Must be truthy or falsy");
1389 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1390 // Note checkHoistValue fills in HoistStops.
1392 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1393 Unhoistables, &HoistStops, Visited);
1394 assert(IsHoistable && "Must be hoistable");
1395 (void)(IsHoistable); // Unused in release build
1396 IsHoisted = true;
1397 }
1398 for (SelectInst *SI : RI.Selects) {
1399 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1400 OutermostScope->FalseBiasedSelects.contains(SI)) &&
1401 "Must be true or false biased");
1402 // Note checkHoistValue fills in HoistStops.
1404 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1405 Unhoistables, &HoistStops, Visited);
1406 assert(IsHoistable && "Must be hoistable");
1407 (void)(IsHoistable); // Unused in release build
1408 IsHoisted = true;
1409 }
1410 if (IsHoisted) {
1411 OutermostScope->CHRRegions.push_back(RI);
1412 OutermostScope->HoistStopMap[R] = HoistStops;
1413 }
1414 }
1415 for (CHRScope *Sub : Scope->Subs)
1416 setCHRRegions(Sub, OutermostScope);
1417}
1418
1419static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1420 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1421}
1422
1423void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1425 Output.resize(Input.size());
1426 llvm::copy(Input, Output.begin());
1428}
1429
1430// Return true if V is already hoisted or was hoisted (along with its operands)
1431// to the insert point.
1432static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1433 HoistStopMapTy &HoistStopMap,
1434 DenseSet<Instruction *> &HoistedSet,
1435 DenseSet<PHINode *> &TrivialPHIs,
1436 DominatorTree &DT) {
1437 auto IT = HoistStopMap.find(R);
1438 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1439 DenseSet<Instruction *> &HoistStops = IT->second;
1440 if (auto *I = dyn_cast<Instruction>(V)) {
1441 if (I == HoistPoint)
1442 return;
1443 if (HoistStops.count(I))
1444 return;
1445 if (auto *PN = dyn_cast<PHINode>(I))
1446 if (TrivialPHIs.count(PN))
1447 // The trivial phi inserted by the previous CHR scope could replace a
1448 // non-phi in HoistStops. Note that since this phi is at the exit of a
1449 // previous CHR scope, which dominates this scope, it's safe to stop
1450 // hoisting there.
1451 return;
1452 if (HoistedSet.count(I))
1453 // Already hoisted, return.
1454 return;
1455 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1456 assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1457 assert(DT.getNode(HoistPoint->getParent()) &&
1458 "DT must contain HoistPoint block");
1459 if (DT.dominates(I, HoistPoint))
1460 // We are already above the hoist point. Stop here. This may be necessary
1461 // when multiple scopes would independently hoist the same
1462 // instruction. Since an outer (dominating) scope would hoist it to its
1463 // entry before an inner (dominated) scope would to its entry, the inner
1464 // scope may see the instruction already hoisted, in which case it
1465 // potentially wrong for the inner scope to hoist it and could cause bad
1466 // IR (non-dominating def), but safe to skip hoisting it instead because
1467 // it's already in a block that dominates the inner scope.
1468 return;
1469 for (Value *Op : I->operands()) {
1470 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1471 }
1472 I->moveBefore(HoistPoint);
1473 HoistedSet.insert(I);
1474 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1475 }
1476}
1477
1478// Hoist the dependent condition values of the branches and the selects in the
1479// scope to the insert point.
1480static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1481 DenseSet<PHINode *> &TrivialPHIs,
1482 DominatorTree &DT) {
1483 DenseSet<Instruction *> HoistedSet;
1484 for (const RegInfo &RI : Scope->CHRRegions) {
1485 Region *R = RI.R;
1486 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1487 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1488 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1489 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1490 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1491 HoistedSet, TrivialPHIs, DT);
1492 }
1493 for (SelectInst *SI : RI.Selects) {
1494 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1495 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1496 if (!(IsTrueBiased || IsFalseBiased))
1497 continue;
1498 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1499 HoistedSet, TrivialPHIs, DT);
1500 }
1501 }
1502}
1503
1504// Negate the predicate if an ICmp if it's used only by branches or selects by
1505// swapping the operands of the branches or the selects. Returns true if success.
1507 Instruction *ExcludedUser,
1508 CHRScope *Scope) {
1509 for (User *U : ICmp->users()) {
1510 if (U == ExcludedUser)
1511 continue;
1512 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1513 continue;
1514 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1515 continue;
1516 return false;
1517 }
1518 for (User *U : ICmp->users()) {
1519 if (U == ExcludedUser)
1520 continue;
1521 if (auto *BI = dyn_cast<BranchInst>(U)) {
1522 assert(BI->isConditional() && "Must be conditional");
1523 BI->swapSuccessors();
1524 // Don't need to swap this in terms of
1525 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1526 // mean whehter the branch is likely go into the if-then rather than
1527 // successor0/successor1 and because we can tell which edge is the then or
1528 // the else one by comparing the destination to the region exit block.
1529 continue;
1530 }
1531 if (auto *SI = dyn_cast<SelectInst>(U)) {
1532 // Swap operands
1533 SI->swapValues();
1534 SI->swapProfMetadata();
1535 if (Scope->TrueBiasedSelects.count(SI)) {
1536 assert(!Scope->FalseBiasedSelects.contains(SI) &&
1537 "Must not be already in");
1538 Scope->FalseBiasedSelects.insert(SI);
1539 } else if (Scope->FalseBiasedSelects.count(SI)) {
1540 assert(!Scope->TrueBiasedSelects.contains(SI) &&
1541 "Must not be already in");
1542 Scope->TrueBiasedSelects.insert(SI);
1543 }
1544 continue;
1545 }
1546 llvm_unreachable("Must be a branch or a select");
1547 }
1549 return true;
1550}
1551
1552// A helper for transformScopes. Insert a trivial phi at the scope exit block
1553// for a value that's defined in the scope but used outside it (meaning it's
1554// alive at the exit block).
1555static void insertTrivialPHIs(CHRScope *Scope,
1556 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1557 DenseSet<PHINode *> &TrivialPHIs) {
1558 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1559 for (RegInfo &RI : Scope->RegInfos) {
1560 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1561 // sub-Scopes.
1562 BlocksInScope.insert(BB);
1563 }
1564 }
1565 CHR_DEBUG({
1566 dbgs() << "Inserting redundant phis\n";
1567 for (BasicBlock *BB : BlocksInScope)
1568 dbgs() << "BlockInScope " << BB->getName() << "\n";
1569 });
1570 for (BasicBlock *BB : BlocksInScope) {
1571 for (Instruction &I : *BB) {
1573 for (User *U : I.users()) {
1574 if (auto *UI = dyn_cast<Instruction>(U)) {
1575 if (!BlocksInScope.contains(UI->getParent()) &&
1576 // Unless there's already a phi for I at the exit block.
1577 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1578 CHR_DEBUG(dbgs() << "V " << I << "\n");
1579 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1580 Users.push_back(UI);
1581 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1582 // There's a loop backedge from a block that's dominated by this
1583 // scope to the entry block.
1584 CHR_DEBUG(dbgs() << "V " << I << "\n");
1585 CHR_DEBUG(dbgs()
1586 << "Used at entry block (for a back edge) by a phi user "
1587 << *UI << "\n");
1588 Users.push_back(UI);
1589 }
1590 }
1591 }
1592 if (Users.size() > 0) {
1593 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1594 // ExitBlock. Replace I with the new phi in UI unless UI is another
1595 // phi at ExitBlock.
1596 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1597 PN->insertBefore(ExitBlock->begin());
1598 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1599 PN->addIncoming(&I, Pred);
1600 }
1601 TrivialPHIs.insert(PN);
1602 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1603 for (Instruction *UI : Users) {
1604 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1605 if (UI->getOperand(J) == &I) {
1606 UI->setOperand(J, PN);
1607 }
1608 }
1609 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1610 }
1611 }
1612 }
1613 }
1614}
1615
1616// Assert that all the CHR regions of the scope have a biased branch or select.
1617static void LLVM_ATTRIBUTE_UNUSED
1619#ifndef NDEBUG
1620 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1621 if (Scope->TrueBiasedRegions.count(RI.R) ||
1622 Scope->FalseBiasedRegions.count(RI.R))
1623 return true;
1624 for (SelectInst *SI : RI.Selects)
1625 if (Scope->TrueBiasedSelects.count(SI) ||
1626 Scope->FalseBiasedSelects.count(SI))
1627 return true;
1628 return false;
1629 };
1630 for (RegInfo &RI : Scope->CHRRegions) {
1631 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1632 "Must have biased branch or select");
1633 }
1634#endif
1635}
1636
1637// Assert that all the condition values of the biased branches and selects have
1638// been hoisted to the pre-entry block or outside of the scope.
1640 CHRScope *Scope, BasicBlock *PreEntryBlock) {
1641 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1642 for (RegInfo &RI : Scope->CHRRegions) {
1643 Region *R = RI.R;
1644 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1645 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1646 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1647 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1648 Value *V = BI->getCondition();
1649 CHR_DEBUG(dbgs() << *V << "\n");
1650 if (auto *I = dyn_cast<Instruction>(V)) {
1651 (void)(I); // Unused in release build.
1652 assert((I->getParent() == PreEntryBlock ||
1653 !Scope->contains(I)) &&
1654 "Must have been hoisted to PreEntryBlock or outside the scope");
1655 }
1656 }
1657 for (SelectInst *SI : RI.Selects) {
1658 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1659 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1660 if (!(IsTrueBiased || IsFalseBiased))
1661 continue;
1662 Value *V = SI->getCondition();
1663 CHR_DEBUG(dbgs() << *V << "\n");
1664 if (auto *I = dyn_cast<Instruction>(V)) {
1665 (void)(I); // Unused in release build.
1666 assert((I->getParent() == PreEntryBlock ||
1667 !Scope->contains(I)) &&
1668 "Must have been hoisted to PreEntryBlock or outside the scope");
1669 }
1670 }
1671 }
1672}
1673
1674void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1675 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1676
1677 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1678
1679 for (RegInfo &RI : Scope->RegInfos) {
1680 const Region *R = RI.R;
1681 unsigned Duplication = getRegionDuplicationCount(R);
1682 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
1683 << "\n");
1684 if (Duplication >= CHRDupThreshsold) {
1685 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1686 << " for this region");
1687 ORE.emit([&]() {
1688 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1689 R->getEntry()->getTerminator())
1690 << "Reached the duplication threshold for the region";
1691 });
1692 return;
1693 }
1694 }
1695 for (RegInfo &RI : Scope->RegInfos) {
1696 DuplicationCount[RI.R]++;
1697 }
1698
1699 Region *FirstRegion = Scope->RegInfos[0].R;
1700 BasicBlock *EntryBlock = FirstRegion->getEntry();
1701 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1702 BasicBlock *ExitBlock = LastRegion->getExit();
1703 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1704
1705 if (ExitBlock) {
1706 // Insert a trivial phi at the exit block (where the CHR hot path and the
1707 // cold path merges) for a value that's defined in the scope but used
1708 // outside it (meaning it's alive at the exit block). We will add the
1709 // incoming values for the CHR cold paths to it below. Without this, we'd
1710 // miss updating phi's for such values unless there happens to already be a
1711 // phi for that value there.
1712 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1713 }
1714
1715 // Split the entry block of the first region. The new block becomes the new
1716 // entry block of the first region. The old entry block becomes the block to
1717 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1718 // through the split, we update the entry of the first region after the split,
1719 // and Region only points to the entry and the exit blocks, rather than
1720 // keeping everything in a list or set, the blocks membership and the
1721 // entry/exit blocks of the region are still valid after the split.
1722 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1723 << " at " << *Scope->BranchInsertPoint << "\n");
1724 BasicBlock *NewEntryBlock =
1725 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1726 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1727 "NewEntryBlock's only pred must be EntryBlock");
1728 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1729 BasicBlock *PreEntryBlock = EntryBlock;
1730
1731 ValueToValueMapTy VMap;
1732 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1733 // hot path (originals) and a cold path (clones) and update the PHIs at the
1734 // exit block.
1735 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1736
1737 // Replace the old (placeholder) branch with the new (merged) conditional
1738 // branch.
1739 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1740 NewEntryBlock, VMap);
1741
1742#ifndef NDEBUG
1744#endif
1745
1746 // Hoist the conditional values of the branches/selects.
1747 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1748
1749#ifndef NDEBUG
1750 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1751#endif
1752
1753 // Create the combined branch condition and constant-fold the branches/selects
1754 // in the hot path.
1755 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1756 ProfileCount.value_or(0));
1757}
1758
1759// A helper for transformScopes. Clone the blocks in the scope (excluding the
1760// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1761// at the exit block.
1762void CHR::cloneScopeBlocks(CHRScope *Scope,
1763 BasicBlock *PreEntryBlock,
1764 BasicBlock *ExitBlock,
1765 Region *LastRegion,
1766 ValueToValueMapTy &VMap) {
1767 // Clone all the blocks. The original blocks will be the hot-path
1768 // CHR-optimized code and the cloned blocks will be the original unoptimized
1769 // code. This is so that the block pointers from the
1770 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1771 // which CHR should apply to.
1773 for (RegInfo &RI : Scope->RegInfos)
1774 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1775 // sub-Scopes.
1776 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1777 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1778 NewBlocks.push_back(NewBB);
1779 VMap[BB] = NewBB;
1780
1781 // Unreachable predecessors will not be cloned and will not have an edge
1782 // to the cloned block. As such, also remove them from any phi nodes.
1783 for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1784 PN.removeIncomingValueIf([&](unsigned Idx) {
1785 return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1786 });
1787 }
1788
1789 // Place the cloned blocks right after the original blocks (right before the
1790 // exit block of.)
1791 if (ExitBlock)
1792 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1793 F.end());
1794
1795 // Update the cloned blocks/instructions to refer to themselves.
1796 for (BasicBlock *NewBB : NewBlocks)
1797 for (Instruction &I : *NewBB)
1798 RemapInstruction(&I, VMap,
1800
1801 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1802 // the top-level region but we don't need to add PHIs. The trivial PHIs
1803 // inserted above will be updated here.
1804 if (ExitBlock)
1805 for (PHINode &PN : ExitBlock->phis())
1806 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1807 ++I) {
1808 BasicBlock *Pred = PN.getIncomingBlock(I);
1809 if (LastRegion->contains(Pred)) {
1810 Value *V = PN.getIncomingValue(I);
1811 auto It = VMap.find(V);
1812 if (It != VMap.end()) V = It->second;
1813 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1814 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1815 }
1816 }
1817}
1818
1819// A helper for transformScope. Replace the old (placeholder) branch with the
1820// new (merged) conditional branch.
1821BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1822 BasicBlock *EntryBlock,
1823 BasicBlock *NewEntryBlock,
1824 ValueToValueMapTy &VMap) {
1825 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1826 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1827 "SplitBlock did not work correctly!");
1828 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1829 "NewEntryBlock's only pred must be EntryBlock");
1830 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1831 "NewEntryBlock must have been copied");
1832 OldBR->dropAllReferences();
1833 OldBR->eraseFromParent();
1834 // The true predicate is a placeholder. It will be replaced later in
1835 // fixupBranchesAndSelects().
1836 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1837 cast<BasicBlock>(VMap[NewEntryBlock]),
1838 ConstantInt::getTrue(F.getContext()));
1839 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1840 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1841 "NewEntryBlock's only pred must be EntryBlock");
1842 return NewBR;
1843}
1844
1845// A helper for transformScopes. Create the combined branch condition and
1846// constant-fold the branches/selects in the hot path.
1847void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1848 BasicBlock *PreEntryBlock,
1849 BranchInst *MergedBR,
1851 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1852 BranchProbability CHRBranchBias(1, 1);
1853 uint64_t NumCHRedBranches = 0;
1854 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1855 for (RegInfo &RI : Scope->CHRRegions) {
1856 Region *R = RI.R;
1857 if (RI.HasBranch) {
1858 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1859 ++NumCHRedBranches;
1860 }
1861 for (SelectInst *SI : RI.Selects) {
1862 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1863 ++NumCHRedBranches;
1864 }
1865 }
1866 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1867 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1868 ORE.emit([&]() {
1870 "CHR",
1871 // Refer to the hot (original) path
1872 MergedBR->getSuccessor(0)->getTerminator())
1873 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1874 << " branches or selects";
1875 });
1876 MergedBR->setCondition(MergedCondition);
1877 uint32_t Weights[] = {
1878 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1879 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1880 };
1881 setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1882 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1883 << "\n");
1884}
1885
1886// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1887// and constant-fold a branch in the hot path.
1888void CHR::fixupBranch(Region *R, CHRScope *Scope,
1889 IRBuilder<> &IRB,
1890 Value *&MergedCondition,
1891 BranchProbability &CHRBranchBias) {
1892 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1893 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1894 "Must be truthy or falsy");
1895 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1896 assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1897 BranchProbability Bias = BranchBiasMap[R];
1898 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1899 // Take the min.
1900 if (CHRBranchBias > Bias)
1901 CHRBranchBias = Bias;
1902 BasicBlock *IfThen = BI->getSuccessor(1);
1903 BasicBlock *IfElse = BI->getSuccessor(0);
1904 BasicBlock *RegionExitBlock = R->getExit();
1905 assert(RegionExitBlock && "Null ExitBlock");
1906 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1907 IfThen != IfElse && "Invariant from findScopes");
1908 if (IfThen == RegionExitBlock) {
1909 // Swap them so that IfThen means going into it and IfElse means skipping
1910 // it.
1911 std::swap(IfThen, IfElse);
1912 }
1913 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1914 << " IfElse " << IfElse->getName() << "\n");
1915 Value *Cond = BI->getCondition();
1916 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1917 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1918 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1919 MergedCondition);
1920 // Constant-fold the branch at ClonedEntryBlock.
1921 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1922 "The successor shouldn't change");
1923 Value *NewCondition = ConditionTrue ?
1924 ConstantInt::getTrue(F.getContext()) :
1925 ConstantInt::getFalse(F.getContext());
1926 BI->setCondition(NewCondition);
1927}
1928
1929// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1930// and constant-fold a select in the hot path.
1931void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1932 IRBuilder<> &IRB,
1933 Value *&MergedCondition,
1934 BranchProbability &CHRBranchBias) {
1935 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1936 assert((IsTrueBiased ||
1937 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1938 assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1939 BranchProbability Bias = SelectBiasMap[SI];
1940 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1941 // Take the min.
1942 if (CHRBranchBias > Bias)
1943 CHRBranchBias = Bias;
1944 Value *Cond = SI->getCondition();
1945 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1946 MergedCondition);
1947 Value *NewCondition = IsTrueBiased ?
1948 ConstantInt::getTrue(F.getContext()) :
1949 ConstantInt::getFalse(F.getContext());
1950 SI->setCondition(NewCondition);
1951}
1952
1953// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1954// condition.
1955void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1956 Instruction *BranchOrSelect, CHRScope *Scope,
1957 IRBuilder<> &IRB, Value *&MergedCondition) {
1958 if (!IsTrueBiased) {
1959 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1960 // branch, negate the icmp predicate and swap the branch targets and avoid
1961 // inserting an Xor to negate Cond.
1962 auto *ICmp = dyn_cast<ICmpInst>(Cond);
1963 if (!ICmp ||
1964 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
1965 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
1966 }
1967
1968 // Freeze potentially poisonous conditions.
1970 Cond = IRB.CreateFreeze(Cond);
1971
1972 // Use logical and to avoid propagating poison from later conditions.
1973 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
1974}
1975
1976void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1977 unsigned I = 0;
1978 DenseSet<PHINode *> TrivialPHIs;
1979 for (CHRScope *Scope : CHRScopes) {
1980 transformScopes(Scope, TrivialPHIs);
1981 CHR_DEBUG(
1982 std::ostringstream oss;
1983 oss << " after transformScopes " << I++;
1984 dumpIR(F, oss.str().c_str(), nullptr));
1985 (void)I;
1986 }
1987}
1988
1989static void LLVM_ATTRIBUTE_UNUSED
1990dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1991 dbgs() << Label << " " << Scopes.size() << "\n";
1992 for (CHRScope *Scope : Scopes) {
1993 dbgs() << *Scope << "\n";
1994 }
1995}
1996
1997bool CHR::run() {
1998 if (!shouldApply(F, PSI))
1999 return false;
2000
2001 CHR_DEBUG(dumpIR(F, "before", nullptr));
2002
2003 bool Changed = false;
2004 {
2005 CHR_DEBUG(
2006 dbgs() << "RegionInfo:\n";
2007 RI.print(dbgs()));
2008
2009 // Recursively traverse the region tree and find regions that have biased
2010 // branches and/or selects and create scopes.
2012 findScopes(AllScopes);
2013 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2014
2015 // Split the scopes if 1) the conditional values of the biased
2016 // branches/selects of the inner/lower scope can't be hoisted up to the
2017 // outermost/uppermost scope entry, or 2) the condition values of the biased
2018 // branches/selects in a scope (including subscopes) don't share at least
2019 // one common value.
2020 SmallVector<CHRScope *, 8> SplitScopes;
2021 splitScopes(AllScopes, SplitScopes);
2022 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2023
2024 // After splitting, set the biased regions and selects of a scope (a tree
2025 // root) that include those of the subscopes.
2026 classifyBiasedScopes(SplitScopes);
2027 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2028
2029 // Filter out the scopes that has only one biased region or select (CHR
2030 // isn't useful in such a case).
2031 SmallVector<CHRScope *, 8> FilteredScopes;
2032 filterScopes(SplitScopes, FilteredScopes);
2033 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2034
2035 // Set the regions to be CHR'ed and their hoist stops for each scope.
2037 setCHRRegions(FilteredScopes, SetScopes);
2038 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2039
2040 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2041 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2042 // to the hot path, rather than both hot and cold paths.
2043 SmallVector<CHRScope *, 8> SortedScopes;
2044 sortScopes(SetScopes, SortedScopes);
2045 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2046
2047 CHR_DEBUG(
2048 dbgs() << "RegionInfo:\n";
2049 RI.print(dbgs()));
2050
2051 // Apply the CHR transformation.
2052 if (!SortedScopes.empty()) {
2053 transformScopes(SortedScopes);
2054 Changed = true;
2055 }
2056 }
2057
2058 if (Changed) {
2059 CHR_DEBUG(dumpIR(F, "after", &Stats));
2060 ORE.emit([&]() {
2061 return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2062 << ore::NV("Function", &F) << " "
2063 << "Reduced the number of branches in hot paths by "
2064 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2065 << " (static) and "
2066 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2067 << " (weighted by PGO count)";
2068 });
2069 }
2070
2071 return Changed;
2072}
2073
2074namespace llvm {
2075
2078}
2079
2081 Function &F,
2084 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2085 // If there is no profile summary, we should not do CHR.
2086 if (!PPSI || !PPSI->hasProfileSummary())
2087 return PreservedAnalyses::all();
2088 auto &PSI = *PPSI;
2089 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2090 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2091 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2093 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2094 if (!Changed)
2095 return PreservedAnalyses::all();
2096 return PreservedAnalyses::none();
2097}
2098
2099} // namespace llvm
static const LLT S1
static MachineBasicBlock * split(MachineBasicBlock::iterator I)
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:199
static void insertTrivialPHIs(CHRScope *Scope, BasicBlock *EntryBlock, BasicBlock *ExitBlock, DenseSet< PHINode * > &TrivialPHIs)
static raw_ostream LLVM_ATTRIBUTE_UNUSED & operator<<(raw_ostream &OS, const CHRStats &Stats)
static cl::opt< bool > DisableCHR("disable-chr", cl::init(false), cl::Hidden, cl::desc("Disable CHR for all functions"))
static StringSet CHRFunctions
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2)
static bool checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables, DenseSet< Instruction * > *HoistStops, DenseMap< Instruction *, bool > &Visited)
static cl::opt< unsigned > CHRMergeThreshold("chr-merge-threshold", cl::init(2), cl::Hidden, cl::desc("CHR merges a group of N branches/selects where N >= this value"))
static bool isHoistable(Instruction *I, DominatorTree &DT)
static cl::opt< unsigned > CHRDupThreshsold("chr-dup-threshold", cl::init(3), cl::Hidden, cl::desc("Max number of duplications by CHR for a region"))
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static bool checkBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, S &TrueSet, S &FalseSet, M &BiasMap)
static StringSet CHRModules
static bool checkBiasedSelect(SelectInst *SI, Region *R, DenseSet< SelectInst * > &TrueBiasedSelectsGlobal, DenseSet< SelectInst * > &FalseBiasedSelectsGlobal, DenseMap< SelectInst *, BranchProbability > &SelectBiasMap)
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, HoistStopMapTy &HoistStopMap, DenseSet< Instruction * > &HoistedSet, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope)
static bool shouldApply(Function &F, ProfileSummaryInfo &PSI)
#define CHR_DEBUG(X)
static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, CHRStats *Stats)
static void getSelectsInScope(CHRScope *Scope, DenseSet< Instruction * > &Output)
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool isHoistableInstructionType(Instruction *I)
static DenseSet< Value * > getCHRConditionValuesForRegion(RegInfo &RI)
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, Instruction *ExcludedUser, CHRScope *Scope)
static Instruction * getBranchInsertPoint(RegInfo &RI)
static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(CHRScope *Scope, BasicBlock *PreEntryBlock)
static cl::opt< std::string > CHRModuleList("chr-module-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of modules to apply CHR to"))
#define DEBUG_TYPE
static void LLVM_ATTRIBUTE_UNUSED assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope)
static cl::opt< double > CHRBiasThreshold("chr-bias-threshold", cl::init(0.99), cl::Hidden, cl::desc("CHR considers a branch bias greater than this ratio as biased"))
static void LLVM_ATTRIBUTE_UNUSED dumpScopes(SmallVectorImpl< CHRScope * > &Scopes, const char *Label)
static BranchProbability getCHRBiasThreshold()
static cl::opt< std::string > CHRFunctionList("chr-function-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of functions to apply CHR to"))
static cl::opt< bool > ForceCHR("force-chr", cl::init(false), cl::Hidden, cl::desc("Apply CHR for all functions"))
static bool checkBiasedBranch(BranchInst *BI, Region *R, DenseSet< Region * > &TrueBiasedRegionsGlobal, DenseSet< Region * > &FalseBiasedRegionsGlobal, DenseMap< Region *, BranchProbability > &BranchBiasMap)
static const std::set< Value * > & getBaseValues(Value *V, DominatorTree &DT, DenseMap< Value *, std::set< Value * > > &Visited)
static bool extractBranchProbabilities(Instruction *I, BranchProbability &TrueProb, BranchProbability &FalseProb)
static void parseCHRFilterFiles()
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:850
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Class to represent profile counts.
Definition: Function.h:296
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2555
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1693
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1536
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
The optimization diagnostic interface.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:688
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionEntryHot(const FuncT *F) const
Returns true if F has hot function entry.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition: RegionInfo.h:357
void replaceEntryRecursive(BlockT *NewEntry)
Recursively replace the entry basic block of the region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:362
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition: RegionInfo.h:965
This class represents the LLVM 'select' instruction.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:685
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:826
@ Exit
Definition: COFF.h:827
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:457
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2020
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
auto predecessors(const MachineBasicBlock *BB)
unsigned pred_size(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860