LLVM 19.0.0git
ControlHeightReduction.cpp
Go to the documentation of this file.
1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/StringSet.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/PassManager.h"
41
42#include <optional>
43#include <set>
44#include <sstream>
45
46using namespace llvm;
47
48#define DEBUG_TYPE "chr"
49
50#define CHR_DEBUG(X) LLVM_DEBUG(X)
51
52static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
53 cl::desc("Disable CHR for all functions"));
54
55static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
56 cl::desc("Apply CHR for all functions"));
57
59 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
60 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
61
63 "chr-merge-threshold", cl::init(2), cl::Hidden,
64 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
65
67 "chr-module-list", cl::init(""), cl::Hidden,
68 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
69
71 "chr-function-list", cl::init(""), cl::Hidden,
72 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
73
75 "chr-dup-threshold", cl::init(3), cl::Hidden,
76 cl::desc("Max number of duplications by CHR for a region"));
77
80
81static void parseCHRFilterFiles() {
82 if (!CHRModuleList.empty()) {
83 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
84 if (!FileOrErr) {
85 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
86 std::exit(1);
87 }
88 StringRef Buf = FileOrErr->get()->getBuffer();
90 Buf.split(Lines, '\n');
91 for (StringRef Line : Lines) {
92 Line = Line.trim();
93 if (!Line.empty())
94 CHRModules.insert(Line);
95 }
96 }
97 if (!CHRFunctionList.empty()) {
98 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
99 if (!FileOrErr) {
100 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
101 std::exit(1);
102 }
103 StringRef Buf = FileOrErr->get()->getBuffer();
105 Buf.split(Lines, '\n');
106 for (StringRef Line : Lines) {
107 Line = Line.trim();
108 if (!Line.empty())
109 CHRFunctions.insert(Line);
110 }
111 }
112}
113
114namespace {
115
116struct CHRStats {
117 CHRStats() = default;
118 void print(raw_ostream &OS) const {
119 OS << "CHRStats: NumBranches " << NumBranches
120 << " NumBranchesDelta " << NumBranchesDelta
121 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
122 }
123 // The original number of conditional branches / selects
124 uint64_t NumBranches = 0;
125 // The decrease of the number of conditional branches / selects in the hot
126 // paths due to CHR.
127 uint64_t NumBranchesDelta = 0;
128 // NumBranchesDelta weighted by the profile count at the scope entry.
129 uint64_t WeightedNumBranchesDelta = 0;
130};
131
132// RegInfo - some properties of a Region.
133struct RegInfo {
134 RegInfo() = default;
135 RegInfo(Region *RegionIn) : R(RegionIn) {}
136 Region *R = nullptr;
137 bool HasBranch = false;
139};
140
141typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
142
143// CHRScope - a sequence of regions to CHR together. It corresponds to a
144// sequence of conditional blocks. It can have subscopes which correspond to
145// nested conditional blocks. Nested CHRScopes form a tree.
146class CHRScope {
147 public:
148 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
149 assert(RI.R && "Null RegionIn");
150 RegInfos.push_back(RI);
151 }
152
153 Region *getParentRegion() {
154 assert(RegInfos.size() > 0 && "Empty CHRScope");
155 Region *Parent = RegInfos[0].R->getParent();
156 assert(Parent && "Unexpected to call this on the top-level region");
157 return Parent;
158 }
159
160 BasicBlock *getEntryBlock() {
161 assert(RegInfos.size() > 0 && "Empty CHRScope");
162 return RegInfos.front().R->getEntry();
163 }
164
165 BasicBlock *getExitBlock() {
166 assert(RegInfos.size() > 0 && "Empty CHRScope");
167 return RegInfos.back().R->getExit();
168 }
169
170 bool appendable(CHRScope *Next) {
171 // The next scope is appendable only if this scope is directly connected to
172 // it (which implies it post-dominates this scope) and this scope dominates
173 // it (no edge to the next scope outside this scope).
174 BasicBlock *NextEntry = Next->getEntryBlock();
175 if (getExitBlock() != NextEntry)
176 // Not directly connected.
177 return false;
178 Region *LastRegion = RegInfos.back().R;
179 for (BasicBlock *Pred : predecessors(NextEntry))
180 if (!LastRegion->contains(Pred))
181 // There's an edge going into the entry of the next scope from outside
182 // of this scope.
183 return false;
184 return true;
185 }
186
187 void append(CHRScope *Next) {
188 assert(RegInfos.size() > 0 && "Empty CHRScope");
189 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
190 assert(getParentRegion() == Next->getParentRegion() &&
191 "Must be siblings");
192 assert(getExitBlock() == Next->getEntryBlock() &&
193 "Must be adjacent");
194 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
195 Subs.append(Next->Subs.begin(), Next->Subs.end());
196 }
197
198 void addSub(CHRScope *SubIn) {
199#ifndef NDEBUG
200 bool IsChild = false;
201 for (RegInfo &RI : RegInfos)
202 if (RI.R == SubIn->getParentRegion()) {
203 IsChild = true;
204 break;
205 }
206 assert(IsChild && "Must be a child");
207#endif
208 Subs.push_back(SubIn);
209 }
210
211 // Split this scope at the boundary region into two, which will belong to the
212 // tail and returns the tail.
213 CHRScope *split(Region *Boundary) {
214 assert(Boundary && "Boundary null");
215 assert(RegInfos.begin()->R != Boundary &&
216 "Can't be split at beginning");
217 auto BoundaryIt = llvm::find_if(
218 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
219 if (BoundaryIt == RegInfos.end())
220 return nullptr;
221 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
222 DenseSet<Region *> TailRegionSet;
223 for (const RegInfo &RI : TailRegInfos)
224 TailRegionSet.insert(RI.R);
225
226 auto TailIt =
227 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
228 assert(Sub && "null Sub");
229 Region *Parent = Sub->getParentRegion();
230 if (TailRegionSet.count(Parent))
231 return false;
232
233 assert(llvm::any_of(
234 RegInfos,
235 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
236 "Must be in head");
237 return true;
238 });
239 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
240
241 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
242 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
243 RegInfos.erase(BoundaryIt, RegInfos.end());
244 Subs.erase(TailIt, Subs.end());
245 return Scope;
246 }
247
248 bool contains(Instruction *I) const {
249 BasicBlock *Parent = I->getParent();
250 for (const RegInfo &RI : RegInfos)
251 if (RI.R->contains(Parent))
252 return true;
253 return false;
254 }
255
256 void print(raw_ostream &OS) const;
257
258 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
259 SmallVector<CHRScope *, 8> Subs; // Subscopes.
260
261 // The instruction at which to insert the CHR conditional branch (and hoist
262 // the dependent condition values).
263 Instruction *BranchInsertPoint;
264
265 // True-biased and false-biased regions (conditional blocks),
266 // respectively. Used only for the outermost scope and includes regions in
267 // subscopes. The rest are unbiased.
268 DenseSet<Region *> TrueBiasedRegions;
269 DenseSet<Region *> FalseBiasedRegions;
270 // Among the biased regions, the regions that get CHRed.
271 SmallVector<RegInfo, 8> CHRRegions;
272
273 // True-biased and false-biased selects, respectively. Used only for the
274 // outermost scope and includes ones in subscopes.
275 DenseSet<SelectInst *> TrueBiasedSelects;
276 DenseSet<SelectInst *> FalseBiasedSelects;
277
278 // Map from one of the above regions to the instructions to stop
279 // hoisting instructions at through use-def chains.
280 HoistStopMapTy HoistStopMap;
281
282 private:
283 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
284 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
285 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
286};
287
288class CHR {
289 public:
290 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
291 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
293 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
294
295 ~CHR() {
296 for (CHRScope *Scope : Scopes) {
297 delete Scope;
298 }
299 }
300
301 bool run();
302
303 private:
304 // See the comments in CHR::run() for the high level flow of the algorithm and
305 // what the following functions do.
306
307 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
308 Region *R = RI.getTopLevelRegion();
309 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
310 Output.push_back(Scope);
311 }
312 }
313 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
315 CHRScope *findScope(Region *R);
316 void checkScopeHoistable(CHRScope *Scope);
317
318 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
320 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
321 CHRScope *Outer,
322 DenseSet<Value *> *OuterConditionValues,
323 Instruction *OuterInsertPoint,
325 DenseSet<Instruction *> &Unhoistables);
326
327 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
328 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
329
330 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
332
333 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
335 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
336
337 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
339
340 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
341 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
342 void cloneScopeBlocks(CHRScope *Scope,
343 BasicBlock *PreEntryBlock,
344 BasicBlock *ExitBlock,
345 Region *LastRegion,
346 ValueToValueMapTy &VMap);
347 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
348 BasicBlock *EntryBlock,
349 BasicBlock *NewEntryBlock,
350 ValueToValueMapTy &VMap);
351 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
352 BranchInst *MergedBR, uint64_t ProfileCount);
353 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
354 Value *&MergedCondition, BranchProbability &CHRBranchBias);
355 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
356 Value *&MergedCondition, BranchProbability &CHRBranchBias);
357 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
358 Instruction *BranchOrSelect, CHRScope *Scope,
359 IRBuilder<> &IRB, Value *&MergedCondition);
360 unsigned getRegionDuplicationCount(const Region *R) {
361 unsigned Count = 0;
362 // Find out how many times region R is cloned. Note that if the parent
363 // of R is cloned, R is also cloned, but R's clone count is not updated
364 // from the clone of the parent. We need to accumlate all the counts
365 // from the ancestors to get the clone count.
366 while (R) {
367 Count += DuplicationCount[R];
368 R = R->getParent();
369 }
370 return Count;
371 }
372
373 Function &F;
375 DominatorTree &DT;
377 RegionInfo &RI;
379 CHRStats Stats;
380
381 // All the true-biased regions in the function
382 DenseSet<Region *> TrueBiasedRegionsGlobal;
383 // All the false-biased regions in the function
384 DenseSet<Region *> FalseBiasedRegionsGlobal;
385 // All the true-biased selects in the function
386 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
387 // All the false-biased selects in the function
388 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
389 // A map from biased regions to their branch bias
391 // A map from biased selects to their branch bias
393 // All the scopes.
395 // This maps records how many times this region is cloned.
396 DenseMap<const Region *, unsigned> DuplicationCount;
397};
398
399} // end anonymous namespace
400
401static inline
403 const CHRStats &Stats) {
404 Stats.print(OS);
405 return OS;
406}
407
408static inline
409raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
410 Scope.print(OS);
411 return OS;
412}
413
415 if (DisableCHR)
416 return false;
417
418 if (ForceCHR)
419 return true;
420
421 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
422 if (CHRModules.count(F.getParent()->getName()))
423 return true;
424 return CHRFunctions.count(F.getName());
425 }
426
427 return PSI.isFunctionEntryHot(&F);
428}
429
430static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
431 CHRStats *Stats) {
432 StringRef FuncName = F.getName();
433 StringRef ModuleName = F.getParent()->getName();
434 (void)(FuncName); // Unused in release build.
435 (void)(ModuleName); // Unused in release build.
436 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
437 << FuncName);
438 if (Stats)
439 CHR_DEBUG(dbgs() << " " << *Stats);
440 CHR_DEBUG(dbgs() << "\n");
441 CHR_DEBUG(F.dump());
442}
443
444void CHRScope::print(raw_ostream &OS) const {
445 assert(RegInfos.size() > 0 && "Empty CHRScope");
446 OS << "CHRScope[";
447 OS << RegInfos.size() << ", Regions[";
448 for (const RegInfo &RI : RegInfos) {
449 OS << RI.R->getNameStr();
450 if (RI.HasBranch)
451 OS << " B";
452 if (RI.Selects.size() > 0)
453 OS << " S" << RI.Selects.size();
454 OS << ", ";
455 }
456 if (RegInfos[0].R->getParent()) {
457 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
458 } else {
459 // top level region
460 OS << "]";
461 }
462 OS << ", Subs[";
463 for (CHRScope *Sub : Subs) {
464 OS << *Sub << ", ";
465 }
466 OS << "]]";
467}
468
469// Return true if the given instruction type can be hoisted by CHR.
471 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
472 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
473 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
474 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
475 isa<InsertValueInst>(I);
476}
477
478// Return true if the given instruction can be hoisted by CHR.
481 return false;
482 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
483}
484
485// Recursively traverse the use-def chains of the given value and return a set
486// of the unhoistable base values defined within the scope (excluding the
487// first-region entry block) or the (hoistable or unhoistable) base values that
488// are defined outside (including the first-region entry block) of the
489// scope. The returned set doesn't include constants.
490static const std::set<Value *> &
492 DenseMap<Value *, std::set<Value *>> &Visited) {
493 auto It = Visited.find(V);
494 if (It != Visited.end()) {
495 return It->second;
496 }
497 std::set<Value *> Result;
498 if (auto *I = dyn_cast<Instruction>(V)) {
499 // We don't stop at a block that's not in the Scope because we would miss
500 // some instructions that are based on the same base values if we stop
501 // there.
502 if (!isHoistable(I, DT)) {
503 Result.insert(I);
504 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
505 }
506 // I is hoistable above the Scope.
507 for (Value *Op : I->operands()) {
508 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
509 Result.insert(OpResult.begin(), OpResult.end());
510 }
511 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
512 }
513 if (isa<Argument>(V)) {
514 Result.insert(V);
515 }
516 // We don't include others like constants because those won't lead to any
517 // chance of folding of conditions (eg two bit checks merged into one check)
518 // after CHR.
519 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
520}
521
522// Return true if V is already hoisted or can be hoisted (along with its
523// operands) above the insert point. When it returns true and HoistStops is
524// non-null, the instructions to stop hoisting at through the use-def chains are
525// inserted into HoistStops.
526static bool
528 DenseSet<Instruction *> &Unhoistables,
529 DenseSet<Instruction *> *HoistStops,
531 assert(InsertPoint && "Null InsertPoint");
532 if (auto *I = dyn_cast<Instruction>(V)) {
533 auto It = Visited.find(I);
534 if (It != Visited.end()) {
535 return It->second;
536 }
537 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
538 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
539 if (Unhoistables.count(I)) {
540 // Don't hoist if they are not to be hoisted.
541 Visited[I] = false;
542 return false;
543 }
544 if (DT.dominates(I, InsertPoint)) {
545 // We are already above the insert point. Stop here.
546 if (HoistStops)
547 HoistStops->insert(I);
548 Visited[I] = true;
549 return true;
550 }
551 // We aren't not above the insert point, check if we can hoist it above the
552 // insert point.
553 if (isHoistable(I, DT)) {
554 // Check operands first.
555 DenseSet<Instruction *> OpsHoistStops;
556 bool AllOpsHoisted = true;
557 for (Value *Op : I->operands()) {
558 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
559 Visited)) {
560 AllOpsHoisted = false;
561 break;
562 }
563 }
564 if (AllOpsHoisted) {
565 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
566 if (HoistStops)
567 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
568 Visited[I] = true;
569 return true;
570 }
571 }
572 Visited[I] = false;
573 return false;
574 }
575 // Non-instructions are considered hoistable.
576 return true;
577}
578
579// Constructs the true and false branch probabilities if the the instruction has
580// valid branch weights. Returns true when this was successful, false otherwise.
582 BranchProbability &TrueProb,
583 BranchProbability &FalseProb) {
584 uint64_t TrueWeight;
585 uint64_t FalseWeight;
586 if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
587 return false;
588 uint64_t SumWeight = TrueWeight + FalseWeight;
589
590 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
591 "Overflow calculating branch probabilities.");
592
593 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
594 if (SumWeight == 0)
595 return false;
596
597 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
598 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
599 return true;
600}
601
604 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
605}
606
607// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
608// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
609// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
610// false.
611template <typename K, typename S, typename M>
612static bool checkBias(K *Key, BranchProbability TrueProb,
613 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
614 M &BiasMap) {
616 if (TrueProb >= Threshold) {
617 TrueSet.insert(Key);
618 BiasMap[Key] = TrueProb;
619 return true;
620 } else if (FalseProb >= Threshold) {
621 FalseSet.insert(Key);
622 BiasMap[Key] = FalseProb;
623 return true;
624 }
625 return false;
626}
627
628// Returns true and insert a region into the right biased set and the map if the
629// branch of the region is biased.
631 DenseSet<Region *> &TrueBiasedRegionsGlobal,
632 DenseSet<Region *> &FalseBiasedRegionsGlobal,
634 if (!BI->isConditional())
635 return false;
636 BranchProbability ThenProb, ElseProb;
637 if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
638 return false;
639 BasicBlock *IfThen = BI->getSuccessor(0);
640 BasicBlock *IfElse = BI->getSuccessor(1);
641 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
642 IfThen != IfElse &&
643 "Invariant from findScopes");
644 if (IfThen == R->getExit()) {
645 // Swap them so that IfThen/ThenProb means going into the conditional code
646 // and IfElse/ElseProb means skipping it.
647 std::swap(IfThen, IfElse);
648 std::swap(ThenProb, ElseProb);
649 }
650 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
651 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
652 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
653 return checkBias(R, ThenProb, ElseProb,
654 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
655 BranchBiasMap);
656}
657
658// Returns true and insert a select into the right biased set and the map if the
659// select is biased.
661 SelectInst *SI, Region *R,
662 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
663 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
665 BranchProbability TrueProb, FalseProb;
666 if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
667 return false;
668 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
669 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
670 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
671 return checkBias(SI, TrueProb, FalseProb,
672 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
673 SelectBiasMap);
674}
675
676// Returns the instruction at which to hoist the dependent condition values and
677// insert the CHR branch for a region. This is the terminator branch in the
678// entry block or the first select in the entry block, if any.
680 Region *R = RI.R;
681 BasicBlock *EntryBB = R->getEntry();
682 // The hoist point is by default the terminator of the entry block, which is
683 // the same as the branch instruction if RI.HasBranch is true.
684 Instruction *HoistPoint = EntryBB->getTerminator();
685 for (SelectInst *SI : RI.Selects) {
686 if (SI->getParent() == EntryBB) {
687 // Pick the first select in Selects in the entry block. Note Selects is
688 // sorted in the instruction order within a block (asserted below).
689 HoistPoint = SI;
690 break;
691 }
692 }
693 assert(HoistPoint && "Null HoistPoint");
694#ifndef NDEBUG
695 // Check that HoistPoint is the first one in Selects in the entry block,
696 // if any.
697 DenseSet<Instruction *> EntryBlockSelectSet;
698 for (SelectInst *SI : RI.Selects) {
699 if (SI->getParent() == EntryBB) {
700 EntryBlockSelectSet.insert(SI);
701 }
702 }
703 for (Instruction &I : *EntryBB) {
704 if (EntryBlockSelectSet.contains(&I)) {
705 assert(&I == HoistPoint &&
706 "HoistPoint must be the first one in Selects");
707 break;
708 }
709 }
710#endif
711 return HoistPoint;
712}
713
714// Find a CHR scope in the given region.
715CHRScope * CHR::findScope(Region *R) {
716 CHRScope *Result = nullptr;
717 BasicBlock *Entry = R->getEntry();
718 BasicBlock *Exit = R->getExit(); // null if top level.
719 assert(Entry && "Entry must not be null");
720 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
721 "Only top level region has a null exit");
722 if (Entry)
723 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
724 else
725 CHR_DEBUG(dbgs() << "Entry null\n");
726 if (Exit)
727 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
728 else
729 CHR_DEBUG(dbgs() << "Exit null\n");
730 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
731 // to this region).
732 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
733 if (EntryInSubregion)
734 return nullptr;
735 // Exclude loops
736 for (BasicBlock *Pred : predecessors(Entry))
737 if (R->contains(Pred))
738 return nullptr;
739 // If any of the basic blocks have address taken, we must skip this region
740 // because we cannot clone basic blocks that have address taken.
741 for (BasicBlock *BB : R->blocks()) {
742 if (BB->hasAddressTaken())
743 return nullptr;
744 // If we encounter llvm.coro.id, skip this region because if the basic block
745 // is cloned, we end up inserting a token type PHI node to the block with
746 // llvm.coro.begin.
747 // FIXME: This could lead to less optimal codegen, because the region is
748 // excluded, it can prevent CHR from merging adjacent regions into bigger
749 // scope and hoisting more branches.
750 for (Instruction &I : *BB)
751 if (auto *II = dyn_cast<IntrinsicInst>(&I))
752 if (II->getIntrinsicID() == Intrinsic::coro_id)
753 return nullptr;
754 }
755
756 if (Exit) {
757 // Try to find an if-then block (check if R is an if-then).
758 // if (cond) {
759 // ...
760 // }
761 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
762 if (BI)
763 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
764 else
765 CHR_DEBUG(dbgs() << "BI null\n");
766 if (BI && BI->isConditional()) {
767 BasicBlock *S0 = BI->getSuccessor(0);
768 BasicBlock *S1 = BI->getSuccessor(1);
769 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
770 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
771 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
772 RegInfo RI(R);
773 RI.HasBranch = checkBiasedBranch(
774 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
775 BranchBiasMap);
776 Result = new CHRScope(RI);
777 Scopes.insert(Result);
778 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
779 ++Stats.NumBranches;
780 if (!RI.HasBranch) {
781 ORE.emit([&]() {
782 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
783 << "Branch not biased";
784 });
785 }
786 }
787 }
788 }
789 {
790 // Try to look for selects in the direct child blocks (as opposed to in
791 // subregions) of R.
792 // ...
793 // if (..) { // Some subregion
794 // ...
795 // }
796 // if (..) { // Some subregion
797 // ...
798 // }
799 // ...
800 // a = cond ? b : c;
801 // ...
803 for (RegionNode *E : R->elements()) {
804 if (E->isSubRegion())
805 continue;
806 // This returns the basic block of E if E is a direct child of R (not a
807 // subregion.)
808 BasicBlock *BB = E->getEntry();
809 // Need to push in the order to make it easier to find the first Select
810 // later.
811 for (Instruction &I : *BB) {
812 if (auto *SI = dyn_cast<SelectInst>(&I)) {
813 Selects.push_back(SI);
814 ++Stats.NumBranches;
815 }
816 }
817 }
818 if (Selects.size() > 0) {
819 auto AddSelects = [&](RegInfo &RI) {
820 for (auto *SI : Selects)
821 if (checkBiasedSelect(SI, RI.R,
822 TrueBiasedSelectsGlobal,
823 FalseBiasedSelectsGlobal,
824 SelectBiasMap))
825 RI.Selects.push_back(SI);
826 else
827 ORE.emit([&]() {
828 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
829 << "Select not biased";
830 });
831 };
832 if (!Result) {
833 CHR_DEBUG(dbgs() << "Found a select-only region\n");
834 RegInfo RI(R);
835 AddSelects(RI);
836 Result = new CHRScope(RI);
837 Scopes.insert(Result);
838 } else {
839 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
840 AddSelects(Result->RegInfos[0]);
841 }
842 }
843 }
844
845 if (Result) {
846 checkScopeHoistable(Result);
847 }
848 return Result;
849}
850
851// Check that any of the branch and the selects in the region could be
852// hoisted above the the CHR branch insert point (the most dominating of
853// them, either the branch (at the end of the first block) or the first
854// select in the first block). If the branch can't be hoisted, drop the
855// selects in the first blocks.
856//
857// For example, for the following scope/region with selects, we want to insert
858// the merged branch right before the first select in the first/entry block by
859// hoisting c1, c2, c3, and c4.
860//
861// // Branch insert point here.
862// a = c1 ? b : c; // Select 1
863// d = c2 ? e : f; // Select 2
864// if (c3) { // Branch
865// ...
866// c4 = foo() // A call.
867// g = c4 ? h : i; // Select 3
868// }
869//
870// But suppose we can't hoist c4 because it's dependent on the preceding
871// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
872// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
873void CHR::checkScopeHoistable(CHRScope *Scope) {
874 RegInfo &RI = Scope->RegInfos[0];
875 Region *R = RI.R;
876 BasicBlock *EntryBB = R->getEntry();
877 auto *Branch = RI.HasBranch ?
878 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
879 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
880 if (RI.HasBranch || !Selects.empty()) {
881 Instruction *InsertPoint = getBranchInsertPoint(RI);
882 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
883 // Avoid a data dependence from a select or a branch to a(nother)
884 // select. Note no instruction can't data-depend on a branch (a branch
885 // instruction doesn't produce a value).
886 DenseSet<Instruction *> Unhoistables;
887 // Initialize Unhoistables with the selects.
888 for (SelectInst *SI : Selects) {
889 Unhoistables.insert(SI);
890 }
891 // Remove Selects that can't be hoisted.
892 for (auto it = Selects.begin(); it != Selects.end(); ) {
893 SelectInst *SI = *it;
894 if (SI == InsertPoint) {
895 ++it;
896 continue;
897 }
899 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
900 DT, Unhoistables, nullptr, Visited);
901 if (!IsHoistable) {
902 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
903 ORE.emit([&]() {
905 "DropUnhoistableSelect", SI)
906 << "Dropped unhoistable select";
907 });
908 it = Selects.erase(it);
909 // Since we are dropping the select here, we also drop it from
910 // Unhoistables.
911 Unhoistables.erase(SI);
912 } else
913 ++it;
914 }
915 // Update InsertPoint after potentially removing selects.
916 InsertPoint = getBranchInsertPoint(RI);
917 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
918 if (RI.HasBranch && InsertPoint != Branch) {
920 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
921 DT, Unhoistables, nullptr, Visited);
922 if (!IsHoistable) {
923 // If the branch isn't hoistable, drop the selects in the entry
924 // block, preferring the branch, which makes the branch the hoist
925 // point.
926 assert(InsertPoint != Branch && "Branch must not be the hoist point");
927 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
928 CHR_DEBUG(
929 for (SelectInst *SI : Selects) {
930 dbgs() << "SI " << *SI << "\n";
931 });
932 for (SelectInst *SI : Selects) {
933 ORE.emit([&]() {
935 "DropSelectUnhoistableBranch", SI)
936 << "Dropped select due to unhoistable branch";
937 });
938 }
939 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
940 return SI->getParent() == EntryBB;
941 });
942 Unhoistables.clear();
943 InsertPoint = Branch;
944 }
945 }
946 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
947#ifndef NDEBUG
948 if (RI.HasBranch) {
949 assert(!DT.dominates(Branch, InsertPoint) &&
950 "Branch can't be already above the hoist point");
952 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
953 DT, Unhoistables, nullptr, Visited) &&
954 "checkHoistValue for branch");
955 }
956 for (auto *SI : Selects) {
957 assert(!DT.dominates(SI, InsertPoint) &&
958 "SI can't be already above the hoist point");
960 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
961 Unhoistables, nullptr, Visited) &&
962 "checkHoistValue for selects");
963 }
964 CHR_DEBUG(dbgs() << "Result\n");
965 if (RI.HasBranch) {
966 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
967 }
968 for (auto *SI : Selects) {
969 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
970 }
971#endif
972 }
973}
974
975// Traverse the region tree, find all nested scopes and merge them if possible.
976CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
978 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
979 CHRScope *Result = findScope(R);
980 // Visit subscopes.
981 CHRScope *ConsecutiveSubscope = nullptr;
983 for (auto It = R->begin(); It != R->end(); ++It) {
984 const std::unique_ptr<Region> &SubR = *It;
985 auto NextIt = std::next(It);
986 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
987 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
988 << "\n");
989 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
990 if (SubCHRScope) {
991 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
992 } else {
993 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
994 }
995 if (SubCHRScope) {
996 if (!ConsecutiveSubscope)
997 ConsecutiveSubscope = SubCHRScope;
998 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
999 Subscopes.push_back(ConsecutiveSubscope);
1000 ConsecutiveSubscope = SubCHRScope;
1001 } else
1002 ConsecutiveSubscope->append(SubCHRScope);
1003 } else {
1004 if (ConsecutiveSubscope) {
1005 Subscopes.push_back(ConsecutiveSubscope);
1006 }
1007 ConsecutiveSubscope = nullptr;
1008 }
1009 }
1010 if (ConsecutiveSubscope) {
1011 Subscopes.push_back(ConsecutiveSubscope);
1012 }
1013 for (CHRScope *Sub : Subscopes) {
1014 if (Result) {
1015 // Combine it with the parent.
1016 Result->addSub(Sub);
1017 } else {
1018 // Push Subscopes as they won't be combined with the parent.
1019 Scopes.push_back(Sub);
1020 }
1021 }
1022 return Result;
1023}
1024
1026 DenseSet<Value *> ConditionValues;
1027 if (RI.HasBranch) {
1028 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1029 ConditionValues.insert(BI->getCondition());
1030 }
1031 for (SelectInst *SI : RI.Selects) {
1032 ConditionValues.insert(SI->getCondition());
1033 }
1034 return ConditionValues;
1035}
1036
1037
1038// Determine whether to split a scope depending on the sets of the branch
1039// condition values of the previous region and the current region. We split
1040// (return true) it if 1) the condition values of the inner/lower scope can't be
1041// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1042// values have an empty intersection (because the combined branch conditions
1043// won't probably lead to a simpler combined condition).
1044static bool shouldSplit(Instruction *InsertPoint,
1045 DenseSet<Value *> &PrevConditionValues,
1046 DenseSet<Value *> &ConditionValues,
1047 DominatorTree &DT,
1048 DenseSet<Instruction *> &Unhoistables) {
1049 assert(InsertPoint && "Null InsertPoint");
1050 CHR_DEBUG(
1051 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1052 for (Value *V : PrevConditionValues) {
1053 dbgs() << *V << ", ";
1054 }
1055 dbgs() << " ConditionValues ";
1056 for (Value *V : ConditionValues) {
1057 dbgs() << *V << ", ";
1058 }
1059 dbgs() << "\n");
1060 // If any of Bases isn't hoistable to the hoist point, split.
1061 for (Value *V : ConditionValues) {
1063 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1064 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1065 return true; // Not hoistable, split.
1066 }
1067 }
1068 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1069 // unnecessary splits at scopes with no branch/selects. If
1070 // PrevConditionValues and ConditionValues don't intersect at all, split.
1071 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1072 // Use std::set as DenseSet doesn't work with set_intersection.
1073 std::set<Value *> PrevBases, Bases;
1075 for (Value *V : PrevConditionValues) {
1076 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1077 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1078 }
1079 for (Value *V : ConditionValues) {
1080 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1081 Bases.insert(BaseValues.begin(), BaseValues.end());
1082 }
1083 CHR_DEBUG(
1084 dbgs() << "PrevBases ";
1085 for (Value *V : PrevBases) {
1086 dbgs() << *V << ", ";
1087 }
1088 dbgs() << " Bases ";
1089 for (Value *V : Bases) {
1090 dbgs() << *V << ", ";
1091 }
1092 dbgs() << "\n");
1093 std::vector<Value *> Intersection;
1094 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1095 Bases.end(), std::back_inserter(Intersection));
1096 if (Intersection.empty()) {
1097 // Empty intersection, split.
1098 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1099 return true;
1100 }
1101 }
1102 CHR_DEBUG(dbgs() << "No split\n");
1103 return false; // Don't split.
1104}
1105
1106static void getSelectsInScope(CHRScope *Scope,
1107 DenseSet<Instruction *> &Output) {
1108 for (RegInfo &RI : Scope->RegInfos)
1109 for (SelectInst *SI : RI.Selects)
1110 Output.insert(SI);
1111 for (CHRScope *Sub : Scope->Subs)
1112 getSelectsInScope(Sub, Output);
1113}
1114
1115void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1117 for (CHRScope *Scope : Input) {
1118 assert(!Scope->BranchInsertPoint &&
1119 "BranchInsertPoint must not be set");
1120 DenseSet<Instruction *> Unhoistables;
1121 getSelectsInScope(Scope, Unhoistables);
1122 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1123 }
1124#ifndef NDEBUG
1125 for (CHRScope *Scope : Output) {
1126 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1127 }
1128#endif
1129}
1130
1131SmallVector<CHRScope *, 8> CHR::splitScope(
1132 CHRScope *Scope,
1133 CHRScope *Outer,
1134 DenseSet<Value *> *OuterConditionValues,
1135 Instruction *OuterInsertPoint,
1137 DenseSet<Instruction *> &Unhoistables) {
1138 if (Outer) {
1139 assert(OuterConditionValues && "Null OuterConditionValues");
1140 assert(OuterInsertPoint && "Null OuterInsertPoint");
1141 }
1142 bool PrevSplitFromOuter = true;
1143 DenseSet<Value *> PrevConditionValues;
1144 Instruction *PrevInsertPoint = nullptr;
1146 SmallVector<bool, 8> SplitsSplitFromOuter;
1147 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1148 SmallVector<Instruction *, 8> SplitsInsertPoints;
1149 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1150 for (RegInfo &RI : RegInfos) {
1151 Instruction *InsertPoint = getBranchInsertPoint(RI);
1153 CHR_DEBUG(
1154 dbgs() << "ConditionValues ";
1155 for (Value *V : ConditionValues) {
1156 dbgs() << *V << ", ";
1157 }
1158 dbgs() << "\n");
1159 if (RI.R == RegInfos[0].R) {
1160 // First iteration. Check to see if we should split from the outer.
1161 if (Outer) {
1162 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1163 CHR_DEBUG(dbgs() << "Should split from outer at "
1164 << RI.R->getNameStr() << "\n");
1165 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1166 ConditionValues, DT, Unhoistables)) {
1167 PrevConditionValues = ConditionValues;
1168 PrevInsertPoint = InsertPoint;
1169 ORE.emit([&]() {
1171 "SplitScopeFromOuter",
1172 RI.R->getEntry()->getTerminator())
1173 << "Split scope from outer due to unhoistable branch/select "
1174 << "and/or lack of common condition values";
1175 });
1176 } else {
1177 // Not splitting from the outer. Use the outer bases and insert
1178 // point. Union the bases.
1179 PrevSplitFromOuter = false;
1180 PrevConditionValues = *OuterConditionValues;
1181 PrevConditionValues.insert(ConditionValues.begin(),
1182 ConditionValues.end());
1183 PrevInsertPoint = OuterInsertPoint;
1184 }
1185 } else {
1186 CHR_DEBUG(dbgs() << "Outer null\n");
1187 PrevConditionValues = ConditionValues;
1188 PrevInsertPoint = InsertPoint;
1189 }
1190 } else {
1191 CHR_DEBUG(dbgs() << "Should split from prev at "
1192 << RI.R->getNameStr() << "\n");
1193 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1194 DT, Unhoistables)) {
1195 CHRScope *Tail = Scope->split(RI.R);
1196 Scopes.insert(Tail);
1197 Splits.push_back(Scope);
1198 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1199 SplitsConditionValues.push_back(PrevConditionValues);
1200 SplitsInsertPoints.push_back(PrevInsertPoint);
1201 Scope = Tail;
1202 PrevConditionValues = ConditionValues;
1203 PrevInsertPoint = InsertPoint;
1204 PrevSplitFromOuter = true;
1205 ORE.emit([&]() {
1207 "SplitScopeFromPrev",
1208 RI.R->getEntry()->getTerminator())
1209 << "Split scope from previous due to unhoistable branch/select "
1210 << "and/or lack of common condition values";
1211 });
1212 } else {
1213 // Not splitting. Union the bases. Keep the hoist point.
1214 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1215 }
1216 }
1217 }
1218 Splits.push_back(Scope);
1219 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1220 SplitsConditionValues.push_back(PrevConditionValues);
1221 assert(PrevInsertPoint && "Null PrevInsertPoint");
1222 SplitsInsertPoints.push_back(PrevInsertPoint);
1223 assert(Splits.size() == SplitsConditionValues.size() &&
1224 Splits.size() == SplitsSplitFromOuter.size() &&
1225 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1226 for (size_t I = 0; I < Splits.size(); ++I) {
1227 CHRScope *Split = Splits[I];
1228 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1229 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1231 DenseSet<Instruction *> SplitUnhoistables;
1232 getSelectsInScope(Split, SplitUnhoistables);
1233 for (CHRScope *Sub : Split->Subs) {
1234 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1235 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1236 SplitUnhoistables);
1237 llvm::append_range(NewSubs, SubSplits);
1238 }
1239 Split->Subs = NewSubs;
1240 }
1242 for (size_t I = 0; I < Splits.size(); ++I) {
1243 CHRScope *Split = Splits[I];
1244 if (SplitsSplitFromOuter[I]) {
1245 // Split from the outer.
1246 Output.push_back(Split);
1247 Split->BranchInsertPoint = SplitsInsertPoints[I];
1248 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1249 << "\n");
1250 } else {
1251 // Connected to the outer.
1252 Result.push_back(Split);
1253 }
1254 }
1255 if (!Outer)
1256 assert(Result.empty() &&
1257 "If no outer (top-level), must return no nested ones");
1258 return Result;
1259}
1260
1261void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1262 for (CHRScope *Scope : Scopes) {
1263 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1264 classifyBiasedScopes(Scope, Scope);
1265 CHR_DEBUG(
1266 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1267 dbgs() << "TrueBiasedRegions ";
1268 for (Region *R : Scope->TrueBiasedRegions) {
1269 dbgs() << R->getNameStr() << ", ";
1270 }
1271 dbgs() << "\n";
1272 dbgs() << "FalseBiasedRegions ";
1273 for (Region *R : Scope->FalseBiasedRegions) {
1274 dbgs() << R->getNameStr() << ", ";
1275 }
1276 dbgs() << "\n";
1277 dbgs() << "TrueBiasedSelects ";
1278 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1279 dbgs() << *SI << ", ";
1280 }
1281 dbgs() << "\n";
1282 dbgs() << "FalseBiasedSelects ";
1283 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1284 dbgs() << *SI << ", ";
1285 }
1286 dbgs() << "\n";);
1287 }
1288}
1289
1290void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1291 for (RegInfo &RI : Scope->RegInfos) {
1292 if (RI.HasBranch) {
1293 Region *R = RI.R;
1294 if (TrueBiasedRegionsGlobal.contains(R))
1295 OutermostScope->TrueBiasedRegions.insert(R);
1296 else if (FalseBiasedRegionsGlobal.contains(R))
1297 OutermostScope->FalseBiasedRegions.insert(R);
1298 else
1299 llvm_unreachable("Must be biased");
1300 }
1301 for (SelectInst *SI : RI.Selects) {
1302 if (TrueBiasedSelectsGlobal.contains(SI))
1303 OutermostScope->TrueBiasedSelects.insert(SI);
1304 else if (FalseBiasedSelectsGlobal.contains(SI))
1305 OutermostScope->FalseBiasedSelects.insert(SI);
1306 else
1307 llvm_unreachable("Must be biased");
1308 }
1309 }
1310 for (CHRScope *Sub : Scope->Subs) {
1311 classifyBiasedScopes(Sub, OutermostScope);
1312 }
1313}
1314
1315static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1316 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1317 Scope->FalseBiasedRegions.size() +
1318 Scope->TrueBiasedSelects.size() +
1319 Scope->FalseBiasedSelects.size();
1320 return NumBiased >= CHRMergeThreshold;
1321}
1322
1323void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1325 for (CHRScope *Scope : Input) {
1326 // Filter out the ones with only one region and no subs.
1327 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1328 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1329 << Scope->TrueBiasedRegions.size()
1330 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1331 << " true-selects " << Scope->TrueBiasedSelects.size()
1332 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1333 ORE.emit([&]() {
1335 DEBUG_TYPE,
1336 "DropScopeWithOneBranchOrSelect",
1337 Scope->RegInfos[0].R->getEntry()->getTerminator())
1338 << "Drop scope with < "
1339 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1340 << " biased branch(es) or select(s)";
1341 });
1342 continue;
1343 }
1344 Output.push_back(Scope);
1345 }
1346}
1347
1348void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1350 for (CHRScope *Scope : Input) {
1351 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1352 "Empty");
1353 setCHRRegions(Scope, Scope);
1354 Output.push_back(Scope);
1355 CHR_DEBUG(
1356 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1357 for (auto pair : Scope->HoistStopMap) {
1358 Region *R = pair.first;
1359 dbgs() << "Region " << R->getNameStr() << "\n";
1360 for (Instruction *I : pair.second) {
1361 dbgs() << "HoistStop " << *I << "\n";
1362 }
1363 }
1364 dbgs() << "CHRRegions" << "\n";
1365 for (RegInfo &RI : Scope->CHRRegions) {
1366 dbgs() << RI.R->getNameStr() << "\n";
1367 });
1368 }
1369}
1370
1371void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1372 DenseSet<Instruction *> Unhoistables;
1373 // Put the biased selects in Unhoistables because they should stay where they
1374 // are and constant-folded after CHR (in case one biased select or a branch
1375 // can depend on another biased select.)
1376 for (RegInfo &RI : Scope->RegInfos) {
1377 for (SelectInst *SI : RI.Selects) {
1378 Unhoistables.insert(SI);
1379 }
1380 }
1381 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1382 for (RegInfo &RI : Scope->RegInfos) {
1383 Region *R = RI.R;
1384 DenseSet<Instruction *> HoistStops;
1385 bool IsHoisted = false;
1386 if (RI.HasBranch) {
1387 assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1388 OutermostScope->FalseBiasedRegions.contains(R)) &&
1389 "Must be truthy or falsy");
1390 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1391 // Note checkHoistValue fills in HoistStops.
1393 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1394 Unhoistables, &HoistStops, Visited);
1395 assert(IsHoistable && "Must be hoistable");
1396 (void)(IsHoistable); // Unused in release build
1397 IsHoisted = true;
1398 }
1399 for (SelectInst *SI : RI.Selects) {
1400 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1401 OutermostScope->FalseBiasedSelects.contains(SI)) &&
1402 "Must be true or false biased");
1403 // Note checkHoistValue fills in HoistStops.
1405 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1406 Unhoistables, &HoistStops, Visited);
1407 assert(IsHoistable && "Must be hoistable");
1408 (void)(IsHoistable); // Unused in release build
1409 IsHoisted = true;
1410 }
1411 if (IsHoisted) {
1412 OutermostScope->CHRRegions.push_back(RI);
1413 OutermostScope->HoistStopMap[R] = HoistStops;
1414 }
1415 }
1416 for (CHRScope *Sub : Scope->Subs)
1417 setCHRRegions(Sub, OutermostScope);
1418}
1419
1420static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1421 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1422}
1423
1424void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1426 Output.resize(Input.size());
1427 llvm::copy(Input, Output.begin());
1429}
1430
1431// Return true if V is already hoisted or was hoisted (along with its operands)
1432// to the insert point.
1433static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1434 HoistStopMapTy &HoistStopMap,
1435 DenseSet<Instruction *> &HoistedSet,
1436 DenseSet<PHINode *> &TrivialPHIs,
1437 DominatorTree &DT) {
1438 auto IT = HoistStopMap.find(R);
1439 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1440 DenseSet<Instruction *> &HoistStops = IT->second;
1441 if (auto *I = dyn_cast<Instruction>(V)) {
1442 if (I == HoistPoint)
1443 return;
1444 if (HoistStops.count(I))
1445 return;
1446 if (auto *PN = dyn_cast<PHINode>(I))
1447 if (TrivialPHIs.count(PN))
1448 // The trivial phi inserted by the previous CHR scope could replace a
1449 // non-phi in HoistStops. Note that since this phi is at the exit of a
1450 // previous CHR scope, which dominates this scope, it's safe to stop
1451 // hoisting there.
1452 return;
1453 if (HoistedSet.count(I))
1454 // Already hoisted, return.
1455 return;
1456 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1457 assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1458 assert(DT.getNode(HoistPoint->getParent()) &&
1459 "DT must contain HoistPoint block");
1460 if (DT.dominates(I, HoistPoint))
1461 // We are already above the hoist point. Stop here. This may be necessary
1462 // when multiple scopes would independently hoist the same
1463 // instruction. Since an outer (dominating) scope would hoist it to its
1464 // entry before an inner (dominated) scope would to its entry, the inner
1465 // scope may see the instruction already hoisted, in which case it
1466 // potentially wrong for the inner scope to hoist it and could cause bad
1467 // IR (non-dominating def), but safe to skip hoisting it instead because
1468 // it's already in a block that dominates the inner scope.
1469 return;
1470 for (Value *Op : I->operands()) {
1471 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1472 }
1473 I->moveBefore(HoistPoint);
1474 HoistedSet.insert(I);
1475 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1476 }
1477}
1478
1479// Hoist the dependent condition values of the branches and the selects in the
1480// scope to the insert point.
1481static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1482 DenseSet<PHINode *> &TrivialPHIs,
1483 DominatorTree &DT) {
1484 DenseSet<Instruction *> HoistedSet;
1485 for (const RegInfo &RI : Scope->CHRRegions) {
1486 Region *R = RI.R;
1487 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1488 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1489 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1490 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1491 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1492 HoistedSet, TrivialPHIs, DT);
1493 }
1494 for (SelectInst *SI : RI.Selects) {
1495 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1496 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1497 if (!(IsTrueBiased || IsFalseBiased))
1498 continue;
1499 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1500 HoistedSet, TrivialPHIs, DT);
1501 }
1502 }
1503}
1504
1505// Negate the predicate if an ICmp if it's used only by branches or selects by
1506// swapping the operands of the branches or the selects. Returns true if success.
1508 Instruction *ExcludedUser,
1509 CHRScope *Scope) {
1510 for (User *U : ICmp->users()) {
1511 if (U == ExcludedUser)
1512 continue;
1513 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1514 continue;
1515 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1516 continue;
1517 return false;
1518 }
1519 for (User *U : ICmp->users()) {
1520 if (U == ExcludedUser)
1521 continue;
1522 if (auto *BI = dyn_cast<BranchInst>(U)) {
1523 assert(BI->isConditional() && "Must be conditional");
1524 BI->swapSuccessors();
1525 // Don't need to swap this in terms of
1526 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1527 // mean whehter the branch is likely go into the if-then rather than
1528 // successor0/successor1 and because we can tell which edge is the then or
1529 // the else one by comparing the destination to the region exit block.
1530 continue;
1531 }
1532 if (auto *SI = dyn_cast<SelectInst>(U)) {
1533 // Swap operands
1534 SI->swapValues();
1535 SI->swapProfMetadata();
1536 if (Scope->TrueBiasedSelects.count(SI)) {
1537 assert(!Scope->FalseBiasedSelects.contains(SI) &&
1538 "Must not be already in");
1539 Scope->FalseBiasedSelects.insert(SI);
1540 } else if (Scope->FalseBiasedSelects.count(SI)) {
1541 assert(!Scope->TrueBiasedSelects.contains(SI) &&
1542 "Must not be already in");
1543 Scope->TrueBiasedSelects.insert(SI);
1544 }
1545 continue;
1546 }
1547 llvm_unreachable("Must be a branch or a select");
1548 }
1550 return true;
1551}
1552
1553// A helper for transformScopes. Insert a trivial phi at the scope exit block
1554// for a value that's defined in the scope but used outside it (meaning it's
1555// alive at the exit block).
1556static void insertTrivialPHIs(CHRScope *Scope,
1557 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1558 DenseSet<PHINode *> &TrivialPHIs) {
1559 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1560 for (RegInfo &RI : Scope->RegInfos) {
1561 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1562 // sub-Scopes.
1563 BlocksInScope.insert(BB);
1564 }
1565 }
1566 CHR_DEBUG({
1567 dbgs() << "Inserting redundant phis\n";
1568 for (BasicBlock *BB : BlocksInScope)
1569 dbgs() << "BlockInScope " << BB->getName() << "\n";
1570 });
1571 for (BasicBlock *BB : BlocksInScope) {
1572 for (Instruction &I : *BB) {
1574 for (User *U : I.users()) {
1575 if (auto *UI = dyn_cast<Instruction>(U)) {
1576 if (!BlocksInScope.contains(UI->getParent()) &&
1577 // Unless there's already a phi for I at the exit block.
1578 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1579 CHR_DEBUG(dbgs() << "V " << I << "\n");
1580 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1581 Users.push_back(UI);
1582 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1583 // There's a loop backedge from a block that's dominated by this
1584 // scope to the entry block.
1585 CHR_DEBUG(dbgs() << "V " << I << "\n");
1586 CHR_DEBUG(dbgs()
1587 << "Used at entry block (for a back edge) by a phi user "
1588 << *UI << "\n");
1589 Users.push_back(UI);
1590 }
1591 }
1592 }
1593 if (Users.size() > 0) {
1594 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1595 // ExitBlock. Replace I with the new phi in UI unless UI is another
1596 // phi at ExitBlock.
1597 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1598 PN->insertBefore(ExitBlock->begin());
1599 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1600 PN->addIncoming(&I, Pred);
1601 }
1602 TrivialPHIs.insert(PN);
1603 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1604 for (Instruction *UI : Users) {
1605 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1606 if (UI->getOperand(J) == &I) {
1607 UI->setOperand(J, PN);
1608 }
1609 }
1610 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1611 }
1612 }
1613 }
1614 }
1615}
1616
1617// Assert that all the CHR regions of the scope have a biased branch or select.
1618static void LLVM_ATTRIBUTE_UNUSED
1620#ifndef NDEBUG
1621 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1622 if (Scope->TrueBiasedRegions.count(RI.R) ||
1623 Scope->FalseBiasedRegions.count(RI.R))
1624 return true;
1625 for (SelectInst *SI : RI.Selects)
1626 if (Scope->TrueBiasedSelects.count(SI) ||
1627 Scope->FalseBiasedSelects.count(SI))
1628 return true;
1629 return false;
1630 };
1631 for (RegInfo &RI : Scope->CHRRegions) {
1632 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1633 "Must have biased branch or select");
1634 }
1635#endif
1636}
1637
1638// Assert that all the condition values of the biased branches and selects have
1639// been hoisted to the pre-entry block or outside of the scope.
1641 CHRScope *Scope, BasicBlock *PreEntryBlock) {
1642 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1643 for (RegInfo &RI : Scope->CHRRegions) {
1644 Region *R = RI.R;
1645 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1646 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1647 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1648 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1649 Value *V = BI->getCondition();
1650 CHR_DEBUG(dbgs() << *V << "\n");
1651 if (auto *I = dyn_cast<Instruction>(V)) {
1652 (void)(I); // Unused in release build.
1653 assert((I->getParent() == PreEntryBlock ||
1654 !Scope->contains(I)) &&
1655 "Must have been hoisted to PreEntryBlock or outside the scope");
1656 }
1657 }
1658 for (SelectInst *SI : RI.Selects) {
1659 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1660 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1661 if (!(IsTrueBiased || IsFalseBiased))
1662 continue;
1663 Value *V = SI->getCondition();
1664 CHR_DEBUG(dbgs() << *V << "\n");
1665 if (auto *I = dyn_cast<Instruction>(V)) {
1666 (void)(I); // Unused in release build.
1667 assert((I->getParent() == PreEntryBlock ||
1668 !Scope->contains(I)) &&
1669 "Must have been hoisted to PreEntryBlock or outside the scope");
1670 }
1671 }
1672 }
1673}
1674
1675void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1676 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1677
1678 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1679
1680 for (RegInfo &RI : Scope->RegInfos) {
1681 const Region *R = RI.R;
1682 unsigned Duplication = getRegionDuplicationCount(R);
1683 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
1684 << "\n");
1685 if (Duplication >= CHRDupThreshsold) {
1686 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1687 << " for this region");
1688 ORE.emit([&]() {
1689 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1690 R->getEntry()->getTerminator())
1691 << "Reached the duplication threshold for the region";
1692 });
1693 return;
1694 }
1695 }
1696 for (RegInfo &RI : Scope->RegInfos) {
1697 DuplicationCount[RI.R]++;
1698 }
1699
1700 Region *FirstRegion = Scope->RegInfos[0].R;
1701 BasicBlock *EntryBlock = FirstRegion->getEntry();
1702 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1703 BasicBlock *ExitBlock = LastRegion->getExit();
1704 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1705
1706 if (ExitBlock) {
1707 // Insert a trivial phi at the exit block (where the CHR hot path and the
1708 // cold path merges) for a value that's defined in the scope but used
1709 // outside it (meaning it's alive at the exit block). We will add the
1710 // incoming values for the CHR cold paths to it below. Without this, we'd
1711 // miss updating phi's for such values unless there happens to already be a
1712 // phi for that value there.
1713 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1714 }
1715
1716 // Split the entry block of the first region. The new block becomes the new
1717 // entry block of the first region. The old entry block becomes the block to
1718 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1719 // through the split, we update the entry of the first region after the split,
1720 // and Region only points to the entry and the exit blocks, rather than
1721 // keeping everything in a list or set, the blocks membership and the
1722 // entry/exit blocks of the region are still valid after the split.
1723 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1724 << " at " << *Scope->BranchInsertPoint << "\n");
1725 BasicBlock *NewEntryBlock =
1726 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1727 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1728 "NewEntryBlock's only pred must be EntryBlock");
1729 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1730 BasicBlock *PreEntryBlock = EntryBlock;
1731
1732 ValueToValueMapTy VMap;
1733 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1734 // hot path (originals) and a cold path (clones) and update the PHIs at the
1735 // exit block.
1736 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1737
1738 // Replace the old (placeholder) branch with the new (merged) conditional
1739 // branch.
1740 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1741 NewEntryBlock, VMap);
1742
1743#ifndef NDEBUG
1745#endif
1746
1747 // Hoist the conditional values of the branches/selects.
1748 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1749
1750#ifndef NDEBUG
1751 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1752#endif
1753
1754 // Create the combined branch condition and constant-fold the branches/selects
1755 // in the hot path.
1756 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1757 ProfileCount.value_or(0));
1758}
1759
1760// A helper for transformScopes. Clone the blocks in the scope (excluding the
1761// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1762// at the exit block.
1763void CHR::cloneScopeBlocks(CHRScope *Scope,
1764 BasicBlock *PreEntryBlock,
1765 BasicBlock *ExitBlock,
1766 Region *LastRegion,
1767 ValueToValueMapTy &VMap) {
1768 // Clone all the blocks. The original blocks will be the hot-path
1769 // CHR-optimized code and the cloned blocks will be the original unoptimized
1770 // code. This is so that the block pointers from the
1771 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1772 // which CHR should apply to.
1774 for (RegInfo &RI : Scope->RegInfos)
1775 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1776 // sub-Scopes.
1777 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1778 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1779 NewBlocks.push_back(NewBB);
1780 VMap[BB] = NewBB;
1781
1782 // Unreachable predecessors will not be cloned and will not have an edge
1783 // to the cloned block. As such, also remove them from any phi nodes.
1784 for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1785 PN.removeIncomingValueIf([&](unsigned Idx) {
1786 return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1787 });
1788 }
1789
1790 // Place the cloned blocks right after the original blocks (right before the
1791 // exit block of.)
1792 if (ExitBlock)
1793 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1794 F.end());
1795
1796 // Update the cloned blocks/instructions to refer to themselves.
1797 for (BasicBlock *NewBB : NewBlocks)
1798 for (Instruction &I : *NewBB)
1799 RemapInstruction(&I, VMap,
1801
1802 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1803 // the top-level region but we don't need to add PHIs. The trivial PHIs
1804 // inserted above will be updated here.
1805 if (ExitBlock)
1806 for (PHINode &PN : ExitBlock->phis())
1807 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1808 ++I) {
1809 BasicBlock *Pred = PN.getIncomingBlock(I);
1810 if (LastRegion->contains(Pred)) {
1811 Value *V = PN.getIncomingValue(I);
1812 auto It = VMap.find(V);
1813 if (It != VMap.end()) V = It->second;
1814 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1815 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1816 }
1817 }
1818}
1819
1820// A helper for transformScope. Replace the old (placeholder) branch with the
1821// new (merged) conditional branch.
1822BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1823 BasicBlock *EntryBlock,
1824 BasicBlock *NewEntryBlock,
1825 ValueToValueMapTy &VMap) {
1826 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1827 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1828 "SplitBlock did not work correctly!");
1829 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1830 "NewEntryBlock's only pred must be EntryBlock");
1831 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1832 "NewEntryBlock must have been copied");
1833 OldBR->dropAllReferences();
1834 OldBR->eraseFromParent();
1835 // The true predicate is a placeholder. It will be replaced later in
1836 // fixupBranchesAndSelects().
1837 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1838 cast<BasicBlock>(VMap[NewEntryBlock]),
1839 ConstantInt::getTrue(F.getContext()));
1840 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1841 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1842 "NewEntryBlock's only pred must be EntryBlock");
1843 return NewBR;
1844}
1845
1846// A helper for transformScopes. Create the combined branch condition and
1847// constant-fold the branches/selects in the hot path.
1848void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1849 BasicBlock *PreEntryBlock,
1850 BranchInst *MergedBR,
1852 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1853 BranchProbability CHRBranchBias(1, 1);
1854 uint64_t NumCHRedBranches = 0;
1855 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1856 for (RegInfo &RI : Scope->CHRRegions) {
1857 Region *R = RI.R;
1858 if (RI.HasBranch) {
1859 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1860 ++NumCHRedBranches;
1861 }
1862 for (SelectInst *SI : RI.Selects) {
1863 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1864 ++NumCHRedBranches;
1865 }
1866 }
1867 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1868 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1869 ORE.emit([&]() {
1871 "CHR",
1872 // Refer to the hot (original) path
1873 MergedBR->getSuccessor(0)->getTerminator())
1874 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1875 << " branches or selects";
1876 });
1877 MergedBR->setCondition(MergedCondition);
1878 uint32_t Weights[] = {
1879 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1880 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1881 };
1882 setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1883 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1884 << "\n");
1885}
1886
1887// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1888// and constant-fold a branch in the hot path.
1889void CHR::fixupBranch(Region *R, CHRScope *Scope,
1890 IRBuilder<> &IRB,
1891 Value *&MergedCondition,
1892 BranchProbability &CHRBranchBias) {
1893 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1894 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1895 "Must be truthy or falsy");
1896 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1897 assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1898 BranchProbability Bias = BranchBiasMap[R];
1899 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1900 // Take the min.
1901 if (CHRBranchBias > Bias)
1902 CHRBranchBias = Bias;
1903 BasicBlock *IfThen = BI->getSuccessor(1);
1904 BasicBlock *IfElse = BI->getSuccessor(0);
1905 BasicBlock *RegionExitBlock = R->getExit();
1906 assert(RegionExitBlock && "Null ExitBlock");
1907 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1908 IfThen != IfElse && "Invariant from findScopes");
1909 if (IfThen == RegionExitBlock) {
1910 // Swap them so that IfThen means going into it and IfElse means skipping
1911 // it.
1912 std::swap(IfThen, IfElse);
1913 }
1914 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1915 << " IfElse " << IfElse->getName() << "\n");
1916 Value *Cond = BI->getCondition();
1917 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1918 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1919 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1920 MergedCondition);
1921 // Constant-fold the branch at ClonedEntryBlock.
1922 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1923 "The successor shouldn't change");
1924 Value *NewCondition = ConditionTrue ?
1925 ConstantInt::getTrue(F.getContext()) :
1926 ConstantInt::getFalse(F.getContext());
1927 BI->setCondition(NewCondition);
1928}
1929
1930// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1931// and constant-fold a select in the hot path.
1932void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1933 IRBuilder<> &IRB,
1934 Value *&MergedCondition,
1935 BranchProbability &CHRBranchBias) {
1936 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1937 assert((IsTrueBiased ||
1938 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1939 assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1940 BranchProbability Bias = SelectBiasMap[SI];
1941 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1942 // Take the min.
1943 if (CHRBranchBias > Bias)
1944 CHRBranchBias = Bias;
1945 Value *Cond = SI->getCondition();
1946 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1947 MergedCondition);
1948 Value *NewCondition = IsTrueBiased ?
1949 ConstantInt::getTrue(F.getContext()) :
1950 ConstantInt::getFalse(F.getContext());
1951 SI->setCondition(NewCondition);
1952}
1953
1954// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1955// condition.
1956void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1957 Instruction *BranchOrSelect, CHRScope *Scope,
1958 IRBuilder<> &IRB, Value *&MergedCondition) {
1959 if (!IsTrueBiased) {
1960 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1961 // branch, negate the icmp predicate and swap the branch targets and avoid
1962 // inserting an Xor to negate Cond.
1963 auto *ICmp = dyn_cast<ICmpInst>(Cond);
1964 if (!ICmp ||
1965 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
1966 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
1967 }
1968
1969 // Freeze potentially poisonous conditions.
1971 Cond = IRB.CreateFreeze(Cond);
1972
1973 // Use logical and to avoid propagating poison from later conditions.
1974 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
1975}
1976
1977void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1978 unsigned I = 0;
1979 DenseSet<PHINode *> TrivialPHIs;
1980 for (CHRScope *Scope : CHRScopes) {
1981 transformScopes(Scope, TrivialPHIs);
1982 CHR_DEBUG(
1983 std::ostringstream oss;
1984 oss << " after transformScopes " << I++;
1985 dumpIR(F, oss.str().c_str(), nullptr));
1986 (void)I;
1987 }
1988}
1989
1990static void LLVM_ATTRIBUTE_UNUSED
1991dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1992 dbgs() << Label << " " << Scopes.size() << "\n";
1993 for (CHRScope *Scope : Scopes) {
1994 dbgs() << *Scope << "\n";
1995 }
1996}
1997
1998bool CHR::run() {
1999 if (!shouldApply(F, PSI))
2000 return false;
2001
2002 CHR_DEBUG(dumpIR(F, "before", nullptr));
2003
2004 bool Changed = false;
2005 {
2006 CHR_DEBUG(
2007 dbgs() << "RegionInfo:\n";
2008 RI.print(dbgs()));
2009
2010 // Recursively traverse the region tree and find regions that have biased
2011 // branches and/or selects and create scopes.
2013 findScopes(AllScopes);
2014 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2015
2016 // Split the scopes if 1) the conditional values of the biased
2017 // branches/selects of the inner/lower scope can't be hoisted up to the
2018 // outermost/uppermost scope entry, or 2) the condition values of the biased
2019 // branches/selects in a scope (including subscopes) don't share at least
2020 // one common value.
2021 SmallVector<CHRScope *, 8> SplitScopes;
2022 splitScopes(AllScopes, SplitScopes);
2023 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2024
2025 // After splitting, set the biased regions and selects of a scope (a tree
2026 // root) that include those of the subscopes.
2027 classifyBiasedScopes(SplitScopes);
2028 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2029
2030 // Filter out the scopes that has only one biased region or select (CHR
2031 // isn't useful in such a case).
2032 SmallVector<CHRScope *, 8> FilteredScopes;
2033 filterScopes(SplitScopes, FilteredScopes);
2034 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2035
2036 // Set the regions to be CHR'ed and their hoist stops for each scope.
2038 setCHRRegions(FilteredScopes, SetScopes);
2039 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2040
2041 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2042 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2043 // to the hot path, rather than both hot and cold paths.
2044 SmallVector<CHRScope *, 8> SortedScopes;
2045 sortScopes(SetScopes, SortedScopes);
2046 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2047
2048 CHR_DEBUG(
2049 dbgs() << "RegionInfo:\n";
2050 RI.print(dbgs()));
2051
2052 // Apply the CHR transformation.
2053 if (!SortedScopes.empty()) {
2054 transformScopes(SortedScopes);
2055 Changed = true;
2056 }
2057 }
2058
2059 if (Changed) {
2060 CHR_DEBUG(dumpIR(F, "after", &Stats));
2061 ORE.emit([&]() {
2062 return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2063 << ore::NV("Function", &F) << " "
2064 << "Reduced the number of branches in hot paths by "
2065 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2066 << " (static) and "
2067 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2068 << " (weighted by PGO count)";
2069 });
2070 }
2071
2072 return Changed;
2073}
2074
2075namespace llvm {
2076
2079}
2080
2082 Function &F,
2085 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2086 // If there is no profile summary, we should not do CHR.
2087 if (!PPSI || !PPSI->hasProfileSummary())
2088 return PreservedAnalyses::all();
2089 auto &PSI = *PPSI;
2090 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2091 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2092 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2094 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2095 if (!Changed)
2096 return PreservedAnalyses::all();
2097 return PreservedAnalyses::none();
2098}
2099
2100} // namespace llvm
static const LLT S1
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
static void insertTrivialPHIs(CHRScope *Scope, BasicBlock *EntryBlock, BasicBlock *ExitBlock, DenseSet< PHINode * > &TrivialPHIs)
static raw_ostream LLVM_ATTRIBUTE_UNUSED & operator<<(raw_ostream &OS, const CHRStats &Stats)
static cl::opt< bool > DisableCHR("disable-chr", cl::init(false), cl::Hidden, cl::desc("Disable CHR for all functions"))
static StringSet CHRFunctions
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2)
static bool checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables, DenseSet< Instruction * > *HoistStops, DenseMap< Instruction *, bool > &Visited)
static cl::opt< unsigned > CHRMergeThreshold("chr-merge-threshold", cl::init(2), cl::Hidden, cl::desc("CHR merges a group of N branches/selects where N >= this value"))
static bool isHoistable(Instruction *I, DominatorTree &DT)
static cl::opt< unsigned > CHRDupThreshsold("chr-dup-threshold", cl::init(3), cl::Hidden, cl::desc("Max number of duplications by CHR for a region"))
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static bool checkBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, S &TrueSet, S &FalseSet, M &BiasMap)
static StringSet CHRModules
static bool checkBiasedSelect(SelectInst *SI, Region *R, DenseSet< SelectInst * > &TrueBiasedSelectsGlobal, DenseSet< SelectInst * > &FalseBiasedSelectsGlobal, DenseMap< SelectInst *, BranchProbability > &SelectBiasMap)
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, HoistStopMapTy &HoistStopMap, DenseSet< Instruction * > &HoistedSet, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope)
static bool shouldApply(Function &F, ProfileSummaryInfo &PSI)
#define CHR_DEBUG(X)
static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, CHRStats *Stats)
static void getSelectsInScope(CHRScope *Scope, DenseSet< Instruction * > &Output)
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool isHoistableInstructionType(Instruction *I)
static DenseSet< Value * > getCHRConditionValuesForRegion(RegInfo &RI)
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, Instruction *ExcludedUser, CHRScope *Scope)
static Instruction * getBranchInsertPoint(RegInfo &RI)
static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(CHRScope *Scope, BasicBlock *PreEntryBlock)
static cl::opt< std::string > CHRModuleList("chr-module-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of modules to apply CHR to"))
#define DEBUG_TYPE
static void LLVM_ATTRIBUTE_UNUSED assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope)
static cl::opt< double > CHRBiasThreshold("chr-bias-threshold", cl::init(0.99), cl::Hidden, cl::desc("CHR considers a branch bias greater than this ratio as biased"))
static void LLVM_ATTRIBUTE_UNUSED dumpScopes(SmallVectorImpl< CHRScope * > &Scopes, const char *Label)
static BranchProbability getCHRBiasThreshold()
static cl::opt< std::string > CHRFunctionList("chr-function-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of functions to apply CHR to"))
static cl::opt< bool > ForceCHR("force-chr", cl::init(false), cl::Hidden, cl::desc("Apply CHR for all functions"))
static bool checkBiasedBranch(BranchInst *BI, Region *R, DenseSet< Region * > &TrueBiasedRegionsGlobal, DenseSet< Region * > &FalseBiasedRegionsGlobal, DenseMap< Region *, BranchProbability > &BranchBiasMap)
static const std::set< Value * > & getBaseValues(Value *V, DominatorTree &DT, DenseMap< Value *, std::set< Value * > > &Visited)
static bool extractBranchProbabilities(Instruction *I, BranchProbability &TrueProb, BranchProbability &FalseProb)
static void parseCHRFilterFiles()
static Error split(StringRef Str, char Separator, std::pair< StringRef, StringRef > &Split)
Checked version of split, to ensure mandatory subparts.
Definition: DataLayout.cpp:235
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:451
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:438
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:507
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:457
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:229
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:850
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:871
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Class to represent profile counts.
Definition: Function.h:289
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2532
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1673
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1516
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2663
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
The optimization diagnostic interface.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:688
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionEntryHot(const FuncT *F) const
Returns true if F has hot function entry.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition: RegionInfo.h:359
void replaceEntryRecursive(BlockT *NewEntry)
Recursively replace the entry basic block of the region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:364
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:322
Analysis pass that exposes the RegionInfo for a function.
Definition: RegionInfo.h:967
This class represents the LLVM 'select' instruction.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:693
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
void dropAllReferences()
Drop all references to operands.
Definition: User.h:299
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:206
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:185
bool erase(const ValueT &V)
Definition: DenseSet.h:101
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:97
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:811
@ Exit
Definition: COFF.h:812
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:457
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2067
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, DebugInfoFinder *DIFinder=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
auto predecessors(const MachineBasicBlock *BB)
unsigned pred_size(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860