LLVM 20.0.0git
ControlHeightReduction.cpp
Go to the documentation of this file.
1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/StringSet.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/PassManager.h"
40
41#include <optional>
42#include <set>
43#include <sstream>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "chr"
48
49#define CHR_DEBUG(X) LLVM_DEBUG(X)
50
51static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
52 cl::desc("Disable CHR for all functions"));
53
54static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
55 cl::desc("Apply CHR for all functions"));
56
58 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
59 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
60
62 "chr-merge-threshold", cl::init(2), cl::Hidden,
63 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
64
66 "chr-module-list", cl::init(""), cl::Hidden,
67 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
68
70 "chr-function-list", cl::init(""), cl::Hidden,
71 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
72
74 "chr-dup-threshold", cl::init(3), cl::Hidden,
75 cl::desc("Max number of duplications by CHR for a region"));
76
79
80static void parseCHRFilterFiles() {
81 if (!CHRModuleList.empty()) {
82 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
83 if (!FileOrErr) {
84 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
85 std::exit(1);
86 }
87 StringRef Buf = FileOrErr->get()->getBuffer();
89 Buf.split(Lines, '\n');
90 for (StringRef Line : Lines) {
91 Line = Line.trim();
92 if (!Line.empty())
93 CHRModules.insert(Line);
94 }
95 }
96 if (!CHRFunctionList.empty()) {
97 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
98 if (!FileOrErr) {
99 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
100 std::exit(1);
101 }
102 StringRef Buf = FileOrErr->get()->getBuffer();
104 Buf.split(Lines, '\n');
105 for (StringRef Line : Lines) {
106 Line = Line.trim();
107 if (!Line.empty())
108 CHRFunctions.insert(Line);
109 }
110 }
111}
112
113namespace {
114
115struct CHRStats {
116 CHRStats() = default;
117 void print(raw_ostream &OS) const {
118 OS << "CHRStats: NumBranches " << NumBranches
119 << " NumBranchesDelta " << NumBranchesDelta
120 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
121 }
122 // The original number of conditional branches / selects
123 uint64_t NumBranches = 0;
124 // The decrease of the number of conditional branches / selects in the hot
125 // paths due to CHR.
126 uint64_t NumBranchesDelta = 0;
127 // NumBranchesDelta weighted by the profile count at the scope entry.
128 uint64_t WeightedNumBranchesDelta = 0;
129};
130
131// RegInfo - some properties of a Region.
132struct RegInfo {
133 RegInfo() = default;
134 RegInfo(Region *RegionIn) : R(RegionIn) {}
135 Region *R = nullptr;
136 bool HasBranch = false;
138};
139
140typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
141
142// CHRScope - a sequence of regions to CHR together. It corresponds to a
143// sequence of conditional blocks. It can have subscopes which correspond to
144// nested conditional blocks. Nested CHRScopes form a tree.
145class CHRScope {
146 public:
147 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
148 assert(RI.R && "Null RegionIn");
149 RegInfos.push_back(RI);
150 }
151
152 Region *getParentRegion() {
153 assert(RegInfos.size() > 0 && "Empty CHRScope");
154 Region *Parent = RegInfos[0].R->getParent();
155 assert(Parent && "Unexpected to call this on the top-level region");
156 return Parent;
157 }
158
159 BasicBlock *getEntryBlock() {
160 assert(RegInfos.size() > 0 && "Empty CHRScope");
161 return RegInfos.front().R->getEntry();
162 }
163
164 BasicBlock *getExitBlock() {
165 assert(RegInfos.size() > 0 && "Empty CHRScope");
166 return RegInfos.back().R->getExit();
167 }
168
169 bool appendable(CHRScope *Next) {
170 // The next scope is appendable only if this scope is directly connected to
171 // it (which implies it post-dominates this scope) and this scope dominates
172 // it (no edge to the next scope outside this scope).
173 BasicBlock *NextEntry = Next->getEntryBlock();
174 if (getExitBlock() != NextEntry)
175 // Not directly connected.
176 return false;
177 Region *LastRegion = RegInfos.back().R;
178 for (BasicBlock *Pred : predecessors(NextEntry))
179 if (!LastRegion->contains(Pred))
180 // There's an edge going into the entry of the next scope from outside
181 // of this scope.
182 return false;
183 return true;
184 }
185
186 void append(CHRScope *Next) {
187 assert(RegInfos.size() > 0 && "Empty CHRScope");
188 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
189 assert(getParentRegion() == Next->getParentRegion() &&
190 "Must be siblings");
191 assert(getExitBlock() == Next->getEntryBlock() &&
192 "Must be adjacent");
193 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
194 Subs.append(Next->Subs.begin(), Next->Subs.end());
195 }
196
197 void addSub(CHRScope *SubIn) {
198#ifndef NDEBUG
199 bool IsChild = false;
200 for (RegInfo &RI : RegInfos)
201 if (RI.R == SubIn->getParentRegion()) {
202 IsChild = true;
203 break;
204 }
205 assert(IsChild && "Must be a child");
206#endif
207 Subs.push_back(SubIn);
208 }
209
210 // Split this scope at the boundary region into two, which will belong to the
211 // tail and returns the tail.
212 CHRScope *split(Region *Boundary) {
213 assert(Boundary && "Boundary null");
214 assert(RegInfos.begin()->R != Boundary &&
215 "Can't be split at beginning");
216 auto BoundaryIt = llvm::find_if(
217 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
218 if (BoundaryIt == RegInfos.end())
219 return nullptr;
220 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
221 DenseSet<Region *> TailRegionSet;
222 for (const RegInfo &RI : TailRegInfos)
223 TailRegionSet.insert(RI.R);
224
225 auto TailIt =
226 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
227 assert(Sub && "null Sub");
228 Region *Parent = Sub->getParentRegion();
229 if (TailRegionSet.count(Parent))
230 return false;
231
232 assert(llvm::any_of(
233 RegInfos,
234 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
235 "Must be in head");
236 return true;
237 });
238 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
239
240 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
241 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
242 RegInfos.erase(BoundaryIt, RegInfos.end());
243 Subs.erase(TailIt, Subs.end());
244 return Scope;
245 }
246
247 bool contains(Instruction *I) const {
248 BasicBlock *Parent = I->getParent();
249 for (const RegInfo &RI : RegInfos)
250 if (RI.R->contains(Parent))
251 return true;
252 return false;
253 }
254
255 void print(raw_ostream &OS) const;
256
257 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
258 SmallVector<CHRScope *, 8> Subs; // Subscopes.
259
260 // The instruction at which to insert the CHR conditional branch (and hoist
261 // the dependent condition values).
262 Instruction *BranchInsertPoint;
263
264 // True-biased and false-biased regions (conditional blocks),
265 // respectively. Used only for the outermost scope and includes regions in
266 // subscopes. The rest are unbiased.
267 DenseSet<Region *> TrueBiasedRegions;
268 DenseSet<Region *> FalseBiasedRegions;
269 // Among the biased regions, the regions that get CHRed.
270 SmallVector<RegInfo, 8> CHRRegions;
271
272 // True-biased and false-biased selects, respectively. Used only for the
273 // outermost scope and includes ones in subscopes.
274 DenseSet<SelectInst *> TrueBiasedSelects;
275 DenseSet<SelectInst *> FalseBiasedSelects;
276
277 // Map from one of the above regions to the instructions to stop
278 // hoisting instructions at through use-def chains.
279 HoistStopMapTy HoistStopMap;
280
281 private:
282 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
283 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
284};
285
286class CHR {
287 public:
288 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
289 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
291 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
292
293 ~CHR() {
294 for (CHRScope *Scope : Scopes) {
295 delete Scope;
296 }
297 }
298
299 bool run();
300
301 private:
302 // See the comments in CHR::run() for the high level flow of the algorithm and
303 // what the following functions do.
304
305 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
306 Region *R = RI.getTopLevelRegion();
307 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
308 Output.push_back(Scope);
309 }
310 }
311 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
313 CHRScope *findScope(Region *R);
314 void checkScopeHoistable(CHRScope *Scope);
315
316 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
318 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
319 CHRScope *Outer,
320 DenseSet<Value *> *OuterConditionValues,
321 Instruction *OuterInsertPoint,
323 DenseSet<Instruction *> &Unhoistables);
324
325 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
326 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
327
328 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
330
331 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
333 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
334
335 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
337
338 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
339 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
340 void cloneScopeBlocks(CHRScope *Scope,
341 BasicBlock *PreEntryBlock,
342 BasicBlock *ExitBlock,
343 Region *LastRegion,
344 ValueToValueMapTy &VMap);
345 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
346 BasicBlock *EntryBlock,
347 BasicBlock *NewEntryBlock,
348 ValueToValueMapTy &VMap);
349 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
350 BranchInst *MergedBR, uint64_t ProfileCount);
351 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
352 Value *&MergedCondition, BranchProbability &CHRBranchBias);
353 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
354 Value *&MergedCondition, BranchProbability &CHRBranchBias);
355 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
356 Instruction *BranchOrSelect, CHRScope *Scope,
357 IRBuilder<> &IRB, Value *&MergedCondition);
358 unsigned getRegionDuplicationCount(const Region *R) {
359 unsigned Count = 0;
360 // Find out how many times region R is cloned. Note that if the parent
361 // of R is cloned, R is also cloned, but R's clone count is not updated
362 // from the clone of the parent. We need to accumlate all the counts
363 // from the ancestors to get the clone count.
364 while (R) {
365 Count += DuplicationCount[R];
366 R = R->getParent();
367 }
368 return Count;
369 }
370
371 Function &F;
373 DominatorTree &DT;
375 RegionInfo &RI;
377 CHRStats Stats;
378
379 // All the true-biased regions in the function
380 DenseSet<Region *> TrueBiasedRegionsGlobal;
381 // All the false-biased regions in the function
382 DenseSet<Region *> FalseBiasedRegionsGlobal;
383 // All the true-biased selects in the function
384 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
385 // All the false-biased selects in the function
386 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
387 // A map from biased regions to their branch bias
389 // A map from biased selects to their branch bias
391 // All the scopes.
393 // This maps records how many times this region is cloned.
394 DenseMap<const Region *, unsigned> DuplicationCount;
395};
396
397} // end anonymous namespace
398
399static inline
401 const CHRStats &Stats) {
402 Stats.print(OS);
403 return OS;
404}
405
406static inline
407raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
408 Scope.print(OS);
409 return OS;
410}
411
413 if (DisableCHR)
414 return false;
415
416 if (ForceCHR)
417 return true;
418
419 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
420 if (CHRModules.count(F.getParent()->getName()))
421 return true;
422 return CHRFunctions.count(F.getName());
423 }
424
425 return PSI.isFunctionEntryHot(&F);
426}
427
428static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
429 CHRStats *Stats) {
430 StringRef FuncName = F.getName();
431 StringRef ModuleName = F.getParent()->getName();
432 (void)(FuncName); // Unused in release build.
433 (void)(ModuleName); // Unused in release build.
434 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
435 << FuncName);
436 if (Stats)
437 CHR_DEBUG(dbgs() << " " << *Stats);
438 CHR_DEBUG(dbgs() << "\n");
439 CHR_DEBUG(F.dump());
440}
441
442void CHRScope::print(raw_ostream &OS) const {
443 assert(RegInfos.size() > 0 && "Empty CHRScope");
444 OS << "CHRScope[";
445 OS << RegInfos.size() << ", Regions[";
446 for (const RegInfo &RI : RegInfos) {
447 OS << RI.R->getNameStr();
448 if (RI.HasBranch)
449 OS << " B";
450 if (RI.Selects.size() > 0)
451 OS << " S" << RI.Selects.size();
452 OS << ", ";
453 }
454 if (RegInfos[0].R->getParent()) {
455 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
456 } else {
457 // top level region
458 OS << "]";
459 }
460 OS << ", Subs[";
461 for (CHRScope *Sub : Subs) {
462 OS << *Sub << ", ";
463 }
464 OS << "]]";
465}
466
467// Return true if the given instruction type can be hoisted by CHR.
469 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
470 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
471 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
472 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
473 isa<InsertValueInst>(I);
474}
475
476// Return true if the given instruction can be hoisted by CHR.
479 return false;
480 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
481}
482
483// Recursively traverse the use-def chains of the given value and return a set
484// of the unhoistable base values defined within the scope (excluding the
485// first-region entry block) or the (hoistable or unhoistable) base values that
486// are defined outside (including the first-region entry block) of the
487// scope. The returned set doesn't include constants.
488static const std::set<Value *> &
490 DenseMap<Value *, std::set<Value *>> &Visited) {
491 auto It = Visited.find(V);
492 if (It != Visited.end()) {
493 return It->second;
494 }
495 std::set<Value *> Result;
496 if (auto *I = dyn_cast<Instruction>(V)) {
497 // We don't stop at a block that's not in the Scope because we would miss
498 // some instructions that are based on the same base values if we stop
499 // there.
500 if (!isHoistable(I, DT)) {
501 Result.insert(I);
502 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
503 }
504 // I is hoistable above the Scope.
505 for (Value *Op : I->operands()) {
506 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
507 Result.insert(OpResult.begin(), OpResult.end());
508 }
509 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
510 }
511 if (isa<Argument>(V)) {
512 Result.insert(V);
513 }
514 // We don't include others like constants because those won't lead to any
515 // chance of folding of conditions (eg two bit checks merged into one check)
516 // after CHR.
517 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
518}
519
520// Return true if V is already hoisted or can be hoisted (along with its
521// operands) above the insert point. When it returns true and HoistStops is
522// non-null, the instructions to stop hoisting at through the use-def chains are
523// inserted into HoistStops.
524static bool
526 DenseSet<Instruction *> &Unhoistables,
527 DenseSet<Instruction *> *HoistStops,
529 assert(InsertPoint && "Null InsertPoint");
530 if (auto *I = dyn_cast<Instruction>(V)) {
531 auto It = Visited.find(I);
532 if (It != Visited.end()) {
533 return It->second;
534 }
535 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
536 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
537 if (Unhoistables.count(I)) {
538 // Don't hoist if they are not to be hoisted.
539 Visited[I] = false;
540 return false;
541 }
542 if (DT.dominates(I, InsertPoint)) {
543 // We are already above the insert point. Stop here.
544 if (HoistStops)
545 HoistStops->insert(I);
546 Visited[I] = true;
547 return true;
548 }
549 // We aren't not above the insert point, check if we can hoist it above the
550 // insert point.
551 if (isHoistable(I, DT)) {
552 // Check operands first.
553 DenseSet<Instruction *> OpsHoistStops;
554 bool AllOpsHoisted = true;
555 for (Value *Op : I->operands()) {
556 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
557 Visited)) {
558 AllOpsHoisted = false;
559 break;
560 }
561 }
562 if (AllOpsHoisted) {
563 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
564 if (HoistStops)
565 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
566 Visited[I] = true;
567 return true;
568 }
569 }
570 Visited[I] = false;
571 return false;
572 }
573 // Non-instructions are considered hoistable.
574 return true;
575}
576
577// Constructs the true and false branch probabilities if the the instruction has
578// valid branch weights. Returns true when this was successful, false otherwise.
580 BranchProbability &TrueProb,
581 BranchProbability &FalseProb) {
582 uint64_t TrueWeight;
583 uint64_t FalseWeight;
584 if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
585 return false;
586 uint64_t SumWeight = TrueWeight + FalseWeight;
587
588 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
589 "Overflow calculating branch probabilities.");
590
591 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
592 if (SumWeight == 0)
593 return false;
594
595 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
596 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
597 return true;
598}
599
602 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
603}
604
605// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
606// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
607// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
608// false.
609template <typename K, typename S, typename M>
610static bool checkBias(K *Key, BranchProbability TrueProb,
611 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
612 M &BiasMap) {
614 if (TrueProb >= Threshold) {
615 TrueSet.insert(Key);
616 BiasMap[Key] = TrueProb;
617 return true;
618 } else if (FalseProb >= Threshold) {
619 FalseSet.insert(Key);
620 BiasMap[Key] = FalseProb;
621 return true;
622 }
623 return false;
624}
625
626// Returns true and insert a region into the right biased set and the map if the
627// branch of the region is biased.
629 DenseSet<Region *> &TrueBiasedRegionsGlobal,
630 DenseSet<Region *> &FalseBiasedRegionsGlobal,
632 if (!BI->isConditional())
633 return false;
634 BranchProbability ThenProb, ElseProb;
635 if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
636 return false;
637 BasicBlock *IfThen = BI->getSuccessor(0);
638 BasicBlock *IfElse = BI->getSuccessor(1);
639 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
640 IfThen != IfElse &&
641 "Invariant from findScopes");
642 if (IfThen == R->getExit()) {
643 // Swap them so that IfThen/ThenProb means going into the conditional code
644 // and IfElse/ElseProb means skipping it.
645 std::swap(IfThen, IfElse);
646 std::swap(ThenProb, ElseProb);
647 }
648 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
649 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
650 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
651 return checkBias(R, ThenProb, ElseProb,
652 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
653 BranchBiasMap);
654}
655
656// Returns true and insert a select into the right biased set and the map if the
657// select is biased.
659 SelectInst *SI, Region *R,
660 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
661 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
663 BranchProbability TrueProb, FalseProb;
664 if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
665 return false;
666 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
667 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
668 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
669 return checkBias(SI, TrueProb, FalseProb,
670 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
671 SelectBiasMap);
672}
673
674// Returns the instruction at which to hoist the dependent condition values and
675// insert the CHR branch for a region. This is the terminator branch in the
676// entry block or the first select in the entry block, if any.
678 Region *R = RI.R;
679 BasicBlock *EntryBB = R->getEntry();
680 // The hoist point is by default the terminator of the entry block, which is
681 // the same as the branch instruction if RI.HasBranch is true.
682 Instruction *HoistPoint = EntryBB->getTerminator();
683 for (SelectInst *SI : RI.Selects) {
684 if (SI->getParent() == EntryBB) {
685 // Pick the first select in Selects in the entry block. Note Selects is
686 // sorted in the instruction order within a block (asserted below).
687 HoistPoint = SI;
688 break;
689 }
690 }
691 assert(HoistPoint && "Null HoistPoint");
692#ifndef NDEBUG
693 // Check that HoistPoint is the first one in Selects in the entry block,
694 // if any.
695 DenseSet<Instruction *> EntryBlockSelectSet;
696 for (SelectInst *SI : RI.Selects) {
697 if (SI->getParent() == EntryBB) {
698 EntryBlockSelectSet.insert(SI);
699 }
700 }
701 for (Instruction &I : *EntryBB) {
702 if (EntryBlockSelectSet.contains(&I)) {
703 assert(&I == HoistPoint &&
704 "HoistPoint must be the first one in Selects");
705 break;
706 }
707 }
708#endif
709 return HoistPoint;
710}
711
712// Find a CHR scope in the given region.
713CHRScope * CHR::findScope(Region *R) {
714 CHRScope *Result = nullptr;
715 BasicBlock *Entry = R->getEntry();
716 BasicBlock *Exit = R->getExit(); // null if top level.
717 assert(Entry && "Entry must not be null");
718 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
719 "Only top level region has a null exit");
720 if (Entry)
721 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
722 else
723 CHR_DEBUG(dbgs() << "Entry null\n");
724 if (Exit)
725 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
726 else
727 CHR_DEBUG(dbgs() << "Exit null\n");
728 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
729 // to this region).
730 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
731 if (EntryInSubregion)
732 return nullptr;
733 // Exclude loops
734 for (BasicBlock *Pred : predecessors(Entry))
735 if (R->contains(Pred))
736 return nullptr;
737 // If any of the basic blocks have address taken, we must skip this region
738 // because we cannot clone basic blocks that have address taken.
739 for (BasicBlock *BB : R->blocks()) {
740 if (BB->hasAddressTaken())
741 return nullptr;
742 // If we encounter llvm.coro.id, skip this region because if the basic block
743 // is cloned, we end up inserting a token type PHI node to the block with
744 // llvm.coro.begin.
745 // FIXME: This could lead to less optimal codegen, because the region is
746 // excluded, it can prevent CHR from merging adjacent regions into bigger
747 // scope and hoisting more branches.
748 for (Instruction &I : *BB)
749 if (auto *II = dyn_cast<IntrinsicInst>(&I))
750 if (II->getIntrinsicID() == Intrinsic::coro_id)
751 return nullptr;
752 }
753
754 if (Exit) {
755 // Try to find an if-then block (check if R is an if-then).
756 // if (cond) {
757 // ...
758 // }
759 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
760 if (BI)
761 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
762 else
763 CHR_DEBUG(dbgs() << "BI null\n");
764 if (BI && BI->isConditional()) {
765 BasicBlock *S0 = BI->getSuccessor(0);
766 BasicBlock *S1 = BI->getSuccessor(1);
767 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
768 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
769 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
770 RegInfo RI(R);
771 RI.HasBranch = checkBiasedBranch(
772 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
773 BranchBiasMap);
774 Result = new CHRScope(RI);
775 Scopes.insert(Result);
776 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
777 ++Stats.NumBranches;
778 if (!RI.HasBranch) {
779 ORE.emit([&]() {
780 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
781 << "Branch not biased";
782 });
783 }
784 }
785 }
786 }
787 {
788 // Try to look for selects in the direct child blocks (as opposed to in
789 // subregions) of R.
790 // ...
791 // if (..) { // Some subregion
792 // ...
793 // }
794 // if (..) { // Some subregion
795 // ...
796 // }
797 // ...
798 // a = cond ? b : c;
799 // ...
801 for (RegionNode *E : R->elements()) {
802 if (E->isSubRegion())
803 continue;
804 // This returns the basic block of E if E is a direct child of R (not a
805 // subregion.)
806 BasicBlock *BB = E->getEntry();
807 // Need to push in the order to make it easier to find the first Select
808 // later.
809 for (Instruction &I : *BB) {
810 if (auto *SI = dyn_cast<SelectInst>(&I)) {
811 Selects.push_back(SI);
812 ++Stats.NumBranches;
813 }
814 }
815 }
816 if (Selects.size() > 0) {
817 auto AddSelects = [&](RegInfo &RI) {
818 for (auto *SI : Selects)
819 if (checkBiasedSelect(SI, RI.R,
820 TrueBiasedSelectsGlobal,
821 FalseBiasedSelectsGlobal,
822 SelectBiasMap))
823 RI.Selects.push_back(SI);
824 else
825 ORE.emit([&]() {
826 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
827 << "Select not biased";
828 });
829 };
830 if (!Result) {
831 CHR_DEBUG(dbgs() << "Found a select-only region\n");
832 RegInfo RI(R);
833 AddSelects(RI);
834 Result = new CHRScope(RI);
835 Scopes.insert(Result);
836 } else {
837 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
838 AddSelects(Result->RegInfos[0]);
839 }
840 }
841 }
842
843 if (Result) {
844 checkScopeHoistable(Result);
845 }
846 return Result;
847}
848
849// Check that any of the branch and the selects in the region could be
850// hoisted above the the CHR branch insert point (the most dominating of
851// them, either the branch (at the end of the first block) or the first
852// select in the first block). If the branch can't be hoisted, drop the
853// selects in the first blocks.
854//
855// For example, for the following scope/region with selects, we want to insert
856// the merged branch right before the first select in the first/entry block by
857// hoisting c1, c2, c3, and c4.
858//
859// // Branch insert point here.
860// a = c1 ? b : c; // Select 1
861// d = c2 ? e : f; // Select 2
862// if (c3) { // Branch
863// ...
864// c4 = foo() // A call.
865// g = c4 ? h : i; // Select 3
866// }
867//
868// But suppose we can't hoist c4 because it's dependent on the preceding
869// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
870// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
871void CHR::checkScopeHoistable(CHRScope *Scope) {
872 RegInfo &RI = Scope->RegInfos[0];
873 Region *R = RI.R;
874 BasicBlock *EntryBB = R->getEntry();
875 auto *Branch = RI.HasBranch ?
876 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
877 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
878 if (RI.HasBranch || !Selects.empty()) {
879 Instruction *InsertPoint = getBranchInsertPoint(RI);
880 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
881 // Avoid a data dependence from a select or a branch to a(nother)
882 // select. Note no instruction can't data-depend on a branch (a branch
883 // instruction doesn't produce a value).
884 DenseSet<Instruction *> Unhoistables;
885 // Initialize Unhoistables with the selects.
886 for (SelectInst *SI : Selects) {
887 Unhoistables.insert(SI);
888 }
889 // Remove Selects that can't be hoisted.
890 for (auto it = Selects.begin(); it != Selects.end(); ) {
891 SelectInst *SI = *it;
892 if (SI == InsertPoint) {
893 ++it;
894 continue;
895 }
897 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
898 DT, Unhoistables, nullptr, Visited);
899 if (!IsHoistable) {
900 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
901 ORE.emit([&]() {
903 "DropUnhoistableSelect", SI)
904 << "Dropped unhoistable select";
905 });
906 it = Selects.erase(it);
907 // Since we are dropping the select here, we also drop it from
908 // Unhoistables.
909 Unhoistables.erase(SI);
910 } else
911 ++it;
912 }
913 // Update InsertPoint after potentially removing selects.
914 InsertPoint = getBranchInsertPoint(RI);
915 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
916 if (RI.HasBranch && InsertPoint != Branch) {
918 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
919 DT, Unhoistables, nullptr, Visited);
920 if (!IsHoistable) {
921 // If the branch isn't hoistable, drop the selects in the entry
922 // block, preferring the branch, which makes the branch the hoist
923 // point.
924 assert(InsertPoint != Branch && "Branch must not be the hoist point");
925 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
926 CHR_DEBUG(
927 for (SelectInst *SI : Selects) {
928 dbgs() << "SI " << *SI << "\n";
929 });
930 for (SelectInst *SI : Selects) {
931 ORE.emit([&]() {
933 "DropSelectUnhoistableBranch", SI)
934 << "Dropped select due to unhoistable branch";
935 });
936 }
937 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
938 return SI->getParent() == EntryBB;
939 });
940 Unhoistables.clear();
941 InsertPoint = Branch;
942 }
943 }
944 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
945#ifndef NDEBUG
946 if (RI.HasBranch) {
947 assert(!DT.dominates(Branch, InsertPoint) &&
948 "Branch can't be already above the hoist point");
950 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
951 DT, Unhoistables, nullptr, Visited) &&
952 "checkHoistValue for branch");
953 }
954 for (auto *SI : Selects) {
955 assert(!DT.dominates(SI, InsertPoint) &&
956 "SI can't be already above the hoist point");
958 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
959 Unhoistables, nullptr, Visited) &&
960 "checkHoistValue for selects");
961 }
962 CHR_DEBUG(dbgs() << "Result\n");
963 if (RI.HasBranch) {
964 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
965 }
966 for (auto *SI : Selects) {
967 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
968 }
969#endif
970 }
971}
972
973// Traverse the region tree, find all nested scopes and merge them if possible.
974CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
976 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
977 CHRScope *Result = findScope(R);
978 // Visit subscopes.
979 CHRScope *ConsecutiveSubscope = nullptr;
981 for (auto It = R->begin(); It != R->end(); ++It) {
982 const std::unique_ptr<Region> &SubR = *It;
983 auto NextIt = std::next(It);
984 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
985 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
986 << "\n");
987 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
988 if (SubCHRScope) {
989 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
990 } else {
991 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
992 }
993 if (SubCHRScope) {
994 if (!ConsecutiveSubscope)
995 ConsecutiveSubscope = SubCHRScope;
996 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
997 Subscopes.push_back(ConsecutiveSubscope);
998 ConsecutiveSubscope = SubCHRScope;
999 } else
1000 ConsecutiveSubscope->append(SubCHRScope);
1001 } else {
1002 if (ConsecutiveSubscope) {
1003 Subscopes.push_back(ConsecutiveSubscope);
1004 }
1005 ConsecutiveSubscope = nullptr;
1006 }
1007 }
1008 if (ConsecutiveSubscope) {
1009 Subscopes.push_back(ConsecutiveSubscope);
1010 }
1011 for (CHRScope *Sub : Subscopes) {
1012 if (Result) {
1013 // Combine it with the parent.
1014 Result->addSub(Sub);
1015 } else {
1016 // Push Subscopes as they won't be combined with the parent.
1017 Scopes.push_back(Sub);
1018 }
1019 }
1020 return Result;
1021}
1022
1024 DenseSet<Value *> ConditionValues;
1025 if (RI.HasBranch) {
1026 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1027 ConditionValues.insert(BI->getCondition());
1028 }
1029 for (SelectInst *SI : RI.Selects) {
1030 ConditionValues.insert(SI->getCondition());
1031 }
1032 return ConditionValues;
1033}
1034
1035
1036// Determine whether to split a scope depending on the sets of the branch
1037// condition values of the previous region and the current region. We split
1038// (return true) it if 1) the condition values of the inner/lower scope can't be
1039// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1040// values have an empty intersection (because the combined branch conditions
1041// won't probably lead to a simpler combined condition).
1042static bool shouldSplit(Instruction *InsertPoint,
1043 DenseSet<Value *> &PrevConditionValues,
1044 DenseSet<Value *> &ConditionValues,
1045 DominatorTree &DT,
1046 DenseSet<Instruction *> &Unhoistables) {
1047 assert(InsertPoint && "Null InsertPoint");
1048 CHR_DEBUG(
1049 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1050 for (Value *V : PrevConditionValues) {
1051 dbgs() << *V << ", ";
1052 }
1053 dbgs() << " ConditionValues ";
1054 for (Value *V : ConditionValues) {
1055 dbgs() << *V << ", ";
1056 }
1057 dbgs() << "\n");
1058 // If any of Bases isn't hoistable to the hoist point, split.
1059 for (Value *V : ConditionValues) {
1061 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1062 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1063 return true; // Not hoistable, split.
1064 }
1065 }
1066 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1067 // unnecessary splits at scopes with no branch/selects. If
1068 // PrevConditionValues and ConditionValues don't intersect at all, split.
1069 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1070 // Use std::set as DenseSet doesn't work with set_intersection.
1071 std::set<Value *> PrevBases, Bases;
1073 for (Value *V : PrevConditionValues) {
1074 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1075 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1076 }
1077 for (Value *V : ConditionValues) {
1078 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1079 Bases.insert(BaseValues.begin(), BaseValues.end());
1080 }
1081 CHR_DEBUG(
1082 dbgs() << "PrevBases ";
1083 for (Value *V : PrevBases) {
1084 dbgs() << *V << ", ";
1085 }
1086 dbgs() << " Bases ";
1087 for (Value *V : Bases) {
1088 dbgs() << *V << ", ";
1089 }
1090 dbgs() << "\n");
1091 std::vector<Value *> Intersection;
1092 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1093 Bases.end(), std::back_inserter(Intersection));
1094 if (Intersection.empty()) {
1095 // Empty intersection, split.
1096 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1097 return true;
1098 }
1099 }
1100 CHR_DEBUG(dbgs() << "No split\n");
1101 return false; // Don't split.
1102}
1103
1104static void getSelectsInScope(CHRScope *Scope,
1105 DenseSet<Instruction *> &Output) {
1106 for (RegInfo &RI : Scope->RegInfos)
1107 for (SelectInst *SI : RI.Selects)
1108 Output.insert(SI);
1109 for (CHRScope *Sub : Scope->Subs)
1110 getSelectsInScope(Sub, Output);
1111}
1112
1113void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1115 for (CHRScope *Scope : Input) {
1116 assert(!Scope->BranchInsertPoint &&
1117 "BranchInsertPoint must not be set");
1118 DenseSet<Instruction *> Unhoistables;
1119 getSelectsInScope(Scope, Unhoistables);
1120 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1121 }
1122#ifndef NDEBUG
1123 for (CHRScope *Scope : Output) {
1124 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1125 }
1126#endif
1127}
1128
1129SmallVector<CHRScope *, 8> CHR::splitScope(
1130 CHRScope *Scope,
1131 CHRScope *Outer,
1132 DenseSet<Value *> *OuterConditionValues,
1133 Instruction *OuterInsertPoint,
1135 DenseSet<Instruction *> &Unhoistables) {
1136 if (Outer) {
1137 assert(OuterConditionValues && "Null OuterConditionValues");
1138 assert(OuterInsertPoint && "Null OuterInsertPoint");
1139 }
1140 bool PrevSplitFromOuter = true;
1141 DenseSet<Value *> PrevConditionValues;
1142 Instruction *PrevInsertPoint = nullptr;
1144 SmallVector<bool, 8> SplitsSplitFromOuter;
1145 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1146 SmallVector<Instruction *, 8> SplitsInsertPoints;
1147 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1148 for (RegInfo &RI : RegInfos) {
1149 Instruction *InsertPoint = getBranchInsertPoint(RI);
1151 CHR_DEBUG(
1152 dbgs() << "ConditionValues ";
1153 for (Value *V : ConditionValues) {
1154 dbgs() << *V << ", ";
1155 }
1156 dbgs() << "\n");
1157 if (RI.R == RegInfos[0].R) {
1158 // First iteration. Check to see if we should split from the outer.
1159 if (Outer) {
1160 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1161 CHR_DEBUG(dbgs() << "Should split from outer at "
1162 << RI.R->getNameStr() << "\n");
1163 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1164 ConditionValues, DT, Unhoistables)) {
1165 PrevConditionValues = ConditionValues;
1166 PrevInsertPoint = InsertPoint;
1167 ORE.emit([&]() {
1169 "SplitScopeFromOuter",
1170 RI.R->getEntry()->getTerminator())
1171 << "Split scope from outer due to unhoistable branch/select "
1172 << "and/or lack of common condition values";
1173 });
1174 } else {
1175 // Not splitting from the outer. Use the outer bases and insert
1176 // point. Union the bases.
1177 PrevSplitFromOuter = false;
1178 PrevConditionValues = *OuterConditionValues;
1179 PrevConditionValues.insert(ConditionValues.begin(),
1180 ConditionValues.end());
1181 PrevInsertPoint = OuterInsertPoint;
1182 }
1183 } else {
1184 CHR_DEBUG(dbgs() << "Outer null\n");
1185 PrevConditionValues = ConditionValues;
1186 PrevInsertPoint = InsertPoint;
1187 }
1188 } else {
1189 CHR_DEBUG(dbgs() << "Should split from prev at "
1190 << RI.R->getNameStr() << "\n");
1191 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1192 DT, Unhoistables)) {
1193 CHRScope *Tail = Scope->split(RI.R);
1194 Scopes.insert(Tail);
1195 Splits.push_back(Scope);
1196 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1197 SplitsConditionValues.push_back(PrevConditionValues);
1198 SplitsInsertPoints.push_back(PrevInsertPoint);
1199 Scope = Tail;
1200 PrevConditionValues = ConditionValues;
1201 PrevInsertPoint = InsertPoint;
1202 PrevSplitFromOuter = true;
1203 ORE.emit([&]() {
1205 "SplitScopeFromPrev",
1206 RI.R->getEntry()->getTerminator())
1207 << "Split scope from previous due to unhoistable branch/select "
1208 << "and/or lack of common condition values";
1209 });
1210 } else {
1211 // Not splitting. Union the bases. Keep the hoist point.
1212 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1213 }
1214 }
1215 }
1216 Splits.push_back(Scope);
1217 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1218 SplitsConditionValues.push_back(PrevConditionValues);
1219 assert(PrevInsertPoint && "Null PrevInsertPoint");
1220 SplitsInsertPoints.push_back(PrevInsertPoint);
1221 assert(Splits.size() == SplitsConditionValues.size() &&
1222 Splits.size() == SplitsSplitFromOuter.size() &&
1223 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1224 for (size_t I = 0; I < Splits.size(); ++I) {
1225 CHRScope *Split = Splits[I];
1226 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1227 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1229 DenseSet<Instruction *> SplitUnhoistables;
1230 getSelectsInScope(Split, SplitUnhoistables);
1231 for (CHRScope *Sub : Split->Subs) {
1232 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1233 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1234 SplitUnhoistables);
1235 llvm::append_range(NewSubs, SubSplits);
1236 }
1237 Split->Subs = NewSubs;
1238 }
1240 for (size_t I = 0; I < Splits.size(); ++I) {
1241 CHRScope *Split = Splits[I];
1242 if (SplitsSplitFromOuter[I]) {
1243 // Split from the outer.
1244 Output.push_back(Split);
1245 Split->BranchInsertPoint = SplitsInsertPoints[I];
1246 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1247 << "\n");
1248 } else {
1249 // Connected to the outer.
1250 Result.push_back(Split);
1251 }
1252 }
1253 if (!Outer)
1254 assert(Result.empty() &&
1255 "If no outer (top-level), must return no nested ones");
1256 return Result;
1257}
1258
1259void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1260 for (CHRScope *Scope : Scopes) {
1261 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1262 classifyBiasedScopes(Scope, Scope);
1263 CHR_DEBUG(
1264 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1265 dbgs() << "TrueBiasedRegions ";
1266 for (Region *R : Scope->TrueBiasedRegions) {
1267 dbgs() << R->getNameStr() << ", ";
1268 }
1269 dbgs() << "\n";
1270 dbgs() << "FalseBiasedRegions ";
1271 for (Region *R : Scope->FalseBiasedRegions) {
1272 dbgs() << R->getNameStr() << ", ";
1273 }
1274 dbgs() << "\n";
1275 dbgs() << "TrueBiasedSelects ";
1276 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1277 dbgs() << *SI << ", ";
1278 }
1279 dbgs() << "\n";
1280 dbgs() << "FalseBiasedSelects ";
1281 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1282 dbgs() << *SI << ", ";
1283 }
1284 dbgs() << "\n";);
1285 }
1286}
1287
1288void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1289 for (RegInfo &RI : Scope->RegInfos) {
1290 if (RI.HasBranch) {
1291 Region *R = RI.R;
1292 if (TrueBiasedRegionsGlobal.contains(R))
1293 OutermostScope->TrueBiasedRegions.insert(R);
1294 else if (FalseBiasedRegionsGlobal.contains(R))
1295 OutermostScope->FalseBiasedRegions.insert(R);
1296 else
1297 llvm_unreachable("Must be biased");
1298 }
1299 for (SelectInst *SI : RI.Selects) {
1300 if (TrueBiasedSelectsGlobal.contains(SI))
1301 OutermostScope->TrueBiasedSelects.insert(SI);
1302 else if (FalseBiasedSelectsGlobal.contains(SI))
1303 OutermostScope->FalseBiasedSelects.insert(SI);
1304 else
1305 llvm_unreachable("Must be biased");
1306 }
1307 }
1308 for (CHRScope *Sub : Scope->Subs) {
1309 classifyBiasedScopes(Sub, OutermostScope);
1310 }
1311}
1312
1313static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1314 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1315 Scope->FalseBiasedRegions.size() +
1316 Scope->TrueBiasedSelects.size() +
1317 Scope->FalseBiasedSelects.size();
1318 return NumBiased >= CHRMergeThreshold;
1319}
1320
1321void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1323 for (CHRScope *Scope : Input) {
1324 // Filter out the ones with only one region and no subs.
1325 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1326 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1327 << Scope->TrueBiasedRegions.size()
1328 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1329 << " true-selects " << Scope->TrueBiasedSelects.size()
1330 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1331 ORE.emit([&]() {
1333 DEBUG_TYPE,
1334 "DropScopeWithOneBranchOrSelect",
1335 Scope->RegInfos[0].R->getEntry()->getTerminator())
1336 << "Drop scope with < "
1337 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1338 << " biased branch(es) or select(s)";
1339 });
1340 continue;
1341 }
1342 Output.push_back(Scope);
1343 }
1344}
1345
1346void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1348 for (CHRScope *Scope : Input) {
1349 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1350 "Empty");
1351 setCHRRegions(Scope, Scope);
1352 Output.push_back(Scope);
1353 CHR_DEBUG(
1354 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1355 for (auto pair : Scope->HoistStopMap) {
1356 Region *R = pair.first;
1357 dbgs() << "Region " << R->getNameStr() << "\n";
1358 for (Instruction *I : pair.second) {
1359 dbgs() << "HoistStop " << *I << "\n";
1360 }
1361 }
1362 dbgs() << "CHRRegions" << "\n";
1363 for (RegInfo &RI : Scope->CHRRegions) {
1364 dbgs() << RI.R->getNameStr() << "\n";
1365 });
1366 }
1367}
1368
1369void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1370 DenseSet<Instruction *> Unhoistables;
1371 // Put the biased selects in Unhoistables because they should stay where they
1372 // are and constant-folded after CHR (in case one biased select or a branch
1373 // can depend on another biased select.)
1374 for (RegInfo &RI : Scope->RegInfos) {
1375 for (SelectInst *SI : RI.Selects) {
1376 Unhoistables.insert(SI);
1377 }
1378 }
1379 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1380 for (RegInfo &RI : Scope->RegInfos) {
1381 Region *R = RI.R;
1382 DenseSet<Instruction *> HoistStops;
1383 bool IsHoisted = false;
1384 if (RI.HasBranch) {
1385 assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1386 OutermostScope->FalseBiasedRegions.contains(R)) &&
1387 "Must be truthy or falsy");
1388 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1389 // Note checkHoistValue fills in HoistStops.
1391 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1392 Unhoistables, &HoistStops, Visited);
1393 assert(IsHoistable && "Must be hoistable");
1394 (void)(IsHoistable); // Unused in release build
1395 IsHoisted = true;
1396 }
1397 for (SelectInst *SI : RI.Selects) {
1398 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1399 OutermostScope->FalseBiasedSelects.contains(SI)) &&
1400 "Must be true or false biased");
1401 // Note checkHoistValue fills in HoistStops.
1403 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1404 Unhoistables, &HoistStops, Visited);
1405 assert(IsHoistable && "Must be hoistable");
1406 (void)(IsHoistable); // Unused in release build
1407 IsHoisted = true;
1408 }
1409 if (IsHoisted) {
1410 OutermostScope->CHRRegions.push_back(RI);
1411 OutermostScope->HoistStopMap[R] = HoistStops;
1412 }
1413 }
1414 for (CHRScope *Sub : Scope->Subs)
1415 setCHRRegions(Sub, OutermostScope);
1416}
1417
1418static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1419 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1420}
1421
1422void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1424 Output.resize(Input.size());
1425 llvm::copy(Input, Output.begin());
1427}
1428
1429// Return true if V is already hoisted or was hoisted (along with its operands)
1430// to the insert point.
1431static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1432 HoistStopMapTy &HoistStopMap,
1433 DenseSet<Instruction *> &HoistedSet,
1434 DenseSet<PHINode *> &TrivialPHIs,
1435 DominatorTree &DT) {
1436 auto IT = HoistStopMap.find(R);
1437 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1438 DenseSet<Instruction *> &HoistStops = IT->second;
1439 if (auto *I = dyn_cast<Instruction>(V)) {
1440 if (I == HoistPoint)
1441 return;
1442 if (HoistStops.count(I))
1443 return;
1444 if (auto *PN = dyn_cast<PHINode>(I))
1445 if (TrivialPHIs.count(PN))
1446 // The trivial phi inserted by the previous CHR scope could replace a
1447 // non-phi in HoistStops. Note that since this phi is at the exit of a
1448 // previous CHR scope, which dominates this scope, it's safe to stop
1449 // hoisting there.
1450 return;
1451 if (HoistedSet.count(I))
1452 // Already hoisted, return.
1453 return;
1454 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1455 assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1456 assert(DT.getNode(HoistPoint->getParent()) &&
1457 "DT must contain HoistPoint block");
1458 if (DT.dominates(I, HoistPoint))
1459 // We are already above the hoist point. Stop here. This may be necessary
1460 // when multiple scopes would independently hoist the same
1461 // instruction. Since an outer (dominating) scope would hoist it to its
1462 // entry before an inner (dominated) scope would to its entry, the inner
1463 // scope may see the instruction already hoisted, in which case it
1464 // potentially wrong for the inner scope to hoist it and could cause bad
1465 // IR (non-dominating def), but safe to skip hoisting it instead because
1466 // it's already in a block that dominates the inner scope.
1467 return;
1468 for (Value *Op : I->operands()) {
1469 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1470 }
1471 I->moveBefore(HoistPoint);
1472 HoistedSet.insert(I);
1473 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1474 }
1475}
1476
1477// Hoist the dependent condition values of the branches and the selects in the
1478// scope to the insert point.
1479static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1480 DenseSet<PHINode *> &TrivialPHIs,
1481 DominatorTree &DT) {
1482 DenseSet<Instruction *> HoistedSet;
1483 for (const RegInfo &RI : Scope->CHRRegions) {
1484 Region *R = RI.R;
1485 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1486 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1487 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1488 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1489 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1490 HoistedSet, TrivialPHIs, DT);
1491 }
1492 for (SelectInst *SI : RI.Selects) {
1493 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1494 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1495 if (!(IsTrueBiased || IsFalseBiased))
1496 continue;
1497 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1498 HoistedSet, TrivialPHIs, DT);
1499 }
1500 }
1501}
1502
1503// Negate the predicate if an ICmp if it's used only by branches or selects by
1504// swapping the operands of the branches or the selects. Returns true if success.
1506 Instruction *ExcludedUser,
1507 CHRScope *Scope) {
1508 for (User *U : ICmp->users()) {
1509 if (U == ExcludedUser)
1510 continue;
1511 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1512 continue;
1513 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1514 continue;
1515 return false;
1516 }
1517 for (User *U : ICmp->users()) {
1518 if (U == ExcludedUser)
1519 continue;
1520 if (auto *BI = dyn_cast<BranchInst>(U)) {
1521 assert(BI->isConditional() && "Must be conditional");
1522 BI->swapSuccessors();
1523 // Don't need to swap this in terms of
1524 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1525 // mean whehter the branch is likely go into the if-then rather than
1526 // successor0/successor1 and because we can tell which edge is the then or
1527 // the else one by comparing the destination to the region exit block.
1528 continue;
1529 }
1530 if (auto *SI = dyn_cast<SelectInst>(U)) {
1531 // Swap operands
1532 SI->swapValues();
1533 SI->swapProfMetadata();
1534 if (Scope->TrueBiasedSelects.count(SI)) {
1535 assert(!Scope->FalseBiasedSelects.contains(SI) &&
1536 "Must not be already in");
1537 Scope->FalseBiasedSelects.insert(SI);
1538 } else if (Scope->FalseBiasedSelects.count(SI)) {
1539 assert(!Scope->TrueBiasedSelects.contains(SI) &&
1540 "Must not be already in");
1541 Scope->TrueBiasedSelects.insert(SI);
1542 }
1543 continue;
1544 }
1545 llvm_unreachable("Must be a branch or a select");
1546 }
1548 return true;
1549}
1550
1551// A helper for transformScopes. Insert a trivial phi at the scope exit block
1552// for a value that's defined in the scope but used outside it (meaning it's
1553// alive at the exit block).
1554static void insertTrivialPHIs(CHRScope *Scope,
1555 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1556 DenseSet<PHINode *> &TrivialPHIs) {
1557 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1558 for (RegInfo &RI : Scope->RegInfos) {
1559 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1560 // sub-Scopes.
1561 BlocksInScope.insert(BB);
1562 }
1563 }
1564 CHR_DEBUG({
1565 dbgs() << "Inserting redundant phis\n";
1566 for (BasicBlock *BB : BlocksInScope)
1567 dbgs() << "BlockInScope " << BB->getName() << "\n";
1568 });
1569 for (BasicBlock *BB : BlocksInScope) {
1570 for (Instruction &I : *BB) {
1572 for (User *U : I.users()) {
1573 if (auto *UI = dyn_cast<Instruction>(U)) {
1574 if (!BlocksInScope.contains(UI->getParent()) &&
1575 // Unless there's already a phi for I at the exit block.
1576 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1577 CHR_DEBUG(dbgs() << "V " << I << "\n");
1578 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1579 Users.push_back(UI);
1580 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1581 // There's a loop backedge from a block that's dominated by this
1582 // scope to the entry block.
1583 CHR_DEBUG(dbgs() << "V " << I << "\n");
1584 CHR_DEBUG(dbgs()
1585 << "Used at entry block (for a back edge) by a phi user "
1586 << *UI << "\n");
1587 Users.push_back(UI);
1588 }
1589 }
1590 }
1591 if (Users.size() > 0) {
1592 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1593 // ExitBlock. Replace I with the new phi in UI unless UI is another
1594 // phi at ExitBlock.
1595 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1596 PN->insertBefore(ExitBlock->begin());
1597 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1598 PN->addIncoming(&I, Pred);
1599 }
1600 TrivialPHIs.insert(PN);
1601 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1602 for (Instruction *UI : Users) {
1603 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1604 if (UI->getOperand(J) == &I) {
1605 UI->setOperand(J, PN);
1606 }
1607 }
1608 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1609 }
1610 }
1611 }
1612 }
1613}
1614
1615// Assert that all the CHR regions of the scope have a biased branch or select.
1616static void LLVM_ATTRIBUTE_UNUSED
1618#ifndef NDEBUG
1619 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1620 if (Scope->TrueBiasedRegions.count(RI.R) ||
1621 Scope->FalseBiasedRegions.count(RI.R))
1622 return true;
1623 for (SelectInst *SI : RI.Selects)
1624 if (Scope->TrueBiasedSelects.count(SI) ||
1625 Scope->FalseBiasedSelects.count(SI))
1626 return true;
1627 return false;
1628 };
1629 for (RegInfo &RI : Scope->CHRRegions) {
1630 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1631 "Must have biased branch or select");
1632 }
1633#endif
1634}
1635
1636// Assert that all the condition values of the biased branches and selects have
1637// been hoisted to the pre-entry block or outside of the scope.
1639 CHRScope *Scope, BasicBlock *PreEntryBlock) {
1640 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1641 for (RegInfo &RI : Scope->CHRRegions) {
1642 Region *R = RI.R;
1643 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1644 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1645 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1646 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1647 Value *V = BI->getCondition();
1648 CHR_DEBUG(dbgs() << *V << "\n");
1649 if (auto *I = dyn_cast<Instruction>(V)) {
1650 (void)(I); // Unused in release build.
1651 assert((I->getParent() == PreEntryBlock ||
1652 !Scope->contains(I)) &&
1653 "Must have been hoisted to PreEntryBlock or outside the scope");
1654 }
1655 }
1656 for (SelectInst *SI : RI.Selects) {
1657 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1658 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1659 if (!(IsTrueBiased || IsFalseBiased))
1660 continue;
1661 Value *V = SI->getCondition();
1662 CHR_DEBUG(dbgs() << *V << "\n");
1663 if (auto *I = dyn_cast<Instruction>(V)) {
1664 (void)(I); // Unused in release build.
1665 assert((I->getParent() == PreEntryBlock ||
1666 !Scope->contains(I)) &&
1667 "Must have been hoisted to PreEntryBlock or outside the scope");
1668 }
1669 }
1670 }
1671}
1672
1673void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1674 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1675
1676 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1677
1678 for (RegInfo &RI : Scope->RegInfos) {
1679 const Region *R = RI.R;
1680 unsigned Duplication = getRegionDuplicationCount(R);
1681 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
1682 << "\n");
1684 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1685 << " for this region");
1686 ORE.emit([&]() {
1687 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1688 R->getEntry()->getTerminator())
1689 << "Reached the duplication threshold for the region";
1690 });
1691 return;
1692 }
1693 }
1694 for (RegInfo &RI : Scope->RegInfos) {
1695 DuplicationCount[RI.R]++;
1696 }
1697
1698 Region *FirstRegion = Scope->RegInfos[0].R;
1699 BasicBlock *EntryBlock = FirstRegion->getEntry();
1700 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1701 BasicBlock *ExitBlock = LastRegion->getExit();
1702 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1703
1704 if (ExitBlock) {
1705 // Insert a trivial phi at the exit block (where the CHR hot path and the
1706 // cold path merges) for a value that's defined in the scope but used
1707 // outside it (meaning it's alive at the exit block). We will add the
1708 // incoming values for the CHR cold paths to it below. Without this, we'd
1709 // miss updating phi's for such values unless there happens to already be a
1710 // phi for that value there.
1711 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1712 }
1713
1714 // Split the entry block of the first region. The new block becomes the new
1715 // entry block of the first region. The old entry block becomes the block to
1716 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1717 // through the split, we update the entry of the first region after the split,
1718 // and Region only points to the entry and the exit blocks, rather than
1719 // keeping everything in a list or set, the blocks membership and the
1720 // entry/exit blocks of the region are still valid after the split.
1721 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1722 << " at " << *Scope->BranchInsertPoint << "\n");
1723 BasicBlock *NewEntryBlock =
1724 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1725 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1726 "NewEntryBlock's only pred must be EntryBlock");
1727 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1728 BasicBlock *PreEntryBlock = EntryBlock;
1729
1730 ValueToValueMapTy VMap;
1731 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1732 // hot path (originals) and a cold path (clones) and update the PHIs at the
1733 // exit block.
1734 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1735
1736 // Replace the old (placeholder) branch with the new (merged) conditional
1737 // branch.
1738 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1739 NewEntryBlock, VMap);
1740
1741#ifndef NDEBUG
1743#endif
1744
1745 // Hoist the conditional values of the branches/selects.
1746 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1747
1748#ifndef NDEBUG
1749 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1750#endif
1751
1752 // Create the combined branch condition and constant-fold the branches/selects
1753 // in the hot path.
1754 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1755 ProfileCount.value_or(0));
1756}
1757
1758// A helper for transformScopes. Clone the blocks in the scope (excluding the
1759// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1760// at the exit block.
1761void CHR::cloneScopeBlocks(CHRScope *Scope,
1762 BasicBlock *PreEntryBlock,
1763 BasicBlock *ExitBlock,
1764 Region *LastRegion,
1765 ValueToValueMapTy &VMap) {
1766 // Clone all the blocks. The original blocks will be the hot-path
1767 // CHR-optimized code and the cloned blocks will be the original unoptimized
1768 // code. This is so that the block pointers from the
1769 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1770 // which CHR should apply to.
1772 for (RegInfo &RI : Scope->RegInfos)
1773 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1774 // sub-Scopes.
1775 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1776 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1777 NewBlocks.push_back(NewBB);
1778 VMap[BB] = NewBB;
1779
1780 // Unreachable predecessors will not be cloned and will not have an edge
1781 // to the cloned block. As such, also remove them from any phi nodes.
1782 for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1783 PN.removeIncomingValueIf([&](unsigned Idx) {
1784 return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1785 });
1786 }
1787
1788 // Place the cloned blocks right after the original blocks (right before the
1789 // exit block of.)
1790 if (ExitBlock)
1791 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1792 F.end());
1793
1794 // Update the cloned blocks/instructions to refer to themselves.
1795 for (BasicBlock *NewBB : NewBlocks)
1796 for (Instruction &I : *NewBB)
1797 RemapInstruction(&I, VMap,
1799
1800 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1801 // the top-level region but we don't need to add PHIs. The trivial PHIs
1802 // inserted above will be updated here.
1803 if (ExitBlock)
1804 for (PHINode &PN : ExitBlock->phis())
1805 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1806 ++I) {
1807 BasicBlock *Pred = PN.getIncomingBlock(I);
1808 if (LastRegion->contains(Pred)) {
1809 Value *V = PN.getIncomingValue(I);
1810 auto It = VMap.find(V);
1811 if (It != VMap.end()) V = It->second;
1812 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1813 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1814 }
1815 }
1816}
1817
1818// A helper for transformScope. Replace the old (placeholder) branch with the
1819// new (merged) conditional branch.
1820BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1821 BasicBlock *EntryBlock,
1822 BasicBlock *NewEntryBlock,
1823 ValueToValueMapTy &VMap) {
1824 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1825 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1826 "SplitBlock did not work correctly!");
1827 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1828 "NewEntryBlock's only pred must be EntryBlock");
1829 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1830 "NewEntryBlock must have been copied");
1831 OldBR->dropAllReferences();
1832 OldBR->eraseFromParent();
1833 // The true predicate is a placeholder. It will be replaced later in
1834 // fixupBranchesAndSelects().
1835 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1836 cast<BasicBlock>(VMap[NewEntryBlock]),
1837 ConstantInt::getTrue(F.getContext()));
1838 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1839 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1840 "NewEntryBlock's only pred must be EntryBlock");
1841 return NewBR;
1842}
1843
1844// A helper for transformScopes. Create the combined branch condition and
1845// constant-fold the branches/selects in the hot path.
1846void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1847 BasicBlock *PreEntryBlock,
1848 BranchInst *MergedBR,
1850 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1851 BranchProbability CHRBranchBias(1, 1);
1852 uint64_t NumCHRedBranches = 0;
1853 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1854 for (RegInfo &RI : Scope->CHRRegions) {
1855 Region *R = RI.R;
1856 if (RI.HasBranch) {
1857 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1858 ++NumCHRedBranches;
1859 }
1860 for (SelectInst *SI : RI.Selects) {
1861 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1862 ++NumCHRedBranches;
1863 }
1864 }
1865 assert(NumCHRedBranches > 0);
1866 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1867 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1868 ORE.emit([&]() {
1870 "CHR",
1871 // Refer to the hot (original) path
1872 MergedBR->getSuccessor(0)->getTerminator())
1873 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1874 << " branches or selects";
1875 });
1876 MergedBR->setCondition(MergedCondition);
1877 uint32_t Weights[] = {
1878 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1879 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1880 };
1881 setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1882 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1883 << "\n");
1884}
1885
1886// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1887// and constant-fold a branch in the hot path.
1888void CHR::fixupBranch(Region *R, CHRScope *Scope,
1889 IRBuilder<> &IRB,
1890 Value *&MergedCondition,
1891 BranchProbability &CHRBranchBias) {
1892 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1893 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1894 "Must be truthy or falsy");
1895 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1896 assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1897 BranchProbability Bias = BranchBiasMap[R];
1898 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1899 // Take the min.
1900 if (CHRBranchBias > Bias)
1901 CHRBranchBias = Bias;
1902 BasicBlock *IfThen = BI->getSuccessor(1);
1903 BasicBlock *IfElse = BI->getSuccessor(0);
1904 BasicBlock *RegionExitBlock = R->getExit();
1905 assert(RegionExitBlock && "Null ExitBlock");
1906 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1907 IfThen != IfElse && "Invariant from findScopes");
1908 if (IfThen == RegionExitBlock) {
1909 // Swap them so that IfThen means going into it and IfElse means skipping
1910 // it.
1911 std::swap(IfThen, IfElse);
1912 }
1913 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1914 << " IfElse " << IfElse->getName() << "\n");
1915 Value *Cond = BI->getCondition();
1916 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1917 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1918 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1919 MergedCondition);
1920 // Constant-fold the branch at ClonedEntryBlock.
1921 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1922 "The successor shouldn't change");
1923 Value *NewCondition = ConditionTrue ?
1924 ConstantInt::getTrue(F.getContext()) :
1925 ConstantInt::getFalse(F.getContext());
1926 BI->setCondition(NewCondition);
1927}
1928
1929// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1930// and constant-fold a select in the hot path.
1931void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1932 IRBuilder<> &IRB,
1933 Value *&MergedCondition,
1934 BranchProbability &CHRBranchBias) {
1935 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1936 assert((IsTrueBiased ||
1937 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1938 assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1939 BranchProbability Bias = SelectBiasMap[SI];
1940 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1941 // Take the min.
1942 if (CHRBranchBias > Bias)
1943 CHRBranchBias = Bias;
1944 Value *Cond = SI->getCondition();
1945 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1946 MergedCondition);
1947 Value *NewCondition = IsTrueBiased ?
1948 ConstantInt::getTrue(F.getContext()) :
1949 ConstantInt::getFalse(F.getContext());
1950 SI->setCondition(NewCondition);
1951}
1952
1953// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1954// condition.
1955void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1956 Instruction *BranchOrSelect, CHRScope *Scope,
1957 IRBuilder<> &IRB, Value *&MergedCondition) {
1958 if (!IsTrueBiased) {
1959 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1960 // branch, negate the icmp predicate and swap the branch targets and avoid
1961 // inserting an Xor to negate Cond.
1962 auto *ICmp = dyn_cast<ICmpInst>(Cond);
1963 if (!ICmp ||
1964 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
1965 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
1966 }
1967
1968 // Freeze potentially poisonous conditions.
1970 Cond = IRB.CreateFreeze(Cond);
1971
1972 // Use logical and to avoid propagating poison from later conditions.
1973 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
1974}
1975
1976void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1977 unsigned I = 0;
1978 DenseSet<PHINode *> TrivialPHIs;
1979 for (CHRScope *Scope : CHRScopes) {
1980 transformScopes(Scope, TrivialPHIs);
1981 CHR_DEBUG(
1982 std::ostringstream oss;
1983 oss << " after transformScopes " << I++;
1984 dumpIR(F, oss.str().c_str(), nullptr));
1985 (void)I;
1986 }
1987}
1988
1989static void LLVM_ATTRIBUTE_UNUSED
1990dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1991 dbgs() << Label << " " << Scopes.size() << "\n";
1992 for (CHRScope *Scope : Scopes) {
1993 dbgs() << *Scope << "\n";
1994 }
1995}
1996
1997bool CHR::run() {
1998 if (!shouldApply(F, PSI))
1999 return false;
2000
2001 CHR_DEBUG(dumpIR(F, "before", nullptr));
2002
2003 bool Changed = false;
2004 {
2005 CHR_DEBUG(
2006 dbgs() << "RegionInfo:\n";
2007 RI.print(dbgs()));
2008
2009 // Recursively traverse the region tree and find regions that have biased
2010 // branches and/or selects and create scopes.
2012 findScopes(AllScopes);
2013 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2014
2015 // Split the scopes if 1) the conditional values of the biased
2016 // branches/selects of the inner/lower scope can't be hoisted up to the
2017 // outermost/uppermost scope entry, or 2) the condition values of the biased
2018 // branches/selects in a scope (including subscopes) don't share at least
2019 // one common value.
2020 SmallVector<CHRScope *, 8> SplitScopes;
2021 splitScopes(AllScopes, SplitScopes);
2022 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2023
2024 // After splitting, set the biased regions and selects of a scope (a tree
2025 // root) that include those of the subscopes.
2026 classifyBiasedScopes(SplitScopes);
2027 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2028
2029 // Filter out the scopes that has only one biased region or select (CHR
2030 // isn't useful in such a case).
2031 SmallVector<CHRScope *, 8> FilteredScopes;
2032 filterScopes(SplitScopes, FilteredScopes);
2033 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2034
2035 // Set the regions to be CHR'ed and their hoist stops for each scope.
2037 setCHRRegions(FilteredScopes, SetScopes);
2038 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2039
2040 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2041 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2042 // to the hot path, rather than both hot and cold paths.
2043 SmallVector<CHRScope *, 8> SortedScopes;
2044 sortScopes(SetScopes, SortedScopes);
2045 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2046
2047 CHR_DEBUG(
2048 dbgs() << "RegionInfo:\n";
2049 RI.print(dbgs()));
2050
2051 // Apply the CHR transformation.
2052 if (!SortedScopes.empty()) {
2053 transformScopes(SortedScopes);
2054 Changed = true;
2055 }
2056 }
2057
2058 if (Changed) {
2059 CHR_DEBUG(dumpIR(F, "after", &Stats));
2060 ORE.emit([&]() {
2061 return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2062 << ore::NV("Function", &F) << " "
2063 << "Reduced the number of branches in hot paths by "
2064 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2065 << " (static) and "
2066 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2067 << " (weighted by PGO count)";
2068 });
2069 }
2070
2071 return Changed;
2072}
2073
2074namespace llvm {
2075
2078}
2079
2081 Function &F,
2084 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2085 // If there is no profile summary, we should not do CHR.
2086 if (!PPSI || !PPSI->hasProfileSummary())
2087 return PreservedAnalyses::all();
2088 auto &PSI = *PPSI;
2089 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2090 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2091 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2093 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2094 if (!Changed)
2095 return PreservedAnalyses::all();
2096 return PreservedAnalyses::none();
2097}
2098
2099} // namespace llvm
static const LLT S1
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
static void insertTrivialPHIs(CHRScope *Scope, BasicBlock *EntryBlock, BasicBlock *ExitBlock, DenseSet< PHINode * > &TrivialPHIs)
static raw_ostream LLVM_ATTRIBUTE_UNUSED & operator<<(raw_ostream &OS, const CHRStats &Stats)
static cl::opt< bool > DisableCHR("disable-chr", cl::init(false), cl::Hidden, cl::desc("Disable CHR for all functions"))
static StringSet CHRFunctions
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2)
static bool checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables, DenseSet< Instruction * > *HoistStops, DenseMap< Instruction *, bool > &Visited)
static cl::opt< unsigned > CHRMergeThreshold("chr-merge-threshold", cl::init(2), cl::Hidden, cl::desc("CHR merges a group of N branches/selects where N >= this value"))
static bool isHoistable(Instruction *I, DominatorTree &DT)
static cl::opt< unsigned > CHRDupThreshsold("chr-dup-threshold", cl::init(3), cl::Hidden, cl::desc("Max number of duplications by CHR for a region"))
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static bool checkBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, S &TrueSet, S &FalseSet, M &BiasMap)
static StringSet CHRModules
static bool checkBiasedSelect(SelectInst *SI, Region *R, DenseSet< SelectInst * > &TrueBiasedSelectsGlobal, DenseSet< SelectInst * > &FalseBiasedSelectsGlobal, DenseMap< SelectInst *, BranchProbability > &SelectBiasMap)
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, HoistStopMapTy &HoistStopMap, DenseSet< Instruction * > &HoistedSet, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope)
static bool shouldApply(Function &F, ProfileSummaryInfo &PSI)
#define CHR_DEBUG(X)
static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, CHRStats *Stats)
static void getSelectsInScope(CHRScope *Scope, DenseSet< Instruction * > &Output)
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool isHoistableInstructionType(Instruction *I)
static DenseSet< Value * > getCHRConditionValuesForRegion(RegInfo &RI)
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, Instruction *ExcludedUser, CHRScope *Scope)
static Instruction * getBranchInsertPoint(RegInfo &RI)
static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(CHRScope *Scope, BasicBlock *PreEntryBlock)
static cl::opt< std::string > CHRModuleList("chr-module-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of modules to apply CHR to"))
#define DEBUG_TYPE
static void LLVM_ATTRIBUTE_UNUSED assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope)
static cl::opt< double > CHRBiasThreshold("chr-bias-threshold", cl::init(0.99), cl::Hidden, cl::desc("CHR considers a branch bias greater than this ratio as biased"))
static void LLVM_ATTRIBUTE_UNUSED dumpScopes(SmallVectorImpl< CHRScope * > &Scopes, const char *Label)
static BranchProbability getCHRBiasThreshold()
static cl::opt< std::string > CHRFunctionList("chr-function-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of functions to apply CHR to"))
static cl::opt< bool > ForceCHR("force-chr", cl::init(false), cl::Hidden, cl::desc("Apply CHR for all functions"))
static bool checkBiasedBranch(BranchInst *BI, Region *R, DenseSet< Region * > &TrueBiasedRegionsGlobal, DenseSet< Region * > &FalseBiasedRegionsGlobal, DenseMap< Region *, BranchProbability > &BranchBiasMap)
static const std::set< Value * > & getBaseValues(Value *V, DominatorTree &DT, DenseMap< Value *, std::set< Value * > > &Visited)
static bool extractBranchProbabilities(Instruction *I, BranchProbability &TrueProb, BranchProbability &FalseProb)
static void parseCHRFilterFiles()
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
Definition: IVUsers.cpp:48
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
block placement Basic Block Placement Stats
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
early Early Tail Duplication
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition: InstrTypes.h:766
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition: InstrTypes.h:787
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:278
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Class to represent profile counts.
Definition: Function.h:292
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="")
Definition: IRBuilder.h:1699
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1542
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
The optimization diagnostic interface.
Diagnostic information for missed-optimization remarks.
Diagnostic information for applied optimization remarks.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionEntryHot(const FuncT *F) const
Returns true if F has hot function entry.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition: RegionInfo.h:357
void replaceEntryRecursive(BlockT *NewEntry)
Recursively replace the entry basic block of the region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionT * getParent() const
Get the parent of the Region.
Definition: RegionInfo.h:362
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition: RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition: RegionInfo.h:965
This class represents the LLVM 'select' instruction.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:276
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition: StringRef.h:700
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:38
void dropAllReferences()
Drop all references to operands.
Definition: User.h:345
iterator find(const KeyT &Val)
Definition: ValueMap.h:155
iterator end()
Definition: ValueMap.h:135
LLVM Value Representation.
Definition: Value.h:74
iterator_range< user_iterator > users()
Definition: Value.h:421
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
std::pair< iterator, bool > insert(const ValueT &V)
Definition: DenseSet.h:213
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition: DenseSet.h:193
bool erase(const ValueT &V)
Definition: DenseSet.h:97
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:95
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:844
@ Exit
Definition: COFF.h:845
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:456
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:2037
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
auto pred_size(const MachineBasicBlock *BB)
void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition: ValueMapper.h:94
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition: ValueMapper.h:76
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
Definition: ValueMapper.h:263
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1841
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1766
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
auto predecessors(const MachineBasicBlock *BB)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860