LLVM 23.0.0git
ControlHeightReduction.cpp
Go to the documentation of this file.
1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/StringSet.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/PassManager.h"
40
41#include <optional>
42#include <set>
43#include <sstream>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "chr"
48
49#define CHR_DEBUG(X) LLVM_DEBUG(X)
50
51static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
52 cl::desc("Disable CHR for all functions"));
53
54static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
55 cl::desc("Apply CHR for all functions"));
56
58 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
59 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
60
62 "chr-merge-threshold", cl::init(2), cl::Hidden,
63 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
64
66 "chr-module-list", cl::init(""), cl::Hidden,
67 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
68
70 "chr-function-list", cl::init(""), cl::Hidden,
71 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
72
74 "chr-dup-threshold", cl::init(3), cl::Hidden,
75 cl::desc("Max number of duplications by CHR for a region"));
76
79
80static void parseCHRFilterFiles() {
81 if (!CHRModuleList.empty()) {
82 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
83 if (!FileOrErr) {
84 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
85 std::exit(1);
86 }
87 StringRef Buf = FileOrErr->get()->getBuffer();
89 Buf.split(Lines, '\n');
90 for (StringRef Line : Lines) {
91 Line = Line.trim();
92 if (!Line.empty())
93 CHRModules.insert(Line);
94 }
95 }
96 if (!CHRFunctionList.empty()) {
97 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
98 if (!FileOrErr) {
99 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
100 std::exit(1);
101 }
102 StringRef Buf = FileOrErr->get()->getBuffer();
104 Buf.split(Lines, '\n');
105 for (StringRef Line : Lines) {
106 Line = Line.trim();
107 if (!Line.empty())
108 CHRFunctions.insert(Line);
109 }
110 }
111}
112
113namespace {
114
115struct CHRStats {
116 CHRStats() = default;
117 void print(raw_ostream &OS) const {
118 OS << "CHRStats: NumBranches " << NumBranches
119 << " NumBranchesDelta " << NumBranchesDelta
120 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
121 }
122 // The original number of conditional branches / selects
123 uint64_t NumBranches = 0;
124 // The decrease of the number of conditional branches / selects in the hot
125 // paths due to CHR.
126 uint64_t NumBranchesDelta = 0;
127 // NumBranchesDelta weighted by the profile count at the scope entry.
128 uint64_t WeightedNumBranchesDelta = 0;
129};
130
131// RegInfo - some properties of a Region.
132struct RegInfo {
133 RegInfo() = default;
134 RegInfo(Region *RegionIn) : R(RegionIn) {}
135 Region *R = nullptr;
136 bool HasBranch = false;
138};
139
140typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
141
142// CHRScope - a sequence of regions to CHR together. It corresponds to a
143// sequence of conditional blocks. It can have subscopes which correspond to
144// nested conditional blocks. Nested CHRScopes form a tree.
145class CHRScope {
146 public:
147 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
148 assert(RI.R && "Null RegionIn");
149 RegInfos.push_back(RI);
150 }
151
152 Region *getParentRegion() {
153 assert(RegInfos.size() > 0 && "Empty CHRScope");
154 Region *Parent = RegInfos[0].R->getParent();
155 assert(Parent && "Unexpected to call this on the top-level region");
156 return Parent;
157 }
158
159 BasicBlock *getEntryBlock() {
160 assert(RegInfos.size() > 0 && "Empty CHRScope");
161 return RegInfos.front().R->getEntry();
162 }
163
164 BasicBlock *getExitBlock() {
165 assert(RegInfos.size() > 0 && "Empty CHRScope");
166 return RegInfos.back().R->getExit();
167 }
168
169 bool appendable(CHRScope *Next) {
170 // The next scope is appendable only if this scope is directly connected to
171 // it (which implies it post-dominates this scope) and this scope dominates
172 // it (no edge to the next scope outside this scope).
173 BasicBlock *NextEntry = Next->getEntryBlock();
174 if (getExitBlock() != NextEntry)
175 // Not directly connected.
176 return false;
177 Region *LastRegion = RegInfos.back().R;
178 for (BasicBlock *Pred : predecessors(NextEntry))
179 if (!LastRegion->contains(Pred))
180 // There's an edge going into the entry of the next scope from outside
181 // of this scope.
182 return false;
183 return true;
184 }
185
186 void append(CHRScope *Next) {
187 assert(RegInfos.size() > 0 && "Empty CHRScope");
188 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
189 assert(getParentRegion() == Next->getParentRegion() &&
190 "Must be siblings");
191 assert(getExitBlock() == Next->getEntryBlock() &&
192 "Must be adjacent");
193 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
194 Subs.append(Next->Subs.begin(), Next->Subs.end());
195 }
196
197 void addSub(CHRScope *SubIn) {
198#ifndef NDEBUG
199 bool IsChild = false;
200 for (RegInfo &RI : RegInfos)
201 if (RI.R == SubIn->getParentRegion()) {
202 IsChild = true;
203 break;
204 }
205 assert(IsChild && "Must be a child");
206#endif
207 Subs.push_back(SubIn);
208 }
209
210 // Split this scope at the boundary region into two, which will belong to the
211 // tail and returns the tail.
212 CHRScope *split(Region *Boundary) {
213 assert(Boundary && "Boundary null");
214 assert(RegInfos.begin()->R != Boundary &&
215 "Can't be split at beginning");
216 auto BoundaryIt = llvm::find_if(
217 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
218 if (BoundaryIt == RegInfos.end())
219 return nullptr;
220 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
221 DenseSet<Region *> TailRegionSet;
222 for (const RegInfo &RI : TailRegInfos)
223 TailRegionSet.insert(RI.R);
224
225 auto TailIt =
226 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
227 assert(Sub && "null Sub");
228 Region *Parent = Sub->getParentRegion();
229 if (TailRegionSet.count(Parent))
230 return false;
231
232 assert(llvm::any_of(
233 RegInfos,
234 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
235 "Must be in head");
236 return true;
237 });
238 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
239
240 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
241 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
242 RegInfos.erase(BoundaryIt, RegInfos.end());
243 Subs.erase(TailIt, Subs.end());
244 return Scope;
245 }
246
247 bool contains(Instruction *I) const {
248 BasicBlock *Parent = I->getParent();
249 for (const RegInfo &RI : RegInfos)
250 if (RI.R->contains(Parent))
251 return true;
252 return false;
253 }
254
255 void print(raw_ostream &OS) const;
256
257 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
258 SmallVector<CHRScope *, 8> Subs; // Subscopes.
259
260 // The instruction at which to insert the CHR conditional branch (and hoist
261 // the dependent condition values).
262 Instruction *BranchInsertPoint;
263
264 // True-biased and false-biased regions (conditional blocks),
265 // respectively. Used only for the outermost scope and includes regions in
266 // subscopes. The rest are unbiased.
267 DenseSet<Region *> TrueBiasedRegions;
268 DenseSet<Region *> FalseBiasedRegions;
269 // Among the biased regions, the regions that get CHRed.
270 SmallVector<RegInfo, 8> CHRRegions;
271
272 // True-biased and false-biased selects, respectively. Used only for the
273 // outermost scope and includes ones in subscopes.
274 DenseSet<SelectInst *> TrueBiasedSelects;
275 DenseSet<SelectInst *> FalseBiasedSelects;
276
277 // Map from one of the above regions to the instructions to stop
278 // hoisting instructions at through use-def chains.
279 HoistStopMapTy HoistStopMap;
280
281 private:
282 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
283 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
284};
285
286class CHR {
287 public:
288 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
289 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
290 OptimizationRemarkEmitter &OREin)
291 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
292
293 ~CHR() {
294 for (CHRScope *Scope : Scopes) {
295 delete Scope;
296 }
297 }
298
299 bool run();
300
301 private:
302 // See the comments in CHR::run() for the high level flow of the algorithm and
303 // what the following functions do.
304
305 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
306 Region *R = RI.getTopLevelRegion();
307 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
308 Output.push_back(Scope);
309 }
310 }
311 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
312 SmallVectorImpl<CHRScope *> &Scopes);
313 CHRScope *findScope(Region *R);
314 void checkScopeHoistable(CHRScope *Scope);
315
316 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
317 SmallVectorImpl<CHRScope *> &Output);
318 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
319 CHRScope *Outer,
320 DenseSet<Value *> *OuterConditionValues,
321 Instruction *OuterInsertPoint,
322 SmallVectorImpl<CHRScope *> &Output,
323 DenseSet<Instruction *> &Unhoistables);
324
325 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
326 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
327
328 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
329 SmallVectorImpl<CHRScope *> &Output);
330
331 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
332 SmallVectorImpl<CHRScope *> &Output);
333 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
334
335 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
336 SmallVectorImpl<CHRScope *> &Output);
337
338 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
339 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
340 void cloneScopeBlocks(CHRScope *Scope,
341 BasicBlock *PreEntryBlock,
342 BasicBlock *ExitBlock,
343 Region *LastRegion,
344 ValueToValueMapTy &VMap);
345 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
346 BasicBlock *EntryBlock,
347 BasicBlock *NewEntryBlock,
348 ValueToValueMapTy &VMap);
349 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
350 BranchInst *MergedBR, uint64_t ProfileCount);
351 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
352 Value *&MergedCondition, BranchProbability &CHRBranchBias);
353 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
354 Value *&MergedCondition, BranchProbability &CHRBranchBias);
355 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
356 Instruction *BranchOrSelect, CHRScope *Scope,
357 IRBuilder<> &IRB, Value *&MergedCondition);
358 unsigned getRegionDuplicationCount(const Region *R) {
359 unsigned Count = 0;
360 // Find out how many times region R is cloned. Note that if the parent
361 // of R is cloned, R is also cloned, but R's clone count is not updated
362 // from the clone of the parent. We need to accumulate all the counts
363 // from the ancestors to get the clone count.
364 while (R) {
365 Count += DuplicationCount[R];
366 R = R->getParent();
367 }
368 return Count;
369 }
370
371 Function &F;
372 BlockFrequencyInfo &BFI;
373 DominatorTree &DT;
374 ProfileSummaryInfo &PSI;
375 RegionInfo &RI;
376 OptimizationRemarkEmitter &ORE;
377 CHRStats Stats;
378
379 // All the true-biased regions in the function
380 DenseSet<Region *> TrueBiasedRegionsGlobal;
381 // All the false-biased regions in the function
382 DenseSet<Region *> FalseBiasedRegionsGlobal;
383 // All the true-biased selects in the function
384 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
385 // All the false-biased selects in the function
386 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
387 // A map from biased regions to their branch bias
388 DenseMap<Region *, BranchProbability> BranchBiasMap;
389 // A map from biased selects to their branch bias
390 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
391 // All the scopes.
392 DenseSet<CHRScope *> Scopes;
393 // This maps records how many times this region is cloned.
394 DenseMap<const Region *, unsigned> DuplicationCount;
395};
396
397} // end anonymous namespace
398
399[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS,
400 const CHRStats &Stats) {
401 Stats.print(OS);
402 return OS;
403}
404
405static inline
406raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
407 Scope.print(OS);
408 return OS;
409}
410
412 if (DisableCHR)
413 return false;
414
415 if (ForceCHR)
416 return true;
417
418 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
419 if (CHRModules.count(F.getParent()->getName()))
420 return true;
421 return CHRFunctions.count(F.getName());
422 }
423
424 return PSI.isFunctionEntryHot(&F);
425}
426
427[[maybe_unused]] static void dumpIR(Function &F, const char *Label,
428 CHRStats *Stats) {
429 StringRef FuncName = F.getName();
430 StringRef ModuleName = F.getParent()->getName();
431 (void)(FuncName); // Unused in release build.
432 (void)(ModuleName); // Unused in release build.
433 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
434 << FuncName);
435 if (Stats)
436 CHR_DEBUG(dbgs() << " " << *Stats);
437 CHR_DEBUG(dbgs() << "\n");
438 CHR_DEBUG(F.dump());
439}
440
441void CHRScope::print(raw_ostream &OS) const {
442 assert(RegInfos.size() > 0 && "Empty CHRScope");
443 OS << "CHRScope[";
444 OS << RegInfos.size() << ", Regions[";
445 for (const RegInfo &RI : RegInfos) {
446 OS << RI.R->getNameStr();
447 if (RI.HasBranch)
448 OS << " B";
449 if (RI.Selects.size() > 0)
450 OS << " S" << RI.Selects.size();
451 OS << ", ";
452 }
453 if (RegInfos[0].R->getParent()) {
454 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
455 } else {
456 // top level region
457 OS << "]";
458 }
459 OS << ", Subs[";
460 for (CHRScope *Sub : Subs) {
461 OS << *Sub << ", ";
462 }
463 OS << "]]";
464}
465
466// Return true if the given instruction type can be hoisted by CHR.
474
475// Return true if the given instruction can be hoisted by CHR.
478 return false;
479 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
480}
481
482// Recursively traverse the use-def chains of the given value and return a set
483// of the unhoistable base values defined within the scope (excluding the
484// first-region entry block) or the (hoistable or unhoistable) base values that
485// are defined outside (including the first-region entry block) of the
486// scope. The returned set doesn't include constants.
487static const std::set<Value *> &
489 DenseMap<Value *, std::set<Value *>> &Visited) {
490 auto It = Visited.find(V);
491 if (It != Visited.end()) {
492 return It->second;
493 }
494 std::set<Value *> Result;
495 if (auto *I = dyn_cast<Instruction>(V)) {
496 // We don't stop at a block that's not in the Scope because we would miss
497 // some instructions that are based on the same base values if we stop
498 // there.
499 if (!isHoistable(I, DT)) {
500 Result.insert(I);
501 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
502 }
503 // I is hoistable above the Scope.
504 for (Value *Op : I->operands()) {
505 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
506 Result.insert(OpResult.begin(), OpResult.end());
507 }
508 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
509 }
510 if (isa<Argument>(V)) {
511 Result.insert(V);
512 }
513 // We don't include others like constants because those won't lead to any
514 // chance of folding of conditions (eg two bit checks merged into one check)
515 // after CHR.
516 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
517}
518
519// Return true if V is already hoisted or can be hoisted (along with its
520// operands) above the insert point. When it returns true and HoistStops is
521// non-null, the instructions to stop hoisting at through the use-def chains are
522// inserted into HoistStops.
523static bool
525 DenseSet<Instruction *> &Unhoistables,
526 DenseSet<Instruction *> *HoistStops,
528 assert(InsertPoint && "Null InsertPoint");
529 if (auto *I = dyn_cast<Instruction>(V)) {
530 auto It = Visited.find(I);
531 if (It != Visited.end()) {
532 return It->second;
533 }
534 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
535 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
536 if (Unhoistables.count(I)) {
537 // Don't hoist if they are not to be hoisted.
538 Visited[I] = false;
539 return false;
540 }
541 if (DT.dominates(I, InsertPoint)) {
542 // We are already above the insert point. Stop here.
543 if (HoistStops)
544 HoistStops->insert(I);
545 Visited[I] = true;
546 return true;
547 }
548 // We aren't not above the insert point, check if we can hoist it above the
549 // insert point.
550 if (isHoistable(I, DT)) {
551 // Check operands first.
552 DenseSet<Instruction *> OpsHoistStops;
553 bool AllOpsHoisted = true;
554 for (Value *Op : I->operands()) {
555 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
556 Visited)) {
557 AllOpsHoisted = false;
558 break;
559 }
560 }
561 if (AllOpsHoisted) {
562 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
563 if (HoistStops)
564 HoistStops->insert_range(OpsHoistStops);
565 Visited[I] = true;
566 return true;
567 }
568 }
569 Visited[I] = false;
570 return false;
571 }
572 // Non-instructions are considered hoistable.
573 return true;
574}
575
576// Constructs the true and false branch probabilities if the the instruction has
577// valid branch weights. Returns true when this was successful, false otherwise.
579 BranchProbability &TrueProb,
580 BranchProbability &FalseProb) {
581 uint64_t TrueWeight;
582 uint64_t FalseWeight;
583 if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
584 return false;
585 uint64_t SumWeight = TrueWeight + FalseWeight;
586
587 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
588 "Overflow calculating branch probabilities.");
589
590 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
591 if (SumWeight == 0)
592 return false;
593
594 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
595 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
596 return true;
597}
598
601 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
602}
603
604// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
605// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
606// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
607// false.
608template <typename K, typename S, typename M>
609static bool checkBias(K *Key, BranchProbability TrueProb,
610 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
611 M &BiasMap) {
613 if (TrueProb >= Threshold) {
614 TrueSet.insert(Key);
615 BiasMap[Key] = TrueProb;
616 return true;
617 } else if (FalseProb >= Threshold) {
618 FalseSet.insert(Key);
619 BiasMap[Key] = FalseProb;
620 return true;
621 }
622 return false;
623}
624
625// Returns true and insert a region into the right biased set and the map if the
626// branch of the region is biased.
628 DenseSet<Region *> &TrueBiasedRegionsGlobal,
629 DenseSet<Region *> &FalseBiasedRegionsGlobal,
631 if (!BI->isConditional())
632 return false;
633 BranchProbability ThenProb, ElseProb;
634 if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
635 return false;
636 BasicBlock *IfThen = BI->getSuccessor(0);
637 BasicBlock *IfElse = BI->getSuccessor(1);
638 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
639 IfThen != IfElse &&
640 "Invariant from findScopes");
641 if (IfThen == R->getExit()) {
642 // Swap them so that IfThen/ThenProb means going into the conditional code
643 // and IfElse/ElseProb means skipping it.
644 std::swap(IfThen, IfElse);
645 std::swap(ThenProb, ElseProb);
646 }
647 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
648 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
649 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
650 return checkBias(R, ThenProb, ElseProb,
651 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
652 BranchBiasMap);
653}
654
655// Returns true and insert a select into the right biased set and the map if the
656// select is biased.
658 SelectInst *SI, Region *R,
659 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
660 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
662 BranchProbability TrueProb, FalseProb;
663 if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
664 return false;
665 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
666 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
667 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
668 return checkBias(SI, TrueProb, FalseProb,
669 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
670 SelectBiasMap);
671}
672
673// Returns the instruction at which to hoist the dependent condition values and
674// insert the CHR branch for a region. This is the terminator branch in the
675// entry block or the first select in the entry block, if any.
677 Region *R = RI.R;
678 BasicBlock *EntryBB = R->getEntry();
679 // The hoist point is by default the terminator of the entry block, which is
680 // the same as the branch instruction if RI.HasBranch is true.
681 Instruction *HoistPoint = EntryBB->getTerminator();
682 for (SelectInst *SI : RI.Selects) {
683 if (SI->getParent() == EntryBB) {
684 // Pick the first select in Selects in the entry block. Note Selects is
685 // sorted in the instruction order within a block (asserted below).
686 HoistPoint = SI;
687 break;
688 }
689 }
690 assert(HoistPoint && "Null HoistPoint");
691#ifndef NDEBUG
692 // Check that HoistPoint is the first one in Selects in the entry block,
693 // if any.
694 DenseSet<Instruction *> EntryBlockSelectSet;
695 for (SelectInst *SI : RI.Selects) {
696 if (SI->getParent() == EntryBB) {
697 EntryBlockSelectSet.insert(SI);
698 }
699 }
700 for (Instruction &I : *EntryBB) {
701 if (EntryBlockSelectSet.contains(&I)) {
702 assert(&I == HoistPoint &&
703 "HoistPoint must be the first one in Selects");
704 break;
705 }
706 }
707#endif
708 return HoistPoint;
709}
710
711// Find a CHR scope in the given region.
712CHRScope * CHR::findScope(Region *R) {
713 CHRScope *Result = nullptr;
714 BasicBlock *Entry = R->getEntry();
715 BasicBlock *Exit = R->getExit(); // null if top level.
716 assert(Entry && "Entry must not be null");
717 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
718 "Only top level region has a null exit");
719 if (Entry)
720 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
721 else
722 CHR_DEBUG(dbgs() << "Entry null\n");
723 if (Exit)
724 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
725 else
726 CHR_DEBUG(dbgs() << "Exit null\n");
727 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
728 // to this region).
729 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
730 if (EntryInSubregion)
731 return nullptr;
732 // Exclude loops
733 for (BasicBlock *Pred : predecessors(Entry))
734 if (R->contains(Pred))
735 return nullptr;
736 // If any of the basic blocks have address taken, we must skip this region
737 // because we cannot clone basic blocks that have address taken.
738 for (BasicBlock *BB : R->blocks()) {
739 if (BB->hasAddressTaken())
740 return nullptr;
741 // If we encounter llvm.coro.id, skip this region because if the basic block
742 // is cloned, we end up inserting a token type PHI node to the block with
743 // llvm.coro.begin.
744 // FIXME: This could lead to less optimal codegen, because the region is
745 // excluded, it can prevent CHR from merging adjacent regions into bigger
746 // scope and hoisting more branches.
747 for (Instruction &I : *BB) {
748 if (auto *II = dyn_cast<IntrinsicInst>(&I))
749 if (II->getIntrinsicID() == Intrinsic::coro_id)
750 return nullptr;
751 // Can't clone regions containing convergent or noduplicate calls.
752 //
753 // CHR clones a region into hot/cold paths guarded by a merged
754 // speculative branch. On GPU targets, this branch may be divergent
755 // (different threads evaluate it differently), splitting the set of
756 // threads that reach each copy. A convergent call (e.g. a cross-lane
757 // operation like ds_bpermute on AMDGPU) requires a specific set of
758 // threads to be active; when CHR places a copy on the hot path, only
759 // the threads that took the hot branch are active, so the operation
760 // reads stale values from threads that went to the cold path.
761 //
762 // Similarly, noduplicate calls must not be duplicated by definition.
763 //
764 // This matches SimplifyCFG's block-duplication guard.
765 if (auto *CB = dyn_cast<CallBase>(&I)) {
766 if (CB->cannotDuplicate() || CB->isConvergent())
767 return nullptr;
768 }
769 }
770 }
771
772 if (Exit) {
773 // Try to find an if-then block (check if R is an if-then).
774 // if (cond) {
775 // ...
776 // }
777 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
778 if (BI)
779 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
780 else
781 CHR_DEBUG(dbgs() << "BI null\n");
782 if (BI && BI->isConditional()) {
783 BasicBlock *S0 = BI->getSuccessor(0);
784 BasicBlock *S1 = BI->getSuccessor(1);
785 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
786 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
787 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
788 RegInfo RI(R);
789 RI.HasBranch = checkBiasedBranch(
790 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
791 BranchBiasMap);
792 Result = new CHRScope(RI);
793 Scopes.insert(Result);
794 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
795 ++Stats.NumBranches;
796 if (!RI.HasBranch) {
797 ORE.emit([&]() {
798 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
799 << "Branch not biased";
800 });
801 }
802 }
803 }
804 }
805 {
806 // Try to look for selects in the direct child blocks (as opposed to in
807 // subregions) of R.
808 // ...
809 // if (..) { // Some subregion
810 // ...
811 // }
812 // if (..) { // Some subregion
813 // ...
814 // }
815 // ...
816 // a = cond ? b : c;
817 // ...
819 for (RegionNode *E : R->elements()) {
820 if (E->isSubRegion())
821 continue;
822 // This returns the basic block of E if E is a direct child of R (not a
823 // subregion.)
824 BasicBlock *BB = E->getEntry();
825 // Need to push in the order to make it easier to find the first Select
826 // later.
827 for (Instruction &I : *BB) {
828 if (auto *SI = dyn_cast<SelectInst>(&I)) {
829 Selects.push_back(SI);
830 ++Stats.NumBranches;
831 }
832 }
833 }
834 if (Selects.size() > 0) {
835 auto AddSelects = [&](RegInfo &RI) {
836 for (auto *SI : Selects)
837 if (checkBiasedSelect(SI, RI.R,
838 TrueBiasedSelectsGlobal,
839 FalseBiasedSelectsGlobal,
840 SelectBiasMap))
841 RI.Selects.push_back(SI);
842 else
843 ORE.emit([&]() {
844 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
845 << "Select not biased";
846 });
847 };
848 if (!Result) {
849 CHR_DEBUG(dbgs() << "Found a select-only region\n");
850 RegInfo RI(R);
851 AddSelects(RI);
852 Result = new CHRScope(RI);
853 Scopes.insert(Result);
854 } else {
855 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
856 AddSelects(Result->RegInfos[0]);
857 }
858 }
859 }
860
861 if (Result) {
862 checkScopeHoistable(Result);
863 }
864 return Result;
865}
866
867// Check that any of the branch and the selects in the region could be
868// hoisted above the the CHR branch insert point (the most dominating of
869// them, either the branch (at the end of the first block) or the first
870// select in the first block). If the branch can't be hoisted, drop the
871// selects in the first blocks.
872//
873// For example, for the following scope/region with selects, we want to insert
874// the merged branch right before the first select in the first/entry block by
875// hoisting c1, c2, c3, and c4.
876//
877// // Branch insert point here.
878// a = c1 ? b : c; // Select 1
879// d = c2 ? e : f; // Select 2
880// if (c3) { // Branch
881// ...
882// c4 = foo() // A call.
883// g = c4 ? h : i; // Select 3
884// }
885//
886// But suppose we can't hoist c4 because it's dependent on the preceding
887// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
888// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
889void CHR::checkScopeHoistable(CHRScope *Scope) {
890 RegInfo &RI = Scope->RegInfos[0];
891 Region *R = RI.R;
892 BasicBlock *EntryBB = R->getEntry();
893 auto *Branch = RI.HasBranch ?
894 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
895 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
896 if (RI.HasBranch || !Selects.empty()) {
897 Instruction *InsertPoint = getBranchInsertPoint(RI);
898 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
899 // Avoid a data dependence from a select or a branch to a(nother)
900 // select. Note no instruction can't data-depend on a branch (a branch
901 // instruction doesn't produce a value).
902 // Initialize Unhoistables with the selects.
903 DenseSet<Instruction *> Unhoistables(llvm::from_range, Selects);
904 // Remove Selects that can't be hoisted.
905 for (auto it = Selects.begin(); it != Selects.end(); ) {
906 SelectInst *SI = *it;
907 if (SI == InsertPoint) {
908 ++it;
909 continue;
910 }
911 DenseMap<Instruction *, bool> Visited;
912 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
913 DT, Unhoistables, nullptr, Visited);
914 if (!IsHoistable) {
915 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
916 ORE.emit([&]() {
917 return OptimizationRemarkMissed(DEBUG_TYPE,
918 "DropUnhoistableSelect", SI)
919 << "Dropped unhoistable select";
920 });
921 it = Selects.erase(it);
922 // Since we are dropping the select here, we also drop it from
923 // Unhoistables.
924 Unhoistables.erase(SI);
925 } else
926 ++it;
927 }
928 // Update InsertPoint after potentially removing selects.
929 InsertPoint = getBranchInsertPoint(RI);
930 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
931 if (RI.HasBranch && InsertPoint != Branch) {
932 DenseMap<Instruction *, bool> Visited;
933 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
934 DT, Unhoistables, nullptr, Visited);
935 if (!IsHoistable) {
936 // If the branch isn't hoistable, drop the selects in the entry
937 // block, preferring the branch, which makes the branch the hoist
938 // point.
939 assert(InsertPoint != Branch && "Branch must not be the hoist point");
940 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
941 CHR_DEBUG(
942 for (SelectInst *SI : Selects) {
943 dbgs() << "SI " << *SI << "\n";
944 });
945 for (SelectInst *SI : Selects) {
946 ORE.emit([&]() {
947 return OptimizationRemarkMissed(DEBUG_TYPE,
948 "DropSelectUnhoistableBranch", SI)
949 << "Dropped select due to unhoistable branch";
950 });
951 }
952 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
953 return SI->getParent() == EntryBB;
954 });
955 Unhoistables.clear();
956 InsertPoint = Branch;
957 }
958 }
959 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
960#ifndef NDEBUG
961 if (RI.HasBranch) {
962 assert(!DT.dominates(Branch, InsertPoint) &&
963 "Branch can't be already above the hoist point");
964 DenseMap<Instruction *, bool> Visited;
965 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
966 DT, Unhoistables, nullptr, Visited) &&
967 "checkHoistValue for branch");
968 }
969 for (auto *SI : Selects) {
970 assert(!DT.dominates(SI, InsertPoint) &&
971 "SI can't be already above the hoist point");
972 DenseMap<Instruction *, bool> Visited;
973 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
974 Unhoistables, nullptr, Visited) &&
975 "checkHoistValue for selects");
976 }
977 CHR_DEBUG(dbgs() << "Result\n");
978 if (RI.HasBranch) {
979 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
980 }
981 for (auto *SI : Selects) {
982 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
983 }
984#endif
985 }
986}
987
988// Traverse the region tree, find all nested scopes and merge them if possible.
989CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
990 SmallVectorImpl<CHRScope *> &Scopes) {
991 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
992 CHRScope *Result = findScope(R);
993 // Visit subscopes.
994 CHRScope *ConsecutiveSubscope = nullptr;
996 for (auto It = R->begin(); It != R->end(); ++It) {
997 const std::unique_ptr<Region> &SubR = *It;
998 auto NextIt = std::next(It);
999 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
1000 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
1001 << "\n");
1002 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
1003 if (SubCHRScope) {
1004 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
1005 } else {
1006 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
1007 }
1008 if (SubCHRScope) {
1009 if (!ConsecutiveSubscope)
1010 ConsecutiveSubscope = SubCHRScope;
1011 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1012 Subscopes.push_back(ConsecutiveSubscope);
1013 ConsecutiveSubscope = SubCHRScope;
1014 } else
1015 ConsecutiveSubscope->append(SubCHRScope);
1016 } else {
1017 if (ConsecutiveSubscope) {
1018 Subscopes.push_back(ConsecutiveSubscope);
1019 }
1020 ConsecutiveSubscope = nullptr;
1021 }
1022 }
1023 if (ConsecutiveSubscope) {
1024 Subscopes.push_back(ConsecutiveSubscope);
1025 }
1026 for (CHRScope *Sub : Subscopes) {
1027 if (Result) {
1028 // Combine it with the parent.
1029 Result->addSub(Sub);
1030 } else {
1031 // Push Subscopes as they won't be combined with the parent.
1032 Scopes.push_back(Sub);
1033 }
1034 }
1035 return Result;
1036}
1037
1039 DenseSet<Value *> ConditionValues;
1040 if (RI.HasBranch) {
1041 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1042 ConditionValues.insert(BI->getCondition());
1043 }
1044 for (SelectInst *SI : RI.Selects) {
1045 ConditionValues.insert(SI->getCondition());
1046 }
1047 return ConditionValues;
1048}
1049
1050
1051// Determine whether to split a scope depending on the sets of the branch
1052// condition values of the previous region and the current region. We split
1053// (return true) it if 1) the condition values of the inner/lower scope can't be
1054// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1055// values have an empty intersection (because the combined branch conditions
1056// won't probably lead to a simpler combined condition).
1058 DenseSet<Value *> &PrevConditionValues,
1059 DenseSet<Value *> &ConditionValues,
1060 DominatorTree &DT,
1061 DenseSet<Instruction *> &Unhoistables) {
1062 assert(InsertPoint && "Null InsertPoint");
1063 CHR_DEBUG(
1064 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1065 for (Value *V : PrevConditionValues) {
1066 dbgs() << *V << ", ";
1067 }
1068 dbgs() << " ConditionValues ";
1069 for (Value *V : ConditionValues) {
1070 dbgs() << *V << ", ";
1071 }
1072 dbgs() << "\n");
1073 // If any of Bases isn't hoistable to the hoist point, split.
1074 for (Value *V : ConditionValues) {
1076 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1077 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1078 return true; // Not hoistable, split.
1079 }
1080 }
1081 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1082 // unnecessary splits at scopes with no branch/selects. If
1083 // PrevConditionValues and ConditionValues don't intersect at all, split.
1084 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1085 // Use std::set as DenseSet doesn't work with set_intersection.
1086 std::set<Value *> PrevBases, Bases;
1088 for (Value *V : PrevConditionValues) {
1089 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1090 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1091 }
1092 for (Value *V : ConditionValues) {
1093 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1094 Bases.insert(BaseValues.begin(), BaseValues.end());
1095 }
1096 CHR_DEBUG(
1097 dbgs() << "PrevBases ";
1098 for (Value *V : PrevBases) {
1099 dbgs() << *V << ", ";
1100 }
1101 dbgs() << " Bases ";
1102 for (Value *V : Bases) {
1103 dbgs() << *V << ", ";
1104 }
1105 dbgs() << "\n");
1106 std::vector<Value *> Intersection;
1107 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1108 Bases.end(), std::back_inserter(Intersection));
1109 if (Intersection.empty()) {
1110 // Empty intersection, split.
1111 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1112 return true;
1113 }
1114 }
1115 CHR_DEBUG(dbgs() << "No split\n");
1116 return false; // Don't split.
1117}
1118
1119static void getSelectsInScope(CHRScope *Scope,
1120 DenseSet<Instruction *> &Output) {
1121 for (RegInfo &RI : Scope->RegInfos)
1122 Output.insert_range(RI.Selects);
1123 for (CHRScope *Sub : Scope->Subs)
1124 getSelectsInScope(Sub, Output);
1125}
1126
1127void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1128 SmallVectorImpl<CHRScope *> &Output) {
1129 for (CHRScope *Scope : Input) {
1130 assert(!Scope->BranchInsertPoint &&
1131 "BranchInsertPoint must not be set");
1132 DenseSet<Instruction *> Unhoistables;
1133 getSelectsInScope(Scope, Unhoistables);
1134 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1135 }
1136#ifndef NDEBUG
1137 for (CHRScope *Scope : Output) {
1138 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1139 }
1140#endif
1141}
1142
1143SmallVector<CHRScope *, 8> CHR::splitScope(
1144 CHRScope *Scope,
1145 CHRScope *Outer,
1146 DenseSet<Value *> *OuterConditionValues,
1147 Instruction *OuterInsertPoint,
1148 SmallVectorImpl<CHRScope *> &Output,
1149 DenseSet<Instruction *> &Unhoistables) {
1150 if (Outer) {
1151 assert(OuterConditionValues && "Null OuterConditionValues");
1152 assert(OuterInsertPoint && "Null OuterInsertPoint");
1153 }
1154 bool PrevSplitFromOuter = true;
1155 DenseSet<Value *> PrevConditionValues;
1156 Instruction *PrevInsertPoint = nullptr;
1158 SmallVector<bool, 8> SplitsSplitFromOuter;
1159 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1160 SmallVector<Instruction *, 8> SplitsInsertPoints;
1161 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1162 for (RegInfo &RI : RegInfos) {
1163 Instruction *InsertPoint = getBranchInsertPoint(RI);
1164 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1165 CHR_DEBUG(
1166 dbgs() << "ConditionValues ";
1167 for (Value *V : ConditionValues) {
1168 dbgs() << *V << ", ";
1169 }
1170 dbgs() << "\n");
1171 if (RI.R == RegInfos[0].R) {
1172 // First iteration. Check to see if we should split from the outer.
1173 if (Outer) {
1174 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1175 CHR_DEBUG(dbgs() << "Should split from outer at "
1176 << RI.R->getNameStr() << "\n");
1177 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1178 ConditionValues, DT, Unhoistables)) {
1179 PrevConditionValues = ConditionValues;
1180 PrevInsertPoint = InsertPoint;
1181 ORE.emit([&]() {
1182 return OptimizationRemarkMissed(DEBUG_TYPE,
1183 "SplitScopeFromOuter",
1184 RI.R->getEntry()->getTerminator())
1185 << "Split scope from outer due to unhoistable branch/select "
1186 << "and/or lack of common condition values";
1187 });
1188 } else {
1189 // Not splitting from the outer. Use the outer bases and insert
1190 // point. Union the bases.
1191 PrevSplitFromOuter = false;
1192 PrevConditionValues = *OuterConditionValues;
1193 PrevConditionValues.insert_range(ConditionValues);
1194 PrevInsertPoint = OuterInsertPoint;
1195 }
1196 } else {
1197 CHR_DEBUG(dbgs() << "Outer null\n");
1198 PrevConditionValues = ConditionValues;
1199 PrevInsertPoint = InsertPoint;
1200 }
1201 } else {
1202 CHR_DEBUG(dbgs() << "Should split from prev at "
1203 << RI.R->getNameStr() << "\n");
1204 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1205 DT, Unhoistables)) {
1206 CHRScope *Tail = Scope->split(RI.R);
1207 Scopes.insert(Tail);
1208 Splits.push_back(Scope);
1209 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1210 SplitsConditionValues.push_back(PrevConditionValues);
1211 SplitsInsertPoints.push_back(PrevInsertPoint);
1212 Scope = Tail;
1213 PrevConditionValues = ConditionValues;
1214 PrevInsertPoint = InsertPoint;
1215 PrevSplitFromOuter = true;
1216 ORE.emit([&]() {
1217 return OptimizationRemarkMissed(DEBUG_TYPE,
1218 "SplitScopeFromPrev",
1219 RI.R->getEntry()->getTerminator())
1220 << "Split scope from previous due to unhoistable branch/select "
1221 << "and/or lack of common condition values";
1222 });
1223 } else {
1224 // Not splitting. Union the bases. Keep the hoist point.
1225 PrevConditionValues.insert_range(ConditionValues);
1226 }
1227 }
1228 }
1229 Splits.push_back(Scope);
1230 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1231 SplitsConditionValues.push_back(PrevConditionValues);
1232 assert(PrevInsertPoint && "Null PrevInsertPoint");
1233 SplitsInsertPoints.push_back(PrevInsertPoint);
1234 assert(Splits.size() == SplitsConditionValues.size() &&
1235 Splits.size() == SplitsSplitFromOuter.size() &&
1236 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1237 for (size_t I = 0; I < Splits.size(); ++I) {
1238 CHRScope *Split = Splits[I];
1239 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1240 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1242 DenseSet<Instruction *> SplitUnhoistables;
1243 getSelectsInScope(Split, SplitUnhoistables);
1244 for (CHRScope *Sub : Split->Subs) {
1245 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1246 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1247 SplitUnhoistables);
1248 llvm::append_range(NewSubs, SubSplits);
1249 }
1250 Split->Subs = NewSubs;
1251 }
1253 for (size_t I = 0; I < Splits.size(); ++I) {
1254 CHRScope *Split = Splits[I];
1255 if (SplitsSplitFromOuter[I]) {
1256 // Split from the outer.
1257 Output.push_back(Split);
1258 Split->BranchInsertPoint = SplitsInsertPoints[I];
1259 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1260 << "\n");
1261 } else {
1262 // Connected to the outer.
1263 Result.push_back(Split);
1264 }
1265 }
1266 if (!Outer)
1267 assert(Result.empty() &&
1268 "If no outer (top-level), must return no nested ones");
1269 return Result;
1270}
1271
1272void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1273 for (CHRScope *Scope : Scopes) {
1274 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1275 classifyBiasedScopes(Scope, Scope);
1276 CHR_DEBUG(
1277 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1278 dbgs() << "TrueBiasedRegions ";
1279 for (Region *R : Scope->TrueBiasedRegions) {
1280 dbgs() << R->getNameStr() << ", ";
1281 }
1282 dbgs() << "\n";
1283 dbgs() << "FalseBiasedRegions ";
1284 for (Region *R : Scope->FalseBiasedRegions) {
1285 dbgs() << R->getNameStr() << ", ";
1286 }
1287 dbgs() << "\n";
1288 dbgs() << "TrueBiasedSelects ";
1289 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1290 dbgs() << *SI << ", ";
1291 }
1292 dbgs() << "\n";
1293 dbgs() << "FalseBiasedSelects ";
1294 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1295 dbgs() << *SI << ", ";
1296 }
1297 dbgs() << "\n";);
1298 }
1299}
1300
1301void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1302 for (RegInfo &RI : Scope->RegInfos) {
1303 if (RI.HasBranch) {
1304 Region *R = RI.R;
1305 if (TrueBiasedRegionsGlobal.contains(R))
1306 OutermostScope->TrueBiasedRegions.insert(R);
1307 else if (FalseBiasedRegionsGlobal.contains(R))
1308 OutermostScope->FalseBiasedRegions.insert(R);
1309 else
1310 llvm_unreachable("Must be biased");
1311 }
1312 for (SelectInst *SI : RI.Selects) {
1313 if (TrueBiasedSelectsGlobal.contains(SI))
1314 OutermostScope->TrueBiasedSelects.insert(SI);
1315 else if (FalseBiasedSelectsGlobal.contains(SI))
1316 OutermostScope->FalseBiasedSelects.insert(SI);
1317 else
1318 llvm_unreachable("Must be biased");
1319 }
1320 }
1321 for (CHRScope *Sub : Scope->Subs) {
1322 classifyBiasedScopes(Sub, OutermostScope);
1323 }
1324}
1325
1326static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1327 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1328 Scope->FalseBiasedRegions.size() +
1329 Scope->TrueBiasedSelects.size() +
1330 Scope->FalseBiasedSelects.size();
1331 return NumBiased >= CHRMergeThreshold;
1332}
1333
1334void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1335 SmallVectorImpl<CHRScope *> &Output) {
1336 for (CHRScope *Scope : Input) {
1337 // Filter out the ones with only one region and no subs.
1338 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1339 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1340 << Scope->TrueBiasedRegions.size()
1341 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1342 << " true-selects " << Scope->TrueBiasedSelects.size()
1343 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1344 ORE.emit([&]() {
1345 return OptimizationRemarkMissed(
1346 DEBUG_TYPE,
1347 "DropScopeWithOneBranchOrSelect",
1348 Scope->RegInfos[0].R->getEntry()->getTerminator())
1349 << "Drop scope with < "
1350 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1351 << " biased branch(es) or select(s)";
1352 });
1353 continue;
1354 }
1355 Output.push_back(Scope);
1356 }
1357}
1358
1359void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1360 SmallVectorImpl<CHRScope *> &Output) {
1361 for (CHRScope *Scope : Input) {
1362 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1363 "Empty");
1364 setCHRRegions(Scope, Scope);
1365 Output.push_back(Scope);
1366 CHR_DEBUG(
1367 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1368 for (auto pair : Scope->HoistStopMap) {
1369 Region *R = pair.first;
1370 dbgs() << "Region " << R->getNameStr() << "\n";
1371 for (Instruction *I : pair.second) {
1372 dbgs() << "HoistStop " << *I << "\n";
1373 }
1374 }
1375 dbgs() << "CHRRegions" << "\n";
1376 for (RegInfo &RI : Scope->CHRRegions) {
1377 dbgs() << RI.R->getNameStr() << "\n";
1378 });
1379 }
1380}
1381
1382void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1383 DenseSet<Instruction *> Unhoistables;
1384 // Put the biased selects in Unhoistables because they should stay where they
1385 // are and constant-folded after CHR (in case one biased select or a branch
1386 // can depend on another biased select.)
1387 for (RegInfo &RI : Scope->RegInfos)
1388 Unhoistables.insert_range(RI.Selects);
1389 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1390 for (RegInfo &RI : Scope->RegInfos) {
1391 Region *R = RI.R;
1392 DenseSet<Instruction *> HoistStops;
1393 bool IsHoisted = false;
1394 if (RI.HasBranch) {
1395 assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1396 OutermostScope->FalseBiasedRegions.contains(R)) &&
1397 "Must be truthy or falsy");
1398 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1399 // Note checkHoistValue fills in HoistStops.
1400 DenseMap<Instruction *, bool> Visited;
1401 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1402 Unhoistables, &HoistStops, Visited);
1403 assert(IsHoistable && "Must be hoistable");
1404 (void)(IsHoistable); // Unused in release build
1405 IsHoisted = true;
1406 }
1407 for (SelectInst *SI : RI.Selects) {
1408 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1409 OutermostScope->FalseBiasedSelects.contains(SI)) &&
1410 "Must be true or false biased");
1411 // Note checkHoistValue fills in HoistStops.
1412 DenseMap<Instruction *, bool> Visited;
1413 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1414 Unhoistables, &HoistStops, Visited);
1415 assert(IsHoistable && "Must be hoistable");
1416 (void)(IsHoistable); // Unused in release build
1417 IsHoisted = true;
1418 }
1419 if (IsHoisted) {
1420 OutermostScope->CHRRegions.push_back(RI);
1421 OutermostScope->HoistStopMap[R] = HoistStops;
1422 }
1423 }
1424 for (CHRScope *Sub : Scope->Subs)
1425 setCHRRegions(Sub, OutermostScope);
1426}
1427
1428static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1429 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1430}
1431
1432void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1433 SmallVectorImpl<CHRScope *> &Output) {
1434 Output.resize(Input.size());
1435 llvm::copy(Input, Output.begin());
1437}
1438
1439// Return true if V is already hoisted or was hoisted (along with its operands)
1440// to the insert point.
1441static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1442 HoistStopMapTy &HoistStopMap,
1443 DenseSet<Instruction *> &HoistedSet,
1444 DenseSet<PHINode *> &TrivialPHIs,
1445 DominatorTree &DT) {
1446 auto IT = HoistStopMap.find(R);
1447 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1448 DenseSet<Instruction *> &HoistStops = IT->second;
1449 if (auto *I = dyn_cast<Instruction>(V)) {
1450 if (I == HoistPoint)
1451 return;
1452 if (HoistStops.count(I))
1453 return;
1454 if (auto *PN = dyn_cast<PHINode>(I))
1455 if (TrivialPHIs.count(PN))
1456 // The trivial phi inserted by the previous CHR scope could replace a
1457 // non-phi in HoistStops. Note that since this phi is at the exit of a
1458 // previous CHR scope, which dominates this scope, it's safe to stop
1459 // hoisting there.
1460 return;
1461 if (HoistedSet.count(I))
1462 // Already hoisted, return.
1463 return;
1464 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1465 assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1466 assert(DT.getNode(HoistPoint->getParent()) &&
1467 "DT must contain HoistPoint block");
1468 if (DT.dominates(I, HoistPoint))
1469 // We are already above the hoist point. Stop here. This may be necessary
1470 // when multiple scopes would independently hoist the same
1471 // instruction. Since an outer (dominating) scope would hoist it to its
1472 // entry before an inner (dominated) scope would to its entry, the inner
1473 // scope may see the instruction already hoisted, in which case it
1474 // potentially wrong for the inner scope to hoist it and could cause bad
1475 // IR (non-dominating def), but safe to skip hoisting it instead because
1476 // it's already in a block that dominates the inner scope.
1477 return;
1478 for (Value *Op : I->operands()) {
1479 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1480 }
1481 I->moveBefore(HoistPoint->getIterator());
1482 HoistedSet.insert(I);
1483 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1484 }
1485}
1486
1487// Hoist the dependent condition values of the branches and the selects in the
1488// scope to the insert point.
1489static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1490 DenseSet<PHINode *> &TrivialPHIs,
1491 DominatorTree &DT) {
1492 DenseSet<Instruction *> HoistedSet;
1493 for (const RegInfo &RI : Scope->CHRRegions) {
1494 Region *R = RI.R;
1495 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1496 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1497 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1498 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1499 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1500 HoistedSet, TrivialPHIs, DT);
1501 }
1502 for (SelectInst *SI : RI.Selects) {
1503 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1504 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1505 if (!(IsTrueBiased || IsFalseBiased))
1506 continue;
1507 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1508 HoistedSet, TrivialPHIs, DT);
1509 }
1510 }
1511}
1512
1513// Negate the predicate if an ICmp if it's used only by branches or selects by
1514// swapping the operands of the branches or the selects. Returns true if success.
1516 Instruction *ExcludedUser,
1517 CHRScope *Scope) {
1518 for (User *U : ICmp->users()) {
1519 if (U == ExcludedUser)
1520 continue;
1521 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1522 continue;
1523 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1524 continue;
1525 return false;
1526 }
1527 for (User *U : ICmp->users()) {
1528 if (U == ExcludedUser)
1529 continue;
1530 if (auto *BI = dyn_cast<BranchInst>(U)) {
1531 assert(BI->isConditional() && "Must be conditional");
1532 BI->swapSuccessors();
1533 // Don't need to swap this in terms of
1534 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1535 // mean whether the branch is likely go into the if-then rather than
1536 // successor0/successor1 and because we can tell which edge is the then or
1537 // the else one by comparing the destination to the region exit block.
1538 continue;
1539 }
1540 if (auto *SI = dyn_cast<SelectInst>(U)) {
1541 // Swap operands
1542 SI->swapValues();
1543 SI->swapProfMetadata();
1544 if (Scope->TrueBiasedSelects.count(SI)) {
1545 assert(!Scope->FalseBiasedSelects.contains(SI) &&
1546 "Must not be already in");
1547 Scope->FalseBiasedSelects.insert(SI);
1548 } else if (Scope->FalseBiasedSelects.count(SI)) {
1549 assert(!Scope->TrueBiasedSelects.contains(SI) &&
1550 "Must not be already in");
1551 Scope->TrueBiasedSelects.insert(SI);
1552 }
1553 continue;
1554 }
1555 llvm_unreachable("Must be a branch or a select");
1556 }
1558 return true;
1559}
1560
1561// A helper for transformScopes. Insert a trivial phi at the scope exit block
1562// for a value that's defined in the scope but used outside it (meaning it's
1563// alive at the exit block).
1564static void insertTrivialPHIs(CHRScope *Scope,
1565 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1566 DenseSet<PHINode *> &TrivialPHIs) {
1567 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1568 for (RegInfo &RI : Scope->RegInfos) {
1569 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1570 // sub-Scopes.
1571 BlocksInScope.insert(BB);
1572 }
1573 }
1574 CHR_DEBUG({
1575 dbgs() << "Inserting redundant phis\n";
1576 for (BasicBlock *BB : BlocksInScope)
1577 dbgs() << "BlockInScope " << BB->getName() << "\n";
1578 });
1579 for (BasicBlock *BB : BlocksInScope) {
1580 for (Instruction &I : *BB) {
1582 for (User *U : I.users()) {
1583 if (auto *UI = dyn_cast<Instruction>(U)) {
1584 if (!BlocksInScope.contains(UI->getParent()) &&
1585 // Unless there's already a phi for I at the exit block.
1586 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1587 CHR_DEBUG(dbgs() << "V " << I << "\n");
1588 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1589 Users.push_back(UI);
1590 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1591 // There's a loop backedge from a block that's dominated by this
1592 // scope to the entry block.
1593 CHR_DEBUG(dbgs() << "V " << I << "\n");
1594 CHR_DEBUG(dbgs()
1595 << "Used at entry block (for a back edge) by a phi user "
1596 << *UI << "\n");
1597 Users.push_back(UI);
1598 }
1599 }
1600 }
1601 if (Users.size() > 0) {
1602 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1603 // ExitBlock. Replace I with the new phi in UI unless UI is another
1604 // phi at ExitBlock.
1605 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1606 PN->insertBefore(ExitBlock->begin());
1607 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1608 PN->addIncoming(&I, Pred);
1609 }
1610 TrivialPHIs.insert(PN);
1611 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1612 bool FoundLifetimeAnnotation = false;
1613 for (Instruction *UI : Users) {
1614 // If we found a lifetime annotation, remove it, but set a flag
1615 // to ensure that we remove all other lifetime annotations attached
1616 // to the alloca.
1617 if (UI->isLifetimeStartOrEnd()) {
1618 UI->eraseFromParent();
1619 FoundLifetimeAnnotation = true;
1620 continue;
1621 }
1622 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1623 if (UI->getOperand(J) == &I) {
1624 UI->setOperand(J, PN);
1625 }
1626 }
1627 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1628 }
1629 // Erase any leftover lifetime annotations for a dynamic alloca.
1630 if (FoundLifetimeAnnotation) {
1631 for (User *U : make_early_inc_range(I.users())) {
1632 if (auto *UI = dyn_cast<Instruction>(U))
1633 if (UI->isLifetimeStartOrEnd())
1634 UI->eraseFromParent();
1635 }
1636 }
1637 }
1638 }
1639 }
1640}
1641
1642// Assert that all the CHR regions of the scope have a biased branch or select.
1643[[maybe_unused]] static void
1645#ifndef NDEBUG
1646 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1647 if (Scope->TrueBiasedRegions.count(RI.R) ||
1648 Scope->FalseBiasedRegions.count(RI.R))
1649 return true;
1650 for (SelectInst *SI : RI.Selects)
1651 if (Scope->TrueBiasedSelects.count(SI) ||
1652 Scope->FalseBiasedSelects.count(SI))
1653 return true;
1654 return false;
1655 };
1656 for (RegInfo &RI : Scope->CHRRegions) {
1657 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1658 "Must have biased branch or select");
1659 }
1660#endif
1661}
1662
1663// Assert that all the condition values of the biased branches and selects have
1664// been hoisted to the pre-entry block or outside of the scope.
1665[[maybe_unused]] static void
1667 BasicBlock *PreEntryBlock) {
1668 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1669 for (RegInfo &RI : Scope->CHRRegions) {
1670 Region *R = RI.R;
1671 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1672 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1673 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1674 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1675 Value *V = BI->getCondition();
1676 CHR_DEBUG(dbgs() << *V << "\n");
1677 if (auto *I = dyn_cast<Instruction>(V)) {
1678 (void)(I); // Unused in release build.
1679 assert((I->getParent() == PreEntryBlock ||
1680 !Scope->contains(I)) &&
1681 "Must have been hoisted to PreEntryBlock or outside the scope");
1682 }
1683 }
1684 for (SelectInst *SI : RI.Selects) {
1685 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1686 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1687 if (!(IsTrueBiased || IsFalseBiased))
1688 continue;
1689 Value *V = SI->getCondition();
1690 CHR_DEBUG(dbgs() << *V << "\n");
1691 if (auto *I = dyn_cast<Instruction>(V)) {
1692 (void)(I); // Unused in release build.
1693 assert((I->getParent() == PreEntryBlock ||
1694 !Scope->contains(I)) &&
1695 "Must have been hoisted to PreEntryBlock or outside the scope");
1696 }
1697 }
1698 }
1699}
1700
1701void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1702 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1703
1704 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1705
1706 for (RegInfo &RI : Scope->RegInfos) {
1707 const Region *R = RI.R;
1708 unsigned Duplication = getRegionDuplicationCount(R);
1709 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
1710 << "\n");
1712 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1713 << " for this region");
1714 ORE.emit([&]() {
1715 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1716 R->getEntry()->getTerminator())
1717 << "Reached the duplication threshold for the region";
1718 });
1719 return;
1720 }
1721 }
1722 for (RegInfo &RI : Scope->RegInfos) {
1723 DuplicationCount[RI.R]++;
1724 }
1725
1726 Region *FirstRegion = Scope->RegInfos[0].R;
1727 BasicBlock *EntryBlock = FirstRegion->getEntry();
1728 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1729 BasicBlock *ExitBlock = LastRegion->getExit();
1730 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1731
1732 SmallVector<AllocaInst *> StaticAllocas;
1733 for (Instruction &I : *EntryBlock) {
1734 if (auto *AI = dyn_cast<AllocaInst>(&I)) {
1735 if (AI->isStaticAlloca())
1736 StaticAllocas.push_back(AI);
1737 }
1738 }
1739
1740 // Split the entry block of the first region. The new block becomes the new
1741 // entry block of the first region. The old entry block becomes the block to
1742 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1743 // through the split, we update the entry of the first region after the split,
1744 // and Region only points to the entry and the exit blocks, rather than
1745 // keeping everything in a list or set, the blocks membership and the
1746 // entry/exit blocks of the region are still valid after the split.
1747 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1748 << " at " << *Scope->BranchInsertPoint << "\n");
1749 BasicBlock *NewEntryBlock =
1750 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1751 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1752 "NewEntryBlock's only pred must be EntryBlock");
1753 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1754 BasicBlock *PreEntryBlock = EntryBlock;
1755
1756 // Move static allocas into the pre-entry block so they stay static.
1757 for (AllocaInst *AI : StaticAllocas)
1758 AI->moveBefore(EntryBlock->begin());
1759
1760 if (ExitBlock) {
1761 // Insert a trivial phi at the exit block (where the CHR hot path and the
1762 // cold path merges) for a value that's defined in the scope but used
1763 // outside it (meaning it's alive at the exit block). We will add the
1764 // incoming values for the CHR cold paths to it below. Without this, we'd
1765 // miss updating phi's for such values unless there happens to already be a
1766 // phi for that value there.
1767 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1768 }
1769
1770 ValueToValueMapTy VMap;
1771 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1772 // hot path (originals) and a cold path (clones) and update the PHIs at the
1773 // exit block.
1774 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1775
1776 // Replace the old (placeholder) branch with the new (merged) conditional
1777 // branch.
1778 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1779 NewEntryBlock, VMap);
1780
1781#ifndef NDEBUG
1783#endif
1784
1785 // Hoist the conditional values of the branches/selects.
1786 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1787
1788#ifndef NDEBUG
1789 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1790#endif
1791
1792 // Create the combined branch condition and constant-fold the branches/selects
1793 // in the hot path.
1794 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1795 ProfileCount.value_or(0));
1796}
1797
1798// A helper for transformScopes. Clone the blocks in the scope (excluding the
1799// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1800// at the exit block.
1801void CHR::cloneScopeBlocks(CHRScope *Scope,
1802 BasicBlock *PreEntryBlock,
1803 BasicBlock *ExitBlock,
1804 Region *LastRegion,
1805 ValueToValueMapTy &VMap) {
1806 // Clone all the blocks. The original blocks will be the hot-path
1807 // CHR-optimized code and the cloned blocks will be the original unoptimized
1808 // code. This is so that the block pointers from the
1809 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1810 // which CHR should apply to.
1811 SmallVector<BasicBlock*, 8> NewBlocks;
1812 for (RegInfo &RI : Scope->RegInfos)
1813 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1814 // sub-Scopes.
1815 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1816 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1817 NewBlocks.push_back(NewBB);
1818 VMap[BB] = NewBB;
1819
1820 // Unreachable predecessors will not be cloned and will not have an edge
1821 // to the cloned block. As such, also remove them from any phi nodes.
1822 for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1823 PN.removeIncomingValueIf([&](unsigned Idx) {
1824 return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1825 });
1826 }
1827
1828 // Place the cloned blocks right after the original blocks (right before the
1829 // exit block of.)
1830 if (ExitBlock)
1831 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1832 F.end());
1833
1834 // Update the cloned blocks/instructions to refer to themselves.
1835 for (BasicBlock *NewBB : NewBlocks)
1836 for (Instruction &I : *NewBB)
1837 RemapInstruction(&I, VMap,
1839
1840 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1841 // the top-level region but we don't need to add PHIs. The trivial PHIs
1842 // inserted above will be updated here.
1843 if (ExitBlock)
1844 for (PHINode &PN : ExitBlock->phis())
1845 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1846 ++I) {
1847 BasicBlock *Pred = PN.getIncomingBlock(I);
1848 if (LastRegion->contains(Pred)) {
1849 Value *V = PN.getIncomingValue(I);
1850 auto It = VMap.find(V);
1851 if (It != VMap.end()) V = It->second;
1852 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1853 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1854 }
1855 }
1856}
1857
1858// A helper for transformScope. Replace the old (placeholder) branch with the
1859// new (merged) conditional branch.
1860BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1861 BasicBlock *EntryBlock,
1862 BasicBlock *NewEntryBlock,
1863 ValueToValueMapTy &VMap) {
1864 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1865 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1866 "SplitBlock did not work correctly!");
1867 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1868 "NewEntryBlock's only pred must be EntryBlock");
1869 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1870 "NewEntryBlock must have been copied");
1871 OldBR->dropAllReferences();
1872 OldBR->eraseFromParent();
1873 // The true predicate is a placeholder. It will be replaced later in
1874 // fixupBranchesAndSelects().
1875 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1876 cast<BasicBlock>(VMap[NewEntryBlock]),
1877 ConstantInt::getTrue(F.getContext()));
1878 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1879 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1880 "NewEntryBlock's only pred must be EntryBlock");
1881 return NewBR;
1882}
1883
1884// A helper for transformScopes. Create the combined branch condition and
1885// constant-fold the branches/selects in the hot path.
1886void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1887 BasicBlock *PreEntryBlock,
1888 BranchInst *MergedBR,
1889 uint64_t ProfileCount) {
1890 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1891 BranchProbability CHRBranchBias(1, 1);
1892 uint64_t NumCHRedBranches = 0;
1893 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1894 for (RegInfo &RI : Scope->CHRRegions) {
1895 Region *R = RI.R;
1896 if (RI.HasBranch) {
1897 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1898 ++NumCHRedBranches;
1899 }
1900 for (SelectInst *SI : RI.Selects) {
1901 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1902 ++NumCHRedBranches;
1903 }
1904 }
1905 assert(NumCHRedBranches > 0);
1906 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1907 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1908 ORE.emit([&]() {
1909 return OptimizationRemark(DEBUG_TYPE,
1910 "CHR",
1911 // Refer to the hot (original) path
1912 MergedBR->getSuccessor(0)->getTerminator())
1913 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1914 << " branches or selects";
1915 });
1916 MergedBR->setCondition(MergedCondition);
1917 uint32_t Weights[] = {
1918 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1919 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1920 };
1921 setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1922 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1923 << "\n");
1924}
1925
1926// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1927// and constant-fold a branch in the hot path.
1928void CHR::fixupBranch(Region *R, CHRScope *Scope,
1929 IRBuilder<> &IRB,
1930 Value *&MergedCondition,
1931 BranchProbability &CHRBranchBias) {
1932 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1933 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1934 "Must be truthy or falsy");
1935 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1936 assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1937 BranchProbability Bias = BranchBiasMap[R];
1938 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1939 // Take the min.
1940 if (CHRBranchBias > Bias)
1941 CHRBranchBias = Bias;
1942 BasicBlock *IfThen = BI->getSuccessor(1);
1943 BasicBlock *IfElse = BI->getSuccessor(0);
1944 BasicBlock *RegionExitBlock = R->getExit();
1945 assert(RegionExitBlock && "Null ExitBlock");
1946 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1947 IfThen != IfElse && "Invariant from findScopes");
1948 if (IfThen == RegionExitBlock) {
1949 // Swap them so that IfThen means going into it and IfElse means skipping
1950 // it.
1951 std::swap(IfThen, IfElse);
1952 }
1953 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1954 << " IfElse " << IfElse->getName() << "\n");
1955 Value *Cond = BI->getCondition();
1956 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1957 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1958 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1959 MergedCondition);
1960 // Constant-fold the branch at ClonedEntryBlock.
1961 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1962 "The successor shouldn't change");
1963 Value *NewCondition = ConditionTrue ?
1964 ConstantInt::getTrue(F.getContext()) :
1965 ConstantInt::getFalse(F.getContext());
1966 BI->setCondition(NewCondition);
1967}
1968
1969// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1970// and constant-fold a select in the hot path.
1971void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1972 IRBuilder<> &IRB,
1973 Value *&MergedCondition,
1974 BranchProbability &CHRBranchBias) {
1975 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1976 assert((IsTrueBiased ||
1977 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1978 assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1979 BranchProbability Bias = SelectBiasMap[SI];
1980 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1981 // Take the min.
1982 if (CHRBranchBias > Bias)
1983 CHRBranchBias = Bias;
1984 Value *Cond = SI->getCondition();
1985 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1986 MergedCondition);
1987 Value *NewCondition = IsTrueBiased ?
1988 ConstantInt::getTrue(F.getContext()) :
1989 ConstantInt::getFalse(F.getContext());
1990 SI->setCondition(NewCondition);
1991}
1992
1993// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1994// condition.
1995void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1996 Instruction *BranchOrSelect, CHRScope *Scope,
1997 IRBuilder<> &IRB, Value *&MergedCondition) {
1998 if (!IsTrueBiased) {
1999 // If Cond is an icmp and all users of V except for BranchOrSelect is a
2000 // branch, negate the icmp predicate and swap the branch targets and avoid
2001 // inserting an Xor to negate Cond.
2002 auto *ICmp = dyn_cast<ICmpInst>(Cond);
2003 if (!ICmp ||
2004 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
2005 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
2006 }
2007
2008 // Freeze potentially poisonous conditions.
2010 Cond = IRB.CreateFreeze(Cond);
2011
2012 // Use logical and to avoid propagating poison from later conditions.
2013 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
2014 if (auto *MergedInst = dyn_cast<Instruction>(MergedCondition))
2016}
2017
2018void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
2019 unsigned I = 0;
2020 DenseSet<PHINode *> TrivialPHIs;
2021 for (CHRScope *Scope : CHRScopes) {
2022 transformScopes(Scope, TrivialPHIs);
2023 CHR_DEBUG(
2024 std::ostringstream oss;
2025 oss << " after transformScopes " << I++;
2026 dumpIR(F, oss.str().c_str(), nullptr));
2027 (void)I;
2028 }
2029}
2030
2031[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes,
2032 const char *Label) {
2033 dbgs() << Label << " " << Scopes.size() << "\n";
2034 for (CHRScope *Scope : Scopes) {
2035 dbgs() << *Scope << "\n";
2036 }
2037}
2038
2039bool CHR::run() {
2040 if (!shouldApply(F, PSI))
2041 return false;
2042
2043 CHR_DEBUG(dumpIR(F, "before", nullptr));
2044
2045 bool Changed = false;
2046 {
2047 CHR_DEBUG(
2048 dbgs() << "RegionInfo:\n";
2049 RI.print(dbgs()));
2050
2051 // Recursively traverse the region tree and find regions that have biased
2052 // branches and/or selects and create scopes.
2054 findScopes(AllScopes);
2055 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2056
2057 // Split the scopes if 1) the conditional values of the biased
2058 // branches/selects of the inner/lower scope can't be hoisted up to the
2059 // outermost/uppermost scope entry, or 2) the condition values of the biased
2060 // branches/selects in a scope (including subscopes) don't share at least
2061 // one common value.
2062 SmallVector<CHRScope *, 8> SplitScopes;
2063 splitScopes(AllScopes, SplitScopes);
2064 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2065
2066 // After splitting, set the biased regions and selects of a scope (a tree
2067 // root) that include those of the subscopes.
2068 classifyBiasedScopes(SplitScopes);
2069 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2070
2071 // Filter out the scopes that has only one biased region or select (CHR
2072 // isn't useful in such a case).
2073 SmallVector<CHRScope *, 8> FilteredScopes;
2074 filterScopes(SplitScopes, FilteredScopes);
2075 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2076
2077 // Set the regions to be CHR'ed and their hoist stops for each scope.
2079 setCHRRegions(FilteredScopes, SetScopes);
2080 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2081
2082 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2083 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2084 // to the hot path, rather than both hot and cold paths.
2085 SmallVector<CHRScope *, 8> SortedScopes;
2086 sortScopes(SetScopes, SortedScopes);
2087 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2088
2089 CHR_DEBUG(
2090 dbgs() << "RegionInfo:\n";
2091 RI.print(dbgs()));
2092
2093 // Apply the CHR transformation.
2094 if (!SortedScopes.empty()) {
2095 transformScopes(SortedScopes);
2096 Changed = true;
2097 }
2098 }
2099
2100 if (Changed) {
2101 CHR_DEBUG(dumpIR(F, "after", &Stats));
2102 ORE.emit([&]() {
2103 return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2104 << ore::NV("Function", &F) << " "
2105 << "Reduced the number of branches in hot paths by "
2106 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2107 << " (static) and "
2108 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2109 << " (weighted by PGO count)";
2110 });
2111 }
2112
2113 return Changed;
2114}
2115
2119
2121 Function &F,
2123 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2124 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2125 // If there is no profile summary, we should not do CHR.
2126 if (!PPSI || !PPSI->hasProfileSummary())
2127 return PreservedAnalyses::all();
2128 auto &PSI = *PPSI;
2129 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2130 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2131 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2132 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2133 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2134 if (!Changed)
2135 return PreservedAnalyses::all();
2136 return PreservedAnalyses::none();
2137}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void insertTrivialPHIs(CHRScope *Scope, BasicBlock *EntryBlock, BasicBlock *ExitBlock, DenseSet< PHINode * > &TrivialPHIs)
static cl::opt< bool > DisableCHR("disable-chr", cl::init(false), cl::Hidden, cl::desc("Disable CHR for all functions"))
static StringSet CHRFunctions
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2)
static bool checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables, DenseSet< Instruction * > *HoistStops, DenseMap< Instruction *, bool > &Visited)
static cl::opt< unsigned > CHRMergeThreshold("chr-merge-threshold", cl::init(2), cl::Hidden, cl::desc("CHR merges a group of N branches/selects where N >= this value"))
static bool isHoistable(Instruction *I, DominatorTree &DT)
static cl::opt< unsigned > CHRDupThreshsold("chr-dup-threshold", cl::init(3), cl::Hidden, cl::desc("Max number of duplications by CHR for a region"))
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static bool checkBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, S &TrueSet, S &FalseSet, M &BiasMap)
static StringSet CHRModules
static bool checkBiasedSelect(SelectInst *SI, Region *R, DenseSet< SelectInst * > &TrueBiasedSelectsGlobal, DenseSet< SelectInst * > &FalseBiasedSelectsGlobal, DenseMap< SelectInst *, BranchProbability > &SelectBiasMap)
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, HoistStopMapTy &HoistStopMap, DenseSet< Instruction * > &HoistedSet, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope)
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope)
static bool shouldApply(Function &F, ProfileSummaryInfo &PSI)
#define CHR_DEBUG(X)
static void dumpIR(Function &F, const char *Label, CHRStats *Stats)
static void getSelectsInScope(CHRScope *Scope, DenseSet< Instruction * > &Output)
static void dumpScopes(SmallVectorImpl< CHRScope * > &Scopes, const char *Label)
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool isHoistableInstructionType(Instruction *I)
static DenseSet< Value * > getCHRConditionValuesForRegion(RegInfo &RI)
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, Instruction *ExcludedUser, CHRScope *Scope)
static Instruction * getBranchInsertPoint(RegInfo &RI)
static cl::opt< std::string > CHRModuleList("chr-module-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of modules to apply CHR to"))
static void assertBranchOrSelectConditionHoisted(CHRScope *Scope, BasicBlock *PreEntryBlock)
static cl::opt< double > CHRBiasThreshold("chr-bias-threshold", cl::init(0.99), cl::Hidden, cl::desc("CHR considers a branch bias greater than this ratio as biased"))
static BranchProbability getCHRBiasThreshold()
static cl::opt< std::string > CHRFunctionList("chr-function-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of functions to apply CHR to"))
static cl::opt< bool > ForceCHR("force-chr", cl::init(false), cl::Hidden, cl::desc("Apply CHR for all functions"))
static bool checkBiasedBranch(BranchInst *BI, Region *R, DenseSet< Region * > &TrueBiasedRegionsGlobal, DenseSet< Region * > &FalseBiasedRegionsGlobal, DenseMap< Region *, BranchProbability > &BranchBiasMap)
static const std::set< Value * > & getBaseValues(Value *V, DominatorTree &DT, DenseMap< Value *, std::set< Value * > > &Visited)
static bool extractBranchProbabilities(Instruction *I, BranchProbability &TrueProb, BranchProbability &FalseProb)
static void parseCHRFilterFiles()
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
static Value * getCondition(Instruction *I)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
Definition IVUsers.cpp:48
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
block placement Basic Block Placement Stats
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
early Early Tail Duplication
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:539
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
Conditional or Unconditional Branch instruction.
void setCondition(Value *V)
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition InstrTypes.h:768
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2627
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.h:1728
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1602
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI InstListType::iterator insertInto(BasicBlock *ParentBB, InstListType::iterator It)
Inserts an unlinked instruction into ParentBB at position It and returns the iterator of the inserted...
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionEntryHot(const FuncT *F) const
Returns true if F has hot function entry.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition RegionInfo.h:357
void replaceEntryRecursive(BlockT *NewEntry)
Recursively replace the entry basic block of the region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition RegionInfo.h:965
This class represents the LLVM 'select' instruction.
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition StringSet.h:25
void dropAllReferences()
Drop all references to operands.
Definition User.h:324
iterator find(const KeyT &Val)
Definition ValueMap.h:160
iterator end()
Definition ValueMap.h:139
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void insert_range(Range &&R)
Definition DenseSet.h:228
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2116
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Function::ProfileCount ProfileCount
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1885
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872