LLVM 23.0.0git
ControlHeightReduction.cpp
Go to the documentation of this file.
1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
18#include "llvm/ADT/StringSet.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/MDBuilder.h"
31#include "llvm/IR/Module.h"
32#include "llvm/IR/PassManager.h"
40
41#include <optional>
42#include <set>
43#include <sstream>
44
45using namespace llvm;
46
47#define DEBUG_TYPE "chr"
48
49#define CHR_DEBUG(X) LLVM_DEBUG(X)
50
51static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden,
52 cl::desc("Disable CHR for all functions"));
53
54static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
55 cl::desc("Apply CHR for all functions"));
56
58 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
59 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
60
62 "chr-merge-threshold", cl::init(2), cl::Hidden,
63 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
64
66 "chr-module-list", cl::init(""), cl::Hidden,
67 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
68
70 "chr-function-list", cl::init(""), cl::Hidden,
71 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
72
74 "chr-dup-threshold", cl::init(3), cl::Hidden,
75 cl::desc("Max number of duplications by CHR for a region"));
76
79
80static void parseCHRFilterFiles() {
81 if (!CHRModuleList.empty()) {
82 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
83 if (!FileOrErr) {
84 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
85 std::exit(1);
86 }
87 StringRef Buf = FileOrErr->get()->getBuffer();
89 Buf.split(Lines, '\n');
90 for (StringRef Line : Lines) {
91 Line = Line.trim();
92 if (!Line.empty())
93 CHRModules.insert(Line);
94 }
95 }
96 if (!CHRFunctionList.empty()) {
97 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
98 if (!FileOrErr) {
99 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
100 std::exit(1);
101 }
102 StringRef Buf = FileOrErr->get()->getBuffer();
104 Buf.split(Lines, '\n');
105 for (StringRef Line : Lines) {
106 Line = Line.trim();
107 if (!Line.empty())
108 CHRFunctions.insert(Line);
109 }
110 }
111}
112
113namespace {
114
115struct CHRStats {
116 CHRStats() = default;
117 void print(raw_ostream &OS) const {
118 OS << "CHRStats: NumBranches " << NumBranches
119 << " NumBranchesDelta " << NumBranchesDelta
120 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
121 }
122 // The original number of conditional branches / selects
123 uint64_t NumBranches = 0;
124 // The decrease of the number of conditional branches / selects in the hot
125 // paths due to CHR.
126 uint64_t NumBranchesDelta = 0;
127 // NumBranchesDelta weighted by the profile count at the scope entry.
128 uint64_t WeightedNumBranchesDelta = 0;
129};
130
131// RegInfo - some properties of a Region.
132struct RegInfo {
133 RegInfo() = default;
134 RegInfo(Region *RegionIn) : R(RegionIn) {}
135 Region *R = nullptr;
136 bool HasBranch = false;
138};
139
140typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
141
142// CHRScope - a sequence of regions to CHR together. It corresponds to a
143// sequence of conditional blocks. It can have subscopes which correspond to
144// nested conditional blocks. Nested CHRScopes form a tree.
145class CHRScope {
146 public:
147 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
148 assert(RI.R && "Null RegionIn");
149 RegInfos.push_back(RI);
150 }
151
152 Region *getParentRegion() {
153 assert(RegInfos.size() > 0 && "Empty CHRScope");
154 Region *Parent = RegInfos[0].R->getParent();
155 assert(Parent && "Unexpected to call this on the top-level region");
156 return Parent;
157 }
158
159 BasicBlock *getEntryBlock() {
160 assert(RegInfos.size() > 0 && "Empty CHRScope");
161 return RegInfos.front().R->getEntry();
162 }
163
164 BasicBlock *getExitBlock() {
165 assert(RegInfos.size() > 0 && "Empty CHRScope");
166 return RegInfos.back().R->getExit();
167 }
168
169 bool appendable(CHRScope *Next) {
170 // The next scope is appendable only if this scope is directly connected to
171 // it (which implies it post-dominates this scope) and this scope dominates
172 // it (no edge to the next scope outside this scope).
173 BasicBlock *NextEntry = Next->getEntryBlock();
174 if (getExitBlock() != NextEntry)
175 // Not directly connected.
176 return false;
177 Region *LastRegion = RegInfos.back().R;
178 for (BasicBlock *Pred : predecessors(NextEntry))
179 if (!LastRegion->contains(Pred))
180 // There's an edge going into the entry of the next scope from outside
181 // of this scope.
182 return false;
183 return true;
184 }
185
186 void append(CHRScope *Next) {
187 assert(RegInfos.size() > 0 && "Empty CHRScope");
188 assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
189 assert(getParentRegion() == Next->getParentRegion() &&
190 "Must be siblings");
191 assert(getExitBlock() == Next->getEntryBlock() &&
192 "Must be adjacent");
193 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
194 Subs.append(Next->Subs.begin(), Next->Subs.end());
195 }
196
197 void addSub(CHRScope *SubIn) {
198#ifndef NDEBUG
199 bool IsChild = false;
200 for (RegInfo &RI : RegInfos)
201 if (RI.R == SubIn->getParentRegion()) {
202 IsChild = true;
203 break;
204 }
205 assert(IsChild && "Must be a child");
206#endif
207 Subs.push_back(SubIn);
208 }
209
210 // Split this scope at the boundary region into two, which will belong to the
211 // tail and returns the tail.
212 CHRScope *split(Region *Boundary) {
213 assert(Boundary && "Boundary null");
214 assert(RegInfos.begin()->R != Boundary &&
215 "Can't be split at beginning");
216 auto BoundaryIt = llvm::find_if(
217 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
218 if (BoundaryIt == RegInfos.end())
219 return nullptr;
220 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
221 DenseSet<Region *> TailRegionSet;
222 for (const RegInfo &RI : TailRegInfos)
223 TailRegionSet.insert(RI.R);
224
225 auto TailIt =
226 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
227 assert(Sub && "null Sub");
228 Region *Parent = Sub->getParentRegion();
229 if (TailRegionSet.count(Parent))
230 return false;
231
232 assert(llvm::any_of(
233 RegInfos,
234 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
235 "Must be in head");
236 return true;
237 });
238 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
239
240 assert(HoistStopMap.empty() && "MapHoistStops must be empty");
241 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
242 RegInfos.erase(BoundaryIt, RegInfos.end());
243 Subs.erase(TailIt, Subs.end());
244 return Scope;
245 }
246
247 bool contains(Instruction *I) const {
248 BasicBlock *Parent = I->getParent();
249 for (const RegInfo &RI : RegInfos)
250 if (RI.R->contains(Parent))
251 return true;
252 return false;
253 }
254
255 void print(raw_ostream &OS) const;
256
257 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
258 SmallVector<CHRScope *, 8> Subs; // Subscopes.
259
260 // The instruction at which to insert the CHR conditional branch (and hoist
261 // the dependent condition values).
262 Instruction *BranchInsertPoint;
263
264 // True-biased and false-biased regions (conditional blocks),
265 // respectively. Used only for the outermost scope and includes regions in
266 // subscopes. The rest are unbiased.
267 DenseSet<Region *> TrueBiasedRegions;
268 DenseSet<Region *> FalseBiasedRegions;
269 // Among the biased regions, the regions that get CHRed.
270 SmallVector<RegInfo, 8> CHRRegions;
271
272 // True-biased and false-biased selects, respectively. Used only for the
273 // outermost scope and includes ones in subscopes.
274 DenseSet<SelectInst *> TrueBiasedSelects;
275 DenseSet<SelectInst *> FalseBiasedSelects;
276
277 // Map from one of the above regions to the instructions to stop
278 // hoisting instructions at through use-def chains.
279 HoistStopMapTy HoistStopMap;
280
281 private:
282 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
283 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
284};
285
286class CHR {
287 public:
288 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
289 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
290 OptimizationRemarkEmitter &OREin)
291 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
292
293 ~CHR() {
294 for (CHRScope *Scope : Scopes) {
295 delete Scope;
296 }
297 }
298
299 bool run();
300
301 private:
302 // See the comments in CHR::run() for the high level flow of the algorithm and
303 // what the following functions do.
304
305 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
306 Region *R = RI.getTopLevelRegion();
307 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
308 Output.push_back(Scope);
309 }
310 }
311 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
312 SmallVectorImpl<CHRScope *> &Scopes);
313 CHRScope *findScope(Region *R);
314 void checkScopeHoistable(CHRScope *Scope);
315
316 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
317 SmallVectorImpl<CHRScope *> &Output);
318 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
319 CHRScope *Outer,
320 DenseSet<Value *> *OuterConditionValues,
321 Instruction *OuterInsertPoint,
322 SmallVectorImpl<CHRScope *> &Output,
323 DenseSet<Instruction *> &Unhoistables);
324
325 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
326 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
327
328 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
329 SmallVectorImpl<CHRScope *> &Output);
330
331 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
332 SmallVectorImpl<CHRScope *> &Output);
333 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
334
335 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
336 SmallVectorImpl<CHRScope *> &Output);
337
338 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
339 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
340 void cloneScopeBlocks(CHRScope *Scope,
341 BasicBlock *PreEntryBlock,
342 BasicBlock *ExitBlock,
343 Region *LastRegion,
344 ValueToValueMapTy &VMap);
345 CondBrInst *createMergedBranch(BasicBlock *PreEntryBlock,
346 BasicBlock *EntryBlock,
347 BasicBlock *NewEntryBlock,
348 ValueToValueMapTy &VMap);
349 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
350 CondBrInst *MergedBR, uint64_t ProfileCount);
351 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB,
352 Value *&MergedCondition, BranchProbability &CHRBranchBias);
353 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB,
354 Value *&MergedCondition, BranchProbability &CHRBranchBias);
355 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
356 Instruction *BranchOrSelect, CHRScope *Scope,
357 IRBuilder<> &IRB, Value *&MergedCondition);
358 unsigned getRegionDuplicationCount(const Region *R) {
359 unsigned Count = 0;
360 // Find out how many times region R is cloned. Note that if the parent
361 // of R is cloned, R is also cloned, but R's clone count is not updated
362 // from the clone of the parent. We need to accumulate all the counts
363 // from the ancestors to get the clone count.
364 while (R) {
365 Count += DuplicationCount[R];
366 R = R->getParent();
367 }
368 return Count;
369 }
370
371 Function &F;
372 BlockFrequencyInfo &BFI;
373 DominatorTree &DT;
374 ProfileSummaryInfo &PSI;
375 RegionInfo &RI;
376 OptimizationRemarkEmitter &ORE;
377 CHRStats Stats;
378
379 // All the true-biased regions in the function
380 DenseSet<Region *> TrueBiasedRegionsGlobal;
381 // All the false-biased regions in the function
382 DenseSet<Region *> FalseBiasedRegionsGlobal;
383 // All the true-biased selects in the function
384 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
385 // All the false-biased selects in the function
386 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
387 // A map from biased regions to their branch bias
388 DenseMap<Region *, BranchProbability> BranchBiasMap;
389 // A map from biased selects to their branch bias
390 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
391 // All the scopes.
392 DenseSet<CHRScope *> Scopes;
393 // This maps records how many times this region is cloned.
394 DenseMap<const Region *, unsigned> DuplicationCount;
395};
396
397} // end anonymous namespace
398
399[[maybe_unused]] static inline raw_ostream &operator<<(raw_ostream &OS,
400 const CHRStats &Stats) {
401 Stats.print(OS);
402 return OS;
403}
404
405static inline
406raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
407 Scope.print(OS);
408 return OS;
409}
410
412 if (DisableCHR)
413 return false;
414
415 if (ForceCHR)
416 return true;
417
418 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
419 if (CHRModules.count(F.getParent()->getName()))
420 return true;
421 return CHRFunctions.count(F.getName());
422 }
423
424 return PSI.isFunctionEntryHot(&F);
425}
426
427[[maybe_unused]] static void dumpIR(Function &F, const char *Label,
428 CHRStats *Stats) {
429 StringRef FuncName = F.getName();
430 StringRef ModuleName = F.getParent()->getName();
431 (void)(FuncName); // Unused in release build.
432 (void)(ModuleName); // Unused in release build.
433 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
434 << FuncName);
435 if (Stats)
436 CHR_DEBUG(dbgs() << " " << *Stats);
437 CHR_DEBUG(dbgs() << "\n");
438 CHR_DEBUG(F.dump());
439}
440
441void CHRScope::print(raw_ostream &OS) const {
442 assert(RegInfos.size() > 0 && "Empty CHRScope");
443 OS << "CHRScope[";
444 OS << RegInfos.size() << ", Regions[";
445 for (const RegInfo &RI : RegInfos) {
446 OS << RI.R->getNameStr();
447 if (RI.HasBranch)
448 OS << " B";
449 if (RI.Selects.size() > 0)
450 OS << " S" << RI.Selects.size();
451 OS << ", ";
452 }
453 if (RegInfos[0].R->getParent()) {
454 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
455 } else {
456 // top level region
457 OS << "]";
458 }
459 OS << ", Subs[";
460 for (CHRScope *Sub : Subs) {
461 OS << *Sub << ", ";
462 }
463 OS << "]]";
464}
465
466// Return true if the given instruction type can be hoisted by CHR.
474
475// Return true if the given instruction can be hoisted by CHR.
478 return false;
479 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT);
480}
481
482// Recursively traverse the use-def chains of the given value and return a set
483// of the unhoistable base values defined within the scope (excluding the
484// first-region entry block) or the (hoistable or unhoistable) base values that
485// are defined outside (including the first-region entry block) of the
486// scope. The returned set doesn't include constants.
487static const std::set<Value *> &
489 DenseMap<Value *, std::set<Value *>> &Visited) {
490 auto It = Visited.find(V);
491 if (It != Visited.end()) {
492 return It->second;
493 }
494 std::set<Value *> Result;
495 if (auto *I = dyn_cast<Instruction>(V)) {
496 // We don't stop at a block that's not in the Scope because we would miss
497 // some instructions that are based on the same base values if we stop
498 // there.
499 if (!isHoistable(I, DT)) {
500 Result.insert(I);
501 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
502 }
503 // I is hoistable above the Scope.
504 for (Value *Op : I->operands()) {
505 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
506 Result.insert(OpResult.begin(), OpResult.end());
507 }
508 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
509 }
510 if (isa<Argument>(V)) {
511 Result.insert(V);
512 }
513 // We don't include others like constants because those won't lead to any
514 // chance of folding of conditions (eg two bit checks merged into one check)
515 // after CHR.
516 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
517}
518
519// Return true if V is already hoisted or can be hoisted (along with its
520// operands) above the insert point. When it returns true and HoistStops is
521// non-null, the instructions to stop hoisting at through the use-def chains are
522// inserted into HoistStops.
523static bool
525 DenseSet<Instruction *> &Unhoistables,
526 DenseSet<Instruction *> *HoistStops,
528 assert(InsertPoint && "Null InsertPoint");
529 if (auto *I = dyn_cast<Instruction>(V)) {
530 auto It = Visited.find(I);
531 if (It != Visited.end()) {
532 return It->second;
533 }
534 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
535 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
536 if (Unhoistables.count(I)) {
537 // Don't hoist if they are not to be hoisted.
538 Visited[I] = false;
539 return false;
540 }
541 if (DT.dominates(I, InsertPoint)) {
542 // We are already above the insert point. Stop here.
543 if (HoistStops)
544 HoistStops->insert(I);
545 Visited[I] = true;
546 return true;
547 }
548 // We aren't not above the insert point, check if we can hoist it above the
549 // insert point.
550 if (isHoistable(I, DT)) {
551 // Check operands first.
552 DenseSet<Instruction *> OpsHoistStops;
553 bool AllOpsHoisted = true;
554 for (Value *Op : I->operands()) {
555 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
556 Visited)) {
557 AllOpsHoisted = false;
558 break;
559 }
560 }
561 if (AllOpsHoisted) {
562 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
563 if (HoistStops)
564 HoistStops->insert_range(OpsHoistStops);
565 Visited[I] = true;
566 return true;
567 }
568 }
569 Visited[I] = false;
570 return false;
571 }
572 // Non-instructions are considered hoistable.
573 return true;
574}
575
576// Constructs the true and false branch probabilities if the the instruction has
577// valid branch weights. Returns true when this was successful, false otherwise.
579 BranchProbability &TrueProb,
580 BranchProbability &FalseProb) {
581 uint64_t TrueWeight;
582 uint64_t FalseWeight;
583 if (!extractBranchWeights(*I, TrueWeight, FalseWeight))
584 return false;
585 uint64_t SumWeight = TrueWeight + FalseWeight;
586
587 assert(SumWeight >= TrueWeight && SumWeight >= FalseWeight &&
588 "Overflow calculating branch probabilities.");
589
590 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
591 if (SumWeight == 0)
592 return false;
593
594 TrueProb = BranchProbability::getBranchProbability(TrueWeight, SumWeight);
595 FalseProb = BranchProbability::getBranchProbability(FalseWeight, SumWeight);
596 return true;
597}
598
601 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
602}
603
604// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
605// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
606// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
607// false.
608template <typename K, typename S, typename M>
609static bool checkBias(K *Key, BranchProbability TrueProb,
610 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
611 M &BiasMap) {
613 if (TrueProb >= Threshold) {
614 TrueSet.insert(Key);
615 BiasMap[Key] = TrueProb;
616 return true;
617 } else if (FalseProb >= Threshold) {
618 FalseSet.insert(Key);
619 BiasMap[Key] = FalseProb;
620 return true;
621 }
622 return false;
623}
624
625// Returns true and insert a region into the right biased set and the map if the
626// branch of the region is biased.
627static bool
629 DenseSet<Region *> &TrueBiasedRegionsGlobal,
630 DenseSet<Region *> &FalseBiasedRegionsGlobal,
632 BranchProbability ThenProb, ElseProb;
633 if (!extractBranchProbabilities(BI, ThenProb, ElseProb))
634 return false;
635 BasicBlock *IfThen = BI->getSuccessor(0);
636 BasicBlock *IfElse = BI->getSuccessor(1);
637 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
638 IfThen != IfElse &&
639 "Invariant from findScopes");
640 if (IfThen == R->getExit()) {
641 // Swap them so that IfThen/ThenProb means going into the conditional code
642 // and IfElse/ElseProb means skipping it.
643 std::swap(IfThen, IfElse);
644 std::swap(ThenProb, ElseProb);
645 }
646 CHR_DEBUG(dbgs() << "BI " << *BI << " ");
647 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
648 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
649 return checkBias(R, ThenProb, ElseProb,
650 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
651 BranchBiasMap);
652}
653
654// Returns true and insert a select into the right biased set and the map if the
655// select is biased.
657 SelectInst *SI, Region *R,
658 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
659 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
661 BranchProbability TrueProb, FalseProb;
662 if (!extractBranchProbabilities(SI, TrueProb, FalseProb))
663 return false;
664 CHR_DEBUG(dbgs() << "SI " << *SI << " ");
665 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
666 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
667 return checkBias(SI, TrueProb, FalseProb,
668 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
669 SelectBiasMap);
670}
671
672// Returns the instruction at which to hoist the dependent condition values and
673// insert the CHR branch for a region. This is the terminator branch in the
674// entry block or the first select in the entry block, if any.
676 Region *R = RI.R;
677 BasicBlock *EntryBB = R->getEntry();
678 // The hoist point is by default the terminator of the entry block, which is
679 // the same as the branch instruction if RI.HasBranch is true.
680 Instruction *HoistPoint = EntryBB->getTerminator();
681 for (SelectInst *SI : RI.Selects) {
682 if (SI->getParent() == EntryBB) {
683 // Pick the first select in Selects in the entry block. Note Selects is
684 // sorted in the instruction order within a block (asserted below).
685 HoistPoint = SI;
686 break;
687 }
688 }
689 assert(HoistPoint && "Null HoistPoint");
690#ifndef NDEBUG
691 // Check that HoistPoint is the first one in Selects in the entry block,
692 // if any.
693 DenseSet<Instruction *> EntryBlockSelectSet;
694 for (SelectInst *SI : RI.Selects) {
695 if (SI->getParent() == EntryBB) {
696 EntryBlockSelectSet.insert(SI);
697 }
698 }
699 for (Instruction &I : *EntryBB) {
700 if (EntryBlockSelectSet.contains(&I)) {
701 assert(&I == HoistPoint &&
702 "HoistPoint must be the first one in Selects");
703 break;
704 }
705 }
706#endif
707 return HoistPoint;
708}
709
710// Find a CHR scope in the given region.
711CHRScope * CHR::findScope(Region *R) {
712 CHRScope *Result = nullptr;
713 BasicBlock *Entry = R->getEntry();
714 BasicBlock *Exit = R->getExit(); // null if top level.
715 assert(Entry && "Entry must not be null");
716 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
717 "Only top level region has a null exit");
718 if (Entry)
719 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
720 else
721 CHR_DEBUG(dbgs() << "Entry null\n");
722 if (Exit)
723 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
724 else
725 CHR_DEBUG(dbgs() << "Exit null\n");
726 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
727 // to this region).
728 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
729 if (EntryInSubregion)
730 return nullptr;
731 // Exclude loops
732 for (BasicBlock *Pred : predecessors(Entry))
733 if (R->contains(Pred))
734 return nullptr;
735 // If any of the basic blocks have address taken, we must skip this region
736 // because we cannot clone basic blocks that have address taken.
737 for (BasicBlock *BB : R->blocks()) {
738 if (BB->hasAddressTaken())
739 return nullptr;
740 // If we encounter llvm.coro.id, skip this region because if the basic block
741 // is cloned, we end up inserting a token type PHI node to the block with
742 // llvm.coro.begin.
743 // FIXME: This could lead to less optimal codegen, because the region is
744 // excluded, it can prevent CHR from merging adjacent regions into bigger
745 // scope and hoisting more branches.
746 for (Instruction &I : *BB) {
747 if (auto *II = dyn_cast<IntrinsicInst>(&I))
748 if (II->getIntrinsicID() == Intrinsic::coro_id)
749 return nullptr;
750 // Can't clone regions containing convergent or noduplicate calls.
751 //
752 // CHR clones a region into hot/cold paths guarded by a merged
753 // speculative branch. On GPU targets, this branch may be divergent
754 // (different threads evaluate it differently), splitting the set of
755 // threads that reach each copy. A convergent call (e.g. a cross-lane
756 // operation like ds_bpermute on AMDGPU) requires a specific set of
757 // threads to be active; when CHR places a copy on the hot path, only
758 // the threads that took the hot branch are active, so the operation
759 // reads stale values from threads that went to the cold path.
760 //
761 // Similarly, noduplicate calls must not be duplicated by definition.
762 //
763 // This matches SimplifyCFG's block-duplication guard.
764 if (auto *CB = dyn_cast<CallBase>(&I)) {
765 if (CB->cannotDuplicate() || CB->isConvergent())
766 return nullptr;
767 }
768 }
769 }
770
771 if (Exit) {
772 // Try to find an if-then block (check if R is an if-then).
773 // if (cond) {
774 // ...
775 // }
776 if (auto *BI = dyn_cast<CondBrInst>(Entry->getTerminator())) {
777 CHR_DEBUG(dbgs() << "BI conditional\n");
778 BasicBlock *S0 = BI->getSuccessor(0);
779 BasicBlock *S1 = BI->getSuccessor(1);
780 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
781 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
782 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
783 RegInfo RI(R);
784 RI.HasBranch = checkBiasedBranch(
785 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
786 BranchBiasMap);
787 Result = new CHRScope(RI);
788 Scopes.insert(Result);
789 CHR_DEBUG(dbgs() << "Found a region with a branch\n");
790 ++Stats.NumBranches;
791 if (!RI.HasBranch) {
792 ORE.emit([&]() {
793 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
794 << "Branch not biased";
795 });
796 }
797 }
798 }
799 }
800 {
801 // Try to look for selects in the direct child blocks (as opposed to in
802 // subregions) of R.
803 // ...
804 // if (..) { // Some subregion
805 // ...
806 // }
807 // if (..) { // Some subregion
808 // ...
809 // }
810 // ...
811 // a = cond ? b : c;
812 // ...
814 for (RegionNode *E : R->elements()) {
815 if (E->isSubRegion())
816 continue;
817 // This returns the basic block of E if E is a direct child of R (not a
818 // subregion.)
819 BasicBlock *BB = E->getEntry();
820 // Need to push in the order to make it easier to find the first Select
821 // later.
822 for (Instruction &I : *BB) {
823 if (auto *SI = dyn_cast<SelectInst>(&I)) {
824 Selects.push_back(SI);
825 ++Stats.NumBranches;
826 }
827 }
828 }
829 if (Selects.size() > 0) {
830 auto AddSelects = [&](RegInfo &RI) {
831 for (auto *SI : Selects)
832 if (checkBiasedSelect(SI, RI.R,
833 TrueBiasedSelectsGlobal,
834 FalseBiasedSelectsGlobal,
835 SelectBiasMap))
836 RI.Selects.push_back(SI);
837 else
838 ORE.emit([&]() {
839 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
840 << "Select not biased";
841 });
842 };
843 if (!Result) {
844 CHR_DEBUG(dbgs() << "Found a select-only region\n");
845 RegInfo RI(R);
846 AddSelects(RI);
847 Result = new CHRScope(RI);
848 Scopes.insert(Result);
849 } else {
850 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
851 AddSelects(Result->RegInfos[0]);
852 }
853 }
854 }
855
856 if (Result) {
857 checkScopeHoistable(Result);
858 }
859 return Result;
860}
861
862// Check that any of the branch and the selects in the region could be
863// hoisted above the the CHR branch insert point (the most dominating of
864// them, either the branch (at the end of the first block) or the first
865// select in the first block). If the branch can't be hoisted, drop the
866// selects in the first blocks.
867//
868// For example, for the following scope/region with selects, we want to insert
869// the merged branch right before the first select in the first/entry block by
870// hoisting c1, c2, c3, and c4.
871//
872// // Branch insert point here.
873// a = c1 ? b : c; // Select 1
874// d = c2 ? e : f; // Select 2
875// if (c3) { // Branch
876// ...
877// c4 = foo() // A call.
878// g = c4 ? h : i; // Select 3
879// }
880//
881// But suppose we can't hoist c4 because it's dependent on the preceding
882// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
883// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
884void CHR::checkScopeHoistable(CHRScope *Scope) {
885 RegInfo &RI = Scope->RegInfos[0];
886 Region *R = RI.R;
887 BasicBlock *EntryBB = R->getEntry();
888 auto *Branch =
889 RI.HasBranch ? cast<CondBrInst>(EntryBB->getTerminator()) : nullptr;
890 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
891 if (RI.HasBranch || !Selects.empty()) {
892 Instruction *InsertPoint = getBranchInsertPoint(RI);
893 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
894 // Avoid a data dependence from a select or a branch to a(nother)
895 // select. Note no instruction can't data-depend on a branch (a branch
896 // instruction doesn't produce a value).
897 // Initialize Unhoistables with the selects.
898 DenseSet<Instruction *> Unhoistables(llvm::from_range, Selects);
899 // Remove Selects that can't be hoisted.
900 for (auto it = Selects.begin(); it != Selects.end(); ) {
901 SelectInst *SI = *it;
902 if (SI == InsertPoint) {
903 ++it;
904 continue;
905 }
906 DenseMap<Instruction *, bool> Visited;
907 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
908 DT, Unhoistables, nullptr, Visited);
909 if (!IsHoistable) {
910 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
911 ORE.emit([&]() {
912 return OptimizationRemarkMissed(DEBUG_TYPE,
913 "DropUnhoistableSelect", SI)
914 << "Dropped unhoistable select";
915 });
916 it = Selects.erase(it);
917 // Since we are dropping the select here, we also drop it from
918 // Unhoistables.
919 Unhoistables.erase(SI);
920 } else
921 ++it;
922 }
923 // Update InsertPoint after potentially removing selects.
924 InsertPoint = getBranchInsertPoint(RI);
925 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
926 if (RI.HasBranch && InsertPoint != Branch) {
927 DenseMap<Instruction *, bool> Visited;
928 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
929 DT, Unhoistables, nullptr, Visited);
930 if (!IsHoistable) {
931 // If the branch isn't hoistable, drop the selects in the entry
932 // block, preferring the branch, which makes the branch the hoist
933 // point.
934 assert(InsertPoint != Branch && "Branch must not be the hoist point");
935 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
936 CHR_DEBUG(
937 for (SelectInst *SI : Selects) {
938 dbgs() << "SI " << *SI << "\n";
939 });
940 for (SelectInst *SI : Selects) {
941 ORE.emit([&]() {
942 return OptimizationRemarkMissed(DEBUG_TYPE,
943 "DropSelectUnhoistableBranch", SI)
944 << "Dropped select due to unhoistable branch";
945 });
946 }
947 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
948 return SI->getParent() == EntryBB;
949 });
950 Unhoistables.clear();
951 InsertPoint = Branch;
952 }
953 }
954 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
955#ifndef NDEBUG
956 if (RI.HasBranch) {
957 assert(!DT.dominates(Branch, InsertPoint) &&
958 "Branch can't be already above the hoist point");
959 DenseMap<Instruction *, bool> Visited;
960 assert(checkHoistValue(Branch->getCondition(), InsertPoint,
961 DT, Unhoistables, nullptr, Visited) &&
962 "checkHoistValue for branch");
963 }
964 for (auto *SI : Selects) {
965 assert(!DT.dominates(SI, InsertPoint) &&
966 "SI can't be already above the hoist point");
967 DenseMap<Instruction *, bool> Visited;
968 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
969 Unhoistables, nullptr, Visited) &&
970 "checkHoistValue for selects");
971 }
972 CHR_DEBUG(dbgs() << "Result\n");
973 if (RI.HasBranch) {
974 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
975 }
976 for (auto *SI : Selects) {
977 CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
978 }
979#endif
980 }
981}
982
983// Traverse the region tree, find all nested scopes and merge them if possible.
984CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
985 SmallVectorImpl<CHRScope *> &Scopes) {
986 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
987 CHRScope *Result = findScope(R);
988 // Visit subscopes.
989 CHRScope *ConsecutiveSubscope = nullptr;
991 for (auto It = R->begin(); It != R->end(); ++It) {
992 const std::unique_ptr<Region> &SubR = *It;
993 auto NextIt = std::next(It);
994 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
995 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
996 << "\n");
997 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
998 if (SubCHRScope) {
999 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
1000 } else {
1001 CHR_DEBUG(dbgs() << "Subregion Scope null\n");
1002 }
1003 if (SubCHRScope) {
1004 if (!ConsecutiveSubscope)
1005 ConsecutiveSubscope = SubCHRScope;
1006 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1007 Subscopes.push_back(ConsecutiveSubscope);
1008 ConsecutiveSubscope = SubCHRScope;
1009 } else
1010 ConsecutiveSubscope->append(SubCHRScope);
1011 } else {
1012 if (ConsecutiveSubscope) {
1013 Subscopes.push_back(ConsecutiveSubscope);
1014 }
1015 ConsecutiveSubscope = nullptr;
1016 }
1017 }
1018 if (ConsecutiveSubscope) {
1019 Subscopes.push_back(ConsecutiveSubscope);
1020 }
1021 for (CHRScope *Sub : Subscopes) {
1022 if (Result) {
1023 // Combine it with the parent.
1024 Result->addSub(Sub);
1025 } else {
1026 // Push Subscopes as they won't be combined with the parent.
1027 Scopes.push_back(Sub);
1028 }
1029 }
1030 return Result;
1031}
1032
1034 DenseSet<Value *> ConditionValues;
1035 if (RI.HasBranch) {
1036 auto *BI = cast<CondBrInst>(RI.R->getEntry()->getTerminator());
1037 ConditionValues.insert(BI->getCondition());
1038 }
1039 for (SelectInst *SI : RI.Selects) {
1040 ConditionValues.insert(SI->getCondition());
1041 }
1042 return ConditionValues;
1043}
1044
1045
1046// Determine whether to split a scope depending on the sets of the branch
1047// condition values of the previous region and the current region. We split
1048// (return true) it if 1) the condition values of the inner/lower scope can't be
1049// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1050// values have an empty intersection (because the combined branch conditions
1051// won't probably lead to a simpler combined condition).
1053 DenseSet<Value *> &PrevConditionValues,
1054 DenseSet<Value *> &ConditionValues,
1055 DominatorTree &DT,
1056 DenseSet<Instruction *> &Unhoistables) {
1057 assert(InsertPoint && "Null InsertPoint");
1058 CHR_DEBUG(
1059 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1060 for (Value *V : PrevConditionValues) {
1061 dbgs() << *V << ", ";
1062 }
1063 dbgs() << " ConditionValues ";
1064 for (Value *V : ConditionValues) {
1065 dbgs() << *V << ", ";
1066 }
1067 dbgs() << "\n");
1068 // If any of Bases isn't hoistable to the hoist point, split.
1069 for (Value *V : ConditionValues) {
1071 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1072 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1073 return true; // Not hoistable, split.
1074 }
1075 }
1076 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1077 // unnecessary splits at scopes with no branch/selects. If
1078 // PrevConditionValues and ConditionValues don't intersect at all, split.
1079 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1080 // Use std::set as DenseSet doesn't work with set_intersection.
1081 std::set<Value *> PrevBases, Bases;
1083 for (Value *V : PrevConditionValues) {
1084 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1085 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1086 }
1087 for (Value *V : ConditionValues) {
1088 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1089 Bases.insert(BaseValues.begin(), BaseValues.end());
1090 }
1091 CHR_DEBUG(
1092 dbgs() << "PrevBases ";
1093 for (Value *V : PrevBases) {
1094 dbgs() << *V << ", ";
1095 }
1096 dbgs() << " Bases ";
1097 for (Value *V : Bases) {
1098 dbgs() << *V << ", ";
1099 }
1100 dbgs() << "\n");
1101 std::vector<Value *> Intersection;
1102 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1103 Bases.end(), std::back_inserter(Intersection));
1104 if (Intersection.empty()) {
1105 // Empty intersection, split.
1106 CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1107 return true;
1108 }
1109 }
1110 CHR_DEBUG(dbgs() << "No split\n");
1111 return false; // Don't split.
1112}
1113
1114static void getSelectsInScope(CHRScope *Scope,
1115 DenseSet<Instruction *> &Output) {
1116 for (RegInfo &RI : Scope->RegInfos)
1117 Output.insert_range(RI.Selects);
1118 for (CHRScope *Sub : Scope->Subs)
1119 getSelectsInScope(Sub, Output);
1120}
1121
1122void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1123 SmallVectorImpl<CHRScope *> &Output) {
1124 for (CHRScope *Scope : Input) {
1125 assert(!Scope->BranchInsertPoint &&
1126 "BranchInsertPoint must not be set");
1127 DenseSet<Instruction *> Unhoistables;
1128 getSelectsInScope(Scope, Unhoistables);
1129 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1130 }
1131#ifndef NDEBUG
1132 for (CHRScope *Scope : Output) {
1133 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1134 }
1135#endif
1136}
1137
1138SmallVector<CHRScope *, 8> CHR::splitScope(
1139 CHRScope *Scope,
1140 CHRScope *Outer,
1141 DenseSet<Value *> *OuterConditionValues,
1142 Instruction *OuterInsertPoint,
1143 SmallVectorImpl<CHRScope *> &Output,
1144 DenseSet<Instruction *> &Unhoistables) {
1145 if (Outer) {
1146 assert(OuterConditionValues && "Null OuterConditionValues");
1147 assert(OuterInsertPoint && "Null OuterInsertPoint");
1148 }
1149 bool PrevSplitFromOuter = true;
1150 DenseSet<Value *> PrevConditionValues;
1151 Instruction *PrevInsertPoint = nullptr;
1153 SmallVector<bool, 8> SplitsSplitFromOuter;
1154 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1155 SmallVector<Instruction *, 8> SplitsInsertPoints;
1156 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1157 for (RegInfo &RI : RegInfos) {
1158 Instruction *InsertPoint = getBranchInsertPoint(RI);
1159 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1160 CHR_DEBUG(
1161 dbgs() << "ConditionValues ";
1162 for (Value *V : ConditionValues) {
1163 dbgs() << *V << ", ";
1164 }
1165 dbgs() << "\n");
1166 if (RI.R == RegInfos[0].R) {
1167 // First iteration. Check to see if we should split from the outer.
1168 if (Outer) {
1169 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1170 CHR_DEBUG(dbgs() << "Should split from outer at "
1171 << RI.R->getNameStr() << "\n");
1172 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1173 ConditionValues, DT, Unhoistables)) {
1174 PrevConditionValues = ConditionValues;
1175 PrevInsertPoint = InsertPoint;
1176 ORE.emit([&]() {
1177 return OptimizationRemarkMissed(DEBUG_TYPE,
1178 "SplitScopeFromOuter",
1179 RI.R->getEntry()->getTerminator())
1180 << "Split scope from outer due to unhoistable branch/select "
1181 << "and/or lack of common condition values";
1182 });
1183 } else {
1184 // Not splitting from the outer. Use the outer bases and insert
1185 // point. Union the bases.
1186 PrevSplitFromOuter = false;
1187 PrevConditionValues = *OuterConditionValues;
1188 PrevConditionValues.insert_range(ConditionValues);
1189 PrevInsertPoint = OuterInsertPoint;
1190 }
1191 } else {
1192 CHR_DEBUG(dbgs() << "Outer null\n");
1193 PrevConditionValues = ConditionValues;
1194 PrevInsertPoint = InsertPoint;
1195 }
1196 } else {
1197 CHR_DEBUG(dbgs() << "Should split from prev at "
1198 << RI.R->getNameStr() << "\n");
1199 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1200 DT, Unhoistables)) {
1201 CHRScope *Tail = Scope->split(RI.R);
1202 Scopes.insert(Tail);
1203 Splits.push_back(Scope);
1204 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1205 SplitsConditionValues.push_back(PrevConditionValues);
1206 SplitsInsertPoints.push_back(PrevInsertPoint);
1207 Scope = Tail;
1208 PrevConditionValues = ConditionValues;
1209 PrevInsertPoint = InsertPoint;
1210 PrevSplitFromOuter = true;
1211 ORE.emit([&]() {
1212 return OptimizationRemarkMissed(DEBUG_TYPE,
1213 "SplitScopeFromPrev",
1214 RI.R->getEntry()->getTerminator())
1215 << "Split scope from previous due to unhoistable branch/select "
1216 << "and/or lack of common condition values";
1217 });
1218 } else {
1219 // Not splitting. Union the bases. Keep the hoist point.
1220 PrevConditionValues.insert_range(ConditionValues);
1221 }
1222 }
1223 }
1224 Splits.push_back(Scope);
1225 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1226 SplitsConditionValues.push_back(PrevConditionValues);
1227 assert(PrevInsertPoint && "Null PrevInsertPoint");
1228 SplitsInsertPoints.push_back(PrevInsertPoint);
1229 assert(Splits.size() == SplitsConditionValues.size() &&
1230 Splits.size() == SplitsSplitFromOuter.size() &&
1231 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1232 for (size_t I = 0; I < Splits.size(); ++I) {
1233 CHRScope *Split = Splits[I];
1234 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1235 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1237 DenseSet<Instruction *> SplitUnhoistables;
1238 getSelectsInScope(Split, SplitUnhoistables);
1239 for (CHRScope *Sub : Split->Subs) {
1240 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1241 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1242 SplitUnhoistables);
1243 llvm::append_range(NewSubs, SubSplits);
1244 }
1245 Split->Subs = NewSubs;
1246 }
1248 for (size_t I = 0; I < Splits.size(); ++I) {
1249 CHRScope *Split = Splits[I];
1250 if (SplitsSplitFromOuter[I]) {
1251 // Split from the outer.
1252 Output.push_back(Split);
1253 Split->BranchInsertPoint = SplitsInsertPoints[I];
1254 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1255 << "\n");
1256 } else {
1257 // Connected to the outer.
1258 Result.push_back(Split);
1259 }
1260 }
1261 if (!Outer)
1262 assert(Result.empty() &&
1263 "If no outer (top-level), must return no nested ones");
1264 return Result;
1265}
1266
1267void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1268 for (CHRScope *Scope : Scopes) {
1269 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1270 classifyBiasedScopes(Scope, Scope);
1271 CHR_DEBUG(
1272 dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1273 dbgs() << "TrueBiasedRegions ";
1274 for (Region *R : Scope->TrueBiasedRegions) {
1275 dbgs() << R->getNameStr() << ", ";
1276 }
1277 dbgs() << "\n";
1278 dbgs() << "FalseBiasedRegions ";
1279 for (Region *R : Scope->FalseBiasedRegions) {
1280 dbgs() << R->getNameStr() << ", ";
1281 }
1282 dbgs() << "\n";
1283 dbgs() << "TrueBiasedSelects ";
1284 for (SelectInst *SI : Scope->TrueBiasedSelects) {
1285 dbgs() << *SI << ", ";
1286 }
1287 dbgs() << "\n";
1288 dbgs() << "FalseBiasedSelects ";
1289 for (SelectInst *SI : Scope->FalseBiasedSelects) {
1290 dbgs() << *SI << ", ";
1291 }
1292 dbgs() << "\n";);
1293 }
1294}
1295
1296void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1297 for (RegInfo &RI : Scope->RegInfos) {
1298 if (RI.HasBranch) {
1299 Region *R = RI.R;
1300 if (TrueBiasedRegionsGlobal.contains(R))
1301 OutermostScope->TrueBiasedRegions.insert(R);
1302 else if (FalseBiasedRegionsGlobal.contains(R))
1303 OutermostScope->FalseBiasedRegions.insert(R);
1304 else
1305 llvm_unreachable("Must be biased");
1306 }
1307 for (SelectInst *SI : RI.Selects) {
1308 if (TrueBiasedSelectsGlobal.contains(SI))
1309 OutermostScope->TrueBiasedSelects.insert(SI);
1310 else if (FalseBiasedSelectsGlobal.contains(SI))
1311 OutermostScope->FalseBiasedSelects.insert(SI);
1312 else
1313 llvm_unreachable("Must be biased");
1314 }
1315 }
1316 for (CHRScope *Sub : Scope->Subs) {
1317 classifyBiasedScopes(Sub, OutermostScope);
1318 }
1319}
1320
1321static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1322 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1323 Scope->FalseBiasedRegions.size() +
1324 Scope->TrueBiasedSelects.size() +
1325 Scope->FalseBiasedSelects.size();
1326 return NumBiased >= CHRMergeThreshold;
1327}
1328
1329void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1330 SmallVectorImpl<CHRScope *> &Output) {
1331 for (CHRScope *Scope : Input) {
1332 // Filter out the ones with only one region and no subs.
1333 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1334 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1335 << Scope->TrueBiasedRegions.size()
1336 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1337 << " true-selects " << Scope->TrueBiasedSelects.size()
1338 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1339 ORE.emit([&]() {
1340 return OptimizationRemarkMissed(
1341 DEBUG_TYPE,
1342 "DropScopeWithOneBranchOrSelect",
1343 Scope->RegInfos[0].R->getEntry()->getTerminator())
1344 << "Drop scope with < "
1345 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1346 << " biased branch(es) or select(s)";
1347 });
1348 continue;
1349 }
1350 Output.push_back(Scope);
1351 }
1352}
1353
1354void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1355 SmallVectorImpl<CHRScope *> &Output) {
1356 for (CHRScope *Scope : Input) {
1357 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1358 "Empty");
1359 setCHRRegions(Scope, Scope);
1360 Output.push_back(Scope);
1361 CHR_DEBUG(
1362 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1363 for (auto pair : Scope->HoistStopMap) {
1364 Region *R = pair.first;
1365 dbgs() << "Region " << R->getNameStr() << "\n";
1366 for (Instruction *I : pair.second) {
1367 dbgs() << "HoistStop " << *I << "\n";
1368 }
1369 }
1370 dbgs() << "CHRRegions" << "\n";
1371 for (RegInfo &RI : Scope->CHRRegions) {
1372 dbgs() << RI.R->getNameStr() << "\n";
1373 });
1374 }
1375}
1376
1377void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1378 DenseSet<Instruction *> Unhoistables;
1379 // Put the biased selects in Unhoistables because they should stay where they
1380 // are and constant-folded after CHR (in case one biased select or a branch
1381 // can depend on another biased select.)
1382 for (RegInfo &RI : Scope->RegInfos)
1383 Unhoistables.insert_range(RI.Selects);
1384 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1385 for (RegInfo &RI : Scope->RegInfos) {
1386 Region *R = RI.R;
1387 DenseSet<Instruction *> HoistStops;
1388 bool IsHoisted = false;
1389 if (RI.HasBranch) {
1390 assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1391 OutermostScope->FalseBiasedRegions.contains(R)) &&
1392 "Must be truthy or falsy");
1393 auto *BI = cast<CondBrInst>(R->getEntry()->getTerminator());
1394 // Note checkHoistValue fills in HoistStops.
1395 DenseMap<Instruction *, bool> Visited;
1396 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1397 Unhoistables, &HoistStops, Visited);
1398 assert(IsHoistable && "Must be hoistable");
1399 (void)(IsHoistable); // Unused in release build
1400 IsHoisted = true;
1401 }
1402 for (SelectInst *SI : RI.Selects) {
1403 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1404 OutermostScope->FalseBiasedSelects.contains(SI)) &&
1405 "Must be true or false biased");
1406 // Note checkHoistValue fills in HoistStops.
1407 DenseMap<Instruction *, bool> Visited;
1408 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1409 Unhoistables, &HoistStops, Visited);
1410 assert(IsHoistable && "Must be hoistable");
1411 (void)(IsHoistable); // Unused in release build
1412 IsHoisted = true;
1413 }
1414 if (IsHoisted) {
1415 OutermostScope->CHRRegions.push_back(RI);
1416 OutermostScope->HoistStopMap[R] = HoistStops;
1417 }
1418 }
1419 for (CHRScope *Sub : Scope->Subs)
1420 setCHRRegions(Sub, OutermostScope);
1421}
1422
1423static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1424 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1425}
1426
1427void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1428 SmallVectorImpl<CHRScope *> &Output) {
1429 Output.resize(Input.size());
1430 llvm::copy(Input, Output.begin());
1432}
1433
1434// Return true if V is already hoisted or was hoisted (along with its operands)
1435// to the insert point.
1436static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1437 HoistStopMapTy &HoistStopMap,
1438 DenseSet<Instruction *> &HoistedSet,
1439 DenseSet<PHINode *> &TrivialPHIs,
1440 DominatorTree &DT) {
1441 auto IT = HoistStopMap.find(R);
1442 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1443 DenseSet<Instruction *> &HoistStops = IT->second;
1444 if (auto *I = dyn_cast<Instruction>(V)) {
1445 if (I == HoistPoint)
1446 return;
1447 if (HoistStops.count(I))
1448 return;
1449 if (auto *PN = dyn_cast<PHINode>(I))
1450 if (TrivialPHIs.count(PN))
1451 // The trivial phi inserted by the previous CHR scope could replace a
1452 // non-phi in HoistStops. Note that since this phi is at the exit of a
1453 // previous CHR scope, which dominates this scope, it's safe to stop
1454 // hoisting there.
1455 return;
1456 if (HoistedSet.count(I))
1457 // Already hoisted, return.
1458 return;
1459 assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1460 assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1461 assert(DT.getNode(HoistPoint->getParent()) &&
1462 "DT must contain HoistPoint block");
1463 if (DT.dominates(I, HoistPoint))
1464 // We are already above the hoist point. Stop here. This may be necessary
1465 // when multiple scopes would independently hoist the same
1466 // instruction. Since an outer (dominating) scope would hoist it to its
1467 // entry before an inner (dominated) scope would to its entry, the inner
1468 // scope may see the instruction already hoisted, in which case it
1469 // potentially wrong for the inner scope to hoist it and could cause bad
1470 // IR (non-dominating def), but safe to skip hoisting it instead because
1471 // it's already in a block that dominates the inner scope.
1472 return;
1473 for (Value *Op : I->operands()) {
1474 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1475 }
1476 I->moveBefore(HoistPoint->getIterator());
1477 HoistedSet.insert(I);
1478 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1479 }
1480}
1481
1482// Hoist the dependent condition values of the branches and the selects in the
1483// scope to the insert point.
1484static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1485 DenseSet<PHINode *> &TrivialPHIs,
1486 DominatorTree &DT) {
1487 DenseSet<Instruction *> HoistedSet;
1488 for (const RegInfo &RI : Scope->CHRRegions) {
1489 Region *R = RI.R;
1490 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1491 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1492 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1493 auto *BI = cast<CondBrInst>(R->getEntry()->getTerminator());
1494 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1495 HoistedSet, TrivialPHIs, DT);
1496 }
1497 for (SelectInst *SI : RI.Selects) {
1498 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1499 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1500 if (!(IsTrueBiased || IsFalseBiased))
1501 continue;
1502 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1503 HoistedSet, TrivialPHIs, DT);
1504 }
1505 }
1506}
1507
1508// Negate the predicate if an ICmp if it's used only by branches or selects by
1509// swapping the operands of the branches or the selects. Returns true if success.
1511 Instruction *ExcludedUser,
1512 CHRScope *Scope) {
1513 for (User *U : ICmp->users()) {
1514 if (U == ExcludedUser)
1515 continue;
1516 if (isa<CondBrInst>(U))
1517 continue;
1518 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1519 continue;
1520 return false;
1521 }
1522 for (User *U : ICmp->users()) {
1523 if (U == ExcludedUser)
1524 continue;
1525 if (auto *BI = dyn_cast<CondBrInst>(U)) {
1526 BI->swapSuccessors();
1527 // Don't need to swap this in terms of
1528 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1529 // mean whether the branch is likely go into the if-then rather than
1530 // successor0/successor1 and because we can tell which edge is the then or
1531 // the else one by comparing the destination to the region exit block.
1532 continue;
1533 }
1534 if (auto *SI = dyn_cast<SelectInst>(U)) {
1535 // Swap operands
1536 SI->swapValues();
1537 SI->swapProfMetadata();
1538 if (Scope->TrueBiasedSelects.count(SI)) {
1539 assert(!Scope->FalseBiasedSelects.contains(SI) &&
1540 "Must not be already in");
1541 Scope->FalseBiasedSelects.insert(SI);
1542 } else if (Scope->FalseBiasedSelects.count(SI)) {
1543 assert(!Scope->TrueBiasedSelects.contains(SI) &&
1544 "Must not be already in");
1545 Scope->TrueBiasedSelects.insert(SI);
1546 }
1547 continue;
1548 }
1549 llvm_unreachable("Must be a branch or a select");
1550 }
1552 return true;
1553}
1554
1555// A helper for transformScopes. Insert a trivial phi at the scope exit block
1556// for a value that's defined in the scope but used outside it (meaning it's
1557// alive at the exit block).
1558static void insertTrivialPHIs(CHRScope *Scope,
1559 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1560 DenseSet<PHINode *> &TrivialPHIs) {
1561 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1562 for (RegInfo &RI : Scope->RegInfos) {
1563 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1564 // sub-Scopes.
1565 BlocksInScope.insert(BB);
1566 }
1567 }
1568 CHR_DEBUG({
1569 dbgs() << "Inserting redundant phis\n";
1570 for (BasicBlock *BB : BlocksInScope)
1571 dbgs() << "BlockInScope " << BB->getName() << "\n";
1572 });
1573 for (BasicBlock *BB : BlocksInScope) {
1574 for (Instruction &I : *BB) {
1576 for (User *U : I.users()) {
1577 if (auto *UI = dyn_cast<Instruction>(U)) {
1578 if (!BlocksInScope.contains(UI->getParent()) &&
1579 // Unless there's already a phi for I at the exit block.
1580 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1581 CHR_DEBUG(dbgs() << "V " << I << "\n");
1582 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1583 Users.push_back(UI);
1584 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1585 // There's a loop backedge from a block that's dominated by this
1586 // scope to the entry block.
1587 CHR_DEBUG(dbgs() << "V " << I << "\n");
1588 CHR_DEBUG(dbgs()
1589 << "Used at entry block (for a back edge) by a phi user "
1590 << *UI << "\n");
1591 Users.push_back(UI);
1592 }
1593 }
1594 }
1595 if (Users.size() > 0) {
1596 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1597 // ExitBlock. Replace I with the new phi in UI unless UI is another
1598 // phi at ExitBlock.
1599 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "");
1600 PN->insertBefore(ExitBlock->begin());
1601 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1602 PN->addIncoming(&I, Pred);
1603 }
1604 TrivialPHIs.insert(PN);
1605 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1606 bool FoundLifetimeAnnotation = false;
1607 for (Instruction *UI : Users) {
1608 // If we found a lifetime annotation, remove it, but set a flag
1609 // to ensure that we remove all other lifetime annotations attached
1610 // to the alloca.
1611 if (UI->isLifetimeStartOrEnd()) {
1612 UI->eraseFromParent();
1613 FoundLifetimeAnnotation = true;
1614 continue;
1615 }
1616 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1617 if (UI->getOperand(J) == &I) {
1618 UI->setOperand(J, PN);
1619 }
1620 }
1621 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1622 }
1623 // Erase any leftover lifetime annotations for a dynamic alloca.
1624 if (FoundLifetimeAnnotation) {
1625 for (User *U : make_early_inc_range(I.users())) {
1626 if (auto *UI = dyn_cast<Instruction>(U))
1627 if (UI->isLifetimeStartOrEnd())
1628 UI->eraseFromParent();
1629 }
1630 }
1631 }
1632 }
1633 }
1634}
1635
1636// Assert that all the CHR regions of the scope have a biased branch or select.
1637[[maybe_unused]] static void
1639#ifndef NDEBUG
1640 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1641 if (Scope->TrueBiasedRegions.count(RI.R) ||
1642 Scope->FalseBiasedRegions.count(RI.R))
1643 return true;
1644 for (SelectInst *SI : RI.Selects)
1645 if (Scope->TrueBiasedSelects.count(SI) ||
1646 Scope->FalseBiasedSelects.count(SI))
1647 return true;
1648 return false;
1649 };
1650 for (RegInfo &RI : Scope->CHRRegions) {
1651 assert(HasBiasedBranchOrSelect(RI, Scope) &&
1652 "Must have biased branch or select");
1653 }
1654#endif
1655}
1656
1657// Assert that all the condition values of the biased branches and selects have
1658// been hoisted to the pre-entry block or outside of the scope.
1659[[maybe_unused]] static void
1661 BasicBlock *PreEntryBlock) {
1662 CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1663 for (RegInfo &RI : Scope->CHRRegions) {
1664 Region *R = RI.R;
1665 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1666 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1667 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1668 auto *BI = cast<CondBrInst>(R->getEntry()->getTerminator());
1669 Value *V = BI->getCondition();
1670 CHR_DEBUG(dbgs() << *V << "\n");
1671 if (auto *I = dyn_cast<Instruction>(V)) {
1672 (void)(I); // Unused in release build.
1673 assert((I->getParent() == PreEntryBlock ||
1674 !Scope->contains(I)) &&
1675 "Must have been hoisted to PreEntryBlock or outside the scope");
1676 }
1677 }
1678 for (SelectInst *SI : RI.Selects) {
1679 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1680 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1681 if (!(IsTrueBiased || IsFalseBiased))
1682 continue;
1683 Value *V = SI->getCondition();
1684 CHR_DEBUG(dbgs() << *V << "\n");
1685 if (auto *I = dyn_cast<Instruction>(V)) {
1686 (void)(I); // Unused in release build.
1687 assert((I->getParent() == PreEntryBlock ||
1688 !Scope->contains(I)) &&
1689 "Must have been hoisted to PreEntryBlock or outside the scope");
1690 }
1691 }
1692 }
1693}
1694
1695void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1696 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1697
1698 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1699
1700 for (RegInfo &RI : Scope->RegInfos) {
1701 const Region *R = RI.R;
1702 unsigned Duplication = getRegionDuplicationCount(R);
1703 CHR_DEBUG(dbgs() << "Dup count for R=" << R << " is " << Duplication
1704 << "\n");
1706 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication
1707 << " for this region");
1708 ORE.emit([&]() {
1709 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached",
1710 R->getEntry()->getTerminator())
1711 << "Reached the duplication threshold for the region";
1712 });
1713 return;
1714 }
1715 }
1716 for (RegInfo &RI : Scope->RegInfos) {
1717 DuplicationCount[RI.R]++;
1718 }
1719
1720 Region *FirstRegion = Scope->RegInfos[0].R;
1721 BasicBlock *EntryBlock = FirstRegion->getEntry();
1722 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1723 BasicBlock *ExitBlock = LastRegion->getExit();
1724 std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1725
1726 SmallVector<AllocaInst *> StaticAllocas;
1727 for (Instruction &I : *EntryBlock) {
1728 if (auto *AI = dyn_cast<AllocaInst>(&I)) {
1729 if (AI->isStaticAlloca())
1730 StaticAllocas.push_back(AI);
1731 }
1732 }
1733
1734 // Split the entry block of the first region. The new block becomes the new
1735 // entry block of the first region. The old entry block becomes the block to
1736 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1737 // through the split, we update the entry of the first region after the split,
1738 // and Region only points to the entry and the exit blocks, rather than
1739 // keeping everything in a list or set, the blocks membership and the
1740 // entry/exit blocks of the region are still valid after the split.
1741 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1742 << " at " << *Scope->BranchInsertPoint << "\n");
1743 BasicBlock *NewEntryBlock =
1744 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1745 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1746 "NewEntryBlock's only pred must be EntryBlock");
1747 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1748 BasicBlock *PreEntryBlock = EntryBlock;
1749
1750 // Move static allocas into the pre-entry block so they stay static.
1751 for (AllocaInst *AI : StaticAllocas)
1752 AI->moveBefore(EntryBlock->begin());
1753
1754 if (ExitBlock) {
1755 // Insert a trivial phi at the exit block (where the CHR hot path and the
1756 // cold path merges) for a value that's defined in the scope but used
1757 // outside it (meaning it's alive at the exit block). We will add the
1758 // incoming values for the CHR cold paths to it below. Without this, we'd
1759 // miss updating phi's for such values unless there happens to already be a
1760 // phi for that value there.
1761 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1762 }
1763
1764 ValueToValueMapTy VMap;
1765 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1766 // hot path (originals) and a cold path (clones) and update the PHIs at the
1767 // exit block.
1768 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1769
1770 // Replace the old (placeholder) branch with the new (merged) conditional
1771 // branch.
1772 CondBrInst *MergedBr =
1773 createMergedBranch(PreEntryBlock, EntryBlock, NewEntryBlock, VMap);
1774
1775#ifndef NDEBUG
1777#endif
1778
1779 // Hoist the conditional values of the branches/selects.
1780 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1781
1782#ifndef NDEBUG
1783 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1784#endif
1785
1786 // Create the combined branch condition and constant-fold the branches/selects
1787 // in the hot path.
1788 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1789 ProfileCount.value_or(0));
1790}
1791
1792// A helper for transformScopes. Clone the blocks in the scope (excluding the
1793// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1794// at the exit block.
1795void CHR::cloneScopeBlocks(CHRScope *Scope,
1796 BasicBlock *PreEntryBlock,
1797 BasicBlock *ExitBlock,
1798 Region *LastRegion,
1799 ValueToValueMapTy &VMap) {
1800 // Clone all the blocks. The original blocks will be the hot-path
1801 // CHR-optimized code and the cloned blocks will be the original unoptimized
1802 // code. This is so that the block pointers from the
1803 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1804 // which CHR should apply to.
1805 SmallVector<BasicBlock*, 8> NewBlocks;
1806 for (RegInfo &RI : Scope->RegInfos)
1807 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1808 // sub-Scopes.
1809 assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1810 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1811 NewBlocks.push_back(NewBB);
1812 VMap[BB] = NewBB;
1813
1814 // Unreachable predecessors will not be cloned and will not have an edge
1815 // to the cloned block. As such, also remove them from any phi nodes.
1816 for (PHINode &PN : make_early_inc_range(NewBB->phis()))
1817 PN.removeIncomingValueIf([&](unsigned Idx) {
1818 return !DT.isReachableFromEntry(PN.getIncomingBlock(Idx));
1819 });
1820 }
1821
1822 // Place the cloned blocks right after the original blocks (right before the
1823 // exit block of.)
1824 if (ExitBlock)
1825 F.splice(ExitBlock->getIterator(), &F, NewBlocks[0]->getIterator(),
1826 F.end());
1827
1828 // Update the cloned blocks/instructions to refer to themselves.
1829 for (BasicBlock *NewBB : NewBlocks)
1830 for (Instruction &I : *NewBB)
1831 RemapInstruction(&I, VMap,
1833
1834 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1835 // the top-level region but we don't need to add PHIs. The trivial PHIs
1836 // inserted above will be updated here.
1837 if (ExitBlock)
1838 for (PHINode &PN : ExitBlock->phis())
1839 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1840 ++I) {
1841 BasicBlock *Pred = PN.getIncomingBlock(I);
1842 if (LastRegion->contains(Pred)) {
1843 Value *V = PN.getIncomingValue(I);
1844 auto It = VMap.find(V);
1845 if (It != VMap.end()) V = It->second;
1846 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1847 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1848 }
1849 }
1850}
1851
1852// A helper for transformScope. Replace the old (placeholder) branch with the
1853// new (merged) conditional branch.
1854CondBrInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1855 BasicBlock *EntryBlock,
1856 BasicBlock *NewEntryBlock,
1857 ValueToValueMapTy &VMap) {
1858 UncondBrInst *OldBR = cast<UncondBrInst>(PreEntryBlock->getTerminator());
1859 assert(OldBR->getSuccessor() == NewEntryBlock &&
1860 "SplitBlock did not work correctly!");
1861 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1862 "NewEntryBlock's only pred must be EntryBlock");
1863 assert(VMap.find(NewEntryBlock) != VMap.end() &&
1864 "NewEntryBlock must have been copied");
1865 OldBR->dropAllReferences();
1866 OldBR->eraseFromParent();
1867 // The true predicate is a placeholder. It will be replaced later in
1868 // fixupBranchesAndSelects().
1869 CondBrInst *NewBR =
1870 CondBrInst::Create(ConstantInt::getTrue(F.getContext()), NewEntryBlock,
1871 cast<BasicBlock>(VMap[NewEntryBlock]));
1872 NewBR->insertInto(PreEntryBlock, PreEntryBlock->end());
1873 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1874 "NewEntryBlock's only pred must be EntryBlock");
1875 return NewBR;
1876}
1877
1878// A helper for transformScopes. Create the combined branch condition and
1879// constant-fold the branches/selects in the hot path.
1880void CHR::fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock,
1881 CondBrInst *MergedBR, uint64_t ProfileCount) {
1882 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1883 BranchProbability CHRBranchBias(1, 1);
1884 uint64_t NumCHRedBranches = 0;
1885 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1886 for (RegInfo &RI : Scope->CHRRegions) {
1887 Region *R = RI.R;
1888 if (RI.HasBranch) {
1889 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1890 ++NumCHRedBranches;
1891 }
1892 for (SelectInst *SI : RI.Selects) {
1893 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1894 ++NumCHRedBranches;
1895 }
1896 }
1897 assert(NumCHRedBranches > 0);
1898 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1899 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1900 ORE.emit([&]() {
1901 return OptimizationRemark(DEBUG_TYPE,
1902 "CHR",
1903 // Refer to the hot (original) path
1904 MergedBR->getSuccessor(0)->getTerminator())
1905 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1906 << " branches or selects";
1907 });
1908 MergedBR->setCondition(MergedCondition);
1909 uint32_t Weights[] = {
1910 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1911 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1912 };
1913 setBranchWeights(*MergedBR, Weights, /*IsExpected=*/false);
1914 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1915 << "\n");
1916}
1917
1918// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1919// and constant-fold a branch in the hot path.
1920void CHR::fixupBranch(Region *R, CHRScope *Scope,
1921 IRBuilder<> &IRB,
1922 Value *&MergedCondition,
1923 BranchProbability &CHRBranchBias) {
1924 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1925 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1926 "Must be truthy or falsy");
1927 auto *BI = cast<CondBrInst>(R->getEntry()->getTerminator());
1928 assert(BranchBiasMap.contains(R) && "Must be in the bias map");
1929 BranchProbability Bias = BranchBiasMap[R];
1930 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1931 // Take the min.
1932 if (CHRBranchBias > Bias)
1933 CHRBranchBias = Bias;
1934 BasicBlock *IfThen = BI->getSuccessor(1);
1935 BasicBlock *IfElse = BI->getSuccessor(0);
1936 BasicBlock *RegionExitBlock = R->getExit();
1937 assert(RegionExitBlock && "Null ExitBlock");
1938 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1939 IfThen != IfElse && "Invariant from findScopes");
1940 if (IfThen == RegionExitBlock) {
1941 // Swap them so that IfThen means going into it and IfElse means skipping
1942 // it.
1943 std::swap(IfThen, IfElse);
1944 }
1945 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1946 << " IfElse " << IfElse->getName() << "\n");
1947 Value *Cond = BI->getCondition();
1948 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1949 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1950 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1951 MergedCondition);
1952 // Constant-fold the branch at ClonedEntryBlock.
1953 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1954 "The successor shouldn't change");
1955 Value *NewCondition = ConditionTrue ?
1956 ConstantInt::getTrue(F.getContext()) :
1957 ConstantInt::getFalse(F.getContext());
1958 BI->setCondition(NewCondition);
1959}
1960
1961// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1962// and constant-fold a select in the hot path.
1963void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1964 IRBuilder<> &IRB,
1965 Value *&MergedCondition,
1966 BranchProbability &CHRBranchBias) {
1967 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1968 assert((IsTrueBiased ||
1969 Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1970 assert(SelectBiasMap.contains(SI) && "Must be in the bias map");
1971 BranchProbability Bias = SelectBiasMap[SI];
1972 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1973 // Take the min.
1974 if (CHRBranchBias > Bias)
1975 CHRBranchBias = Bias;
1976 Value *Cond = SI->getCondition();
1977 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1978 MergedCondition);
1979 Value *NewCondition = IsTrueBiased ?
1980 ConstantInt::getTrue(F.getContext()) :
1981 ConstantInt::getFalse(F.getContext());
1982 SI->setCondition(NewCondition);
1983}
1984
1985// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1986// condition.
1987void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1988 Instruction *BranchOrSelect, CHRScope *Scope,
1989 IRBuilder<> &IRB, Value *&MergedCondition) {
1990 if (!IsTrueBiased) {
1991 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1992 // branch, negate the icmp predicate and swap the branch targets and avoid
1993 // inserting an Xor to negate Cond.
1994 auto *ICmp = dyn_cast<ICmpInst>(Cond);
1995 if (!ICmp ||
1996 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope))
1997 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond);
1998 }
1999
2000 // Freeze potentially poisonous conditions.
2002 Cond = IRB.CreateFreeze(Cond);
2003
2004 // Use logical and to avoid propagating poison from later conditions.
2005 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond);
2006 if (auto *MergedInst = dyn_cast<Instruction>(MergedCondition))
2008}
2009
2010void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
2011 unsigned I = 0;
2012 DenseSet<PHINode *> TrivialPHIs;
2013 for (CHRScope *Scope : CHRScopes) {
2014 transformScopes(Scope, TrivialPHIs);
2015 CHR_DEBUG(
2016 std::ostringstream oss;
2017 oss << " after transformScopes " << I++;
2018 dumpIR(F, oss.str().c_str(), nullptr));
2019 (void)I;
2020 }
2021}
2022
2023[[maybe_unused]] static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes,
2024 const char *Label) {
2025 dbgs() << Label << " " << Scopes.size() << "\n";
2026 for (CHRScope *Scope : Scopes) {
2027 dbgs() << *Scope << "\n";
2028 }
2029}
2030
2031bool CHR::run() {
2032 if (!shouldApply(F, PSI))
2033 return false;
2034
2035 CHR_DEBUG(dumpIR(F, "before", nullptr));
2036
2037 bool Changed = false;
2038 {
2039 CHR_DEBUG(
2040 dbgs() << "RegionInfo:\n";
2041 RI.print(dbgs()));
2042
2043 // Recursively traverse the region tree and find regions that have biased
2044 // branches and/or selects and create scopes.
2046 findScopes(AllScopes);
2047 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2048
2049 // Split the scopes if 1) the conditional values of the biased
2050 // branches/selects of the inner/lower scope can't be hoisted up to the
2051 // outermost/uppermost scope entry, or 2) the condition values of the biased
2052 // branches/selects in a scope (including subscopes) don't share at least
2053 // one common value.
2054 SmallVector<CHRScope *, 8> SplitScopes;
2055 splitScopes(AllScopes, SplitScopes);
2056 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2057
2058 // After splitting, set the biased regions and selects of a scope (a tree
2059 // root) that include those of the subscopes.
2060 classifyBiasedScopes(SplitScopes);
2061 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2062
2063 // Filter out the scopes that has only one biased region or select (CHR
2064 // isn't useful in such a case).
2065 SmallVector<CHRScope *, 8> FilteredScopes;
2066 filterScopes(SplitScopes, FilteredScopes);
2067 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2068
2069 // Set the regions to be CHR'ed and their hoist stops for each scope.
2071 setCHRRegions(FilteredScopes, SetScopes);
2072 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2073
2074 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2075 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2076 // to the hot path, rather than both hot and cold paths.
2077 SmallVector<CHRScope *, 8> SortedScopes;
2078 sortScopes(SetScopes, SortedScopes);
2079 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2080
2081 CHR_DEBUG(
2082 dbgs() << "RegionInfo:\n";
2083 RI.print(dbgs()));
2084
2085 // Apply the CHR transformation.
2086 if (!SortedScopes.empty()) {
2087 transformScopes(SortedScopes);
2088 Changed = true;
2089 }
2090 }
2091
2092 if (Changed) {
2093 CHR_DEBUG(dumpIR(F, "after", &Stats));
2094 ORE.emit([&]() {
2095 return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2096 << ore::NV("Function", &F) << " "
2097 << "Reduced the number of branches in hot paths by "
2098 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2099 << " (static) and "
2100 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2101 << " (weighted by PGO count)";
2102 });
2103 }
2104
2105 return Changed;
2106}
2107
2111
2113 Function &F,
2115 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2116 auto PPSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2117 // If there is no profile summary, we should not do CHR.
2118 if (!PPSI || !PPSI->hasProfileSummary())
2119 return PreservedAnalyses::all();
2120 auto &PSI = *PPSI;
2121 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2122 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2123 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2124 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2125 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2126 if (!Changed)
2127 return PreservedAnalyses::all();
2128 return PreservedAnalyses::none();
2129}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
constexpr LLT S1
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static uint64_t scale(uint64_t Num, uint32_t N, uint32_t D)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static void insertTrivialPHIs(CHRScope *Scope, BasicBlock *EntryBlock, BasicBlock *ExitBlock, DenseSet< PHINode * > &TrivialPHIs)
static cl::opt< bool > DisableCHR("disable-chr", cl::init(false), cl::Hidden, cl::desc("Disable CHR for all functions"))
static StringSet CHRFunctions
static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2)
static bool checkBiasedBranch(CondBrInst *BI, Region *R, DenseSet< Region * > &TrueBiasedRegionsGlobal, DenseSet< Region * > &FalseBiasedRegionsGlobal, DenseMap< Region *, BranchProbability > &BranchBiasMap)
static bool checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables, DenseSet< Instruction * > *HoistStops, DenseMap< Instruction *, bool > &Visited)
static cl::opt< unsigned > CHRMergeThreshold("chr-merge-threshold", cl::init(2), cl::Hidden, cl::desc("CHR merges a group of N branches/selects where N >= this value"))
static bool isHoistable(Instruction *I, DominatorTree &DT)
static cl::opt< unsigned > CHRDupThreshsold("chr-dup-threshold", cl::init(3), cl::Hidden, cl::desc("Max number of duplications by CHR for a region"))
static bool shouldSplit(Instruction *InsertPoint, DenseSet< Value * > &PrevConditionValues, DenseSet< Value * > &ConditionValues, DominatorTree &DT, DenseSet< Instruction * > &Unhoistables)
static bool checkBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, S &TrueSet, S &FalseSet, M &BiasMap)
static StringSet CHRModules
static bool checkBiasedSelect(SelectInst *SI, Region *R, DenseSet< SelectInst * > &TrueBiasedSelectsGlobal, DenseSet< SelectInst * > &FalseBiasedSelectsGlobal, DenseMap< SelectInst *, BranchProbability > &SelectBiasMap)
static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, HoistStopMapTy &HoistStopMap, DenseSet< Instruction * > &HoistedSet, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope)
static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope)
static bool shouldApply(Function &F, ProfileSummaryInfo &PSI)
#define CHR_DEBUG(X)
static void dumpIR(Function &F, const char *Label, CHRStats *Stats)
static void getSelectsInScope(CHRScope *Scope, DenseSet< Instruction * > &Output)
static void dumpScopes(SmallVectorImpl< CHRScope * > &Scopes, const char *Label)
static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, DenseSet< PHINode * > &TrivialPHIs, DominatorTree &DT)
static bool isHoistableInstructionType(Instruction *I)
static DenseSet< Value * > getCHRConditionValuesForRegion(RegInfo &RI)
static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, Instruction *ExcludedUser, CHRScope *Scope)
static Instruction * getBranchInsertPoint(RegInfo &RI)
static cl::opt< std::string > CHRModuleList("chr-module-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of modules to apply CHR to"))
static void assertBranchOrSelectConditionHoisted(CHRScope *Scope, BasicBlock *PreEntryBlock)
static cl::opt< double > CHRBiasThreshold("chr-bias-threshold", cl::init(0.99), cl::Hidden, cl::desc("CHR considers a branch bias greater than this ratio as biased"))
static BranchProbability getCHRBiasThreshold()
static cl::opt< std::string > CHRFunctionList("chr-function-list", cl::init(""), cl::Hidden, cl::desc("Specify file to retrieve the list of functions to apply CHR to"))
static cl::opt< bool > ForceCHR("force-chr", cl::init(false), cl::Hidden, cl::desc("Apply CHR for all functions"))
static const std::set< Value * > & getBaseValues(Value *V, DominatorTree &DT, DenseMap< Value *, std::set< Value * > > &Visited)
static bool extractBranchProbabilities(Instruction *I, BranchProbability &TrueProb, BranchProbability &FalseProb)
static void parseCHRFilterFiles()
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
static Value * getCondition(Instruction *I)
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
Definition IVUsers.cpp:48
static Constant * getFalse(Type *Ty)
For a boolean type or a vector of boolean type, return false or a vector with every element false.
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
block placement Basic Block Placement Stats
uint64_t IntrinsicInst * II
FunctionAnalysisManager FAM
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the SmallVector class.
StringSet - A set-like wrapper for the StringMap.
early Early Tail Duplication
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:462
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:518
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
LLVM_ABI std::optional< uint64_t > getBlockProfileCount(const BasicBlock *BB, bool AllowSynthetic=false) const
Returns the estimated profile count of BB.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
void setPredicate(Predicate P)
Set the predicate for this instruction to the specified value.
Definition InstrTypes.h:768
Predicate getInversePredicate() const
For example, EQ -> NE, UGT -> ULE, SLT -> SGE, OEQ -> UNE, UGT -> OLE, OLT -> UGE,...
Definition InstrTypes.h:789
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
Conditional Branch instruction.
static CondBrInst * Create(Value *Cond, BasicBlock *IfTrue, BasicBlock *IfFalse, InsertPosition InsertBefore=nullptr)
void setCondition(Value *V)
BasicBlock * getSuccessor(unsigned i) const
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
iterator end()
Definition DenseMap.h:81
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Analysis pass which computes a DominatorTree.
Definition Dominators.h:278
DomTreeNodeBase< NodeT > * getNode(const NodeT *BB) const
getNode - return the (Post)DominatorTree node for the specified basic block.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:159
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition IRBuilder.h:2650
Value * CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition IRBuilder.h:1751
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1625
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
static ErrorOr< std::unique_ptr< MemoryBuffer > > getFile(const Twine &Filename, bool IsText=false, bool RequiresNullTerminator=true, bool IsVolatile=false, std::optional< Align > Alignment=std::nullopt)
Open the specified file as a MemoryBuffer, returning a new MemoryBuffer if successful,...
LLVM_ABI void emit(DiagnosticInfoOptimizationBase &OptDiag)
Output the remark via the diagnostic handler and to the optimization record file.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionEntryHot(const FuncT *F) const
Returns true if F has hot function entry.
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition RegionInfo.h:357
void replaceEntryRecursive(BlockT *NewEntry)
Recursively replace the entry basic block of the region.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition RegionInfo.h:965
This class represents the LLVM 'select' instruction.
bool contains(const_arg_type key) const
Check if the SetVector contains the given key.
Definition SetVector.h:252
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
A SetVector that performs no allocations if smaller than a certain size.
Definition SetVector.h:339
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
iterator erase(const_iterator CI)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition StringSet.h:25
BasicBlock * getSuccessor(unsigned i=0) const
iterator find(const KeyT &Val)
Definition ValueMap.h:160
iterator end()
Definition ValueMap.h:139
LLVM Value Representation.
Definition Value.h:75
iterator_range< user_iterator > users()
Definition Value.h:427
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
void insert_range(Range &&R)
Definition DenseSet.h:228
bool contains(const_arg_type_t< ValueT > V) const
Check if the set contains the given element.
Definition DenseSet.h:175
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition DenseSet.h:180
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition COFF.h:862
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
DiagnosticInfoOptimizationBase::Argument NV
Context & getContext() const
Definition BasicBlock.h:99
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition Path.cpp:457
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2116
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
constexpr from_range_t from_range
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto pred_size(const MachineBasicBlock *BB)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
@ RF_IgnoreMissingLocals
If this flag is set, the remapper ignores missing function-local entries (Argument,...
Definition ValueMapper.h:98
@ RF_NoModuleLevelChanges
If this flag is set, the remapper knows that only local values within a function (such as an instruct...
Definition ValueMapper.h:80
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
Function::ProfileCount ProfileCount
iterator_range< SplittingIterator > split(StringRef Str, StringRef Separator)
Split the specified string over a separator and return a range-compatible iterable over its partition...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
void RemapInstruction(Instruction *I, ValueToValueMapTy &VM, RemapFlags Flags=RF_None, ValueMapTypeRemapper *TypeMapper=nullptr, ValueMaterializer *Materializer=nullptr, const MetadataPredicate *IdentityMD=nullptr)
Convert the instruction operands from referencing the current values into those specified by VM.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1885
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
auto predecessors(const MachineBasicBlock *BB)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872