Bug Summary

File:llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
Warning:line 962, column 9
Value stored to 'InsertPoint' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ControlHeightReduction.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/Instrumentation -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/Instrumentation -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/Instrumentation -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Transforms/Instrumentation -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringSet.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/GlobalsModRef.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/Analysis/ProfileSummaryInfo.h"
23#include "llvm/Analysis/RegionInfo.h"
24#include "llvm/Analysis/RegionIterator.h"
25#include "llvm/Analysis/ValueTracking.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/MDBuilder.h"
30#include "llvm/IR/PassManager.h"
31#include "llvm/InitializePasses.h"
32#include "llvm/Support/BranchProbability.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Transforms/Utils.h"
36#include "llvm/Transforms/Utils/BasicBlockUtils.h"
37#include "llvm/Transforms/Utils/Cloning.h"
38#include "llvm/Transforms/Utils/ValueMapper.h"
39
40#include <set>
41#include <sstream>
42
43using namespace llvm;
44
45#define DEBUG_TYPE"chr" "chr"
46
47#define CHR_DEBUG(X)do { } while (false) LLVM_DEBUG(X)do { } while (false)
48
49static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
50 cl::desc("Apply CHR for all functions"));
51
52static cl::opt<double> CHRBiasThreshold(
53 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
54 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
55
56static cl::opt<unsigned> CHRMergeThreshold(
57 "chr-merge-threshold", cl::init(2), cl::Hidden,
58 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
59
60static cl::opt<std::string> CHRModuleList(
61 "chr-module-list", cl::init(""), cl::Hidden,
62 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
63
64static cl::opt<std::string> CHRFunctionList(
65 "chr-function-list", cl::init(""), cl::Hidden,
66 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
67
68static StringSet<> CHRModules;
69static StringSet<> CHRFunctions;
70
71static void parseCHRFilterFiles() {
72 if (!CHRModuleList.empty()) {
73 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
74 if (!FileOrErr) {
75 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
76 std::exit(1);
77 }
78 StringRef Buf = FileOrErr->get()->getBuffer();
79 SmallVector<StringRef, 0> Lines;
80 Buf.split(Lines, '\n');
81 for (StringRef Line : Lines) {
82 Line = Line.trim();
83 if (!Line.empty())
84 CHRModules.insert(Line);
85 }
86 }
87 if (!CHRFunctionList.empty()) {
88 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
89 if (!FileOrErr) {
90 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
91 std::exit(1);
92 }
93 StringRef Buf = FileOrErr->get()->getBuffer();
94 SmallVector<StringRef, 0> Lines;
95 Buf.split(Lines, '\n');
96 for (StringRef Line : Lines) {
97 Line = Line.trim();
98 if (!Line.empty())
99 CHRFunctions.insert(Line);
100 }
101 }
102}
103
104namespace {
105class ControlHeightReductionLegacyPass : public FunctionPass {
106public:
107 static char ID;
108
109 ControlHeightReductionLegacyPass() : FunctionPass(ID) {
110 initializeControlHeightReductionLegacyPassPass(
111 *PassRegistry::getPassRegistry());
112 parseCHRFilterFiles();
113 }
114
115 bool runOnFunction(Function &F) override;
116 void getAnalysisUsage(AnalysisUsage &AU) const override {
117 AU.addRequired<BlockFrequencyInfoWrapperPass>();
118 AU.addRequired<DominatorTreeWrapperPass>();
119 AU.addRequired<ProfileSummaryInfoWrapperPass>();
120 AU.addRequired<RegionInfoPass>();
121 AU.addPreserved<GlobalsAAWrapperPass>();
122 }
123};
124} // end anonymous namespace
125
126char ControlHeightReductionLegacyPass::ID = 0;
127
128INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
129 "chr",static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
130 "Reduce control height in the hot paths",static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
131 false, false)static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
132INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)initializeBlockFrequencyInfoWrapperPassPass(Registry);
133INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
134INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
135INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)initializeRegionInfoPassPass(Registry);
136INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
137 "chr",PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
138 "Reduce control height in the hot paths",PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
139 false, false)PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
140
141FunctionPass *llvm::createControlHeightReductionLegacyPass() {
142 return new ControlHeightReductionLegacyPass();
143}
144
145namespace {
146
147struct CHRStats {
148 CHRStats() : NumBranches(0), NumBranchesDelta(0),
149 WeightedNumBranchesDelta(0) {}
150 void print(raw_ostream &OS) const {
151 OS << "CHRStats: NumBranches " << NumBranches
152 << " NumBranchesDelta " << NumBranchesDelta
153 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
154 }
155 uint64_t NumBranches; // The original number of conditional branches /
156 // selects
157 uint64_t NumBranchesDelta; // The decrease of the number of conditional
158 // branches / selects in the hot paths due to CHR.
159 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
160 // count at the scope entry.
161};
162
163// RegInfo - some properties of a Region.
164struct RegInfo {
165 RegInfo() : R(nullptr), HasBranch(false) {}
166 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
167 Region *R;
168 bool HasBranch;
169 SmallVector<SelectInst *, 8> Selects;
170};
171
172typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
173
174// CHRScope - a sequence of regions to CHR together. It corresponds to a
175// sequence of conditional blocks. It can have subscopes which correspond to
176// nested conditional blocks. Nested CHRScopes form a tree.
177class CHRScope {
178 public:
179 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
180 assert(RI.R && "Null RegionIn")(static_cast<void> (0));
181 RegInfos.push_back(RI);
182 }
183
184 Region *getParentRegion() {
185 assert(RegInfos.size() > 0 && "Empty CHRScope")(static_cast<void> (0));
186 Region *Parent = RegInfos[0].R->getParent();
187 assert(Parent && "Unexpected to call this on the top-level region")(static_cast<void> (0));
188 return Parent;
189 }
190
191 BasicBlock *getEntryBlock() {
192 assert(RegInfos.size() > 0 && "Empty CHRScope")(static_cast<void> (0));
193 return RegInfos.front().R->getEntry();
194 }
195
196 BasicBlock *getExitBlock() {
197 assert(RegInfos.size() > 0 && "Empty CHRScope")(static_cast<void> (0));
198 return RegInfos.back().R->getExit();
199 }
200
201 bool appendable(CHRScope *Next) {
202 // The next scope is appendable only if this scope is directly connected to
203 // it (which implies it post-dominates this scope) and this scope dominates
204 // it (no edge to the next scope outside this scope).
205 BasicBlock *NextEntry = Next->getEntryBlock();
206 if (getExitBlock() != NextEntry)
207 // Not directly connected.
208 return false;
209 Region *LastRegion = RegInfos.back().R;
210 for (BasicBlock *Pred : predecessors(NextEntry))
211 if (!LastRegion->contains(Pred))
212 // There's an edge going into the entry of the next scope from outside
213 // of this scope.
214 return false;
215 return true;
216 }
217
218 void append(CHRScope *Next) {
219 assert(RegInfos.size() > 0 && "Empty CHRScope")(static_cast<void> (0));
220 assert(Next->RegInfos.size() > 0 && "Empty CHRScope")(static_cast<void> (0));
221 assert(getParentRegion() == Next->getParentRegion() &&(static_cast<void> (0))
222 "Must be siblings")(static_cast<void> (0));
223 assert(getExitBlock() == Next->getEntryBlock() &&(static_cast<void> (0))
224 "Must be adjacent")(static_cast<void> (0));
225 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
226 Subs.append(Next->Subs.begin(), Next->Subs.end());
227 }
228
229 void addSub(CHRScope *SubIn) {
230#ifndef NDEBUG1
231 bool IsChild = false;
232 for (RegInfo &RI : RegInfos)
233 if (RI.R == SubIn->getParentRegion()) {
234 IsChild = true;
235 break;
236 }
237 assert(IsChild && "Must be a child")(static_cast<void> (0));
238#endif
239 Subs.push_back(SubIn);
240 }
241
242 // Split this scope at the boundary region into two, which will belong to the
243 // tail and returns the tail.
244 CHRScope *split(Region *Boundary) {
245 assert(Boundary && "Boundary null")(static_cast<void> (0));
246 assert(RegInfos.begin()->R != Boundary &&(static_cast<void> (0))
247 "Can't be split at beginning")(static_cast<void> (0));
248 auto BoundaryIt = llvm::find_if(
249 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
250 if (BoundaryIt == RegInfos.end())
251 return nullptr;
252 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
253 DenseSet<Region *> TailRegionSet;
254 for (const RegInfo &RI : TailRegInfos)
255 TailRegionSet.insert(RI.R);
256
257 auto TailIt =
258 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
259 assert(Sub && "null Sub")(static_cast<void> (0));
260 Region *Parent = Sub->getParentRegion();
261 if (TailRegionSet.count(Parent))
262 return false;
263
264 assert(llvm::any_of((static_cast<void> (0))
265 RegInfos,(static_cast<void> (0))
266 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&(static_cast<void> (0))
267 "Must be in head")(static_cast<void> (0));
268 return true;
269 });
270 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
271
272 assert(HoistStopMap.empty() && "MapHoistStops must be empty")(static_cast<void> (0));
273 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
274 RegInfos.erase(BoundaryIt, RegInfos.end());
275 Subs.erase(TailIt, Subs.end());
276 return Scope;
277 }
278
279 bool contains(Instruction *I) const {
280 BasicBlock *Parent = I->getParent();
281 for (const RegInfo &RI : RegInfos)
282 if (RI.R->contains(Parent))
283 return true;
284 return false;
285 }
286
287 void print(raw_ostream &OS) const;
288
289 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
290 SmallVector<CHRScope *, 8> Subs; // Subscopes.
291
292 // The instruction at which to insert the CHR conditional branch (and hoist
293 // the dependent condition values).
294 Instruction *BranchInsertPoint;
295
296 // True-biased and false-biased regions (conditional blocks),
297 // respectively. Used only for the outermost scope and includes regions in
298 // subscopes. The rest are unbiased.
299 DenseSet<Region *> TrueBiasedRegions;
300 DenseSet<Region *> FalseBiasedRegions;
301 // Among the biased regions, the regions that get CHRed.
302 SmallVector<RegInfo, 8> CHRRegions;
303
304 // True-biased and false-biased selects, respectively. Used only for the
305 // outermost scope and includes ones in subscopes.
306 DenseSet<SelectInst *> TrueBiasedSelects;
307 DenseSet<SelectInst *> FalseBiasedSelects;
308
309 // Map from one of the above regions to the instructions to stop
310 // hoisting instructions at through use-def chains.
311 HoistStopMapTy HoistStopMap;
312
313 private:
314 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
315 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
316 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
317};
318
319class CHR {
320 public:
321 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
322 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
323 OptimizationRemarkEmitter &OREin)
324 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
325
326 ~CHR() {
327 for (CHRScope *Scope : Scopes) {
328 delete Scope;
329 }
330 }
331
332 bool run();
333
334 private:
335 // See the comments in CHR::run() for the high level flow of the algorithm and
336 // what the following functions do.
337
338 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
339 Region *R = RI.getTopLevelRegion();
340 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
341 Output.push_back(Scope);
342 }
343 }
344 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
345 SmallVectorImpl<CHRScope *> &Scopes);
346 CHRScope *findScope(Region *R);
347 void checkScopeHoistable(CHRScope *Scope);
348
349 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
350 SmallVectorImpl<CHRScope *> &Output);
351 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
352 CHRScope *Outer,
353 DenseSet<Value *> *OuterConditionValues,
354 Instruction *OuterInsertPoint,
355 SmallVectorImpl<CHRScope *> &Output,
356 DenseSet<Instruction *> &Unhoistables);
357
358 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
359 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
360
361 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
362 SmallVectorImpl<CHRScope *> &Output);
363
364 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
365 SmallVectorImpl<CHRScope *> &Output);
366 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
367
368 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
369 SmallVectorImpl<CHRScope *> &Output);
370
371 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
372 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
373 void cloneScopeBlocks(CHRScope *Scope,
374 BasicBlock *PreEntryBlock,
375 BasicBlock *ExitBlock,
376 Region *LastRegion,
377 ValueToValueMapTy &VMap);
378 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
379 BasicBlock *EntryBlock,
380 BasicBlock *NewEntryBlock,
381 ValueToValueMapTy &VMap);
382 void fixupBranchesAndSelects(CHRScope *Scope,
383 BasicBlock *PreEntryBlock,
384 BranchInst *MergedBR,
385 uint64_t ProfileCount);
386 void fixupBranch(Region *R,
387 CHRScope *Scope,
388 IRBuilder<> &IRB,
389 Value *&MergedCondition, BranchProbability &CHRBranchBias);
390 void fixupSelect(SelectInst* SI,
391 CHRScope *Scope,
392 IRBuilder<> &IRB,
393 Value *&MergedCondition, BranchProbability &CHRBranchBias);
394 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
395 Instruction *BranchOrSelect,
396 CHRScope *Scope,
397 IRBuilder<> &IRB,
398 Value *&MergedCondition);
399
400 Function &F;
401 BlockFrequencyInfo &BFI;
402 DominatorTree &DT;
403 ProfileSummaryInfo &PSI;
404 RegionInfo &RI;
405 OptimizationRemarkEmitter &ORE;
406 CHRStats Stats;
407
408 // All the true-biased regions in the function
409 DenseSet<Region *> TrueBiasedRegionsGlobal;
410 // All the false-biased regions in the function
411 DenseSet<Region *> FalseBiasedRegionsGlobal;
412 // All the true-biased selects in the function
413 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
414 // All the false-biased selects in the function
415 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
416 // A map from biased regions to their branch bias
417 DenseMap<Region *, BranchProbability> BranchBiasMap;
418 // A map from biased selects to their branch bias
419 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
420 // All the scopes.
421 DenseSet<CHRScope *> Scopes;
422};
423
424} // end anonymous namespace
425
426static inline
427raw_ostream LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) &operator<<(raw_ostream &OS,
428 const CHRStats &Stats) {
429 Stats.print(OS);
430 return OS;
431}
432
433static inline
434raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
435 Scope.print(OS);
436 return OS;
437}
438
439static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
440 if (ForceCHR)
441 return true;
442
443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
444 if (CHRModules.count(F.getParent()->getName()))
445 return true;
446 return CHRFunctions.count(F.getName());
447 }
448
449 assert(PSI.hasProfileSummary() && "Empty PSI?")(static_cast<void> (0));
450 return PSI.isFunctionEntryHot(&F);
451}
452
453static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) dumpIR(Function &F, const char *Label,
454 CHRStats *Stats) {
455 StringRef FuncName = F.getName();
456 StringRef ModuleName = F.getParent()->getName();
457 (void)(FuncName); // Unused in release build.
458 (void)(ModuleName); // Unused in release build.
459 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "do { } while (false)
460 << FuncName)do { } while (false);
461 if (Stats)
462 CHR_DEBUG(dbgs() << " " << *Stats)do { } while (false);
463 CHR_DEBUG(dbgs() << "\n")do { } while (false);
464 CHR_DEBUG(F.dump())do { } while (false);
465}
466
467void CHRScope::print(raw_ostream &OS) const {
468 assert(RegInfos.size() > 0 && "Empty CHRScope")(static_cast<void> (0));
469 OS << "CHRScope[";
470 OS << RegInfos.size() << ", Regions[";
471 for (const RegInfo &RI : RegInfos) {
472 OS << RI.R->getNameStr();
473 if (RI.HasBranch)
474 OS << " B";
475 if (RI.Selects.size() > 0)
476 OS << " S" << RI.Selects.size();
477 OS << ", ";
478 }
479 if (RegInfos[0].R->getParent()) {
480 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
481 } else {
482 // top level region
483 OS << "]";
484 }
485 OS << ", Subs[";
486 for (CHRScope *Sub : Subs) {
487 OS << *Sub << ", ";
488 }
489 OS << "]]";
490}
491
492// Return true if the given instruction type can be hoisted by CHR.
493static bool isHoistableInstructionType(Instruction *I) {
494 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
495 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
496 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
497 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
498 isa<InsertValueInst>(I);
499}
500
501// Return true if the given instruction can be hoisted by CHR.
502static bool isHoistable(Instruction *I, DominatorTree &DT) {
503 if (!isHoistableInstructionType(I))
504 return false;
505 return isSafeToSpeculativelyExecute(I, nullptr, &DT);
506}
507
508// Recursively traverse the use-def chains of the given value and return a set
509// of the unhoistable base values defined within the scope (excluding the
510// first-region entry block) or the (hoistable or unhoistable) base values that
511// are defined outside (including the first-region entry block) of the
512// scope. The returned set doesn't include constants.
513static const std::set<Value *> &
514getBaseValues(Value *V, DominatorTree &DT,
515 DenseMap<Value *, std::set<Value *>> &Visited) {
516 auto It = Visited.find(V);
517 if (It != Visited.end()) {
518 return It->second;
519 }
520 std::set<Value *> Result;
521 if (auto *I = dyn_cast<Instruction>(V)) {
522 // We don't stop at a block that's not in the Scope because we would miss
523 // some instructions that are based on the same base values if we stop
524 // there.
525 if (!isHoistable(I, DT)) {
526 Result.insert(I);
527 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
528 }
529 // I is hoistable above the Scope.
530 for (Value *Op : I->operands()) {
531 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
532 Result.insert(OpResult.begin(), OpResult.end());
533 }
534 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
535 }
536 if (isa<Argument>(V)) {
537 Result.insert(V);
538 }
539 // We don't include others like constants because those won't lead to any
540 // chance of folding of conditions (eg two bit checks merged into one check)
541 // after CHR.
542 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
543}
544
545// Return true if V is already hoisted or can be hoisted (along with its
546// operands) above the insert point. When it returns true and HoistStops is
547// non-null, the instructions to stop hoisting at through the use-def chains are
548// inserted into HoistStops.
549static bool
550checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
551 DenseSet<Instruction *> &Unhoistables,
552 DenseSet<Instruction *> *HoistStops,
553 DenseMap<Instruction *, bool> &Visited) {
554 assert(InsertPoint && "Null InsertPoint")(static_cast<void> (0));
555 if (auto *I = dyn_cast<Instruction>(V)) {
556 auto It = Visited.find(I);
557 if (It != Visited.end()) {
558 return It->second;
559 }
560 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block")(static_cast<void> (0));
561 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination")(static_cast<void> (0));
562 if (Unhoistables.count(I)) {
563 // Don't hoist if they are not to be hoisted.
564 Visited[I] = false;
565 return false;
566 }
567 if (DT.dominates(I, InsertPoint)) {
568 // We are already above the insert point. Stop here.
569 if (HoistStops)
570 HoistStops->insert(I);
571 Visited[I] = true;
572 return true;
573 }
574 // We aren't not above the insert point, check if we can hoist it above the
575 // insert point.
576 if (isHoistable(I, DT)) {
577 // Check operands first.
578 DenseSet<Instruction *> OpsHoistStops;
579 bool AllOpsHoisted = true;
580 for (Value *Op : I->operands()) {
581 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
582 Visited)) {
583 AllOpsHoisted = false;
584 break;
585 }
586 }
587 if (AllOpsHoisted) {
588 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n")do { } while (false);
589 if (HoistStops)
590 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
591 Visited[I] = true;
592 return true;
593 }
594 }
595 Visited[I] = false;
596 return false;
597 }
598 // Non-instructions are considered hoistable.
599 return true;
600}
601
602// Returns true and sets the true probability and false probability of an
603// MD_prof metadata if it's well-formed.
604static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
605 BranchProbability &FalseProb) {
606 if (!MD) return false;
607 MDString *MDName = cast<MDString>(MD->getOperand(0));
608 if (MDName->getString() != "branch_weights" ||
609 MD->getNumOperands() != 3)
610 return false;
611 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
612 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
613 if (!TrueWeight || !FalseWeight)
614 return false;
615 uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
616 uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
617 uint64_t SumWt = TrueWt + FalseWt;
618
619 assert(SumWt >= TrueWt && SumWt >= FalseWt &&(static_cast<void> (0))
620 "Overflow calculating branch probabilities.")(static_cast<void> (0));
621
622 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
623 if (SumWt == 0)
624 return false;
625
626 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
627 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
628 return true;
629}
630
631static BranchProbability getCHRBiasThreshold() {
632 return BranchProbability::getBranchProbability(
633 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
634}
635
636// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
637// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
638// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
639// false.
640template <typename K, typename S, typename M>
641static bool checkBias(K *Key, BranchProbability TrueProb,
642 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
643 M &BiasMap) {
644 BranchProbability Threshold = getCHRBiasThreshold();
645 if (TrueProb >= Threshold) {
646 TrueSet.insert(Key);
647 BiasMap[Key] = TrueProb;
648 return true;
649 } else if (FalseProb >= Threshold) {
650 FalseSet.insert(Key);
651 BiasMap[Key] = FalseProb;
652 return true;
653 }
654 return false;
655}
656
657// Returns true and insert a region into the right biased set and the map if the
658// branch of the region is biased.
659static bool checkBiasedBranch(BranchInst *BI, Region *R,
660 DenseSet<Region *> &TrueBiasedRegionsGlobal,
661 DenseSet<Region *> &FalseBiasedRegionsGlobal,
662 DenseMap<Region *, BranchProbability> &BranchBiasMap) {
663 if (!BI->isConditional())
664 return false;
665 BranchProbability ThenProb, ElseProb;
666 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
667 ThenProb, ElseProb))
668 return false;
669 BasicBlock *IfThen = BI->getSuccessor(0);
670 BasicBlock *IfElse = BI->getSuccessor(1);
671 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&(static_cast<void> (0))
672 IfThen != IfElse &&(static_cast<void> (0))
673 "Invariant from findScopes")(static_cast<void> (0));
674 if (IfThen == R->getExit()) {
675 // Swap them so that IfThen/ThenProb means going into the conditional code
676 // and IfElse/ElseProb means skipping it.
677 std::swap(IfThen, IfElse);
678 std::swap(ThenProb, ElseProb);
679 }
680 CHR_DEBUG(dbgs() << "BI " << *BI << " ")do { } while (false);
681 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ")do { } while (false);
682 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n")do { } while (false);
683 return checkBias(R, ThenProb, ElseProb,
684 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
685 BranchBiasMap);
686}
687
688// Returns true and insert a select into the right biased set and the map if the
689// select is biased.
690static bool checkBiasedSelect(
691 SelectInst *SI, Region *R,
692 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
693 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
694 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
695 BranchProbability TrueProb, FalseProb;
696 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
697 TrueProb, FalseProb))
698 return false;
699 CHR_DEBUG(dbgs() << "SI " << *SI << " ")do { } while (false);
700 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ")do { } while (false);
701 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n")do { } while (false);
702 return checkBias(SI, TrueProb, FalseProb,
703 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
704 SelectBiasMap);
705}
706
707// Returns the instruction at which to hoist the dependent condition values and
708// insert the CHR branch for a region. This is the terminator branch in the
709// entry block or the first select in the entry block, if any.
710static Instruction* getBranchInsertPoint(RegInfo &RI) {
711 Region *R = RI.R;
712 BasicBlock *EntryBB = R->getEntry();
713 // The hoist point is by default the terminator of the entry block, which is
714 // the same as the branch instruction if RI.HasBranch is true.
715 Instruction *HoistPoint = EntryBB->getTerminator();
716 for (SelectInst *SI : RI.Selects) {
717 if (SI->getParent() == EntryBB) {
718 // Pick the first select in Selects in the entry block. Note Selects is
719 // sorted in the instruction order within a block (asserted below).
720 HoistPoint = SI;
721 break;
722 }
723 }
724 assert(HoistPoint && "Null HoistPoint")(static_cast<void> (0));
725#ifndef NDEBUG1
726 // Check that HoistPoint is the first one in Selects in the entry block,
727 // if any.
728 DenseSet<Instruction *> EntryBlockSelectSet;
729 for (SelectInst *SI : RI.Selects) {
730 if (SI->getParent() == EntryBB) {
731 EntryBlockSelectSet.insert(SI);
732 }
733 }
734 for (Instruction &I : *EntryBB) {
735 if (EntryBlockSelectSet.contains(&I)) {
736 assert(&I == HoistPoint &&(static_cast<void> (0))
737 "HoistPoint must be the first one in Selects")(static_cast<void> (0));
738 break;
739 }
740 }
741#endif
742 return HoistPoint;
743}
744
745// Find a CHR scope in the given region.
746CHRScope * CHR::findScope(Region *R) {
747 CHRScope *Result = nullptr;
748 BasicBlock *Entry = R->getEntry();
749 BasicBlock *Exit = R->getExit(); // null if top level.
750 assert(Entry && "Entry must not be null")(static_cast<void> (0));
751 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&(static_cast<void> (0))
752 "Only top level region has a null exit")(static_cast<void> (0));
753 if (Entry)
754 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n")do { } while (false);
755 else
756 CHR_DEBUG(dbgs() << "Entry null\n")do { } while (false);
757 if (Exit)
758 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n")do { } while (false);
759 else
760 CHR_DEBUG(dbgs() << "Exit null\n")do { } while (false);
761 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
762 // to this region).
763 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
764 if (EntryInSubregion)
765 return nullptr;
766 // Exclude loops
767 for (BasicBlock *Pred : predecessors(Entry))
768 if (R->contains(Pred))
769 return nullptr;
770 // If any of the basic blocks have address taken, we must skip this region
771 // because we cannot clone basic blocks that have address taken.
772 for (BasicBlock *BB : R->blocks())
773 if (BB->hasAddressTaken())
774 return nullptr;
775 if (Exit) {
776 // Try to find an if-then block (check if R is an if-then).
777 // if (cond) {
778 // ...
779 // }
780 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
781 if (BI)
782 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n")do { } while (false);
783 else
784 CHR_DEBUG(dbgs() << "BI null\n")do { } while (false);
785 if (BI && BI->isConditional()) {
786 BasicBlock *S0 = BI->getSuccessor(0);
787 BasicBlock *S1 = BI->getSuccessor(1);
788 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n")do { } while (false);
789 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n")do { } while (false);
790 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
791 RegInfo RI(R);
792 RI.HasBranch = checkBiasedBranch(
793 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
794 BranchBiasMap);
795 Result = new CHRScope(RI);
796 Scopes.insert(Result);
797 CHR_DEBUG(dbgs() << "Found a region with a branch\n")do { } while (false);
798 ++Stats.NumBranches;
799 if (!RI.HasBranch) {
800 ORE.emit([&]() {
801 return OptimizationRemarkMissed(DEBUG_TYPE"chr", "BranchNotBiased", BI)
802 << "Branch not biased";
803 });
804 }
805 }
806 }
807 }
808 {
809 // Try to look for selects in the direct child blocks (as opposed to in
810 // subregions) of R.
811 // ...
812 // if (..) { // Some subregion
813 // ...
814 // }
815 // if (..) { // Some subregion
816 // ...
817 // }
818 // ...
819 // a = cond ? b : c;
820 // ...
821 SmallVector<SelectInst *, 8> Selects;
822 for (RegionNode *E : R->elements()) {
823 if (E->isSubRegion())
824 continue;
825 // This returns the basic block of E if E is a direct child of R (not a
826 // subregion.)
827 BasicBlock *BB = E->getEntry();
828 // Need to push in the order to make it easier to find the first Select
829 // later.
830 for (Instruction &I : *BB) {
831 if (auto *SI = dyn_cast<SelectInst>(&I)) {
832 Selects.push_back(SI);
833 ++Stats.NumBranches;
834 }
835 }
836 }
837 if (Selects.size() > 0) {
838 auto AddSelects = [&](RegInfo &RI) {
839 for (auto *SI : Selects)
840 if (checkBiasedSelect(SI, RI.R,
841 TrueBiasedSelectsGlobal,
842 FalseBiasedSelectsGlobal,
843 SelectBiasMap))
844 RI.Selects.push_back(SI);
845 else
846 ORE.emit([&]() {
847 return OptimizationRemarkMissed(DEBUG_TYPE"chr", "SelectNotBiased", SI)
848 << "Select not biased";
849 });
850 };
851 if (!Result) {
852 CHR_DEBUG(dbgs() << "Found a select-only region\n")do { } while (false);
853 RegInfo RI(R);
854 AddSelects(RI);
855 Result = new CHRScope(RI);
856 Scopes.insert(Result);
857 } else {
858 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n")do { } while (false);
859 AddSelects(Result->RegInfos[0]);
860 }
861 }
862 }
863
864 if (Result) {
865 checkScopeHoistable(Result);
866 }
867 return Result;
868}
869
870// Check that any of the branch and the selects in the region could be
871// hoisted above the the CHR branch insert point (the most dominating of
872// them, either the branch (at the end of the first block) or the first
873// select in the first block). If the branch can't be hoisted, drop the
874// selects in the first blocks.
875//
876// For example, for the following scope/region with selects, we want to insert
877// the merged branch right before the first select in the first/entry block by
878// hoisting c1, c2, c3, and c4.
879//
880// // Branch insert point here.
881// a = c1 ? b : c; // Select 1
882// d = c2 ? e : f; // Select 2
883// if (c3) { // Branch
884// ...
885// c4 = foo() // A call.
886// g = c4 ? h : i; // Select 3
887// }
888//
889// But suppose we can't hoist c4 because it's dependent on the preceding
890// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
891// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
892void CHR::checkScopeHoistable(CHRScope *Scope) {
893 RegInfo &RI = Scope->RegInfos[0];
894 Region *R = RI.R;
895 BasicBlock *EntryBB = R->getEntry();
896 auto *Branch = RI.HasBranch ?
897 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
898 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
899 if (RI.HasBranch || !Selects.empty()) {
900 Instruction *InsertPoint = getBranchInsertPoint(RI);
901 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n")do { } while (false);
902 // Avoid a data dependence from a select or a branch to a(nother)
903 // select. Note no instruction can't data-depend on a branch (a branch
904 // instruction doesn't produce a value).
905 DenseSet<Instruction *> Unhoistables;
906 // Initialize Unhoistables with the selects.
907 for (SelectInst *SI : Selects) {
908 Unhoistables.insert(SI);
909 }
910 // Remove Selects that can't be hoisted.
911 for (auto it = Selects.begin(); it != Selects.end(); ) {
912 SelectInst *SI = *it;
913 if (SI == InsertPoint) {
914 ++it;
915 continue;
916 }
917 DenseMap<Instruction *, bool> Visited;
918 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
919 DT, Unhoistables, nullptr, Visited);
920 if (!IsHoistable) {
921 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n")do { } while (false);
922 ORE.emit([&]() {
923 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
924 "DropUnhoistableSelect", SI)
925 << "Dropped unhoistable select";
926 });
927 it = Selects.erase(it);
928 // Since we are dropping the select here, we also drop it from
929 // Unhoistables.
930 Unhoistables.erase(SI);
931 } else
932 ++it;
933 }
934 // Update InsertPoint after potentially removing selects.
935 InsertPoint = getBranchInsertPoint(RI);
936 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n")do { } while (false);
937 if (RI.HasBranch && InsertPoint != Branch) {
938 DenseMap<Instruction *, bool> Visited;
939 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
940 DT, Unhoistables, nullptr, Visited);
941 if (!IsHoistable) {
942 // If the branch isn't hoistable, drop the selects in the entry
943 // block, preferring the branch, which makes the branch the hoist
944 // point.
945 assert(InsertPoint != Branch && "Branch must not be the hoist point")(static_cast<void> (0));
946 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n")do { } while (false);
947 CHR_DEBUG(do { } while (false)
948 for (SelectInst *SI : Selects) {do { } while (false)
949 dbgs() << "SI " << *SI << "\n";do { } while (false)
950 })do { } while (false);
951 for (SelectInst *SI : Selects) {
952 ORE.emit([&]() {
953 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
954 "DropSelectUnhoistableBranch", SI)
955 << "Dropped select due to unhoistable branch";
956 });
957 }
958 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
959 return SI->getParent() == EntryBB;
960 });
961 Unhoistables.clear();
962 InsertPoint = Branch;
Value stored to 'InsertPoint' is never read
963 }
964 }
965 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n")do { } while (false);
966#ifndef NDEBUG1
967 if (RI.HasBranch) {
968 assert(!DT.dominates(Branch, InsertPoint) &&(static_cast<void> (0))
969 "Branch can't be already above the hoist point")(static_cast<void> (0));
970 DenseMap<Instruction *, bool> Visited;
971 assert(checkHoistValue(Branch->getCondition(), InsertPoint,(static_cast<void> (0))
972 DT, Unhoistables, nullptr, Visited) &&(static_cast<void> (0))
973 "checkHoistValue for branch")(static_cast<void> (0));
974 }
975 for (auto *SI : Selects) {
976 assert(!DT.dominates(SI, InsertPoint) &&(static_cast<void> (0))
977 "SI can't be already above the hoist point")(static_cast<void> (0));
978 DenseMap<Instruction *, bool> Visited;
979 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,(static_cast<void> (0))
980 Unhoistables, nullptr, Visited) &&(static_cast<void> (0))
981 "checkHoistValue for selects")(static_cast<void> (0));
982 }
983 CHR_DEBUG(dbgs() << "Result\n")do { } while (false);
984 if (RI.HasBranch) {
985 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n")do { } while (false);
986 }
987 for (auto *SI : Selects) {
988 CHR_DEBUG(dbgs() << "SI " << *SI << "\n")do { } while (false);
989 }
990#endif
991 }
992}
993
994// Traverse the region tree, find all nested scopes and merge them if possible.
995CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
996 SmallVectorImpl<CHRScope *> &Scopes) {
997 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n")do { } while (false);
998 CHRScope *Result = findScope(R);
999 // Visit subscopes.
1000 CHRScope *ConsecutiveSubscope = nullptr;
1001 SmallVector<CHRScope *, 8> Subscopes;
1002 for (auto It = R->begin(); It != R->end(); ++It) {
1003 const std::unique_ptr<Region> &SubR = *It;
1004 auto NextIt = std::next(It);
1005 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
1006 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()do { } while (false)
1007 << "\n")do { } while (false);
1008 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
1009 if (SubCHRScope) {
1010 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n")do { } while (false);
1011 } else {
1012 CHR_DEBUG(dbgs() << "Subregion Scope null\n")do { } while (false);
1013 }
1014 if (SubCHRScope) {
1015 if (!ConsecutiveSubscope)
1016 ConsecutiveSubscope = SubCHRScope;
1017 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1018 Subscopes.push_back(ConsecutiveSubscope);
1019 ConsecutiveSubscope = SubCHRScope;
1020 } else
1021 ConsecutiveSubscope->append(SubCHRScope);
1022 } else {
1023 if (ConsecutiveSubscope) {
1024 Subscopes.push_back(ConsecutiveSubscope);
1025 }
1026 ConsecutiveSubscope = nullptr;
1027 }
1028 }
1029 if (ConsecutiveSubscope) {
1030 Subscopes.push_back(ConsecutiveSubscope);
1031 }
1032 for (CHRScope *Sub : Subscopes) {
1033 if (Result) {
1034 // Combine it with the parent.
1035 Result->addSub(Sub);
1036 } else {
1037 // Push Subscopes as they won't be combined with the parent.
1038 Scopes.push_back(Sub);
1039 }
1040 }
1041 return Result;
1042}
1043
1044static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1045 DenseSet<Value *> ConditionValues;
1046 if (RI.HasBranch) {
1047 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1048 ConditionValues.insert(BI->getCondition());
1049 }
1050 for (SelectInst *SI : RI.Selects) {
1051 ConditionValues.insert(SI->getCondition());
1052 }
1053 return ConditionValues;
1054}
1055
1056
1057// Determine whether to split a scope depending on the sets of the branch
1058// condition values of the previous region and the current region. We split
1059// (return true) it if 1) the condition values of the inner/lower scope can't be
1060// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1061// values have an empty intersection (because the combined branch conditions
1062// won't probably lead to a simpler combined condition).
1063static bool shouldSplit(Instruction *InsertPoint,
1064 DenseSet<Value *> &PrevConditionValues,
1065 DenseSet<Value *> &ConditionValues,
1066 DominatorTree &DT,
1067 DenseSet<Instruction *> &Unhoistables) {
1068 assert(InsertPoint && "Null InsertPoint")(static_cast<void> (0));
1069 CHR_DEBUG(do { } while (false)
1070 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";do { } while (false)
1071 for (Value *V : PrevConditionValues) {do { } while (false)
1072 dbgs() << *V << ", ";do { } while (false)
1073 }do { } while (false)
1074 dbgs() << " ConditionValues ";do { } while (false)
1075 for (Value *V : ConditionValues) {do { } while (false)
1076 dbgs() << *V << ", ";do { } while (false)
1077 }do { } while (false)
1078 dbgs() << "\n")do { } while (false);
1079 // If any of Bases isn't hoistable to the hoist point, split.
1080 for (Value *V : ConditionValues) {
1081 DenseMap<Instruction *, bool> Visited;
1082 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1083 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n")do { } while (false);
1084 return true; // Not hoistable, split.
1085 }
1086 }
1087 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1088 // unnecessary splits at scopes with no branch/selects. If
1089 // PrevConditionValues and ConditionValues don't intersect at all, split.
1090 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1091 // Use std::set as DenseSet doesn't work with set_intersection.
1092 std::set<Value *> PrevBases, Bases;
1093 DenseMap<Value *, std::set<Value *>> Visited;
1094 for (Value *V : PrevConditionValues) {
1095 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1096 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1097 }
1098 for (Value *V : ConditionValues) {
1099 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1100 Bases.insert(BaseValues.begin(), BaseValues.end());
1101 }
1102 CHR_DEBUG(do { } while (false)
1103 dbgs() << "PrevBases ";do { } while (false)
1104 for (Value *V : PrevBases) {do { } while (false)
1105 dbgs() << *V << ", ";do { } while (false)
1106 }do { } while (false)
1107 dbgs() << " Bases ";do { } while (false)
1108 for (Value *V : Bases) {do { } while (false)
1109 dbgs() << *V << ", ";do { } while (false)
1110 }do { } while (false)
1111 dbgs() << "\n")do { } while (false);
1112 std::vector<Value *> Intersection;
1113 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1114 Bases.end(), std::back_inserter(Intersection));
1115 if (Intersection.empty()) {
1116 // Empty intersection, split.
1117 CHR_DEBUG(dbgs() << "Split. Intersection empty\n")do { } while (false);
1118 return true;
1119 }
1120 }
1121 CHR_DEBUG(dbgs() << "No split\n")do { } while (false);
1122 return false; // Don't split.
1123}
1124
1125static void getSelectsInScope(CHRScope *Scope,
1126 DenseSet<Instruction *> &Output) {
1127 for (RegInfo &RI : Scope->RegInfos)
1128 for (SelectInst *SI : RI.Selects)
1129 Output.insert(SI);
1130 for (CHRScope *Sub : Scope->Subs)
1131 getSelectsInScope(Sub, Output);
1132}
1133
1134void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1135 SmallVectorImpl<CHRScope *> &Output) {
1136 for (CHRScope *Scope : Input) {
1137 assert(!Scope->BranchInsertPoint &&(static_cast<void> (0))
1138 "BranchInsertPoint must not be set")(static_cast<void> (0));
1139 DenseSet<Instruction *> Unhoistables;
1140 getSelectsInScope(Scope, Unhoistables);
1141 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1142 }
1143#ifndef NDEBUG1
1144 for (CHRScope *Scope : Output) {
1145 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set")(static_cast<void> (0));
1146 }
1147#endif
1148}
1149
1150SmallVector<CHRScope *, 8> CHR::splitScope(
1151 CHRScope *Scope,
1152 CHRScope *Outer,
1153 DenseSet<Value *> *OuterConditionValues,
1154 Instruction *OuterInsertPoint,
1155 SmallVectorImpl<CHRScope *> &Output,
1156 DenseSet<Instruction *> &Unhoistables) {
1157 if (Outer) {
1158 assert(OuterConditionValues && "Null OuterConditionValues")(static_cast<void> (0));
1159 assert(OuterInsertPoint && "Null OuterInsertPoint")(static_cast<void> (0));
1160 }
1161 bool PrevSplitFromOuter = true;
1162 DenseSet<Value *> PrevConditionValues;
1163 Instruction *PrevInsertPoint = nullptr;
1164 SmallVector<CHRScope *, 8> Splits;
1165 SmallVector<bool, 8> SplitsSplitFromOuter;
1166 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1167 SmallVector<Instruction *, 8> SplitsInsertPoints;
1168 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1169 for (RegInfo &RI : RegInfos) {
1170 Instruction *InsertPoint = getBranchInsertPoint(RI);
1171 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1172 CHR_DEBUG(do { } while (false)
1173 dbgs() << "ConditionValues ";do { } while (false)
1174 for (Value *V : ConditionValues) {do { } while (false)
1175 dbgs() << *V << ", ";do { } while (false)
1176 }do { } while (false)
1177 dbgs() << "\n")do { } while (false);
1178 if (RI.R == RegInfos[0].R) {
1179 // First iteration. Check to see if we should split from the outer.
1180 if (Outer) {
1181 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n")do { } while (false);
1182 CHR_DEBUG(dbgs() << "Should split from outer at "do { } while (false)
1183 << RI.R->getNameStr() << "\n")do { } while (false);
1184 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1185 ConditionValues, DT, Unhoistables)) {
1186 PrevConditionValues = ConditionValues;
1187 PrevInsertPoint = InsertPoint;
1188 ORE.emit([&]() {
1189 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
1190 "SplitScopeFromOuter",
1191 RI.R->getEntry()->getTerminator())
1192 << "Split scope from outer due to unhoistable branch/select "
1193 << "and/or lack of common condition values";
1194 });
1195 } else {
1196 // Not splitting from the outer. Use the outer bases and insert
1197 // point. Union the bases.
1198 PrevSplitFromOuter = false;
1199 PrevConditionValues = *OuterConditionValues;
1200 PrevConditionValues.insert(ConditionValues.begin(),
1201 ConditionValues.end());
1202 PrevInsertPoint = OuterInsertPoint;
1203 }
1204 } else {
1205 CHR_DEBUG(dbgs() << "Outer null\n")do { } while (false);
1206 PrevConditionValues = ConditionValues;
1207 PrevInsertPoint = InsertPoint;
1208 }
1209 } else {
1210 CHR_DEBUG(dbgs() << "Should split from prev at "do { } while (false)
1211 << RI.R->getNameStr() << "\n")do { } while (false);
1212 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1213 DT, Unhoistables)) {
1214 CHRScope *Tail = Scope->split(RI.R);
1215 Scopes.insert(Tail);
1216 Splits.push_back(Scope);
1217 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1218 SplitsConditionValues.push_back(PrevConditionValues);
1219 SplitsInsertPoints.push_back(PrevInsertPoint);
1220 Scope = Tail;
1221 PrevConditionValues = ConditionValues;
1222 PrevInsertPoint = InsertPoint;
1223 PrevSplitFromOuter = true;
1224 ORE.emit([&]() {
1225 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
1226 "SplitScopeFromPrev",
1227 RI.R->getEntry()->getTerminator())
1228 << "Split scope from previous due to unhoistable branch/select "
1229 << "and/or lack of common condition values";
1230 });
1231 } else {
1232 // Not splitting. Union the bases. Keep the hoist point.
1233 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1234 }
1235 }
1236 }
1237 Splits.push_back(Scope);
1238 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1239 SplitsConditionValues.push_back(PrevConditionValues);
1240 assert(PrevInsertPoint && "Null PrevInsertPoint")(static_cast<void> (0));
1241 SplitsInsertPoints.push_back(PrevInsertPoint);
1242 assert(Splits.size() == SplitsConditionValues.size() &&(static_cast<void> (0))
1243 Splits.size() == SplitsSplitFromOuter.size() &&(static_cast<void> (0))
1244 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes")(static_cast<void> (0));
1245 for (size_t I = 0; I < Splits.size(); ++I) {
1246 CHRScope *Split = Splits[I];
1247 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1248 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1249 SmallVector<CHRScope *, 8> NewSubs;
1250 DenseSet<Instruction *> SplitUnhoistables;
1251 getSelectsInScope(Split, SplitUnhoistables);
1252 for (CHRScope *Sub : Split->Subs) {
1253 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1254 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1255 SplitUnhoistables);
1256 llvm::append_range(NewSubs, SubSplits);
1257 }
1258 Split->Subs = NewSubs;
1259 }
1260 SmallVector<CHRScope *, 8> Result;
1261 for (size_t I = 0; I < Splits.size(); ++I) {
1262 CHRScope *Split = Splits[I];
1263 if (SplitsSplitFromOuter[I]) {
1264 // Split from the outer.
1265 Output.push_back(Split);
1266 Split->BranchInsertPoint = SplitsInsertPoints[I];
1267 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]do { } while (false)
1268 << "\n")do { } while (false);
1269 } else {
1270 // Connected to the outer.
1271 Result.push_back(Split);
1272 }
1273 }
1274 if (!Outer)
1275 assert(Result.empty() &&(static_cast<void> (0))
1276 "If no outer (top-level), must return no nested ones")(static_cast<void> (0));
1277 return Result;
1278}
1279
1280void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1281 for (CHRScope *Scope : Scopes) {
1282 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty")(static_cast<void> (0));
1283 classifyBiasedScopes(Scope, Scope);
1284 CHR_DEBUG(do { } while (false)
1285 dbgs() << "classifyBiasedScopes " << *Scope << "\n";do { } while (false)
1286 dbgs() << "TrueBiasedRegions ";do { } while (false)
1287 for (Region *R : Scope->TrueBiasedRegions) {do { } while (false)
1288 dbgs() << R->getNameStr() << ", ";do { } while (false)
1289 }do { } while (false)
1290 dbgs() << "\n";do { } while (false)
1291 dbgs() << "FalseBiasedRegions ";do { } while (false)
1292 for (Region *R : Scope->FalseBiasedRegions) {do { } while (false)
1293 dbgs() << R->getNameStr() << ", ";do { } while (false)
1294 }do { } while (false)
1295 dbgs() << "\n";do { } while (false)
1296 dbgs() << "TrueBiasedSelects ";do { } while (false)
1297 for (SelectInst *SI : Scope->TrueBiasedSelects) {do { } while (false)
1298 dbgs() << *SI << ", ";do { } while (false)
1299 }do { } while (false)
1300 dbgs() << "\n";do { } while (false)
1301 dbgs() << "FalseBiasedSelects ";do { } while (false)
1302 for (SelectInst *SI : Scope->FalseBiasedSelects) {do { } while (false)
1303 dbgs() << *SI << ", ";do { } while (false)
1304 }do { } while (false)
1305 dbgs() << "\n";)do { } while (false);
1306 }
1307}
1308
1309void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1310 for (RegInfo &RI : Scope->RegInfos) {
1311 if (RI.HasBranch) {
1312 Region *R = RI.R;
1313 if (TrueBiasedRegionsGlobal.contains(R))
1314 OutermostScope->TrueBiasedRegions.insert(R);
1315 else if (FalseBiasedRegionsGlobal.contains(R))
1316 OutermostScope->FalseBiasedRegions.insert(R);
1317 else
1318 llvm_unreachable("Must be biased")__builtin_unreachable();
1319 }
1320 for (SelectInst *SI : RI.Selects) {
1321 if (TrueBiasedSelectsGlobal.contains(SI))
1322 OutermostScope->TrueBiasedSelects.insert(SI);
1323 else if (FalseBiasedSelectsGlobal.contains(SI))
1324 OutermostScope->FalseBiasedSelects.insert(SI);
1325 else
1326 llvm_unreachable("Must be biased")__builtin_unreachable();
1327 }
1328 }
1329 for (CHRScope *Sub : Scope->Subs) {
1330 classifyBiasedScopes(Sub, OutermostScope);
1331 }
1332}
1333
1334static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1335 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1336 Scope->FalseBiasedRegions.size() +
1337 Scope->TrueBiasedSelects.size() +
1338 Scope->FalseBiasedSelects.size();
1339 return NumBiased >= CHRMergeThreshold;
1340}
1341
1342void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1343 SmallVectorImpl<CHRScope *> &Output) {
1344 for (CHRScope *Scope : Input) {
1345 // Filter out the ones with only one region and no subs.
1346 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1347 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "do { } while (false)
1348 << Scope->TrueBiasedRegions.size()do { } while (false)
1349 << " falsy-regions " << Scope->FalseBiasedRegions.size()do { } while (false)
1350 << " true-selects " << Scope->TrueBiasedSelects.size()do { } while (false)
1351 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n")do { } while (false);
1352 ORE.emit([&]() {
1353 return OptimizationRemarkMissed(
1354 DEBUG_TYPE"chr",
1355 "DropScopeWithOneBranchOrSelect",
1356 Scope->RegInfos[0].R->getEntry()->getTerminator())
1357 << "Drop scope with < "
1358 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1359 << " biased branch(es) or select(s)";
1360 });
1361 continue;
1362 }
1363 Output.push_back(Scope);
1364 }
1365}
1366
1367void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1368 SmallVectorImpl<CHRScope *> &Output) {
1369 for (CHRScope *Scope : Input) {
1370 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&(static_cast<void> (0))
1371 "Empty")(static_cast<void> (0));
1372 setCHRRegions(Scope, Scope);
1373 Output.push_back(Scope);
1374 CHR_DEBUG(do { } while (false)
1375 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";do { } while (false)
1376 for (auto pair : Scope->HoistStopMap) {do { } while (false)
1377 Region *R = pair.first;do { } while (false)
1378 dbgs() << "Region " << R->getNameStr() << "\n";do { } while (false)
1379 for (Instruction *I : pair.second) {do { } while (false)
1380 dbgs() << "HoistStop " << *I << "\n";do { } while (false)
1381 }do { } while (false)
1382 }do { } while (false)
1383 dbgs() << "CHRRegions" << "\n";do { } while (false)
1384 for (RegInfo &RI : Scope->CHRRegions) {do { } while (false)
1385 dbgs() << RI.R->getNameStr() << "\n";do { } while (false)
1386 })do { } while (false);
1387 }
1388}
1389
1390void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1391 DenseSet<Instruction *> Unhoistables;
1392 // Put the biased selects in Unhoistables because they should stay where they
1393 // are and constant-folded after CHR (in case one biased select or a branch
1394 // can depend on another biased select.)
1395 for (RegInfo &RI : Scope->RegInfos) {
1396 for (SelectInst *SI : RI.Selects) {
1397 Unhoistables.insert(SI);
1398 }
1399 }
1400 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1401 for (RegInfo &RI : Scope->RegInfos) {
1402 Region *R = RI.R;
1403 DenseSet<Instruction *> HoistStops;
1404 bool IsHoisted = false;
1405 if (RI.HasBranch) {
1406 assert((OutermostScope->TrueBiasedRegions.contains(R) ||(static_cast<void> (0))
1407 OutermostScope->FalseBiasedRegions.contains(R)) &&(static_cast<void> (0))
1408 "Must be truthy or falsy")(static_cast<void> (0));
1409 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1410 // Note checkHoistValue fills in HoistStops.
1411 DenseMap<Instruction *, bool> Visited;
1412 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1413 Unhoistables, &HoistStops, Visited);
1414 assert(IsHoistable && "Must be hoistable")(static_cast<void> (0));
1415 (void)(IsHoistable); // Unused in release build
1416 IsHoisted = true;
1417 }
1418 for (SelectInst *SI : RI.Selects) {
1419 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||(static_cast<void> (0))
1420 OutermostScope->FalseBiasedSelects.contains(SI)) &&(static_cast<void> (0))
1421 "Must be true or false biased")(static_cast<void> (0));
1422 // Note checkHoistValue fills in HoistStops.
1423 DenseMap<Instruction *, bool> Visited;
1424 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1425 Unhoistables, &HoistStops, Visited);
1426 assert(IsHoistable && "Must be hoistable")(static_cast<void> (0));
1427 (void)(IsHoistable); // Unused in release build
1428 IsHoisted = true;
1429 }
1430 if (IsHoisted) {
1431 OutermostScope->CHRRegions.push_back(RI);
1432 OutermostScope->HoistStopMap[R] = HoistStops;
1433 }
1434 }
1435 for (CHRScope *Sub : Scope->Subs)
1436 setCHRRegions(Sub, OutermostScope);
1437}
1438
1439static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1440 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1441}
1442
1443void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1444 SmallVectorImpl<CHRScope *> &Output) {
1445 Output.resize(Input.size());
1446 llvm::copy(Input, Output.begin());
1447 llvm::stable_sort(Output, CHRScopeSorter);
1448}
1449
1450// Return true if V is already hoisted or was hoisted (along with its operands)
1451// to the insert point.
1452static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1453 HoistStopMapTy &HoistStopMap,
1454 DenseSet<Instruction *> &HoistedSet,
1455 DenseSet<PHINode *> &TrivialPHIs,
1456 DominatorTree &DT) {
1457 auto IT = HoistStopMap.find(R);
1458 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map")(static_cast<void> (0));
1459 DenseSet<Instruction *> &HoistStops = IT->second;
1460 if (auto *I = dyn_cast<Instruction>(V)) {
1461 if (I == HoistPoint)
1462 return;
1463 if (HoistStops.count(I))
1464 return;
1465 if (auto *PN = dyn_cast<PHINode>(I))
1466 if (TrivialPHIs.count(PN))
1467 // The trivial phi inserted by the previous CHR scope could replace a
1468 // non-phi in HoistStops. Note that since this phi is at the exit of a
1469 // previous CHR scope, which dominates this scope, it's safe to stop
1470 // hoisting there.
1471 return;
1472 if (HoistedSet.count(I))
1473 // Already hoisted, return.
1474 return;
1475 assert(isHoistableInstructionType(I) && "Unhoistable instruction type")(static_cast<void> (0));
1476 assert(DT.getNode(I->getParent()) && "DT must contain I's block")(static_cast<void> (0));
1477 assert(DT.getNode(HoistPoint->getParent()) &&(static_cast<void> (0))
1478 "DT must contain HoistPoint block")(static_cast<void> (0));
1479 if (DT.dominates(I, HoistPoint))
1480 // We are already above the hoist point. Stop here. This may be necessary
1481 // when multiple scopes would independently hoist the same
1482 // instruction. Since an outer (dominating) scope would hoist it to its
1483 // entry before an inner (dominated) scope would to its entry, the inner
1484 // scope may see the instruction already hoisted, in which case it
1485 // potentially wrong for the inner scope to hoist it and could cause bad
1486 // IR (non-dominating def), but safe to skip hoisting it instead because
1487 // it's already in a block that dominates the inner scope.
1488 return;
1489 for (Value *Op : I->operands()) {
1490 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1491 }
1492 I->moveBefore(HoistPoint);
1493 HoistedSet.insert(I);
1494 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n")do { } while (false);
1495 }
1496}
1497
1498// Hoist the dependent condition values of the branches and the selects in the
1499// scope to the insert point.
1500static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1501 DenseSet<PHINode *> &TrivialPHIs,
1502 DominatorTree &DT) {
1503 DenseSet<Instruction *> HoistedSet;
1504 for (const RegInfo &RI : Scope->CHRRegions) {
1505 Region *R = RI.R;
1506 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1507 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1508 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1509 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1510 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1511 HoistedSet, TrivialPHIs, DT);
1512 }
1513 for (SelectInst *SI : RI.Selects) {
1514 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1515 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1516 if (!(IsTrueBiased || IsFalseBiased))
1517 continue;
1518 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1519 HoistedSet, TrivialPHIs, DT);
1520 }
1521 }
1522}
1523
1524// Negate the predicate if an ICmp if it's used only by branches or selects by
1525// swapping the operands of the branches or the selects. Returns true if success.
1526static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1527 Instruction *ExcludedUser,
1528 CHRScope *Scope) {
1529 for (User *U : ICmp->users()) {
1530 if (U == ExcludedUser)
1531 continue;
1532 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1533 continue;
1534 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1535 continue;
1536 return false;
1537 }
1538 for (User *U : ICmp->users()) {
1539 if (U == ExcludedUser)
1540 continue;
1541 if (auto *BI = dyn_cast<BranchInst>(U)) {
1542 assert(BI->isConditional() && "Must be conditional")(static_cast<void> (0));
1543 BI->swapSuccessors();
1544 // Don't need to swap this in terms of
1545 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1546 // mean whehter the branch is likely go into the if-then rather than
1547 // successor0/successor1 and because we can tell which edge is the then or
1548 // the else one by comparing the destination to the region exit block.
1549 continue;
1550 }
1551 if (auto *SI = dyn_cast<SelectInst>(U)) {
1552 // Swap operands
1553 SI->swapValues();
1554 SI->swapProfMetadata();
1555 if (Scope->TrueBiasedSelects.count(SI)) {
1556 assert(Scope->FalseBiasedSelects.count(SI) == 0 &&(static_cast<void> (0))
1557 "Must not be already in")(static_cast<void> (0));
1558 Scope->FalseBiasedSelects.insert(SI);
1559 } else if (Scope->FalseBiasedSelects.count(SI)) {
1560 assert(Scope->TrueBiasedSelects.count(SI) == 0 &&(static_cast<void> (0))
1561 "Must not be already in")(static_cast<void> (0));
1562 Scope->TrueBiasedSelects.insert(SI);
1563 }
1564 continue;
1565 }
1566 llvm_unreachable("Must be a branch or a select")__builtin_unreachable();
1567 }
1568 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1569 return true;
1570}
1571
1572// A helper for transformScopes. Insert a trivial phi at the scope exit block
1573// for a value that's defined in the scope but used outside it (meaning it's
1574// alive at the exit block).
1575static void insertTrivialPHIs(CHRScope *Scope,
1576 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1577 DenseSet<PHINode *> &TrivialPHIs) {
1578 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1579 for (RegInfo &RI : Scope->RegInfos) {
1580 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1581 // sub-Scopes.
1582 BlocksInScope.insert(BB);
1583 }
1584 }
1585 CHR_DEBUG({do { } while (false)
1586 dbgs() << "Inserting redundant phis\n";do { } while (false)
1587 for (BasicBlock *BB : BlocksInScope)do { } while (false)
1588 dbgs() << "BlockInScope " << BB->getName() << "\n";do { } while (false)
1589 })do { } while (false);
1590 for (BasicBlock *BB : BlocksInScope) {
1591 for (Instruction &I : *BB) {
1592 SmallVector<Instruction *, 8> Users;
1593 for (User *U : I.users()) {
1594 if (auto *UI = dyn_cast<Instruction>(U)) {
1595 if (BlocksInScope.count(UI->getParent()) == 0 &&
1596 // Unless there's already a phi for I at the exit block.
1597 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1598 CHR_DEBUG(dbgs() << "V " << I << "\n")do { } while (false);
1599 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n")do { } while (false);
1600 Users.push_back(UI);
1601 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1602 // There's a loop backedge from a block that's dominated by this
1603 // scope to the entry block.
1604 CHR_DEBUG(dbgs() << "V " << I << "\n")do { } while (false);
1605 CHR_DEBUG(dbgs()do { } while (false)
1606 << "Used at entry block (for a back edge) by a phi user "do { } while (false)
1607 << *UI << "\n")do { } while (false);
1608 Users.push_back(UI);
1609 }
1610 }
1611 }
1612 if (Users.size() > 0) {
1613 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1614 // ExitBlock. Replace I with the new phi in UI unless UI is another
1615 // phi at ExitBlock.
1616 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
1617 &ExitBlock->front());
1618 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1619 PN->addIncoming(&I, Pred);
1620 }
1621 TrivialPHIs.insert(PN);
1622 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n")do { } while (false);
1623 for (Instruction *UI : Users) {
1624 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1625 if (UI->getOperand(J) == &I) {
1626 UI->setOperand(J, PN);
1627 }
1628 }
1629 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n")do { } while (false);
1630 }
1631 }
1632 }
1633 }
1634}
1635
1636// Assert that all the CHR regions of the scope have a biased branch or select.
1637static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__))
1638assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1639#ifndef NDEBUG1
1640 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1641 if (Scope->TrueBiasedRegions.count(RI.R) ||
1642 Scope->FalseBiasedRegions.count(RI.R))
1643 return true;
1644 for (SelectInst *SI : RI.Selects)
1645 if (Scope->TrueBiasedSelects.count(SI) ||
1646 Scope->FalseBiasedSelects.count(SI))
1647 return true;
1648 return false;
1649 };
1650 for (RegInfo &RI : Scope->CHRRegions) {
1651 assert(HasBiasedBranchOrSelect(RI, Scope) &&(static_cast<void> (0))
1652 "Must have biased branch or select")(static_cast<void> (0));
1653 }
1654#endif
1655}
1656
1657// Assert that all the condition values of the biased branches and selects have
1658// been hoisted to the pre-entry block or outside of the scope.
1659static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) assertBranchOrSelectConditionHoisted(
1660 CHRScope *Scope, BasicBlock *PreEntryBlock) {
1661 CHR_DEBUG(dbgs() << "Biased regions condition values \n")do { } while (false);
1662 for (RegInfo &RI : Scope->CHRRegions) {
1663 Region *R = RI.R;
1664 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1665 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1666 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1667 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1668 Value *V = BI->getCondition();
1669 CHR_DEBUG(dbgs() << *V << "\n")do { } while (false);
1670 if (auto *I = dyn_cast<Instruction>(V)) {
1671 (void)(I); // Unused in release build.
1672 assert((I->getParent() == PreEntryBlock ||(static_cast<void> (0))
1673 !Scope->contains(I)) &&(static_cast<void> (0))
1674 "Must have been hoisted to PreEntryBlock or outside the scope")(static_cast<void> (0));
1675 }
1676 }
1677 for (SelectInst *SI : RI.Selects) {
1678 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1679 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1680 if (!(IsTrueBiased || IsFalseBiased))
1681 continue;
1682 Value *V = SI->getCondition();
1683 CHR_DEBUG(dbgs() << *V << "\n")do { } while (false);
1684 if (auto *I = dyn_cast<Instruction>(V)) {
1685 (void)(I); // Unused in release build.
1686 assert((I->getParent() == PreEntryBlock ||(static_cast<void> (0))
1687 !Scope->contains(I)) &&(static_cast<void> (0))
1688 "Must have been hoisted to PreEntryBlock or outside the scope")(static_cast<void> (0));
1689 }
1690 }
1691 }
1692}
1693
1694void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1695 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n")do { } while (false);
1696
1697 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region")(static_cast<void> (0));
1698 Region *FirstRegion = Scope->RegInfos[0].R;
1699 BasicBlock *EntryBlock = FirstRegion->getEntry();
1700 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1701 BasicBlock *ExitBlock = LastRegion->getExit();
1702 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1703
1704 if (ExitBlock) {
1705 // Insert a trivial phi at the exit block (where the CHR hot path and the
1706 // cold path merges) for a value that's defined in the scope but used
1707 // outside it (meaning it's alive at the exit block). We will add the
1708 // incoming values for the CHR cold paths to it below. Without this, we'd
1709 // miss updating phi's for such values unless there happens to already be a
1710 // phi for that value there.
1711 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1712 }
1713
1714 // Split the entry block of the first region. The new block becomes the new
1715 // entry block of the first region. The old entry block becomes the block to
1716 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1717 // through the split, we update the entry of the first region after the split,
1718 // and Region only points to the entry and the exit blocks, rather than
1719 // keeping everything in a list or set, the blocks membership and the
1720 // entry/exit blocks of the region are still valid after the split.
1721 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()do { } while (false)
1722 << " at " << *Scope->BranchInsertPoint << "\n")do { } while (false);
1723 BasicBlock *NewEntryBlock =
1724 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1725 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&(static_cast<void> (0))
1726 "NewEntryBlock's only pred must be EntryBlock")(static_cast<void> (0));
1727 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1728 BasicBlock *PreEntryBlock = EntryBlock;
1729
1730 ValueToValueMapTy VMap;
1731 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1732 // hot path (originals) and a cold path (clones) and update the PHIs at the
1733 // exit block.
1734 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1735
1736 // Replace the old (placeholder) branch with the new (merged) conditional
1737 // branch.
1738 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1739 NewEntryBlock, VMap);
1740
1741#ifndef NDEBUG1
1742 assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1743#endif
1744
1745 // Hoist the conditional values of the branches/selects.
1746 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1747
1748#ifndef NDEBUG1
1749 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1750#endif
1751
1752 // Create the combined branch condition and constant-fold the branches/selects
1753 // in the hot path.
1754 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1755 ProfileCount ? ProfileCount.getValue() : 0);
1756}
1757
1758// A helper for transformScopes. Clone the blocks in the scope (excluding the
1759// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1760// at the exit block.
1761void CHR::cloneScopeBlocks(CHRScope *Scope,
1762 BasicBlock *PreEntryBlock,
1763 BasicBlock *ExitBlock,
1764 Region *LastRegion,
1765 ValueToValueMapTy &VMap) {
1766 // Clone all the blocks. The original blocks will be the hot-path
1767 // CHR-optimized code and the cloned blocks will be the original unoptimized
1768 // code. This is so that the block pointers from the
1769 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1770 // which CHR should apply to.
1771 SmallVector<BasicBlock*, 8> NewBlocks;
1772 for (RegInfo &RI : Scope->RegInfos)
1773 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1774 // sub-Scopes.
1775 assert(BB != PreEntryBlock && "Don't copy the preetntry block")(static_cast<void> (0));
1776 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1777 NewBlocks.push_back(NewBB);
1778 VMap[BB] = NewBB;
1779 }
1780
1781 // Place the cloned blocks right after the original blocks (right before the
1782 // exit block of.)
1783 if (ExitBlock)
1784 F.getBasicBlockList().splice(ExitBlock->getIterator(),
1785 F.getBasicBlockList(),
1786 NewBlocks[0]->getIterator(), F.end());
1787
1788 // Update the cloned blocks/instructions to refer to themselves.
1789 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1790 for (Instruction &I : *NewBlocks[i])
1791 RemapInstruction(&I, VMap,
1792 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1793
1794 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1795 // the top-level region but we don't need to add PHIs. The trivial PHIs
1796 // inserted above will be updated here.
1797 if (ExitBlock)
1798 for (PHINode &PN : ExitBlock->phis())
1799 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1800 ++I) {
1801 BasicBlock *Pred = PN.getIncomingBlock(I);
1802 if (LastRegion->contains(Pred)) {
1803 Value *V = PN.getIncomingValue(I);
1804 auto It = VMap.find(V);
1805 if (It != VMap.end()) V = It->second;
1806 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned")(static_cast<void> (0));
1807 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1808 }
1809 }
1810}
1811
1812// A helper for transformScope. Replace the old (placeholder) branch with the
1813// new (merged) conditional branch.
1814BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1815 BasicBlock *EntryBlock,
1816 BasicBlock *NewEntryBlock,
1817 ValueToValueMapTy &VMap) {
1818 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1819 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&(static_cast<void> (0))
1820 "SplitBlock did not work correctly!")(static_cast<void> (0));
1821 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&(static_cast<void> (0))
1822 "NewEntryBlock's only pred must be EntryBlock")(static_cast<void> (0));
1823 assert(VMap.find(NewEntryBlock) != VMap.end() &&(static_cast<void> (0))
1824 "NewEntryBlock must have been copied")(static_cast<void> (0));
1825 OldBR->dropAllReferences();
1826 OldBR->eraseFromParent();
1827 // The true predicate is a placeholder. It will be replaced later in
1828 // fixupBranchesAndSelects().
1829 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1830 cast<BasicBlock>(VMap[NewEntryBlock]),
1831 ConstantInt::getTrue(F.getContext()));
1832 PreEntryBlock->getInstList().push_back(NewBR);
1833 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&(static_cast<void> (0))
1834 "NewEntryBlock's only pred must be EntryBlock")(static_cast<void> (0));
1835 return NewBR;
1836}
1837
1838// A helper for transformScopes. Create the combined branch condition and
1839// constant-fold the branches/selects in the hot path.
1840void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1841 BasicBlock *PreEntryBlock,
1842 BranchInst *MergedBR,
1843 uint64_t ProfileCount) {
1844 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1845 BranchProbability CHRBranchBias(1, 1);
1846 uint64_t NumCHRedBranches = 0;
1847 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1848 for (RegInfo &RI : Scope->CHRRegions) {
1849 Region *R = RI.R;
1850 if (RI.HasBranch) {
1851 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1852 ++NumCHRedBranches;
1853 }
1854 for (SelectInst *SI : RI.Selects) {
1855 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1856 ++NumCHRedBranches;
1857 }
1858 }
1859 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1860 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1861 ORE.emit([&]() {
1862 return OptimizationRemark(DEBUG_TYPE"chr",
1863 "CHR",
1864 // Refer to the hot (original) path
1865 MergedBR->getSuccessor(0)->getTerminator())
1866 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1867 << " branches or selects";
1868 });
1869 MergedBR->setCondition(MergedCondition);
1870 uint32_t Weights[] = {
1871 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1872 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1873 };
1874 MDBuilder MDB(F.getContext());
1875 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1876 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]do { } while (false)
1877 << "\n")do { } while (false);
1878}
1879
1880// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1881// and constant-fold a branch in the hot path.
1882void CHR::fixupBranch(Region *R, CHRScope *Scope,
1883 IRBuilder<> &IRB,
1884 Value *&MergedCondition,
1885 BranchProbability &CHRBranchBias) {
1886 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1887 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&(static_cast<void> (0))
1888 "Must be truthy or falsy")(static_cast<void> (0));
1889 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1890 assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&(static_cast<void> (0))
1891 "Must be in the bias map")(static_cast<void> (0));
1892 BranchProbability Bias = BranchBiasMap[R];
1893 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased")(static_cast<void> (0));
1894 // Take the min.
1895 if (CHRBranchBias > Bias)
1896 CHRBranchBias = Bias;
1897 BasicBlock *IfThen = BI->getSuccessor(1);
1898 BasicBlock *IfElse = BI->getSuccessor(0);
1899 BasicBlock *RegionExitBlock = R->getExit();
1900 assert(RegionExitBlock && "Null ExitBlock")(static_cast<void> (0));
1901 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&(static_cast<void> (0))
1902 IfThen != IfElse && "Invariant from findScopes")(static_cast<void> (0));
1903 if (IfThen == RegionExitBlock) {
1904 // Swap them so that IfThen means going into it and IfElse means skipping
1905 // it.
1906 std::swap(IfThen, IfElse);
1907 }
1908 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()do { } while (false)
1909 << " IfElse " << IfElse->getName() << "\n")do { } while (false);
1910 Value *Cond = BI->getCondition();
1911 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1912 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1913 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1914 MergedCondition);
1915 // Constant-fold the branch at ClonedEntryBlock.
1916 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&(static_cast<void> (0))
1917 "The successor shouldn't change")(static_cast<void> (0));
1918 Value *NewCondition = ConditionTrue ?
1919 ConstantInt::getTrue(F.getContext()) :
1920 ConstantInt::getFalse(F.getContext());
1921 BI->setCondition(NewCondition);
1922}
1923
1924// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1925// and constant-fold a select in the hot path.
1926void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1927 IRBuilder<> &IRB,
1928 Value *&MergedCondition,
1929 BranchProbability &CHRBranchBias) {
1930 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1931 assert((IsTrueBiased ||(static_cast<void> (0))
1932 Scope->FalseBiasedSelects.count(SI)) && "Must be biased")(static_cast<void> (0));
1933 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&(static_cast<void> (0))
1934 "Must be in the bias map")(static_cast<void> (0));
1935 BranchProbability Bias = SelectBiasMap[SI];
1936 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased")(static_cast<void> (0));
1937 // Take the min.
1938 if (CHRBranchBias > Bias)
1939 CHRBranchBias = Bias;
1940 Value *Cond = SI->getCondition();
1941 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1942 MergedCondition);
1943 Value *NewCondition = IsTrueBiased ?
1944 ConstantInt::getTrue(F.getContext()) :
1945 ConstantInt::getFalse(F.getContext());
1946 SI->setCondition(NewCondition);
1947}
1948
1949// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1950// condition.
1951void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1952 Instruction *BranchOrSelect,
1953 CHRScope *Scope,
1954 IRBuilder<> &IRB,
1955 Value *&MergedCondition) {
1956 if (IsTrueBiased) {
1957 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1958 } else {
1959 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1960 // branch, negate the icmp predicate and swap the branch targets and avoid
1961 // inserting an Xor to negate Cond.
1962 bool Done = false;
1963 if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1964 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1965 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1966 Done = true;
1967 }
1968 if (!Done) {
1969 Value *Negate = IRB.CreateXor(
1970 ConstantInt::getTrue(F.getContext()), Cond);
1971 MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1972 }
1973 }
1974}
1975
1976void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1977 unsigned I = 0;
1978 DenseSet<PHINode *> TrivialPHIs;
1979 for (CHRScope *Scope : CHRScopes) {
1980 transformScopes(Scope, TrivialPHIs);
1981 CHR_DEBUG(do { } while (false)
1982 std::ostringstream oss;do { } while (false)
1983 oss << " after transformScopes " << I++;do { } while (false)
1984 dumpIR(F, oss.str().c_str(), nullptr))do { } while (false);
1985 (void)I;
1986 }
1987}
1988
1989static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__))
1990dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1991 dbgs() << Label << " " << Scopes.size() << "\n";
1992 for (CHRScope *Scope : Scopes) {
1993 dbgs() << *Scope << "\n";
1994 }
1995}
1996
1997bool CHR::run() {
1998 if (!shouldApply(F, PSI))
1999 return false;
2000
2001 CHR_DEBUG(dumpIR(F, "before", nullptr))do { } while (false);
2002
2003 bool Changed = false;
2004 {
2005 CHR_DEBUG(do { } while (false)
2006 dbgs() << "RegionInfo:\n";do { } while (false)
2007 RI.print(dbgs()))do { } while (false);
2008
2009 // Recursively traverse the region tree and find regions that have biased
2010 // branches and/or selects and create scopes.
2011 SmallVector<CHRScope *, 8> AllScopes;
2012 findScopes(AllScopes);
2013 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"))do { } while (false);
2014
2015 // Split the scopes if 1) the conditiona values of the biased
2016 // branches/selects of the inner/lower scope can't be hoisted up to the
2017 // outermost/uppermost scope entry, or 2) the condition values of the biased
2018 // branches/selects in a scope (including subscopes) don't share at least
2019 // one common value.
2020 SmallVector<CHRScope *, 8> SplitScopes;
2021 splitScopes(AllScopes, SplitScopes);
2022 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"))do { } while (false);
2023
2024 // After splitting, set the biased regions and selects of a scope (a tree
2025 // root) that include those of the subscopes.
2026 classifyBiasedScopes(SplitScopes);
2027 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n")do { } while (false);
2028
2029 // Filter out the scopes that has only one biased region or select (CHR
2030 // isn't useful in such a case).
2031 SmallVector<CHRScope *, 8> FilteredScopes;
2032 filterScopes(SplitScopes, FilteredScopes);
2033 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"))do { } while (false);
2034
2035 // Set the regions to be CHR'ed and their hoist stops for each scope.
2036 SmallVector<CHRScope *, 8> SetScopes;
2037 setCHRRegions(FilteredScopes, SetScopes);
2038 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"))do { } while (false);
2039
2040 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2041 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2042 // to the hot path, rather than both hot and cold paths.
2043 SmallVector<CHRScope *, 8> SortedScopes;
2044 sortScopes(SetScopes, SortedScopes);
2045 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"))do { } while (false);
2046
2047 CHR_DEBUG(do { } while (false)
2048 dbgs() << "RegionInfo:\n";do { } while (false)
2049 RI.print(dbgs()))do { } while (false);
2050
2051 // Apply the CHR transformation.
2052 if (!SortedScopes.empty()) {
2053 transformScopes(SortedScopes);
2054 Changed = true;
2055 }
2056 }
2057
2058 if (Changed) {
2059 CHR_DEBUG(dumpIR(F, "after", &Stats))do { } while (false);
2060 ORE.emit([&]() {
2061 return OptimizationRemark(DEBUG_TYPE"chr", "Stats", &F)
2062 << ore::NV("Function", &F) << " "
2063 << "Reduced the number of branches in hot paths by "
2064 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2065 << " (static) and "
2066 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2067 << " (weighted by PGO count)";
2068 });
2069 }
2070
2071 return Changed;
2072}
2073
2074bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
2075 BlockFrequencyInfo &BFI =
2076 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
2077 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2078 ProfileSummaryInfo &PSI =
2079 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2080 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
2081 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
2082 std::make_unique<OptimizationRemarkEmitter>(&F);
2083 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
2084}
2085
2086namespace llvm {
2087
2088ControlHeightReductionPass::ControlHeightReductionPass() {
2089 parseCHRFilterFiles();
2090}
2091
2092PreservedAnalyses ControlHeightReductionPass::run(
2093 Function &F,
2094 FunctionAnalysisManager &FAM) {
2095 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2096 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2097 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2098 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2099 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2100 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2101 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2102 if (!Changed)
2103 return PreservedAnalyses::all();
2104 return PreservedAnalyses::none();
2105}
2106
2107} // namespace llvm