Bug Summary

File:lib/CodeGen/CodeGenPrepare.cpp
Warning:line 2236, column 18
Called C++ object pointer is null

Annotated Source Code

1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass munges the code in the input function to better prepare it for
11// SelectionDAG-based code generation. This works around limitations in it's
12// basic-block-at-a-time approach. It should eventually be removed.
13//
14//===----------------------------------------------------------------------===//
15
16#include "llvm/ADT/DenseMap.h"
17#include "llvm/ADT/SetVector.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/Statistic.h"
20#include "llvm/Analysis/BlockFrequencyInfo.h"
21#include "llvm/Analysis/BranchProbabilityInfo.h"
22#include "llvm/Analysis/CFG.h"
23#include "llvm/Analysis/InstructionSimplify.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/MemoryBuiltins.h"
26#include "llvm/Analysis/ProfileSummaryInfo.h"
27#include "llvm/Analysis/TargetLibraryInfo.h"
28#include "llvm/Analysis/TargetTransformInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/CodeGen/Analysis.h"
31#include "llvm/CodeGen/Passes.h"
32#include "llvm/CodeGen/TargetPassConfig.h"
33#include "llvm/IR/CallSite.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/GetElementPtrTypeIterator.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InlineAsm.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/MDBuilder.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Statepoint.h"
47#include "llvm/IR/ValueHandle.h"
48#include "llvm/IR/ValueMap.h"
49#include "llvm/Pass.h"
50#include "llvm/Support/BranchProbability.h"
51#include "llvm/Support/CommandLine.h"
52#include "llvm/Support/Debug.h"
53#include "llvm/Support/raw_ostream.h"
54#include "llvm/Target/TargetLowering.h"
55#include "llvm/Target/TargetSubtargetInfo.h"
56#include "llvm/Transforms/Utils/BasicBlockUtils.h"
57#include "llvm/Transforms/Utils/BuildLibCalls.h"
58#include "llvm/Transforms/Utils/BypassSlowDivision.h"
59#include "llvm/Transforms/Utils/Cloning.h"
60#include "llvm/Transforms/Utils/Local.h"
61#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
62#include "llvm/Transforms/Utils/ValueMapper.h"
63
64using namespace llvm;
65using namespace llvm::PatternMatch;
66
67#define DEBUG_TYPE"codegenprepare" "codegenprepare"
68
69STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated", {0}, false}
;
70STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated", {0}, false}
;
71STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts", {0}, false}
;
72STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
, {0}, false}
73 "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
, {0}, false}
;
74STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
, {0}, false}
75 "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
, {0}, false}
;
76STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
, {0}, false}
77 "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
, {0}, false}
;
78STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads", {0},
false}
;
79STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized", {0}, false
}
;
80STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads", {
0}, false}
81 "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads", {
0}, false}
;
82STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized", {0}, false
}
;
83STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated", {0}, false}
;
84STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved", {0}, false}
;
85STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
, {0}, false}
;
86STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
, {0}, false}
;
87
88STATISTIC(NumMemCmpCalls, "Number of memcmp calls")static llvm::Statistic NumMemCmpCalls = {"codegenprepare", "NumMemCmpCalls"
, "Number of memcmp calls", {0}, false}
;
89STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size")static llvm::Statistic NumMemCmpNotConstant = {"codegenprepare"
, "NumMemCmpNotConstant", "Number of memcmp calls without constant size"
, {0}, false}
;
90STATISTIC(NumMemCmpGreaterThanMax,static llvm::Statistic NumMemCmpGreaterThanMax = {"codegenprepare"
, "NumMemCmpGreaterThanMax", "Number of memcmp calls with size greater than max size"
, {0}, false}
91 "Number of memcmp calls with size greater than max size")static llvm::Statistic NumMemCmpGreaterThanMax = {"codegenprepare"
, "NumMemCmpGreaterThanMax", "Number of memcmp calls with size greater than max size"
, {0}, false}
;
92STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls")static llvm::Statistic NumMemCmpInlined = {"codegenprepare", "NumMemCmpInlined"
, "Number of inlined memcmp calls", {0}, false}
;
93
94static cl::opt<bool> DisableBranchOpts(
95 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
96 cl::desc("Disable branch optimizations in CodeGenPrepare"));
97
98static cl::opt<bool>
99 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
100 cl::desc("Disable GC optimizations in CodeGenPrepare"));
101
102static cl::opt<bool> DisableSelectToBranch(
103 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
104 cl::desc("Disable select to branch conversion."));
105
106static cl::opt<bool> AddrSinkUsingGEPs(
107 "addr-sink-using-gep", cl::Hidden, cl::init(true),
108 cl::desc("Address sinking in CGP using GEPs."));
109
110static cl::opt<bool> EnableAndCmpSinking(
111 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
112 cl::desc("Enable sinkinig and/cmp into branches."));
113
114static cl::opt<bool> DisableStoreExtract(
115 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
116 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
117
118static cl::opt<bool> StressStoreExtract(
119 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
120 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
121
122static cl::opt<bool> DisableExtLdPromotion(
123 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
124 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
125 "CodeGenPrepare"));
126
127static cl::opt<bool> StressExtLdPromotion(
128 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
129 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
130 "optimization in CodeGenPrepare"));
131
132static cl::opt<bool> DisablePreheaderProtect(
133 "disable-preheader-prot", cl::Hidden, cl::init(false),
134 cl::desc("Disable protection against removing loop preheaders"));
135
136static cl::opt<bool> ProfileGuidedSectionPrefix(
137 "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
138 cl::desc("Use profile info to add section prefix for hot/cold functions"));
139
140static cl::opt<unsigned> FreqRatioToSkipMerge(
141 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
142 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
143 "(frequency of destination block) is greater than this ratio"));
144
145static cl::opt<bool> ForceSplitStore(
146 "force-split-store", cl::Hidden, cl::init(false),
147 cl::desc("Force store splitting no matter what the target query says."));
148
149static cl::opt<bool>
150EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
151 cl::desc("Enable merging of redundant sexts when one is dominating"
152 " the other."), cl::init(true));
153
154static cl::opt<unsigned> MemCmpNumLoadsPerBlock(
155 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
156 cl::desc("The number of loads per basic block for inline expansion of "
157 "memcmp that is only being compared against zero."));
158
159namespace {
160typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
161typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
162typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
163typedef SmallVector<Instruction *, 16> SExts;
164typedef DenseMap<Value *, SExts> ValueToSExts;
165class TypePromotionTransaction;
166
167 class CodeGenPrepare : public FunctionPass {
168 const TargetMachine *TM;
169 const TargetSubtargetInfo *SubtargetInfo;
170 const TargetLowering *TLI;
171 const TargetRegisterInfo *TRI;
172 const TargetTransformInfo *TTI;
173 const TargetLibraryInfo *TLInfo;
174 const LoopInfo *LI;
175 std::unique_ptr<BlockFrequencyInfo> BFI;
176 std::unique_ptr<BranchProbabilityInfo> BPI;
177
178 /// As we scan instructions optimizing them, this is the next instruction
179 /// to optimize. Transforms that can invalidate this should update it.
180 BasicBlock::iterator CurInstIterator;
181
182 /// Keeps track of non-local addresses that have been sunk into a block.
183 /// This allows us to avoid inserting duplicate code for blocks with
184 /// multiple load/stores of the same address.
185 ValueMap<Value*, Value*> SunkAddrs;
186
187 /// Keeps track of all instructions inserted for the current function.
188 SetOfInstrs InsertedInsts;
189 /// Keeps track of the type of the related instruction before their
190 /// promotion for the current function.
191 InstrToOrigTy PromotedInsts;
192
193 /// Keep track of instructions removed during promotion.
194 SetOfInstrs RemovedInsts;
195
196 /// Keep track of sext chains based on their initial value.
197 DenseMap<Value *, Instruction *> SeenChainsForSExt;
198
199 /// Keep track of SExt promoted.
200 ValueToSExts ValToSExtendedUses;
201
202 /// True if CFG is modified in any way.
203 bool ModifiedDT;
204
205 /// True if optimizing for size.
206 bool OptSize;
207
208 /// DataLayout for the Function being processed.
209 const DataLayout *DL;
210
211 public:
212 static char ID; // Pass identification, replacement for typeid
213 CodeGenPrepare()
214 : FunctionPass(ID), TM(nullptr), TLI(nullptr), TTI(nullptr),
215 DL(nullptr) {
216 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
217 }
218 bool runOnFunction(Function &F) override;
219
220 StringRef getPassName() const override { return "CodeGen Prepare"; }
221
222 void getAnalysisUsage(AnalysisUsage &AU) const override {
223 // FIXME: When we can selectively preserve passes, preserve the domtree.
224 AU.addRequired<ProfileSummaryInfoWrapperPass>();
225 AU.addRequired<TargetLibraryInfoWrapperPass>();
226 AU.addRequired<TargetTransformInfoWrapperPass>();
227 AU.addRequired<LoopInfoWrapperPass>();
228 }
229
230 private:
231 bool eliminateFallThrough(Function &F);
232 bool eliminateMostlyEmptyBlocks(Function &F);
233 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
234 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
235 void eliminateMostlyEmptyBlock(BasicBlock *BB);
236 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
237 bool isPreheader);
238 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
239 bool optimizeInst(Instruction *I, bool &ModifiedDT);
240 bool optimizeMemoryInst(Instruction *I, Value *Addr,
241 Type *AccessTy, unsigned AS);
242 bool optimizeInlineAsmInst(CallInst *CS);
243 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
244 bool optimizeExt(Instruction *&I);
245 bool optimizeExtUses(Instruction *I);
246 bool optimizeLoadExt(LoadInst *I);
247 bool optimizeSelectInst(SelectInst *SI);
248 bool optimizeShuffleVectorInst(ShuffleVectorInst *SI);
249 bool optimizeSwitchInst(SwitchInst *CI);
250 bool optimizeExtractElementInst(Instruction *Inst);
251 bool dupRetToEnableTailCallOpts(BasicBlock *BB);
252 bool placeDbgValues(Function &F);
253 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
254 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
255 bool tryToPromoteExts(TypePromotionTransaction &TPT,
256 const SmallVectorImpl<Instruction *> &Exts,
257 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
258 unsigned CreatedInstsCost = 0);
259 bool mergeSExts(Function &F);
260 bool performAddressTypePromotion(
261 Instruction *&Inst,
262 bool AllowPromotionWithoutCommonHeader,
263 bool HasPromoted, TypePromotionTransaction &TPT,
264 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
265 bool splitBranchCondition(Function &F);
266 bool simplifyOffsetableRelocate(Instruction &I);
267 bool splitIndirectCriticalEdges(Function &F);
268 };
269}
270
271char CodeGenPrepare::ID = 0;
272INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
273 "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
274INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
275INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
276 "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
277
278FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
279
280bool CodeGenPrepare::runOnFunction(Function &F) {
281 if (skipFunction(F))
282 return false;
283
284 DL = &F.getParent()->getDataLayout();
285
286 bool EverMadeChange = false;
287 // Clear per function information.
288 InsertedInsts.clear();
289 PromotedInsts.clear();
290 BFI.reset();
291 BPI.reset();
292
293 ModifiedDT = false;
294 if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
295 TM = &TPC->getTM<TargetMachine>();
296 SubtargetInfo = TM->getSubtargetImpl(F);
297 TLI = SubtargetInfo->getTargetLowering();
298 TRI = SubtargetInfo->getRegisterInfo();
299 }
300 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
301 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
302 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
303 OptSize = F.optForSize();
304
305 if (ProfileGuidedSectionPrefix) {
306 ProfileSummaryInfo *PSI =
307 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
308 if (PSI->isFunctionHotInCallGraph(&F))
309 F.setSectionPrefix(".hot");
310 else if (PSI->isFunctionColdInCallGraph(&F))
311 F.setSectionPrefix(".unlikely");
312 }
313
314 /// This optimization identifies DIV instructions that can be
315 /// profitably bypassed and carried out with a shorter, faster divide.
316 if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
317 const DenseMap<unsigned int, unsigned int> &BypassWidths =
318 TLI->getBypassSlowDivWidths();
319 BasicBlock* BB = &*F.begin();
320 while (BB != nullptr) {
321 // bypassSlowDivision may create new BBs, but we don't want to reapply the
322 // optimization to those blocks.
323 BasicBlock* Next = BB->getNextNode();
324 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
325 BB = Next;
326 }
327 }
328
329 // Eliminate blocks that contain only PHI nodes and an
330 // unconditional branch.
331 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
332
333 // llvm.dbg.value is far away from the value then iSel may not be able
334 // handle it properly. iSel will drop llvm.dbg.value if it can not
335 // find a node corresponding to the value.
336 EverMadeChange |= placeDbgValues(F);
337
338 if (!DisableBranchOpts)
339 EverMadeChange |= splitBranchCondition(F);
340
341 // Split some critical edges where one of the sources is an indirect branch,
342 // to help generate sane code for PHIs involving such edges.
343 EverMadeChange |= splitIndirectCriticalEdges(F);
344
345 bool MadeChange = true;
346 while (MadeChange) {
347 MadeChange = false;
348 SeenChainsForSExt.clear();
349 ValToSExtendedUses.clear();
350 RemovedInsts.clear();
351 for (Function::iterator I = F.begin(); I != F.end(); ) {
352 BasicBlock *BB = &*I++;
353 bool ModifiedDTOnIteration = false;
354 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
355
356 // Restart BB iteration if the dominator tree of the Function was changed
357 if (ModifiedDTOnIteration)
358 break;
359 }
360 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
361 MadeChange |= mergeSExts(F);
362
363 // Really free removed instructions during promotion.
364 for (Instruction *I : RemovedInsts)
365 I->deleteValue();
366
367 EverMadeChange |= MadeChange;
368 }
369
370 SunkAddrs.clear();
371
372 if (!DisableBranchOpts) {
373 MadeChange = false;
374 SmallPtrSet<BasicBlock*, 8> WorkList;
375 for (BasicBlock &BB : F) {
376 SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
377 MadeChange |= ConstantFoldTerminator(&BB, true);
378 if (!MadeChange) continue;
379
380 for (SmallVectorImpl<BasicBlock*>::iterator
381 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
382 if (pred_begin(*II) == pred_end(*II))
383 WorkList.insert(*II);
384 }
385
386 // Delete the dead blocks and any of their dead successors.
387 MadeChange |= !WorkList.empty();
388 while (!WorkList.empty()) {
389 BasicBlock *BB = *WorkList.begin();
390 WorkList.erase(BB);
391 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
392
393 DeleteDeadBlock(BB);
394
395 for (SmallVectorImpl<BasicBlock*>::iterator
396 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
397 if (pred_begin(*II) == pred_end(*II))
398 WorkList.insert(*II);
399 }
400
401 // Merge pairs of basic blocks with unconditional branches, connected by
402 // a single edge.
403 if (EverMadeChange || MadeChange)
404 MadeChange |= eliminateFallThrough(F);
405
406 EverMadeChange |= MadeChange;
407 }
408
409 if (!DisableGCOpts) {
410 SmallVector<Instruction *, 2> Statepoints;
411 for (BasicBlock &BB : F)
412 for (Instruction &I : BB)
413 if (isStatepoint(I))
414 Statepoints.push_back(&I);
415 for (auto &I : Statepoints)
416 EverMadeChange |= simplifyOffsetableRelocate(*I);
417 }
418
419 return EverMadeChange;
420}
421
422/// Merge basic blocks which are connected by a single edge, where one of the
423/// basic blocks has a single successor pointing to the other basic block,
424/// which has a single predecessor.
425bool CodeGenPrepare::eliminateFallThrough(Function &F) {
426 bool Changed = false;
427 // Scan all of the blocks in the function, except for the entry block.
428 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
429 BasicBlock *BB = &*I++;
430 // If the destination block has a single pred, then this is a trivial
431 // edge, just collapse it.
432 BasicBlock *SinglePred = BB->getSinglePredecessor();
433
434 // Don't merge if BB's address is taken.
435 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
436
437 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
438 if (Term && !Term->isConditional()) {
439 Changed = true;
440 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n"<< *
SinglePred << "\n\n\n"; } } while (false)
;
441 // Remember if SinglePred was the entry block of the function.
442 // If so, we will need to move BB back to the entry position.
443 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
444 MergeBasicBlockIntoOnlyPred(BB, nullptr);
445
446 if (isEntry && BB != &BB->getParent()->getEntryBlock())
447 BB->moveBefore(&BB->getParent()->getEntryBlock());
448
449 // We have erased a block. Update the iterator.
450 I = BB->getIterator();
451 }
452 }
453 return Changed;
454}
455
456/// Find a destination block from BB if BB is mergeable empty block.
457BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
458 // If this block doesn't end with an uncond branch, ignore it.
459 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
460 if (!BI || !BI->isUnconditional())
461 return nullptr;
462
463 // If the instruction before the branch (skipping debug info) isn't a phi
464 // node, then other stuff is happening here.
465 BasicBlock::iterator BBI = BI->getIterator();
466 if (BBI != BB->begin()) {
467 --BBI;
468 while (isa<DbgInfoIntrinsic>(BBI)) {
469 if (BBI == BB->begin())
470 break;
471 --BBI;
472 }
473 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
474 return nullptr;
475 }
476
477 // Do not break infinite loops.
478 BasicBlock *DestBB = BI->getSuccessor(0);
479 if (DestBB == BB)
480 return nullptr;
481
482 if (!canMergeBlocks(BB, DestBB))
483 DestBB = nullptr;
484
485 return DestBB;
486}
487
488// Return the unique indirectbr predecessor of a block. This may return null
489// even if such a predecessor exists, if it's not useful for splitting.
490// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
491// predecessors of BB.
492static BasicBlock *
493findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
494 // If the block doesn't have any PHIs, we don't care about it, since there's
495 // no point in splitting it.
496 PHINode *PN = dyn_cast<PHINode>(BB->begin());
497 if (!PN)
498 return nullptr;
499
500 // Verify we have exactly one IBR predecessor.
501 // Conservatively bail out if one of the other predecessors is not a "regular"
502 // terminator (that is, not a switch or a br).
503 BasicBlock *IBB = nullptr;
504 for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
505 BasicBlock *PredBB = PN->getIncomingBlock(Pred);
506 TerminatorInst *PredTerm = PredBB->getTerminator();
507 switch (PredTerm->getOpcode()) {
508 case Instruction::IndirectBr:
509 if (IBB)
510 return nullptr;
511 IBB = PredBB;
512 break;
513 case Instruction::Br:
514 case Instruction::Switch:
515 OtherPreds.push_back(PredBB);
516 continue;
517 default:
518 return nullptr;
519 }
520 }
521
522 return IBB;
523}
524
525// Split critical edges where the source of the edge is an indirectbr
526// instruction. This isn't always possible, but we can handle some easy cases.
527// This is useful because MI is unable to split such critical edges,
528// which means it will not be able to sink instructions along those edges.
529// This is especially painful for indirect branches with many successors, where
530// we end up having to prepare all outgoing values in the origin block.
531//
532// Our normal algorithm for splitting critical edges requires us to update
533// the outgoing edges of the edge origin block, but for an indirectbr this
534// is hard, since it would require finding and updating the block addresses
535// the indirect branch uses. But if a block only has a single indirectbr
536// predecessor, with the others being regular branches, we can do it in a
537// different way.
538// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
539// We can split D into D0 and D1, where D0 contains only the PHIs from D,
540// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
541// create the following structure:
542// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
543bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
544 // Check whether the function has any indirectbrs, and collect which blocks
545 // they may jump to. Since most functions don't have indirect branches,
546 // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
547 SmallSetVector<BasicBlock *, 16> Targets;
548 for (auto &BB : F) {
549 auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
550 if (!IBI)
551 continue;
552
553 for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
554 Targets.insert(IBI->getSuccessor(Succ));
555 }
556
557 if (Targets.empty())
558 return false;
559
560 bool Changed = false;
561 for (BasicBlock *Target : Targets) {
562 SmallVector<BasicBlock *, 16> OtherPreds;
563 BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
564 // If we did not found an indirectbr, or the indirectbr is the only
565 // incoming edge, this isn't the kind of edge we're looking for.
566 if (!IBRPred || OtherPreds.empty())
567 continue;
568
569 // Don't even think about ehpads/landingpads.
570 Instruction *FirstNonPHI = Target->getFirstNonPHI();
571 if (FirstNonPHI->isEHPad() || Target->isLandingPad())
572 continue;
573
574 BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
575 // It's possible Target was its own successor through an indirectbr.
576 // In this case, the indirectbr now comes from BodyBlock.
577 if (IBRPred == Target)
578 IBRPred = BodyBlock;
579
580 // At this point Target only has PHIs, and BodyBlock has the rest of the
581 // block's body. Create a copy of Target that will be used by the "direct"
582 // preds.
583 ValueToValueMapTy VMap;
584 BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
585
586 for (BasicBlock *Pred : OtherPreds) {
587 // If the target is a loop to itself, then the terminator of the split
588 // block needs to be updated.
589 if (Pred == Target)
590 BodyBlock->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
591 else
592 Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
593 }
594
595 // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
596 // they are clones, so the number of PHIs are the same.
597 // (a) Remove the edge coming from IBRPred from the "Direct" PHI
598 // (b) Leave that as the only edge in the "Indirect" PHI.
599 // (c) Merge the two in the body block.
600 BasicBlock::iterator Indirect = Target->begin(),
601 End = Target->getFirstNonPHI()->getIterator();
602 BasicBlock::iterator Direct = DirectSucc->begin();
603 BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
604
605 assert(&*End == Target->getTerminator() &&((&*End == Target->getTerminator() && "Block was expected to only contain PHIs"
) ? static_cast<void> (0) : __assert_fail ("&*End == Target->getTerminator() && \"Block was expected to only contain PHIs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 606, __PRETTY_FUNCTION__))
606 "Block was expected to only contain PHIs")((&*End == Target->getTerminator() && "Block was expected to only contain PHIs"
) ? static_cast<void> (0) : __assert_fail ("&*End == Target->getTerminator() && \"Block was expected to only contain PHIs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 606, __PRETTY_FUNCTION__))
;
607
608 while (Indirect != End) {
609 PHINode *DirPHI = cast<PHINode>(Direct);
610 PHINode *IndPHI = cast<PHINode>(Indirect);
611
612 // Now, clean up - the direct block shouldn't get the indirect value,
613 // and vice versa.
614 DirPHI->removeIncomingValue(IBRPred);
615 Direct++;
616
617 // Advance the pointer here, to avoid invalidation issues when the old
618 // PHI is erased.
619 Indirect++;
620
621 PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
622 NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
623 IBRPred);
624
625 // Create a PHI in the body block, to merge the direct and indirect
626 // predecessors.
627 PHINode *MergePHI =
628 PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
629 MergePHI->addIncoming(NewIndPHI, Target);
630 MergePHI->addIncoming(DirPHI, DirectSucc);
631
632 IndPHI->replaceAllUsesWith(MergePHI);
633 IndPHI->eraseFromParent();
634 }
635
636 Changed = true;
637 }
638
639 return Changed;
640}
641
642/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
643/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
644/// edges in ways that are non-optimal for isel. Start by eliminating these
645/// blocks so we can split them the way we want them.
646bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
647 SmallPtrSet<BasicBlock *, 16> Preheaders;
648 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
649 while (!LoopList.empty()) {
650 Loop *L = LoopList.pop_back_val();
651 LoopList.insert(LoopList.end(), L->begin(), L->end());
652 if (BasicBlock *Preheader = L->getLoopPreheader())
653 Preheaders.insert(Preheader);
654 }
655
656 bool MadeChange = false;
657 // Note that this intentionally skips the entry block.
658 for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
659 BasicBlock *BB = &*I++;
660 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
661 if (!DestBB ||
662 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
663 continue;
664
665 eliminateMostlyEmptyBlock(BB);
666 MadeChange = true;
667 }
668 return MadeChange;
669}
670
671bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
672 BasicBlock *DestBB,
673 bool isPreheader) {
674 // Do not delete loop preheaders if doing so would create a critical edge.
675 // Loop preheaders can be good locations to spill registers. If the
676 // preheader is deleted and we create a critical edge, registers may be
677 // spilled in the loop body instead.
678 if (!DisablePreheaderProtect && isPreheader &&
679 !(BB->getSinglePredecessor() &&
680 BB->getSinglePredecessor()->getSingleSuccessor()))
681 return false;
682
683 // Try to skip merging if the unique predecessor of BB is terminated by a
684 // switch or indirect branch instruction, and BB is used as an incoming block
685 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
686 // add COPY instructions in the predecessor of BB instead of BB (if it is not
687 // merged). Note that the critical edge created by merging such blocks wont be
688 // split in MachineSink because the jump table is not analyzable. By keeping
689 // such empty block (BB), ISel will place COPY instructions in BB, not in the
690 // predecessor of BB.
691 BasicBlock *Pred = BB->getUniquePredecessor();
692 if (!Pred ||
693 !(isa<SwitchInst>(Pred->getTerminator()) ||
694 isa<IndirectBrInst>(Pred->getTerminator())))
695 return true;
696
697 if (BB->getTerminator() != BB->getFirstNonPHI())
698 return true;
699
700 // We use a simple cost heuristic which determine skipping merging is
701 // profitable if the cost of skipping merging is less than the cost of
702 // merging : Cost(skipping merging) < Cost(merging BB), where the
703 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
704 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
705 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
706 // Freq(Pred) / Freq(BB) > 2.
707 // Note that if there are multiple empty blocks sharing the same incoming
708 // value for the PHIs in the DestBB, we consider them together. In such
709 // case, Cost(merging BB) will be the sum of their frequencies.
710
711 if (!isa<PHINode>(DestBB->begin()))
712 return true;
713
714 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
715
716 // Find all other incoming blocks from which incoming values of all PHIs in
717 // DestBB are the same as the ones from BB.
718 for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
719 ++PI) {
720 BasicBlock *DestBBPred = *PI;
721 if (DestBBPred == BB)
722 continue;
723
724 bool HasAllSameValue = true;
725 BasicBlock::const_iterator DestBBI = DestBB->begin();
726 while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
727 if (DestPN->getIncomingValueForBlock(BB) !=
728 DestPN->getIncomingValueForBlock(DestBBPred)) {
729 HasAllSameValue = false;
730 break;
731 }
732 }
733 if (HasAllSameValue)
734 SameIncomingValueBBs.insert(DestBBPred);
735 }
736
737 // See if all BB's incoming values are same as the value from Pred. In this
738 // case, no reason to skip merging because COPYs are expected to be place in
739 // Pred already.
740 if (SameIncomingValueBBs.count(Pred))
741 return true;
742
743 if (!BFI) {
744 Function &F = *BB->getParent();
745 LoopInfo LI{DominatorTree(F)};
746 BPI.reset(new BranchProbabilityInfo(F, LI));
747 BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
748 }
749
750 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
751 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
752
753 for (auto SameValueBB : SameIncomingValueBBs)
754 if (SameValueBB->getUniquePredecessor() == Pred &&
755 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
756 BBFreq += BFI->getBlockFreq(SameValueBB);
757
758 return PredFreq.getFrequency() <=
759 BBFreq.getFrequency() * FreqRatioToSkipMerge;
760}
761
762/// Return true if we can merge BB into DestBB if there is a single
763/// unconditional branch between them, and BB contains no other non-phi
764/// instructions.
765bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
766 const BasicBlock *DestBB) const {
767 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
768 // the successor. If there are more complex condition (e.g. preheaders),
769 // don't mess around with them.
770 BasicBlock::const_iterator BBI = BB->begin();
771 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
772 for (const User *U : PN->users()) {
773 const Instruction *UI = cast<Instruction>(U);
774 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
775 return false;
776 // If User is inside DestBB block and it is a PHINode then check
777 // incoming value. If incoming value is not from BB then this is
778 // a complex condition (e.g. preheaders) we want to avoid here.
779 if (UI->getParent() == DestBB) {
780 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
781 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
782 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
783 if (Insn && Insn->getParent() == BB &&
784 Insn->getParent() != UPN->getIncomingBlock(I))
785 return false;
786 }
787 }
788 }
789 }
790
791 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
792 // and DestBB may have conflicting incoming values for the block. If so, we
793 // can't merge the block.
794 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
795 if (!DestBBPN) return true; // no conflict.
796
797 // Collect the preds of BB.
798 SmallPtrSet<const BasicBlock*, 16> BBPreds;
799 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
800 // It is faster to get preds from a PHI than with pred_iterator.
801 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
802 BBPreds.insert(BBPN->getIncomingBlock(i));
803 } else {
804 BBPreds.insert(pred_begin(BB), pred_end(BB));
805 }
806
807 // Walk the preds of DestBB.
808 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
809 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
810 if (BBPreds.count(Pred)) { // Common predecessor?
811 BBI = DestBB->begin();
812 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
813 const Value *V1 = PN->getIncomingValueForBlock(Pred);
814 const Value *V2 = PN->getIncomingValueForBlock(BB);
815
816 // If V2 is a phi node in BB, look up what the mapped value will be.
817 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
818 if (V2PN->getParent() == BB)
819 V2 = V2PN->getIncomingValueForBlock(Pred);
820
821 // If there is a conflict, bail out.
822 if (V1 != V2) return false;
823 }
824 }
825 }
826
827 return true;
828}
829
830
831/// Eliminate a basic block that has only phi's and an unconditional branch in
832/// it.
833void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
834 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
835 BasicBlock *DestBB = BI->getSuccessor(0);
836
837 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
;
838
839 // If the destination block has a single pred, then this is a trivial edge,
840 // just collapse it.
841 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
842 if (SinglePred != DestBB) {
843 // Remember if SinglePred was the entry block of the function. If so, we
844 // will need to move BB back to the entry position.
845 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
846 MergeBasicBlockIntoOnlyPred(DestBB, nullptr);
847
848 if (isEntry && BB != &BB->getParent()->getEntryBlock())
849 BB->moveBefore(&BB->getParent()->getEntryBlock());
850
851 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
852 return;
853 }
854 }
855
856 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
857 // to handle the new incoming edges it is about to have.
858 PHINode *PN;
859 for (BasicBlock::iterator BBI = DestBB->begin();
860 (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
861 // Remove the incoming value for BB, and remember it.
862 Value *InVal = PN->removeIncomingValue(BB, false);
863
864 // Two options: either the InVal is a phi node defined in BB or it is some
865 // value that dominates BB.
866 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
867 if (InValPhi && InValPhi->getParent() == BB) {
868 // Add all of the input values of the input PHI as inputs of this phi.
869 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
870 PN->addIncoming(InValPhi->getIncomingValue(i),
871 InValPhi->getIncomingBlock(i));
872 } else {
873 // Otherwise, add one instance of the dominating value for each edge that
874 // we will be adding.
875 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
876 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
877 PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
878 } else {
879 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
880 PN->addIncoming(InVal, *PI);
881 }
882 }
883 }
884
885 // The PHIs are now updated, change everything that refers to BB to use
886 // DestBB and remove BB.
887 BB->replaceAllUsesWith(DestBB);
888 BB->eraseFromParent();
889 ++NumBlocksElim;
890
891 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
892}
893
894// Computes a map of base pointer relocation instructions to corresponding
895// derived pointer relocation instructions given a vector of all relocate calls
896static void computeBaseDerivedRelocateMap(
897 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
898 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
899 &RelocateInstMap) {
900 // Collect information in two maps: one primarily for locating the base object
901 // while filling the second map; the second map is the final structure holding
902 // a mapping between Base and corresponding Derived relocate calls
903 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
904 for (auto *ThisRelocate : AllRelocateCalls) {
905 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
906 ThisRelocate->getDerivedPtrIndex());
907 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
908 }
909 for (auto &Item : RelocateIdxMap) {
910 std::pair<unsigned, unsigned> Key = Item.first;
911 if (Key.first == Key.second)
912 // Base relocation: nothing to insert
913 continue;
914
915 GCRelocateInst *I = Item.second;
916 auto BaseKey = std::make_pair(Key.first, Key.first);
917
918 // We're iterating over RelocateIdxMap so we cannot modify it.
919 auto MaybeBase = RelocateIdxMap.find(BaseKey);
920 if (MaybeBase == RelocateIdxMap.end())
921 // TODO: We might want to insert a new base object relocate and gep off
922 // that, if there are enough derived object relocates.
923 continue;
924
925 RelocateInstMap[MaybeBase->second].push_back(I);
926 }
927}
928
929// Accepts a GEP and extracts the operands into a vector provided they're all
930// small integer constants
931static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
932 SmallVectorImpl<Value *> &OffsetV) {
933 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
934 // Only accept small constant integer operands
935 auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
936 if (!Op || Op->getZExtValue() > 20)
937 return false;
938 }
939
940 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
941 OffsetV.push_back(GEP->getOperand(i));
942 return true;
943}
944
945// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
946// replace, computes a replacement, and affects it.
947static bool
948simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
949 const SmallVectorImpl<GCRelocateInst *> &Targets) {
950 bool MadeChange = false;
951 for (GCRelocateInst *ToReplace : Targets) {
952 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 953, __PRETTY_FUNCTION__))
953 "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 953, __PRETTY_FUNCTION__))
;
954 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
955 // A duplicate relocate call. TODO: coalesce duplicates.
956 continue;
957 }
958
959 if (RelocatedBase->getParent() != ToReplace->getParent()) {
960 // Base and derived relocates are in different basic blocks.
961 // In this case transform is only valid when base dominates derived
962 // relocate. However it would be too expensive to check dominance
963 // for each such relocate, so we skip the whole transformation.
964 continue;
965 }
966
967 Value *Base = ToReplace->getBasePtr();
968 auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
969 if (!Derived || Derived->getPointerOperand() != Base)
970 continue;
971
972 SmallVector<Value *, 2> OffsetV;
973 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
974 continue;
975
976 // Create a Builder and replace the target callsite with a gep
977 assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 978, __PRETTY_FUNCTION__))
978 "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 978, __PRETTY_FUNCTION__))
;
979
980 // Insert after RelocatedBase
981 IRBuilder<> Builder(RelocatedBase->getNextNode());
982 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
983
984 // If gc_relocate does not match the actual type, cast it to the right type.
985 // In theory, there must be a bitcast after gc_relocate if the type does not
986 // match, and we should reuse it to get the derived pointer. But it could be
987 // cases like this:
988 // bb1:
989 // ...
990 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
991 // br label %merge
992 //
993 // bb2:
994 // ...
995 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
996 // br label %merge
997 //
998 // merge:
999 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1000 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1001 //
1002 // In this case, we can not find the bitcast any more. So we insert a new bitcast
1003 // no matter there is already one or not. In this way, we can handle all cases, and
1004 // the extra bitcast should be optimized away in later passes.
1005 Value *ActualRelocatedBase = RelocatedBase;
1006 if (RelocatedBase->getType() != Base->getType()) {
1007 ActualRelocatedBase =
1008 Builder.CreateBitCast(RelocatedBase, Base->getType());
1009 }
1010 Value *Replacement = Builder.CreateGEP(
1011 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
1012 Replacement->takeName(ToReplace);
1013 // If the newly generated derived pointer's type does not match the original derived
1014 // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
1015 Value *ActualReplacement = Replacement;
1016 if (Replacement->getType() != ToReplace->getType()) {
1017 ActualReplacement =
1018 Builder.CreateBitCast(Replacement, ToReplace->getType());
1019 }
1020 ToReplace->replaceAllUsesWith(ActualReplacement);
1021 ToReplace->eraseFromParent();
1022
1023 MadeChange = true;
1024 }
1025 return MadeChange;
1026}
1027
1028// Turns this:
1029//
1030// %base = ...
1031// %ptr = gep %base + 15
1032// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1033// %base' = relocate(%tok, i32 4, i32 4)
1034// %ptr' = relocate(%tok, i32 4, i32 5)
1035// %val = load %ptr'
1036//
1037// into this:
1038//
1039// %base = ...
1040// %ptr = gep %base + 15
1041// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1042// %base' = gc.relocate(%tok, i32 4, i32 4)
1043// %ptr' = gep %base' + 15
1044// %val = load %ptr'
1045bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1046 bool MadeChange = false;
1047 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1048
1049 for (auto *U : I.users())
1050 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1051 // Collect all the relocate calls associated with a statepoint
1052 AllRelocateCalls.push_back(Relocate);
1053
1054 // We need atleast one base pointer relocation + one derived pointer
1055 // relocation to mangle
1056 if (AllRelocateCalls.size() < 2)
1057 return false;
1058
1059 // RelocateInstMap is a mapping from the base relocate instruction to the
1060 // corresponding derived relocate instructions
1061 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1062 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1063 if (RelocateInstMap.empty())
1064 return false;
1065
1066 for (auto &Item : RelocateInstMap)
1067 // Item.first is the RelocatedBase to offset against
1068 // Item.second is the vector of Targets to replace
1069 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1070 return MadeChange;
1071}
1072
1073/// SinkCast - Sink the specified cast instruction into its user blocks
1074static bool SinkCast(CastInst *CI) {
1075 BasicBlock *DefBB = CI->getParent();
1076
1077 /// InsertedCasts - Only insert a cast in each block once.
1078 DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1079
1080 bool MadeChange = false;
1081 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1082 UI != E; ) {
1083 Use &TheUse = UI.getUse();
1084 Instruction *User = cast<Instruction>(*UI);
1085
1086 // Figure out which BB this cast is used in. For PHI's this is the
1087 // appropriate predecessor block.
1088 BasicBlock *UserBB = User->getParent();
1089 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1090 UserBB = PN->getIncomingBlock(TheUse);
1091 }
1092
1093 // Preincrement use iterator so we don't invalidate it.
1094 ++UI;
1095
1096 // The first insertion point of a block containing an EH pad is after the
1097 // pad. If the pad is the user, we cannot sink the cast past the pad.
1098 if (User->isEHPad())
1099 continue;
1100
1101 // If the block selected to receive the cast is an EH pad that does not
1102 // allow non-PHI instructions before the terminator, we can't sink the
1103 // cast.
1104 if (UserBB->getTerminator()->isEHPad())
1105 continue;
1106
1107 // If this user is in the same block as the cast, don't change the cast.
1108 if (UserBB == DefBB) continue;
1109
1110 // If we have already inserted a cast into this block, use it.
1111 CastInst *&InsertedCast = InsertedCasts[UserBB];
1112
1113 if (!InsertedCast) {
1114 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1115 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1115, __PRETTY_FUNCTION__))
;
1116 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1117 CI->getType(), "", &*InsertPt);
1118 }
1119
1120 // Replace a use of the cast with a use of the new cast.
1121 TheUse = InsertedCast;
1122 MadeChange = true;
1123 ++NumCastUses;
1124 }
1125
1126 // If we removed all uses, nuke the cast.
1127 if (CI->use_empty()) {
1128 CI->eraseFromParent();
1129 MadeChange = true;
1130 }
1131
1132 return MadeChange;
1133}
1134
1135/// If the specified cast instruction is a noop copy (e.g. it's casting from
1136/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1137/// reduce the number of virtual registers that must be created and coalesced.
1138///
1139/// Return true if any changes are made.
1140///
1141static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1142 const DataLayout &DL) {
1143 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1144 // than sinking only nop casts, but is helpful on some platforms.
1145 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1146 if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
1147 ASC->getDestAddressSpace()))
1148 return false;
1149 }
1150
1151 // If this is a noop copy,
1152 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1153 EVT DstVT = TLI.getValueType(DL, CI->getType());
1154
1155 // This is an fp<->int conversion?
1156 if (SrcVT.isInteger() != DstVT.isInteger())
1157 return false;
1158
1159 // If this is an extension, it will be a zero or sign extension, which
1160 // isn't a noop.
1161 if (SrcVT.bitsLT(DstVT)) return false;
1162
1163 // If these values will be promoted, find out what they will be promoted
1164 // to. This helps us consider truncates on PPC as noop copies when they
1165 // are.
1166 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1167 TargetLowering::TypePromoteInteger)
1168 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1169 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1170 TargetLowering::TypePromoteInteger)
1171 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1172
1173 // If, after promotion, these are the same types, this is a noop copy.
1174 if (SrcVT != DstVT)
1175 return false;
1176
1177 return SinkCast(CI);
1178}
1179
1180/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
1181/// possible.
1182///
1183/// Return true if any changes were made.
1184static bool CombineUAddWithOverflow(CmpInst *CI) {
1185 Value *A, *B;
1186 Instruction *AddI;
1187 if (!match(CI,
1188 m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
1189 return false;
1190
1191 Type *Ty = AddI->getType();
1192 if (!isa<IntegerType>(Ty))
1193 return false;
1194
1195 // We don't want to move around uses of condition values this late, so we we
1196 // check if it is legal to create the call to the intrinsic in the basic
1197 // block containing the icmp:
1198
1199 if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
1200 return false;
1201
1202#ifndef NDEBUG
1203 // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
1204 // for now:
1205 if (AddI->hasOneUse())
1206 assert(*AddI->user_begin() == CI && "expected!")((*AddI->user_begin() == CI && "expected!") ? static_cast
<void> (0) : __assert_fail ("*AddI->user_begin() == CI && \"expected!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1206, __PRETTY_FUNCTION__))
;
1207#endif
1208
1209 Module *M = CI->getModule();
1210 Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
1211
1212 auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
1213
1214 auto *UAddWithOverflow =
1215 CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
1216 auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
1217 auto *Overflow =
1218 ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
1219
1220 CI->replaceAllUsesWith(Overflow);
1221 AddI->replaceAllUsesWith(UAdd);
1222 CI->eraseFromParent();
1223 AddI->eraseFromParent();
1224 return true;
1225}
1226
1227/// Sink the given CmpInst into user blocks to reduce the number of virtual
1228/// registers that must be created and coalesced. This is a clear win except on
1229/// targets with multiple condition code registers (PowerPC), where it might
1230/// lose; some adjustment may be wanted there.
1231///
1232/// Return true if any changes are made.
1233static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1234 BasicBlock *DefBB = CI->getParent();
1235
1236 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1237 if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
1238 return false;
1239
1240 // Only insert a cmp in each block once.
1241 DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1242
1243 bool MadeChange = false;
1244 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1245 UI != E; ) {
1246 Use &TheUse = UI.getUse();
1247 Instruction *User = cast<Instruction>(*UI);
1248
1249 // Preincrement use iterator so we don't invalidate it.
1250 ++UI;
1251
1252 // Don't bother for PHI nodes.
1253 if (isa<PHINode>(User))
1254 continue;
1255
1256 // Figure out which BB this cmp is used in.
1257 BasicBlock *UserBB = User->getParent();
1258
1259 // If this user is in the same block as the cmp, don't change the cmp.
1260 if (UserBB == DefBB) continue;
1261
1262 // If we have already inserted a cmp into this block, use it.
1263 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1264
1265 if (!InsertedCmp) {
1266 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1267 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1267, __PRETTY_FUNCTION__))
;
1268 InsertedCmp =
1269 CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
1270 CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
1271 // Propagate the debug info.
1272 InsertedCmp->setDebugLoc(CI->getDebugLoc());
1273 }
1274
1275 // Replace a use of the cmp with a use of the new cmp.
1276 TheUse = InsertedCmp;
1277 MadeChange = true;
1278 ++NumCmpUses;
1279 }
1280
1281 // If we removed all uses, nuke the cmp.
1282 if (CI->use_empty()) {
1283 CI->eraseFromParent();
1284 MadeChange = true;
1285 }
1286
1287 return MadeChange;
1288}
1289
1290static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1291 if (SinkCmpExpression(CI, TLI))
1292 return true;
1293
1294 if (CombineUAddWithOverflow(CI))
1295 return true;
1296
1297 return false;
1298}
1299
1300/// Duplicate and sink the given 'and' instruction into user blocks where it is
1301/// used in a compare to allow isel to generate better code for targets where
1302/// this operation can be combined.
1303///
1304/// Return true if any changes are made.
1305static bool sinkAndCmp0Expression(Instruction *AndI,
1306 const TargetLowering &TLI,
1307 SetOfInstrs &InsertedInsts) {
1308 // Double-check that we're not trying to optimize an instruction that was
1309 // already optimized by some other part of this pass.
1310 assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1311, __PRETTY_FUNCTION__))
1311 "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1311, __PRETTY_FUNCTION__))
;
1312 (void) InsertedInsts;
1313
1314 // Nothing to do for single use in same basic block.
1315 if (AndI->hasOneUse() &&
1316 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1317 return false;
1318
1319 // Try to avoid cases where sinking/duplicating is likely to increase register
1320 // pressure.
1321 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1322 !isa<ConstantInt>(AndI->getOperand(1)) &&
1323 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1324 return false;
1325
1326 for (auto *U : AndI->users()) {
1327 Instruction *User = cast<Instruction>(U);
1328
1329 // Only sink for and mask feeding icmp with 0.
1330 if (!isa<ICmpInst>(User))
1331 return false;
1332
1333 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1334 if (!CmpC || !CmpC->isZero())
1335 return false;
1336 }
1337
1338 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1339 return false;
1340
1341 DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false)
;
1342 DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false)
;
1343
1344 // Push the 'and' into the same block as the icmp 0. There should only be
1345 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1346 // others, so we don't need to keep track of which BBs we insert into.
1347 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1348 UI != E; ) {
1349 Use &TheUse = UI.getUse();
1350 Instruction *User = cast<Instruction>(*UI);
1351
1352 // Preincrement use iterator so we don't invalidate it.
1353 ++UI;
1354
1355 DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false)
;
1356
1357 // Keep the 'and' in the same place if the use is already in the same block.
1358 Instruction *InsertPt =
1359 User->getParent() == AndI->getParent() ? AndI : User;
1360 Instruction *InsertedAnd =
1361 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1362 AndI->getOperand(1), "", InsertPt);
1363 // Propagate the debug info.
1364 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1365
1366 // Replace a use of the 'and' with a use of the new 'and'.
1367 TheUse = InsertedAnd;
1368 ++NumAndUses;
1369 DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false)
;
1370 }
1371
1372 // We removed all uses, nuke the and.
1373 AndI->eraseFromParent();
1374 return true;
1375}
1376
1377/// Check if the candidates could be combined with a shift instruction, which
1378/// includes:
1379/// 1. Truncate instruction
1380/// 2. And instruction and the imm is a mask of the low bits:
1381/// imm & (imm+1) == 0
1382static bool isExtractBitsCandidateUse(Instruction *User) {
1383 if (!isa<TruncInst>(User)) {
1384 if (User->getOpcode() != Instruction::And ||
1385 !isa<ConstantInt>(User->getOperand(1)))
1386 return false;
1387
1388 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1389
1390 if ((Cimm & (Cimm + 1)).getBoolValue())
1391 return false;
1392 }
1393 return true;
1394}
1395
1396/// Sink both shift and truncate instruction to the use of truncate's BB.
1397static bool
1398SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1399 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1400 const TargetLowering &TLI, const DataLayout &DL) {
1401 BasicBlock *UserBB = User->getParent();
1402 DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1403 TruncInst *TruncI = dyn_cast<TruncInst>(User);
1404 bool MadeChange = false;
1405
1406 for (Value::user_iterator TruncUI = TruncI->user_begin(),
1407 TruncE = TruncI->user_end();
1408 TruncUI != TruncE;) {
1409
1410 Use &TruncTheUse = TruncUI.getUse();
1411 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1412 // Preincrement use iterator so we don't invalidate it.
1413
1414 ++TruncUI;
1415
1416 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1417 if (!ISDOpcode)
1418 continue;
1419
1420 // If the use is actually a legal node, there will not be an
1421 // implicit truncate.
1422 // FIXME: always querying the result type is just an
1423 // approximation; some nodes' legality is determined by the
1424 // operand or other means. There's no good way to find out though.
1425 if (TLI.isOperationLegalOrCustom(
1426 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1427 continue;
1428
1429 // Don't bother for PHI nodes.
1430 if (isa<PHINode>(TruncUser))
1431 continue;
1432
1433 BasicBlock *TruncUserBB = TruncUser->getParent();
1434
1435 if (UserBB == TruncUserBB)
1436 continue;
1437
1438 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1439 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1440
1441 if (!InsertedShift && !InsertedTrunc) {
1442 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1443 assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1443, __PRETTY_FUNCTION__))
;
1444 // Sink the shift
1445 if (ShiftI->getOpcode() == Instruction::AShr)
1446 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1447 "", &*InsertPt);
1448 else
1449 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1450 "", &*InsertPt);
1451
1452 // Sink the trunc
1453 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1454 TruncInsertPt++;
1455 assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1455, __PRETTY_FUNCTION__))
;
1456
1457 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1458 TruncI->getType(), "", &*TruncInsertPt);
1459
1460 MadeChange = true;
1461
1462 TruncTheUse = InsertedTrunc;
1463 }
1464 }
1465 return MadeChange;
1466}
1467
1468/// Sink the shift *right* instruction into user blocks if the uses could
1469/// potentially be combined with this shift instruction and generate BitExtract
1470/// instruction. It will only be applied if the architecture supports BitExtract
1471/// instruction. Here is an example:
1472/// BB1:
1473/// %x.extract.shift = lshr i64 %arg1, 32
1474/// BB2:
1475/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1476/// ==>
1477///
1478/// BB2:
1479/// %x.extract.shift.1 = lshr i64 %arg1, 32
1480/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1481///
1482/// CodeGen will recoginze the pattern in BB2 and generate BitExtract
1483/// instruction.
1484/// Return true if any changes are made.
1485static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1486 const TargetLowering &TLI,
1487 const DataLayout &DL) {
1488 BasicBlock *DefBB = ShiftI->getParent();
1489
1490 /// Only insert instructions in each block once.
1491 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1492
1493 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1494
1495 bool MadeChange = false;
1496 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1497 UI != E;) {
1498 Use &TheUse = UI.getUse();
1499 Instruction *User = cast<Instruction>(*UI);
1500 // Preincrement use iterator so we don't invalidate it.
1501 ++UI;
1502
1503 // Don't bother for PHI nodes.
1504 if (isa<PHINode>(User))
1505 continue;
1506
1507 if (!isExtractBitsCandidateUse(User))
1508 continue;
1509
1510 BasicBlock *UserBB = User->getParent();
1511
1512 if (UserBB == DefBB) {
1513 // If the shift and truncate instruction are in the same BB. The use of
1514 // the truncate(TruncUse) may still introduce another truncate if not
1515 // legal. In this case, we would like to sink both shift and truncate
1516 // instruction to the BB of TruncUse.
1517 // for example:
1518 // BB1:
1519 // i64 shift.result = lshr i64 opnd, imm
1520 // trunc.result = trunc shift.result to i16
1521 //
1522 // BB2:
1523 // ----> We will have an implicit truncate here if the architecture does
1524 // not have i16 compare.
1525 // cmp i16 trunc.result, opnd2
1526 //
1527 if (isa<TruncInst>(User) && shiftIsLegal
1528 // If the type of the truncate is legal, no trucate will be
1529 // introduced in other basic blocks.
1530 &&
1531 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1532 MadeChange =
1533 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1534
1535 continue;
1536 }
1537 // If we have already inserted a shift into this block, use it.
1538 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1539
1540 if (!InsertedShift) {
1541 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1542 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1542, __PRETTY_FUNCTION__))
;
1543
1544 if (ShiftI->getOpcode() == Instruction::AShr)
1545 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1546 "", &*InsertPt);
1547 else
1548 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1549 "", &*InsertPt);
1550
1551 MadeChange = true;
1552 }
1553
1554 // Replace a use of the shift with a use of the new shift.
1555 TheUse = InsertedShift;
1556 }
1557
1558 // If we removed all uses, nuke the shift.
1559 if (ShiftI->use_empty())
1560 ShiftI->eraseFromParent();
1561
1562 return MadeChange;
1563}
1564
1565/// If counting leading or trailing zeros is an expensive operation and a zero
1566/// input is defined, add a check for zero to avoid calling the intrinsic.
1567///
1568/// We want to transform:
1569/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1570///
1571/// into:
1572/// entry:
1573/// %cmpz = icmp eq i64 %A, 0
1574/// br i1 %cmpz, label %cond.end, label %cond.false
1575/// cond.false:
1576/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1577/// br label %cond.end
1578/// cond.end:
1579/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1580///
1581/// If the transform is performed, return true and set ModifiedDT to true.
1582static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1583 const TargetLowering *TLI,
1584 const DataLayout *DL,
1585 bool &ModifiedDT) {
1586 if (!TLI || !DL)
1587 return false;
1588
1589 // If a zero input is undefined, it doesn't make sense to despeculate that.
1590 if (match(CountZeros->getOperand(1), m_One()))
1591 return false;
1592
1593 // If it's cheap to speculate, there's nothing to do.
1594 auto IntrinsicID = CountZeros->getIntrinsicID();
1595 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1596 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1597 return false;
1598
1599 // Only handle legal scalar cases. Anything else requires too much work.
1600 Type *Ty = CountZeros->getType();
1601 unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1602 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1603 return false;
1604
1605 // The intrinsic will be sunk behind a compare against zero and branch.
1606 BasicBlock *StartBlock = CountZeros->getParent();
1607 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1608
1609 // Create another block after the count zero intrinsic. A PHI will be added
1610 // in this block to select the result of the intrinsic or the bit-width
1611 // constant if the input to the intrinsic is zero.
1612 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1613 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1614
1615 // Set up a builder to create a compare, conditional branch, and PHI.
1616 IRBuilder<> Builder(CountZeros->getContext());
1617 Builder.SetInsertPoint(StartBlock->getTerminator());
1618 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1619
1620 // Replace the unconditional branch that was created by the first split with
1621 // a compare against zero and a conditional branch.
1622 Value *Zero = Constant::getNullValue(Ty);
1623 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1624 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1625 StartBlock->getTerminator()->eraseFromParent();
1626
1627 // Create a PHI in the end block to select either the output of the intrinsic
1628 // or the bit width of the operand.
1629 Builder.SetInsertPoint(&EndBlock->front());
1630 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1631 CountZeros->replaceAllUsesWith(PN);
1632 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1633 PN->addIncoming(BitWidth, StartBlock);
1634 PN->addIncoming(CountZeros, CallBlock);
1635
1636 // We are explicitly handling the zero case, so we can set the intrinsic's
1637 // undefined zero argument to 'true'. This will also prevent reprocessing the
1638 // intrinsic; we only despeculate when a zero input is defined.
1639 CountZeros->setArgOperand(1, Builder.getTrue());
1640 ModifiedDT = true;
1641 return true;
1642}
1643
1644// This class provides helper functions to expand a memcmp library call into an
1645// inline expansion.
1646class MemCmpExpansion {
1647 struct ResultBlock {
1648 BasicBlock *BB;
1649 PHINode *PhiSrc1;
1650 PHINode *PhiSrc2;
1651 ResultBlock();
1652 };
1653
1654 CallInst *CI;
1655 ResultBlock ResBlock;
1656 unsigned MaxLoadSize;
1657 unsigned NumBlocks;
1658 unsigned NumBlocksNonOneByte;
1659 unsigned NumLoadsPerBlock;
1660 std::vector<BasicBlock *> LoadCmpBlocks;
1661 BasicBlock *EndBlock;
1662 PHINode *PhiRes;
1663 bool IsUsedForZeroCmp;
1664 const DataLayout &DL;
1665 IRBuilder<> Builder;
1666
1667 unsigned calculateNumBlocks(unsigned Size);
1668 void createLoadCmpBlocks();
1669 void createResultBlock();
1670 void setupResultBlockPHINodes();
1671 void setupEndBlockPHINodes();
1672 void emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
1673 unsigned GEPIndex);
1674 Value *getCompareLoadPairs(unsigned Index, unsigned Size,
1675 unsigned &NumBytesProcessed);
1676 void emitLoadCompareBlockMultipleLoads(unsigned Index, unsigned Size,
1677 unsigned &NumBytesProcessed);
1678 void emitLoadCompareByteBlock(unsigned Index, unsigned GEPIndex);
1679 void emitMemCmpResultBlock();
1680 Value *getMemCmpExpansionZeroCase(unsigned Size);
1681 Value *getMemCmpEqZeroOneBlock(unsigned Size);
1682 unsigned getLoadSize(unsigned Size);
1683 unsigned getNumLoads(unsigned Size);
1684
1685public:
1686 MemCmpExpansion(CallInst *CI, uint64_t Size, unsigned MaxLoadSize,
1687 unsigned NumLoadsPerBlock, const DataLayout &DL);
1688 Value *getMemCmpExpansion(uint64_t Size);
1689};
1690
1691MemCmpExpansion::ResultBlock::ResultBlock()
1692 : BB(nullptr), PhiSrc1(nullptr), PhiSrc2(nullptr) {}
1693
1694// Initialize the basic block structure required for expansion of memcmp call
1695// with given maximum load size and memcmp size parameter.
1696// This structure includes:
1697// 1. A list of load compare blocks - LoadCmpBlocks.
1698// 2. An EndBlock, split from original instruction point, which is the block to
1699// return from.
1700// 3. ResultBlock, block to branch to for early exit when a
1701// LoadCmpBlock finds a difference.
1702MemCmpExpansion::MemCmpExpansion(CallInst *CI, uint64_t Size,
1703 unsigned MaxLoadSize, unsigned LoadsPerBlock,
1704 const DataLayout &TheDataLayout)
1705 : CI(CI), MaxLoadSize(MaxLoadSize), NumLoadsPerBlock(LoadsPerBlock),
1706 DL(TheDataLayout), Builder(CI) {
1707
1708 // A memcmp with zero-comparison with only one block of load and compare does
1709 // not need to set up any extra blocks. This case could be handled in the DAG,
1710 // but since we have all of the machinery to flexibly expand any memcpy here,
1711 // we choose to handle this case too to avoid fragmented lowering.
1712 IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
1713 NumBlocks = calculateNumBlocks(Size);
1714 if (!IsUsedForZeroCmp || NumBlocks != 1) {
1715 BasicBlock *StartBlock = CI->getParent();
1716 EndBlock = StartBlock->splitBasicBlock(CI, "endblock");
1717 setupEndBlockPHINodes();
1718 createResultBlock();
1719
1720 // If return value of memcmp is not used in a zero equality, we need to
1721 // calculate which source was larger. The calculation requires the
1722 // two loaded source values of each load compare block.
1723 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
1724 if (!IsUsedForZeroCmp)
1725 setupResultBlockPHINodes();
1726
1727 // Create the number of required load compare basic blocks.
1728 createLoadCmpBlocks();
1729
1730 // Update the terminator added by splitBasicBlock to branch to the first
1731 // LoadCmpBlock.
1732 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
1733 }
1734
1735 Builder.SetCurrentDebugLocation(CI->getDebugLoc());
1736}
1737
1738void MemCmpExpansion::createLoadCmpBlocks() {
1739 for (unsigned i = 0; i < NumBlocks; i++) {
1740 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
1741 EndBlock->getParent(), EndBlock);
1742 LoadCmpBlocks.push_back(BB);
1743 }
1744}
1745
1746void MemCmpExpansion::createResultBlock() {
1747 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
1748 EndBlock->getParent(), EndBlock);
1749}
1750
1751// This function creates the IR instructions for loading and comparing 1 byte.
1752// It loads 1 byte from each source of the memcmp parameters with the given
1753// GEPIndex. It then subtracts the two loaded values and adds this result to the
1754// final phi node for selecting the memcmp result.
1755void MemCmpExpansion::emitLoadCompareByteBlock(unsigned Index,
1756 unsigned GEPIndex) {
1757 Value *Source1 = CI->getArgOperand(0);
1758 Value *Source2 = CI->getArgOperand(1);
1759
1760 Builder.SetInsertPoint(LoadCmpBlocks[Index]);
1761 Type *LoadSizeType = Type::getInt8Ty(CI->getContext());
1762 // Cast source to LoadSizeType*.
1763 if (Source1->getType() != LoadSizeType)
1764 Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
1765 if (Source2->getType() != LoadSizeType)
1766 Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
1767
1768 // Get the base address using the GEPIndex.
1769 if (GEPIndex != 0) {
1770 Source1 = Builder.CreateGEP(LoadSizeType, Source1,
1771 ConstantInt::get(LoadSizeType, GEPIndex));
1772 Source2 = Builder.CreateGEP(LoadSizeType, Source2,
1773 ConstantInt::get(LoadSizeType, GEPIndex));
1774 }
1775
1776 Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
1777 Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
1778
1779 LoadSrc1 = Builder.CreateZExt(LoadSrc1, Type::getInt32Ty(CI->getContext()));
1780 LoadSrc2 = Builder.CreateZExt(LoadSrc2, Type::getInt32Ty(CI->getContext()));
1781 Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
1782
1783 PhiRes->addIncoming(Diff, LoadCmpBlocks[Index]);
1784
1785 if (Index < (LoadCmpBlocks.size() - 1)) {
1786 // Early exit branch if difference found to EndBlock. Otherwise, continue to
1787 // next LoadCmpBlock,
1788 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
1789 ConstantInt::get(Diff->getType(), 0));
1790 BranchInst *CmpBr =
1791 BranchInst::Create(EndBlock, LoadCmpBlocks[Index + 1], Cmp);
1792 Builder.Insert(CmpBr);
1793 } else {
1794 // The last block has an unconditional branch to EndBlock.
1795 BranchInst *CmpBr = BranchInst::Create(EndBlock);
1796 Builder.Insert(CmpBr);
1797 }
1798}
1799
1800unsigned MemCmpExpansion::getNumLoads(unsigned Size) {
1801 return (Size / MaxLoadSize) + countPopulation(Size % MaxLoadSize);
1802}
1803
1804unsigned MemCmpExpansion::getLoadSize(unsigned Size) {
1805 return MinAlign(PowerOf2Floor(Size), MaxLoadSize);
1806}
1807
1808/// Generate an equality comparison for one or more pairs of loaded values.
1809/// This is used in the case where the memcmp() call is compared equal or not
1810/// equal to zero.
1811Value *MemCmpExpansion::getCompareLoadPairs(unsigned Index, unsigned Size,
1812 unsigned &NumBytesProcessed) {
1813 std::vector<Value *> XorList, OrList;
1814 Value *Diff;
1815
1816 unsigned RemainingBytes = Size - NumBytesProcessed;
1817 unsigned NumLoadsRemaining = getNumLoads(RemainingBytes);
1818 unsigned NumLoads = std::min(NumLoadsRemaining, NumLoadsPerBlock);
1819
1820 // For a single-block expansion, start inserting before the memcmp call.
1821 if (LoadCmpBlocks.empty())
1822 Builder.SetInsertPoint(CI);
1823 else
1824 Builder.SetInsertPoint(LoadCmpBlocks[Index]);
1825
1826 Value *Cmp = nullptr;
1827 for (unsigned i = 0; i < NumLoads; ++i) {
1828 unsigned LoadSize = getLoadSize(RemainingBytes);
1829 unsigned GEPIndex = NumBytesProcessed / LoadSize;
1830 NumBytesProcessed += LoadSize;
1831 RemainingBytes -= LoadSize;
1832
1833 Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
1834 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
1835 assert(LoadSize <= MaxLoadSize && "Unexpected load type")((LoadSize <= MaxLoadSize && "Unexpected load type"
) ? static_cast<void> (0) : __assert_fail ("LoadSize <= MaxLoadSize && \"Unexpected load type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1835, __PRETTY_FUNCTION__))
;
1836
1837 Value *Source1 = CI->getArgOperand(0);
1838 Value *Source2 = CI->getArgOperand(1);
1839
1840 // Cast source to LoadSizeType*.
1841 if (Source1->getType() != LoadSizeType)
1842 Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
1843 if (Source2->getType() != LoadSizeType)
1844 Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
1845
1846 // Get the base address using the GEPIndex.
1847 if (GEPIndex != 0) {
1848 Source1 = Builder.CreateGEP(LoadSizeType, Source1,
1849 ConstantInt::get(LoadSizeType, GEPIndex));
1850 Source2 = Builder.CreateGEP(LoadSizeType, Source2,
1851 ConstantInt::get(LoadSizeType, GEPIndex));
1852 }
1853
1854 // Get a constant or load a value for each source address.
1855 Value *LoadSrc1 = nullptr;
1856 if (auto *Source1C = dyn_cast<Constant>(Source1))
1857 LoadSrc1 = ConstantFoldLoadFromConstPtr(Source1C, LoadSizeType, DL);
1858 if (!LoadSrc1)
1859 LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
1860
1861 Value *LoadSrc2 = nullptr;
1862 if (auto *Source2C = dyn_cast<Constant>(Source2))
1863 LoadSrc2 = ConstantFoldLoadFromConstPtr(Source2C, LoadSizeType, DL);
1864 if (!LoadSrc2)
1865 LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
1866
1867 if (NumLoads != 1) {
1868 if (LoadSizeType != MaxLoadType) {
1869 LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
1870 LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
1871 }
1872 // If we have multiple loads per block, we need to generate a composite
1873 // comparison using xor+or.
1874 Diff = Builder.CreateXor(LoadSrc1, LoadSrc2);
1875 Diff = Builder.CreateZExt(Diff, MaxLoadType);
1876 XorList.push_back(Diff);
1877 } else {
1878 // If there's only one load per block, we just compare the loaded values.
1879 Cmp = Builder.CreateICmpNE(LoadSrc1, LoadSrc2);
1880 }
1881 }
1882
1883 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
1884 std::vector<Value *> OutList;
1885 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
1886 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
1887 OutList.push_back(Or);
1888 }
1889 if (InList.size() % 2 != 0)
1890 OutList.push_back(InList.back());
1891 return OutList;
1892 };
1893
1894 if (!Cmp) {
1895 // Pairwise OR the XOR results.
1896 OrList = pairWiseOr(XorList);
1897
1898 // Pairwise OR the OR results until one result left.
1899 while (OrList.size() != 1) {
1900 OrList = pairWiseOr(OrList);
1901 }
1902 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
1903 }
1904
1905 return Cmp;
1906}
1907
1908void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(
1909 unsigned Index, unsigned Size, unsigned &NumBytesProcessed) {
1910 Value *Cmp = getCompareLoadPairs(Index, Size, NumBytesProcessed);
1911
1912 BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
1913 ? EndBlock
1914 : LoadCmpBlocks[Index + 1];
1915 // Early exit branch if difference found to ResultBlock. Otherwise,
1916 // continue to next LoadCmpBlock or EndBlock.
1917 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
1918 Builder.Insert(CmpBr);
1919
1920 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
1921 // since early exit to ResultBlock was not taken (no difference was found in
1922 // any of the bytes).
1923 if (Index == LoadCmpBlocks.size() - 1) {
1924 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
1925 PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
1926 }
1927}
1928
1929// This function creates the IR intructions for loading and comparing using the
1930// given LoadSize. It loads the number of bytes specified by LoadSize from each
1931// source of the memcmp parameters. It then does a subtract to see if there was
1932// a difference in the loaded values. If a difference is found, it branches
1933// with an early exit to the ResultBlock for calculating which source was
1934// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
1935// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
1936// a special case through emitLoadCompareByteBlock. The special handling can
1937// simply subtract the loaded values and add it to the result phi node.
1938void MemCmpExpansion::emitLoadCompareBlock(unsigned Index, unsigned LoadSize,
1939 unsigned GEPIndex) {
1940 if (LoadSize == 1) {
1941 MemCmpExpansion::emitLoadCompareByteBlock(Index, GEPIndex);
1942 return;
1943 }
1944
1945 Type *LoadSizeType = IntegerType::get(CI->getContext(), LoadSize * 8);
1946 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
1947 assert(LoadSize <= MaxLoadSize && "Unexpected load type")((LoadSize <= MaxLoadSize && "Unexpected load type"
) ? static_cast<void> (0) : __assert_fail ("LoadSize <= MaxLoadSize && \"Unexpected load type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 1947, __PRETTY_FUNCTION__))
;
1948
1949 Value *Source1 = CI->getArgOperand(0);
1950 Value *Source2 = CI->getArgOperand(1);
1951
1952 Builder.SetInsertPoint(LoadCmpBlocks[Index]);
1953 // Cast source to LoadSizeType*.
1954 if (Source1->getType() != LoadSizeType)
1955 Source1 = Builder.CreateBitCast(Source1, LoadSizeType->getPointerTo());
1956 if (Source2->getType() != LoadSizeType)
1957 Source2 = Builder.CreateBitCast(Source2, LoadSizeType->getPointerTo());
1958
1959 // Get the base address using the GEPIndex.
1960 if (GEPIndex != 0) {
1961 Source1 = Builder.CreateGEP(LoadSizeType, Source1,
1962 ConstantInt::get(LoadSizeType, GEPIndex));
1963 Source2 = Builder.CreateGEP(LoadSizeType, Source2,
1964 ConstantInt::get(LoadSizeType, GEPIndex));
1965 }
1966
1967 // Load LoadSizeType from the base address.
1968 Value *LoadSrc1 = Builder.CreateLoad(LoadSizeType, Source1);
1969 Value *LoadSrc2 = Builder.CreateLoad(LoadSizeType, Source2);
1970
1971 if (DL.isLittleEndian()) {
1972 Function *Bswap = Intrinsic::getDeclaration(CI->getModule(),
1973 Intrinsic::bswap, LoadSizeType);
1974 LoadSrc1 = Builder.CreateCall(Bswap, LoadSrc1);
1975 LoadSrc2 = Builder.CreateCall(Bswap, LoadSrc2);
1976 }
1977
1978 if (LoadSizeType != MaxLoadType) {
1979 LoadSrc1 = Builder.CreateZExt(LoadSrc1, MaxLoadType);
1980 LoadSrc2 = Builder.CreateZExt(LoadSrc2, MaxLoadType);
1981 }
1982
1983 // Add the loaded values to the phi nodes for calculating memcmp result only
1984 // if result is not used in a zero equality.
1985 if (!IsUsedForZeroCmp) {
1986 ResBlock.PhiSrc1->addIncoming(LoadSrc1, LoadCmpBlocks[Index]);
1987 ResBlock.PhiSrc2->addIncoming(LoadSrc2, LoadCmpBlocks[Index]);
1988 }
1989
1990 Value *Diff = Builder.CreateSub(LoadSrc1, LoadSrc2);
1991
1992 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
1993 ConstantInt::get(Diff->getType(), 0));
1994 BasicBlock *NextBB = (Index == (LoadCmpBlocks.size() - 1))
1995 ? EndBlock
1996 : LoadCmpBlocks[Index + 1];
1997 // Early exit branch if difference found to ResultBlock. Otherwise, continue
1998 // to next LoadCmpBlock or EndBlock.
1999 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
2000 Builder.Insert(CmpBr);
2001
2002 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
2003 // since early exit to ResultBlock was not taken (no difference was found in
2004 // any of the bytes).
2005 if (Index == LoadCmpBlocks.size() - 1) {
2006 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
2007 PhiRes->addIncoming(Zero, LoadCmpBlocks[Index]);
2008 }
2009}
2010
2011// This function populates the ResultBlock with a sequence to calculate the
2012// memcmp result. It compares the two loaded source values and returns -1 if
2013// src1 < src2 and 1 if src1 > src2.
2014void MemCmpExpansion::emitMemCmpResultBlock() {
2015 // Special case: if memcmp result is used in a zero equality, result does not
2016 // need to be calculated and can simply return 1.
2017 if (IsUsedForZeroCmp) {
2018 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
2019 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
2020 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
2021 PhiRes->addIncoming(Res, ResBlock.BB);
2022 BranchInst *NewBr = BranchInst::Create(EndBlock);
2023 Builder.Insert(NewBr);
2024 return;
2025 }
2026 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
2027 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
2028
2029 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
2030 ResBlock.PhiSrc2);
2031
2032 Value *Res =
2033 Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
2034 ConstantInt::get(Builder.getInt32Ty(), 1));
2035
2036 BranchInst *NewBr = BranchInst::Create(EndBlock);
2037 Builder.Insert(NewBr);
2038 PhiRes->addIncoming(Res, ResBlock.BB);
2039}
2040
2041unsigned MemCmpExpansion::calculateNumBlocks(unsigned Size) {
2042 unsigned NumBlocks = 0;
2043 bool HaveOneByteLoad = false;
2044 unsigned RemainingSize = Size;
2045 unsigned LoadSize = MaxLoadSize;
2046 while (RemainingSize) {
2047 if (LoadSize == 1)
2048 HaveOneByteLoad = true;
2049 NumBlocks += RemainingSize / LoadSize;
2050 RemainingSize = RemainingSize % LoadSize;
2051 LoadSize = LoadSize / 2;
2052 }
2053 NumBlocksNonOneByte = HaveOneByteLoad ? (NumBlocks - 1) : NumBlocks;
2054
2055 if (IsUsedForZeroCmp)
2056 NumBlocks = NumBlocks / NumLoadsPerBlock +
2057 (NumBlocks % NumLoadsPerBlock != 0 ? 1 : 0);
2058
2059 return NumBlocks;
2060}
2061
2062void MemCmpExpansion::setupResultBlockPHINodes() {
2063 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
2064 Builder.SetInsertPoint(ResBlock.BB);
2065 ResBlock.PhiSrc1 =
2066 Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src1");
2067 ResBlock.PhiSrc2 =
2068 Builder.CreatePHI(MaxLoadType, NumBlocksNonOneByte, "phi.src2");
2069}
2070
2071void MemCmpExpansion::setupEndBlockPHINodes() {
2072 Builder.SetInsertPoint(&EndBlock->front());
2073 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
2074}
2075
2076Value *MemCmpExpansion::getMemCmpExpansionZeroCase(unsigned Size) {
2077 unsigned NumBytesProcessed = 0;
2078 // This loop populates each of the LoadCmpBlocks with the IR sequence to
2079 // handle multiple loads per block.
2080 for (unsigned i = 0; i < NumBlocks; ++i)
2081 emitLoadCompareBlockMultipleLoads(i, Size, NumBytesProcessed);
2082
2083 emitMemCmpResultBlock();
2084 return PhiRes;
2085}
2086
2087/// A memcmp expansion that compares equality with 0 and only has one block of
2088/// load and compare can bypass the compare, branch, and phi IR that is required
2089/// in the general case.
2090Value *MemCmpExpansion::getMemCmpEqZeroOneBlock(unsigned Size) {
2091 unsigned NumBytesProcessed = 0;
2092 Value *Cmp = getCompareLoadPairs(0, Size, NumBytesProcessed);
2093 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
2094}
2095
2096// This function expands the memcmp call into an inline expansion and returns
2097// the memcmp result.
2098Value *MemCmpExpansion::getMemCmpExpansion(uint64_t Size) {
2099 if (IsUsedForZeroCmp)
2100 return NumBlocks == 1 ? getMemCmpEqZeroOneBlock(Size) :
2101 getMemCmpExpansionZeroCase(Size);
2102
2103 // This loop calls emitLoadCompareBlock for comparing Size bytes of the two
2104 // memcmp sources. It starts with loading using the maximum load size set by
2105 // the target. It processes any remaining bytes using a load size which is the
2106 // next smallest power of 2.
2107 unsigned LoadSize = MaxLoadSize;
2108 unsigned NumBytesToBeProcessed = Size;
2109 unsigned Index = 0;
2110 while (NumBytesToBeProcessed) {
2111 // Calculate how many blocks we can create with the current load size.
2112 unsigned NumBlocks = NumBytesToBeProcessed / LoadSize;
2113 unsigned GEPIndex = (Size - NumBytesToBeProcessed) / LoadSize;
2114 NumBytesToBeProcessed = NumBytesToBeProcessed % LoadSize;
2115
2116 // For each NumBlocks, populate the instruction sequence for loading and
2117 // comparing LoadSize bytes.
2118 while (NumBlocks--) {
2119 emitLoadCompareBlock(Index, LoadSize, GEPIndex);
2120 Index++;
2121 GEPIndex++;
2122 }
2123 // Get the next LoadSize to use.
2124 LoadSize = LoadSize / 2;
2125 }
2126
2127 emitMemCmpResultBlock();
2128 return PhiRes;
2129}
2130
2131// This function checks to see if an expansion of memcmp can be generated.
2132// It checks for constant compare size that is less than the max inline size.
2133// If an expansion cannot occur, returns false to leave as a library call.
2134// Otherwise, the library call is replaced with a new IR instruction sequence.
2135/// We want to transform:
2136/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
2137/// To:
2138/// loadbb:
2139/// %0 = bitcast i32* %buffer2 to i8*
2140/// %1 = bitcast i32* %buffer1 to i8*
2141/// %2 = bitcast i8* %1 to i64*
2142/// %3 = bitcast i8* %0 to i64*
2143/// %4 = load i64, i64* %2
2144/// %5 = load i64, i64* %3
2145/// %6 = call i64 @llvm.bswap.i64(i64 %4)
2146/// %7 = call i64 @llvm.bswap.i64(i64 %5)
2147/// %8 = sub i64 %6, %7
2148/// %9 = icmp ne i64 %8, 0
2149/// br i1 %9, label %res_block, label %loadbb1
2150/// res_block: ; preds = %loadbb2,
2151/// %loadbb1, %loadbb
2152/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
2153/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
2154/// %10 = icmp ult i64 %phi.src1, %phi.src2
2155/// %11 = select i1 %10, i32 -1, i32 1
2156/// br label %endblock
2157/// loadbb1: ; preds = %loadbb
2158/// %12 = bitcast i32* %buffer2 to i8*
2159/// %13 = bitcast i32* %buffer1 to i8*
2160/// %14 = bitcast i8* %13 to i32*
2161/// %15 = bitcast i8* %12 to i32*
2162/// %16 = getelementptr i32, i32* %14, i32 2
2163/// %17 = getelementptr i32, i32* %15, i32 2
2164/// %18 = load i32, i32* %16
2165/// %19 = load i32, i32* %17
2166/// %20 = call i32 @llvm.bswap.i32(i32 %18)
2167/// %21 = call i32 @llvm.bswap.i32(i32 %19)
2168/// %22 = zext i32 %20 to i64
2169/// %23 = zext i32 %21 to i64
2170/// %24 = sub i64 %22, %23
2171/// %25 = icmp ne i64 %24, 0
2172/// br i1 %25, label %res_block, label %loadbb2
2173/// loadbb2: ; preds = %loadbb1
2174/// %26 = bitcast i32* %buffer2 to i8*
2175/// %27 = bitcast i32* %buffer1 to i8*
2176/// %28 = bitcast i8* %27 to i16*
2177/// %29 = bitcast i8* %26 to i16*
2178/// %30 = getelementptr i16, i16* %28, i16 6
2179/// %31 = getelementptr i16, i16* %29, i16 6
2180/// %32 = load i16, i16* %30
2181/// %33 = load i16, i16* %31
2182/// %34 = call i16 @llvm.bswap.i16(i16 %32)
2183/// %35 = call i16 @llvm.bswap.i16(i16 %33)
2184/// %36 = zext i16 %34 to i64
2185/// %37 = zext i16 %35 to i64
2186/// %38 = sub i64 %36, %37
2187/// %39 = icmp ne i64 %38, 0
2188/// br i1 %39, label %res_block, label %loadbb3
2189/// loadbb3: ; preds = %loadbb2
2190/// %40 = bitcast i32* %buffer2 to i8*
2191/// %41 = bitcast i32* %buffer1 to i8*
2192/// %42 = getelementptr i8, i8* %41, i8 14
2193/// %43 = getelementptr i8, i8* %40, i8 14
2194/// %44 = load i8, i8* %42
2195/// %45 = load i8, i8* %43
2196/// %46 = zext i8 %44 to i32
2197/// %47 = zext i8 %45 to i32
2198/// %48 = sub i32 %46, %47
2199/// br label %endblock
2200/// endblock: ; preds = %res_block,
2201/// %loadbb3
2202/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
2203/// ret i32 %phi.res
2204static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
2205 const TargetLowering *TLI, const DataLayout *DL) {
2206 NumMemCmpCalls++;
2207
2208 // TTI call to check if target would like to expand memcmp. Also, get the
2209 // MaxLoadSize.
2210 unsigned MaxLoadSize;
2211 if (!TTI->expandMemCmp(CI, MaxLoadSize))
13
Assuming the condition is false
14
Taking false branch
2212 return false;
2213
2214 // Early exit from expansion if -Oz.
2215 if (CI->getFunction()->optForMinSize())
15
Assuming the condition is false
16
Taking false branch
2216 return false;
2217
2218 // Early exit from expansion if size is not a constant.
2219 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
2220 if (!SizeCast) {
17
Assuming 'SizeCast' is non-null
18
Taking false branch
2221 NumMemCmpNotConstant++;
2222 return false;
2223 }
2224
2225 // Early exit from expansion if size greater than max bytes to load.
2226 uint64_t SizeVal = SizeCast->getZExtValue();
2227 unsigned NumLoads = 0;
2228 unsigned RemainingSize = SizeVal;
2229 unsigned LoadSize = MaxLoadSize;
2230 while (RemainingSize) {
19
Loop condition is false. Execution continues on line 2236
2231 NumLoads += RemainingSize / LoadSize;
2232 RemainingSize = RemainingSize % LoadSize;
2233 LoadSize = LoadSize / 2;
2234 }
2235
2236 if (NumLoads > TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize())) {
20
Called C++ object pointer is null
2237 NumMemCmpGreaterThanMax++;
2238 return false;
2239 }
2240
2241 NumMemCmpInlined++;
2242
2243 // MemCmpHelper object creates and sets up basic blocks required for
2244 // expanding memcmp with size SizeVal.
2245 unsigned NumLoadsPerBlock = MemCmpNumLoadsPerBlock;
2246 MemCmpExpansion MemCmpHelper(CI, SizeVal, MaxLoadSize, NumLoadsPerBlock, *DL);
2247
2248 Value *Res = MemCmpHelper.getMemCmpExpansion(SizeVal);
2249
2250 // Replace call with result of expansion and erase call.
2251 CI->replaceAllUsesWith(Res);
2252 CI->eraseFromParent();
2253
2254 return true;
2255}
2256
2257bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
2258 BasicBlock *BB = CI->getParent();
2259
2260 // Lower inline assembly if we can.
2261 // If we found an inline asm expession, and if the target knows how to
2262 // lower it to normal LLVM code, do so now.
2263 if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1
Assuming pointer value is null
2
Taking false branch
2264 if (TLI->ExpandInlineAsm(CI)) {
2265 // Avoid invalidating the iterator.
2266 CurInstIterator = BB->begin();
2267 // Avoid processing instructions out of order, which could cause
2268 // reuse before a value is defined.
2269 SunkAddrs.clear();
2270 return true;
2271 }
2272 // Sink address computing for memory operands into the block.
2273 if (optimizeInlineAsmInst(CI))
2274 return true;
2275 }
2276
2277 // Align the pointer arguments to this call if the target thinks it's a good
2278 // idea
2279 unsigned MinSize, PrefAlign;
2280 if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2281 for (auto &Arg : CI->arg_operands()) {
2282 // We want to align both objects whose address is used directly and
2283 // objects whose address is used in casts and GEPs, though it only makes
2284 // sense for GEPs if the offset is a multiple of the desired alignment and
2285 // if size - offset meets the size threshold.
2286 if (!Arg->getType()->isPointerTy())
2287 continue;
2288 APInt Offset(DL->getPointerSizeInBits(
2289 cast<PointerType>(Arg->getType())->getAddressSpace()),
2290 0);
2291 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2292 uint64_t Offset2 = Offset.getLimitedValue();
2293 if ((Offset2 & (PrefAlign-1)) != 0)
2294 continue;
2295 AllocaInst *AI;
2296 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
2297 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2298 AI->setAlignment(PrefAlign);
2299 // Global variables can only be aligned if they are defined in this
2300 // object (i.e. they are uniquely initialized in this object), and
2301 // over-aligning global variables that have an explicit section is
2302 // forbidden.
2303 GlobalVariable *GV;
2304 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2305 GV->getPointerAlignment(*DL) < PrefAlign &&
2306 DL->getTypeAllocSize(GV->getValueType()) >=
2307 MinSize + Offset2)
2308 GV->setAlignment(PrefAlign);
2309 }
2310 // If this is a memcpy (or similar) then we may be able to improve the
2311 // alignment
2312 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2313 unsigned Align = getKnownAlignment(MI->getDest(), *DL);
2314 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI))
2315 Align = std::min(Align, getKnownAlignment(MTI->getSource(), *DL));
2316 if (Align > MI->getAlignment())
2317 MI->setAlignment(ConstantInt::get(MI->getAlignmentType(), Align));
2318 }
2319 }
2320
2321 // If we have a cold call site, try to sink addressing computation into the
2322 // cold block. This interacts with our handling for loads and stores to
2323 // ensure that we can fold all uses of a potential addressing computation
2324 // into their uses. TODO: generalize this to work over profiling data
2325 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
3
Assuming the condition is false
2326 for (auto &Arg : CI->arg_operands()) {
2327 if (!Arg->getType()->isPointerTy())
2328 continue;
2329 unsigned AS = Arg->getType()->getPointerAddressSpace();
2330 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
2331 }
2332
2333 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2334 if (II) {
4
Taking false branch
2335 switch (II->getIntrinsicID()) {
2336 default: break;
2337 case Intrinsic::objectsize: {
2338 // Lower all uses of llvm.objectsize.*
2339 ConstantInt *RetVal =
2340 lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
2341 // Substituting this can cause recursive simplifications, which can
2342 // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case
2343 // this
2344 // happens.
2345 Value *CurValue = &*CurInstIterator;
2346 WeakTrackingVH IterHandle(CurValue);
2347
2348 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2349
2350 // If the iterator instruction was recursively deleted, start over at the
2351 // start of the block.
2352 if (IterHandle != CurValue) {
2353 CurInstIterator = BB->begin();
2354 SunkAddrs.clear();
2355 }
2356 return true;
2357 }
2358 case Intrinsic::aarch64_stlxr:
2359 case Intrinsic::aarch64_stxr: {
2360 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2361 if (!ExtVal || !ExtVal->hasOneUse() ||
2362 ExtVal->getParent() == CI->getParent())
2363 return false;
2364 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2365 ExtVal->moveBefore(CI);
2366 // Mark this instruction as "inserted by CGP", so that other
2367 // optimizations don't touch it.
2368 InsertedInsts.insert(ExtVal);
2369 return true;
2370 }
2371 case Intrinsic::invariant_group_barrier:
2372 II->replaceAllUsesWith(II->getArgOperand(0));
2373 II->eraseFromParent();
2374 return true;
2375
2376 case Intrinsic::cttz:
2377 case Intrinsic::ctlz:
2378 // If counting zeros is expensive, try to avoid it.
2379 return despeculateCountZeros(II, TLI, DL, ModifiedDT);
2380 }
2381
2382 if (TLI) {
2383 SmallVector<Value*, 2> PtrOps;
2384 Type *AccessTy;
2385 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2386 while (!PtrOps.empty()) {
2387 Value *PtrVal = PtrOps.pop_back_val();
2388 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2389 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2390 return true;
2391 }
2392 }
2393 }
2394
2395 // From here on out we're working with named functions.
2396 if (!CI->getCalledFunction()) return false;
5
Assuming the condition is false
6
Taking false branch
2397
2398 // Lower all default uses of _chk calls. This is very similar
2399 // to what InstCombineCalls does, but here we are only lowering calls
2400 // to fortified library functions (e.g. __memcpy_chk) that have the default
2401 // "don't know" as the objectsize. Anything else should be left alone.
2402 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2403 if (Value *V = Simplifier.optimizeCall(CI)) {
7
Assuming 'V' is null
8
Taking false branch
2404 CI->replaceAllUsesWith(V);
2405 CI->eraseFromParent();
2406 return true;
2407 }
2408
2409 LibFunc Func;
2410 if (TLInfo->getLibFunc(ImmutableCallSite(CI), Func) &&
9
Assuming the condition is true
2411 Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TLI, DL)) {
10
Assuming 'Func' is equal to LibFunc_memcmp
11
Passing null pointer value via 3rd parameter 'TLI'
12
Calling 'expandMemCmp'
2412 ModifiedDT = true;
2413 return true;
2414 }
2415 return false;
2416}
2417
2418/// Look for opportunities to duplicate return instructions to the predecessor
2419/// to enable tail call optimizations. The case it is currently looking for is:
2420/// @code
2421/// bb0:
2422/// %tmp0 = tail call i32 @f0()
2423/// br label %return
2424/// bb1:
2425/// %tmp1 = tail call i32 @f1()
2426/// br label %return
2427/// bb2:
2428/// %tmp2 = tail call i32 @f2()
2429/// br label %return
2430/// return:
2431/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2432/// ret i32 %retval
2433/// @endcode
2434///
2435/// =>
2436///
2437/// @code
2438/// bb0:
2439/// %tmp0 = tail call i32 @f0()
2440/// ret i32 %tmp0
2441/// bb1:
2442/// %tmp1 = tail call i32 @f1()
2443/// ret i32 %tmp1
2444/// bb2:
2445/// %tmp2 = tail call i32 @f2()
2446/// ret i32 %tmp2
2447/// @endcode
2448bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
2449 if (!TLI)
2450 return false;
2451
2452 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2453 if (!RetI)
2454 return false;
2455
2456 PHINode *PN = nullptr;
2457 BitCastInst *BCI = nullptr;
2458 Value *V = RetI->getReturnValue();
2459 if (V) {
2460 BCI = dyn_cast<BitCastInst>(V);
2461 if (BCI)
2462 V = BCI->getOperand(0);
2463
2464 PN = dyn_cast<PHINode>(V);
2465 if (!PN)
2466 return false;
2467 }
2468
2469 if (PN && PN->getParent() != BB)
2470 return false;
2471
2472 // Make sure there are no instructions between the PHI and return, or that the
2473 // return is the first instruction in the block.
2474 if (PN) {
2475 BasicBlock::iterator BI = BB->begin();
2476 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
2477 if (&*BI == BCI)
2478 // Also skip over the bitcast.
2479 ++BI;
2480 if (&*BI != RetI)
2481 return false;
2482 } else {
2483 BasicBlock::iterator BI = BB->begin();
2484 while (isa<DbgInfoIntrinsic>(BI)) ++BI;
2485 if (&*BI != RetI)
2486 return false;
2487 }
2488
2489 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2490 /// call.
2491 const Function *F = BB->getParent();
2492 SmallVector<CallInst*, 4> TailCalls;
2493 if (PN) {
2494 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2495 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
2496 // Make sure the phi value is indeed produced by the tail call.
2497 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
2498 TLI->mayBeEmittedAsTailCall(CI) &&
2499 attributesPermitTailCall(F, CI, RetI, *TLI))
2500 TailCalls.push_back(CI);
2501 }
2502 } else {
2503 SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2504 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
2505 if (!VisitedBBs.insert(*PI).second)
2506 continue;
2507
2508 BasicBlock::InstListType &InstList = (*PI)->getInstList();
2509 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
2510 BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
2511 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
2512 if (RI == RE)
2513 continue;
2514
2515 CallInst *CI = dyn_cast<CallInst>(&*RI);
2516 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2517 attributesPermitTailCall(F, CI, RetI, *TLI))
2518 TailCalls.push_back(CI);
2519 }
2520 }
2521
2522 bool Changed = false;
2523 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
2524 CallInst *CI = TailCalls[i];
2525 CallSite CS(CI);
2526
2527 // Conservatively require the attributes of the call to match those of the
2528 // return. Ignore noalias because it doesn't affect the call sequence.
2529 AttributeList CalleeAttrs = CS.getAttributes();
2530 if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
2531 .removeAttribute(Attribute::NoAlias) !=
2532 AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
2533 .removeAttribute(Attribute::NoAlias))
2534 continue;
2535
2536 // Make sure the call instruction is followed by an unconditional branch to
2537 // the return block.
2538 BasicBlock *CallBB = CI->getParent();
2539 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
2540 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2541 continue;
2542
2543 // Duplicate the return into CallBB.
2544 (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
2545 ModifiedDT = Changed = true;
2546 ++NumRetsDup;
2547 }
2548
2549 // If we eliminated all predecessors of the block, delete the block now.
2550 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
2551 BB->eraseFromParent();
2552
2553 return Changed;
2554}
2555
2556//===----------------------------------------------------------------------===//
2557// Memory Optimization
2558//===----------------------------------------------------------------------===//
2559
2560namespace {
2561
2562/// This is an extended version of TargetLowering::AddrMode
2563/// which holds actual Value*'s for register values.
2564struct ExtAddrMode : public TargetLowering::AddrMode {
2565 Value *BaseReg;
2566 Value *ScaledReg;
2567 ExtAddrMode() : BaseReg(nullptr), ScaledReg(nullptr) {}
2568 void print(raw_ostream &OS) const;
2569 void dump() const;
2570
2571 bool operator==(const ExtAddrMode& O) const {
2572 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
2573 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
2574 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
2575 }
2576};
2577
2578#ifndef NDEBUG
2579static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2580 AM.print(OS);
2581 return OS;
2582}
2583#endif
2584
2585void ExtAddrMode::print(raw_ostream &OS) const {
2586 bool NeedPlus = false;
2587 OS << "[";
2588 if (BaseGV) {
2589 OS << (NeedPlus ? " + " : "")
2590 << "GV:";
2591 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2592 NeedPlus = true;
2593 }
2594
2595 if (BaseOffs) {
2596 OS << (NeedPlus ? " + " : "")
2597 << BaseOffs;
2598 NeedPlus = true;
2599 }
2600
2601 if (BaseReg) {
2602 OS << (NeedPlus ? " + " : "")
2603 << "Base:";
2604 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2605 NeedPlus = true;
2606 }
2607 if (Scale) {
2608 OS << (NeedPlus ? " + " : "")
2609 << Scale << "*";
2610 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2611 }
2612
2613 OS << ']';
2614}
2615
2616#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2617LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {
2618 print(dbgs());
2619 dbgs() << '\n';
2620}
2621#endif
2622
2623/// \brief This class provides transaction based operation on the IR.
2624/// Every change made through this class is recorded in the internal state and
2625/// can be undone (rollback) until commit is called.
2626class TypePromotionTransaction {
2627
2628 /// \brief This represents the common interface of the individual transaction.
2629 /// Each class implements the logic for doing one specific modification on
2630 /// the IR via the TypePromotionTransaction.
2631 class TypePromotionAction {
2632 protected:
2633 /// The Instruction modified.
2634 Instruction *Inst;
2635
2636 public:
2637 /// \brief Constructor of the action.
2638 /// The constructor performs the related action on the IR.
2639 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2640
2641 virtual ~TypePromotionAction() {}
2642
2643 /// \brief Undo the modification done by this action.
2644 /// When this method is called, the IR must be in the same state as it was
2645 /// before this action was applied.
2646 /// \pre Undoing the action works if and only if the IR is in the exact same
2647 /// state as it was directly after this action was applied.
2648 virtual void undo() = 0;
2649
2650 /// \brief Advocate every change made by this action.
2651 /// When the results on the IR of the action are to be kept, it is important
2652 /// to call this function, otherwise hidden information may be kept forever.
2653 virtual void commit() {
2654 // Nothing to be done, this action is not doing anything.
2655 }
2656 };
2657
2658 /// \brief Utility to remember the position of an instruction.
2659 class InsertionHandler {
2660 /// Position of an instruction.
2661 /// Either an instruction:
2662 /// - Is the first in a basic block: BB is used.
2663 /// - Has a previous instructon: PrevInst is used.
2664 union {
2665 Instruction *PrevInst;
2666 BasicBlock *BB;
2667 } Point;
2668 /// Remember whether or not the instruction had a previous instruction.
2669 bool HasPrevInstruction;
2670
2671 public:
2672 /// \brief Record the position of \p Inst.
2673 InsertionHandler(Instruction *Inst) {
2674 BasicBlock::iterator It = Inst->getIterator();
2675 HasPrevInstruction = (It != (Inst->getParent()->begin()));
2676 if (HasPrevInstruction)
2677 Point.PrevInst = &*--It;
2678 else
2679 Point.BB = Inst->getParent();
2680 }
2681
2682 /// \brief Insert \p Inst at the recorded position.
2683 void insert(Instruction *Inst) {
2684 if (HasPrevInstruction) {
2685 if (Inst->getParent())
2686 Inst->removeFromParent();
2687 Inst->insertAfter(Point.PrevInst);
2688 } else {
2689 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2690 if (Inst->getParent())
2691 Inst->moveBefore(Position);
2692 else
2693 Inst->insertBefore(Position);
2694 }
2695 }
2696 };
2697
2698 /// \brief Move an instruction before another.
2699 class InstructionMoveBefore : public TypePromotionAction {
2700 /// Original position of the instruction.
2701 InsertionHandler Position;
2702
2703 public:
2704 /// \brief Move \p Inst before \p Before.
2705 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2706 : TypePromotionAction(Inst), Position(Inst) {
2707 DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
;
2708 Inst->moveBefore(Before);
2709 }
2710
2711 /// \brief Move the instruction back to its original position.
2712 void undo() override {
2713 DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false)
;
2714 Position.insert(Inst);
2715 }
2716 };
2717
2718 /// \brief Set the operand of an instruction with a new value.
2719 class OperandSetter : public TypePromotionAction {
2720 /// Original operand of the instruction.
2721 Value *Origin;
2722 /// Index of the modified instruction.
2723 unsigned Idx;
2724
2725 public:
2726 /// \brief Set \p Idx operand of \p Inst with \p NewVal.
2727 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2728 : TypePromotionAction(Inst), Idx(Idx) {
2729 DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2730 << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2731 << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
;
2732 Origin = Inst->getOperand(Idx);
2733 Inst->setOperand(Idx, NewVal);
2734 }
2735
2736 /// \brief Restore the original value of the instruction.
2737 void undo() override {
2738 DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2739 << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2740 << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
;
2741 Inst->setOperand(Idx, Origin);
2742 }
2743 };
2744
2745 /// \brief Hide the operands of an instruction.
2746 /// Do as if this instruction was not using any of its operands.
2747 class OperandsHider : public TypePromotionAction {
2748 /// The list of original operands.
2749 SmallVector<Value *, 4> OriginalValues;
2750
2751 public:
2752 /// \brief Remove \p Inst from the uses of the operands of \p Inst.
2753 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2754 DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false)
;
2755 unsigned NumOpnds = Inst->getNumOperands();
2756 OriginalValues.reserve(NumOpnds);
2757 for (unsigned It = 0; It < NumOpnds; ++It) {
2758 // Save the current operand.
2759 Value *Val = Inst->getOperand(It);
2760 OriginalValues.push_back(Val);
2761 // Set a dummy one.
2762 // We could use OperandSetter here, but that would imply an overhead
2763 // that we are not willing to pay.
2764 Inst->setOperand(It, UndefValue::get(Val->getType()));
2765 }
2766 }
2767
2768 /// \brief Restore the original list of uses.
2769 void undo() override {
2770 DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false)
;
2771 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2772 Inst->setOperand(It, OriginalValues[It]);
2773 }
2774 };
2775
2776 /// \brief Build a truncate instruction.
2777 class TruncBuilder : public TypePromotionAction {
2778 Value *Val;
2779 public:
2780 /// \brief Build a truncate instruction of \p Opnd producing a \p Ty
2781 /// result.
2782 /// trunc Opnd to Ty.
2783 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2784 IRBuilder<> Builder(Opnd);
2785 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2786 DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2787 }
2788
2789 /// \brief Get the built value.
2790 Value *getBuiltValue() { return Val; }
2791
2792 /// \brief Remove the built instruction.
2793 void undo() override {
2794 DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2795 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2796 IVal->eraseFromParent();
2797 }
2798 };
2799
2800 /// \brief Build a sign extension instruction.
2801 class SExtBuilder : public TypePromotionAction {
2802 Value *Val;
2803 public:
2804 /// \brief Build a sign extension instruction of \p Opnd producing a \p Ty
2805 /// result.
2806 /// sext Opnd to Ty.
2807 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2808 : TypePromotionAction(InsertPt) {
2809 IRBuilder<> Builder(InsertPt);
2810 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2811 DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2812 }
2813
2814 /// \brief Get the built value.
2815 Value *getBuiltValue() { return Val; }
2816
2817 /// \brief Remove the built instruction.
2818 void undo() override {
2819 DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2820 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2821 IVal->eraseFromParent();
2822 }
2823 };
2824
2825 /// \brief Build a zero extension instruction.
2826 class ZExtBuilder : public TypePromotionAction {
2827 Value *Val;
2828 public:
2829 /// \brief Build a zero extension instruction of \p Opnd producing a \p Ty
2830 /// result.
2831 /// zext Opnd to Ty.
2832 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2833 : TypePromotionAction(InsertPt) {
2834 IRBuilder<> Builder(InsertPt);
2835 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2836 DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2837 }
2838
2839 /// \brief Get the built value.
2840 Value *getBuiltValue() { return Val; }
2841
2842 /// \brief Remove the built instruction.
2843 void undo() override {
2844 DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2845 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2846 IVal->eraseFromParent();
2847 }
2848 };
2849
2850 /// \brief Mutate an instruction to another type.
2851 class TypeMutator : public TypePromotionAction {
2852 /// Record the original type.
2853 Type *OrigTy;
2854
2855 public:
2856 /// \brief Mutate the type of \p Inst into \p NewTy.
2857 TypeMutator(Instruction *Inst, Type *NewTy)
2858 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2859 DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
2860 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
;
2861 Inst->mutateType(NewTy);
2862 }
2863
2864 /// \brief Mutate the instruction back to its original type.
2865 void undo() override {
2866 DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
2867 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
;
2868 Inst->mutateType(OrigTy);
2869 }
2870 };
2871
2872 /// \brief Replace the uses of an instruction by another instruction.
2873 class UsesReplacer : public TypePromotionAction {
2874 /// Helper structure to keep track of the replaced uses.
2875 struct InstructionAndIdx {
2876 /// The instruction using the instruction.
2877 Instruction *Inst;
2878 /// The index where this instruction is used for Inst.
2879 unsigned Idx;
2880 InstructionAndIdx(Instruction *Inst, unsigned Idx)
2881 : Inst(Inst), Idx(Idx) {}
2882 };
2883
2884 /// Keep track of the original uses (pair Instruction, Index).
2885 SmallVector<InstructionAndIdx, 4> OriginalUses;
2886 typedef SmallVectorImpl<InstructionAndIdx>::iterator use_iterator;
2887
2888 public:
2889 /// \brief Replace all the use of \p Inst by \p New.
2890 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2891 DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
2892 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
;
2893 // Record the original uses.
2894 for (Use &U : Inst->uses()) {
2895 Instruction *UserI = cast<Instruction>(U.getUser());
2896 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2897 }
2898 // Now, we can replace the uses.
2899 Inst->replaceAllUsesWith(New);
2900 }
2901
2902 /// \brief Reassign the original uses of Inst to Inst.
2903 void undo() override {
2904 DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false)
;
2905 for (use_iterator UseIt = OriginalUses.begin(),
2906 EndIt = OriginalUses.end();
2907 UseIt != EndIt; ++UseIt) {
2908 UseIt->Inst->setOperand(UseIt->Idx, Inst);
2909 }
2910 }
2911 };
2912
2913 /// \brief Remove an instruction from the IR.
2914 class InstructionRemover : public TypePromotionAction {
2915 /// Original position of the instruction.
2916 InsertionHandler Inserter;
2917 /// Helper structure to hide all the link to the instruction. In other
2918 /// words, this helps to do as if the instruction was removed.
2919 OperandsHider Hider;
2920 /// Keep track of the uses replaced, if any.
2921 UsesReplacer *Replacer;
2922 /// Keep track of instructions removed.
2923 SetOfInstrs &RemovedInsts;
2924
2925 public:
2926 /// \brief Remove all reference of \p Inst and optinally replace all its
2927 /// uses with New.
2928 /// \p RemovedInsts Keep track of the instructions removed by this Action.
2929 /// \pre If !Inst->use_empty(), then New != nullptr
2930 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2931 Value *New = nullptr)
2932 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2933 Replacer(nullptr), RemovedInsts(RemovedInsts) {
2934 if (New)
2935 Replacer = new UsesReplacer(Inst, New);
2936 DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2937 RemovedInsts.insert(Inst);
2938 /// The instructions removed here will be freed after completing
2939 /// optimizeBlock() for all blocks as we need to keep track of the
2940 /// removed instructions during promotion.
2941 Inst->removeFromParent();
2942 }
2943
2944 ~InstructionRemover() override { delete Replacer; }
2945
2946 /// \brief Resurrect the instruction and reassign it to the proper uses if
2947 /// new value was provided when build this action.
2948 void undo() override {
2949 DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2950 Inserter.insert(Inst);
2951 if (Replacer)
2952 Replacer->undo();
2953 Hider.undo();
2954 RemovedInsts.erase(Inst);
2955 }
2956 };
2957
2958public:
2959 /// Restoration point.
2960 /// The restoration point is a pointer to an action instead of an iterator
2961 /// because the iterator may be invalidated but not the pointer.
2962 typedef const TypePromotionAction *ConstRestorationPt;
2963
2964 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2965 : RemovedInsts(RemovedInsts) {}
2966
2967 /// Advocate every changes made in that transaction.
2968 void commit();
2969 /// Undo all the changes made after the given point.
2970 void rollback(ConstRestorationPt Point);
2971 /// Get the current restoration point.
2972 ConstRestorationPt getRestorationPoint() const;
2973
2974 /// \name API for IR modification with state keeping to support rollback.
2975 /// @{
2976 /// Same as Instruction::setOperand.
2977 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2978 /// Same as Instruction::eraseFromParent.
2979 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2980 /// Same as Value::replaceAllUsesWith.
2981 void replaceAllUsesWith(Instruction *Inst, Value *New);
2982 /// Same as Value::mutateType.
2983 void mutateType(Instruction *Inst, Type *NewTy);
2984 /// Same as IRBuilder::createTrunc.
2985 Value *createTrunc(Instruction *Opnd, Type *Ty);
2986 /// Same as IRBuilder::createSExt.
2987 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2988 /// Same as IRBuilder::createZExt.
2989 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2990 /// Same as Instruction::moveBefore.
2991 void moveBefore(Instruction *Inst, Instruction *Before);
2992 /// @}
2993
2994private:
2995 /// The ordered list of actions made so far.
2996 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2997 typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
2998 SetOfInstrs &RemovedInsts;
2999};
3000
3001void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3002 Value *NewVal) {
3003 Actions.push_back(
3004 make_unique<TypePromotionTransaction::OperandSetter>(Inst, Idx, NewVal));
3005}
3006
3007void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3008 Value *NewVal) {
3009 Actions.push_back(
3010 make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
3011 RemovedInsts, NewVal));
3012}
3013
3014void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3015 Value *New) {
3016 Actions.push_back(make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3017}
3018
3019void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3020 Actions.push_back(make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3021}
3022
3023Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
3024 Type *Ty) {
3025 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3026 Value *Val = Ptr->getBuiltValue();
3027 Actions.push_back(std::move(Ptr));
3028 return Val;
3029}
3030
3031Value *TypePromotionTransaction::createSExt(Instruction *Inst,
3032 Value *Opnd, Type *Ty) {
3033 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3034 Value *Val = Ptr->getBuiltValue();
3035 Actions.push_back(std::move(Ptr));
3036 return Val;
3037}
3038
3039Value *TypePromotionTransaction::createZExt(Instruction *Inst,
3040 Value *Opnd, Type *Ty) {
3041 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3042 Value *Val = Ptr->getBuiltValue();
3043 Actions.push_back(std::move(Ptr));
3044 return Val;
3045}
3046
3047void TypePromotionTransaction::moveBefore(Instruction *Inst,
3048 Instruction *Before) {
3049 Actions.push_back(
3050 make_unique<TypePromotionTransaction::InstructionMoveBefore>(Inst, Before));
3051}
3052
3053TypePromotionTransaction::ConstRestorationPt
3054TypePromotionTransaction::getRestorationPoint() const {
3055 return !Actions.empty() ? Actions.back().get() : nullptr;
3056}
3057
3058void TypePromotionTransaction::commit() {
3059 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
3060 ++It)
3061 (*It)->commit();
3062 Actions.clear();
3063}
3064
3065void TypePromotionTransaction::rollback(
3066 TypePromotionTransaction::ConstRestorationPt Point) {
3067 while (!Actions.empty() && Point != Actions.back().get()) {
3068 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3069 Curr->undo();
3070 }
3071}
3072
3073/// \brief A helper class for matching addressing modes.
3074///
3075/// This encapsulates the logic for matching the target-legal addressing modes.
3076class AddressingModeMatcher {
3077 SmallVectorImpl<Instruction*> &AddrModeInsts;
3078 const TargetLowering &TLI;
3079 const TargetRegisterInfo &TRI;
3080 const DataLayout &DL;
3081
3082 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3083 /// the memory instruction that we're computing this address for.
3084 Type *AccessTy;
3085 unsigned AddrSpace;
3086 Instruction *MemoryInst;
3087
3088 /// This is the addressing mode that we're building up. This is
3089 /// part of the return value of this addressing mode matching stuff.
3090 ExtAddrMode &AddrMode;
3091
3092 /// The instructions inserted by other CodeGenPrepare optimizations.
3093 const SetOfInstrs &InsertedInsts;
3094 /// A map from the instructions to their type before promotion.
3095 InstrToOrigTy &PromotedInsts;
3096 /// The ongoing transaction where every action should be registered.
3097 TypePromotionTransaction &TPT;
3098
3099 /// This is set to true when we should not do profitability checks.
3100 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3101 bool IgnoreProfitability;
3102
3103 AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
3104 const TargetLowering &TLI,
3105 const TargetRegisterInfo &TRI,
3106 Type *AT, unsigned AS,
3107 Instruction *MI, ExtAddrMode &AM,
3108 const SetOfInstrs &InsertedInsts,
3109 InstrToOrigTy &PromotedInsts,
3110 TypePromotionTransaction &TPT)
3111 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3112 DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
3113 MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
3114 PromotedInsts(PromotedInsts), TPT(TPT) {
3115 IgnoreProfitability = false;
3116 }
3117public:
3118
3119 /// Find the maximal addressing mode that a load/store of V can fold,
3120 /// give an access type of AccessTy. This returns a list of involved
3121 /// instructions in AddrModeInsts.
3122 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3123 /// optimizations.
3124 /// \p PromotedInsts maps the instructions to their type before promotion.
3125 /// \p The ongoing transaction where every action should be registered.
3126 static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
3127 Instruction *MemoryInst,
3128 SmallVectorImpl<Instruction*> &AddrModeInsts,
3129 const TargetLowering &TLI,
3130 const TargetRegisterInfo &TRI,
3131 const SetOfInstrs &InsertedInsts,
3132 InstrToOrigTy &PromotedInsts,
3133 TypePromotionTransaction &TPT) {
3134 ExtAddrMode Result;
3135
3136 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
3137 AccessTy, AS,
3138 MemoryInst, Result, InsertedInsts,
3139 PromotedInsts, TPT).matchAddr(V, 0);
3140 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3140, __PRETTY_FUNCTION__))
;
3141 return Result;
3142 }
3143private:
3144 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3145 bool matchAddr(Value *V, unsigned Depth);
3146 bool matchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth,
3147 bool *MovedAway = nullptr);
3148 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3149 ExtAddrMode &AMBefore,
3150 ExtAddrMode &AMAfter);
3151 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3152 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3153 Value *PromotedOperand) const;
3154};
3155
3156/// Try adding ScaleReg*Scale to the current addressing mode.
3157/// Return true and update AddrMode if this addr mode is legal for the target,
3158/// false if not.
3159bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3160 unsigned Depth) {
3161 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3162 // mode. Just process that directly.
3163 if (Scale == 1)
3164 return matchAddr(ScaleReg, Depth);
3165
3166 // If the scale is 0, it takes nothing to add this.
3167 if (Scale == 0)
3168 return true;
3169
3170 // If we already have a scale of this value, we can add to it, otherwise, we
3171 // need an available scale field.
3172 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3173 return false;
3174
3175 ExtAddrMode TestAddrMode = AddrMode;
3176
3177 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3178 // [A+B + A*7] -> [B+A*8].
3179 TestAddrMode.Scale += Scale;
3180 TestAddrMode.ScaledReg = ScaleReg;
3181
3182 // If the new address isn't legal, bail out.
3183 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3184 return false;
3185
3186 // It was legal, so commit it.
3187 AddrMode = TestAddrMode;
3188
3189 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3190 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3191 // X*Scale + C*Scale to addr mode.
3192 ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3193 if (isa<Instruction>(ScaleReg) && // not a constant expr.
3194 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3195 TestAddrMode.ScaledReg = AddLHS;
3196 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3197
3198 // If this addressing mode is legal, commit it and remember that we folded
3199 // this instruction.
3200 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3201 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3202 AddrMode = TestAddrMode;
3203 return true;
3204 }
3205 }
3206
3207 // Otherwise, not (x+c)*scale, just return what we have.
3208 return true;
3209}
3210
3211/// This is a little filter, which returns true if an addressing computation
3212/// involving I might be folded into a load/store accessing it.
3213/// This doesn't need to be perfect, but needs to accept at least
3214/// the set of instructions that MatchOperationAddr can.
3215static bool MightBeFoldableInst(Instruction *I) {
3216 switch (I->getOpcode()) {
3217 case Instruction::BitCast:
3218 case Instruction::AddrSpaceCast:
3219 // Don't touch identity bitcasts.
3220 if (I->getType() == I->getOperand(0)->getType())
3221 return false;
3222 return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
3223 case Instruction::PtrToInt:
3224 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3225 return true;
3226 case Instruction::IntToPtr:
3227 // We know the input is intptr_t, so this is foldable.
3228 return true;
3229 case Instruction::Add:
3230 return true;
3231 case Instruction::Mul:
3232 case Instruction::Shl:
3233 // Can only handle X*C and X << C.
3234 return isa<ConstantInt>(I->getOperand(1));
3235 case Instruction::GetElementPtr:
3236 return true;
3237 default:
3238 return false;
3239 }
3240}
3241
3242/// \brief Check whether or not \p Val is a legal instruction for \p TLI.
3243/// \note \p Val is assumed to be the product of some type promotion.
3244/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3245/// to be legal, as the non-promoted value would have had the same state.
3246static bool isPromotedInstructionLegal(const TargetLowering &TLI,
3247 const DataLayout &DL, Value *Val) {
3248 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3249 if (!PromotedInst)
3250 return false;
3251 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3252 // If the ISDOpcode is undefined, it was undefined before the promotion.
3253 if (!ISDOpcode)
3254 return true;
3255 // Otherwise, check if the promoted instruction is legal or not.
3256 return TLI.isOperationLegalOrCustom(
3257 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3258}
3259
3260/// \brief Hepler class to perform type promotion.
3261class TypePromotionHelper {
3262 /// \brief Utility function to check whether or not a sign or zero extension
3263 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3264 /// either using the operands of \p Inst or promoting \p Inst.
3265 /// The type of the extension is defined by \p IsSExt.
3266 /// In other words, check if:
3267 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3268 /// #1 Promotion applies:
3269 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3270 /// #2 Operand reuses:
3271 /// ext opnd1 to ConsideredExtType.
3272 /// \p PromotedInsts maps the instructions to their type before promotion.
3273 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3274 const InstrToOrigTy &PromotedInsts, bool IsSExt);
3275
3276 /// \brief Utility function to determine if \p OpIdx should be promoted when
3277 /// promoting \p Inst.
3278 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3279 return !(isa<SelectInst>(Inst) && OpIdx == 0);
3280 }
3281
3282 /// \brief Utility function to promote the operand of \p Ext when this
3283 /// operand is a promotable trunc or sext or zext.
3284 /// \p PromotedInsts maps the instructions to their type before promotion.
3285 /// \p CreatedInstsCost[out] contains the cost of all instructions
3286 /// created to promote the operand of Ext.
3287 /// Newly added extensions are inserted in \p Exts.
3288 /// Newly added truncates are inserted in \p Truncs.
3289 /// Should never be called directly.
3290 /// \return The promoted value which is used instead of Ext.
3291 static Value *promoteOperandForTruncAndAnyExt(
3292 Instruction *Ext, TypePromotionTransaction &TPT,
3293 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3294 SmallVectorImpl<Instruction *> *Exts,
3295 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3296
3297 /// \brief Utility function to promote the operand of \p Ext when this
3298 /// operand is promotable and is not a supported trunc or sext.
3299 /// \p PromotedInsts maps the instructions to their type before promotion.
3300 /// \p CreatedInstsCost[out] contains the cost of all the instructions
3301 /// created to promote the operand of Ext.
3302 /// Newly added extensions are inserted in \p Exts.
3303 /// Newly added truncates are inserted in \p Truncs.
3304 /// Should never be called directly.
3305 /// \return The promoted value which is used instead of Ext.
3306 static Value *promoteOperandForOther(Instruction *Ext,
3307 TypePromotionTransaction &TPT,
3308 InstrToOrigTy &PromotedInsts,
3309 unsigned &CreatedInstsCost,
3310 SmallVectorImpl<Instruction *> *Exts,
3311 SmallVectorImpl<Instruction *> *Truncs,
3312 const TargetLowering &TLI, bool IsSExt);
3313
3314 /// \see promoteOperandForOther.
3315 static Value *signExtendOperandForOther(
3316 Instruction *Ext, TypePromotionTransaction &TPT,
3317 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3318 SmallVectorImpl<Instruction *> *Exts,
3319 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3320 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3321 Exts, Truncs, TLI, true);
3322 }
3323
3324 /// \see promoteOperandForOther.
3325 static Value *zeroExtendOperandForOther(
3326 Instruction *Ext, TypePromotionTransaction &TPT,
3327 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3328 SmallVectorImpl<Instruction *> *Exts,
3329 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3330 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3331 Exts, Truncs, TLI, false);
3332 }
3333
3334public:
3335 /// Type for the utility function that promotes the operand of Ext.
3336 typedef Value *(*Action)(Instruction *Ext, TypePromotionTransaction &TPT,
3337 InstrToOrigTy &PromotedInsts,
3338 unsigned &CreatedInstsCost,
3339 SmallVectorImpl<Instruction *> *Exts,
3340 SmallVectorImpl<Instruction *> *Truncs,
3341 const TargetLowering &TLI);
3342 /// \brief Given a sign/zero extend instruction \p Ext, return the approriate
3343 /// action to promote the operand of \p Ext instead of using Ext.
3344 /// \return NULL if no promotable action is possible with the current
3345 /// sign extension.
3346 /// \p InsertedInsts keeps track of all the instructions inserted by the
3347 /// other CodeGenPrepare optimizations. This information is important
3348 /// because we do not want to promote these instructions as CodeGenPrepare
3349 /// will reinsert them later. Thus creating an infinite loop: create/remove.
3350 /// \p PromotedInsts maps the instructions to their type before promotion.
3351 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3352 const TargetLowering &TLI,
3353 const InstrToOrigTy &PromotedInsts);
3354};
3355
3356bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3357 Type *ConsideredExtType,
3358 const InstrToOrigTy &PromotedInsts,
3359 bool IsSExt) {
3360 // The promotion helper does not know how to deal with vector types yet.
3361 // To be able to fix that, we would need to fix the places where we
3362 // statically extend, e.g., constants and such.
3363 if (Inst->getType()->isVectorTy())
3364 return false;
3365
3366 // We can always get through zext.
3367 if (isa<ZExtInst>(Inst))
3368 return true;
3369
3370 // sext(sext) is ok too.
3371 if (IsSExt && isa<SExtInst>(Inst))
3372 return true;
3373
3374 // We can get through binary operator, if it is legal. In other words, the
3375 // binary operator must have a nuw or nsw flag.
3376 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3377 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3378 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3379 (IsSExt && BinOp->hasNoSignedWrap())))
3380 return true;
3381
3382 // Check if we can do the following simplification.
3383 // ext(trunc(opnd)) --> ext(opnd)
3384 if (!isa<TruncInst>(Inst))
3385 return false;
3386
3387 Value *OpndVal = Inst->getOperand(0);
3388 // Check if we can use this operand in the extension.
3389 // If the type is larger than the result type of the extension, we cannot.
3390 if (!OpndVal->getType()->isIntegerTy() ||
3391 OpndVal->getType()->getIntegerBitWidth() >
3392 ConsideredExtType->getIntegerBitWidth())
3393 return false;
3394
3395 // If the operand of the truncate is not an instruction, we will not have
3396 // any information on the dropped bits.
3397 // (Actually we could for constant but it is not worth the extra logic).
3398 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3399 if (!Opnd)
3400 return false;
3401
3402 // Check if the source of the type is narrow enough.
3403 // I.e., check that trunc just drops extended bits of the same kind of
3404 // the extension.
3405 // #1 get the type of the operand and check the kind of the extended bits.
3406 const Type *OpndType;
3407 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3408 if (It != PromotedInsts.end() && It->second.getInt() == IsSExt)
3409 OpndType = It->second.getPointer();
3410 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3411 OpndType = Opnd->getOperand(0)->getType();
3412 else
3413 return false;
3414
3415 // #2 check that the truncate just drops extended bits.
3416 return Inst->getType()->getIntegerBitWidth() >=
3417 OpndType->getIntegerBitWidth();
3418}
3419
3420TypePromotionHelper::Action TypePromotionHelper::getAction(
3421 Instruction *Ext, const SetOfInstrs &InsertedInsts,
3422 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3423 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3424, __PRETTY_FUNCTION__))
3424 "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3424, __PRETTY_FUNCTION__))
;
3425 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3426 Type *ExtTy = Ext->getType();
3427 bool IsSExt = isa<SExtInst>(Ext);
3428 // If the operand of the extension is not an instruction, we cannot
3429 // get through.
3430 // If it, check we can get through.
3431 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3432 return nullptr;
3433
3434 // Do not promote if the operand has been added by codegenprepare.
3435 // Otherwise, it means we are undoing an optimization that is likely to be
3436 // redone, thus causing potential infinite loop.
3437 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3438 return nullptr;
3439
3440 // SExt or Trunc instructions.
3441 // Return the related handler.
3442 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3443 isa<ZExtInst>(ExtOpnd))
3444 return promoteOperandForTruncAndAnyExt;
3445
3446 // Regular instruction.
3447 // Abort early if we will have to insert non-free instructions.
3448 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3449 return nullptr;
3450 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3451}
3452
3453Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3454 llvm::Instruction *SExt, TypePromotionTransaction &TPT,
3455 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3456 SmallVectorImpl<Instruction *> *Exts,
3457 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3458 // By construction, the operand of SExt is an instruction. Otherwise we cannot
3459 // get through it and this method should not be called.
3460 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3461 Value *ExtVal = SExt;
3462 bool HasMergedNonFreeExt = false;
3463 if (isa<ZExtInst>(SExtOpnd)) {
3464 // Replace s|zext(zext(opnd))
3465 // => zext(opnd).
3466 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3467 Value *ZExt =
3468 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3469 TPT.replaceAllUsesWith(SExt, ZExt);
3470 TPT.eraseInstruction(SExt);
3471 ExtVal = ZExt;
3472 } else {
3473 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3474 // => z|sext(opnd).
3475 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3476 }
3477 CreatedInstsCost = 0;
3478
3479 // Remove dead code.
3480 if (SExtOpnd->use_empty())
3481 TPT.eraseInstruction(SExtOpnd);
3482
3483 // Check if the extension is still needed.
3484 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3485 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3486 if (ExtInst) {
3487 if (Exts)
3488 Exts->push_back(ExtInst);
3489 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3490 }
3491 return ExtVal;
3492 }
3493
3494 // At this point we have: ext ty opnd to ty.
3495 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3496 Value *NextVal = ExtInst->getOperand(0);
3497 TPT.eraseInstruction(ExtInst, NextVal);
3498 return NextVal;
3499}
3500
3501Value *TypePromotionHelper::promoteOperandForOther(
3502 Instruction *Ext, TypePromotionTransaction &TPT,
3503 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3504 SmallVectorImpl<Instruction *> *Exts,
3505 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
3506 bool IsSExt) {
3507 // By construction, the operand of Ext is an instruction. Otherwise we cannot
3508 // get through it and this method should not be called.
3509 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
3510 CreatedInstsCost = 0;
3511 if (!ExtOpnd->hasOneUse()) {
3512 // ExtOpnd will be promoted.
3513 // All its uses, but Ext, will need to use a truncated value of the
3514 // promoted version.
3515 // Create the truncate now.
3516 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
3517 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
3518 ITrunc->removeFromParent();
3519 // Insert it just after the definition.
3520 ITrunc->insertAfter(ExtOpnd);
3521 if (Truncs)
3522 Truncs->push_back(ITrunc);
3523 }
3524
3525 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
3526 // Restore the operand of Ext (which has been replaced by the previous call
3527 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
3528 TPT.setOperand(Ext, 0, ExtOpnd);
3529 }
3530
3531 // Get through the Instruction:
3532 // 1. Update its type.
3533 // 2. Replace the uses of Ext by Inst.
3534 // 3. Extend each operand that needs to be extended.
3535
3536 // Remember the original type of the instruction before promotion.
3537 // This is useful to know that the high bits are sign extended bits.
3538 PromotedInsts.insert(std::pair<Instruction *, TypeIsSExt>(
3539 ExtOpnd, TypeIsSExt(ExtOpnd->getType(), IsSExt)));
3540 // Step #1.
3541 TPT.mutateType(ExtOpnd, Ext->getType());
3542 // Step #2.
3543 TPT.replaceAllUsesWith(Ext, ExtOpnd);
3544 // Step #3.
3545 Instruction *ExtForOpnd = Ext;
3546
3547 DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false)
;
3548 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
3549 ++OpIdx) {
3550 DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
)
;
3551 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
3552 !shouldExtOperand(ExtOpnd, OpIdx)) {
3553 DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false)
;
3554 continue;
3555 }
3556 // Check if we can statically extend the operand.
3557 Value *Opnd = ExtOpnd->getOperand(OpIdx);
3558 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
3559 DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
3560 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
3561 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
3562 : Cst->getValue().zext(BitWidth);
3563 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
3564 continue;
3565 }
3566 // UndefValue are typed, so we have to statically sign extend them.
3567 if (isa<UndefValue>(Opnd)) {
3568 DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
3569 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
3570 continue;
3571 }
3572
3573 // Otherwise we have to explicity sign extend the operand.
3574 // Check if Ext was reused to extend an operand.
3575 if (!ExtForOpnd) {
3576 // If yes, create a new one.
3577 DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false)
;
3578 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
3579 : TPT.createZExt(Ext, Opnd, Ext->getType());
3580 if (!isa<Instruction>(ValForExtOpnd)) {
3581 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
3582 continue;
3583 }
3584 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
3585 }
3586 if (Exts)
3587 Exts->push_back(ExtForOpnd);
3588 TPT.setOperand(ExtForOpnd, 0, Opnd);
3589
3590 // Move the sign extension before the insertion point.
3591 TPT.moveBefore(ExtForOpnd, ExtOpnd);
3592 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
3593 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
3594 // If more sext are required, new instructions will have to be created.
3595 ExtForOpnd = nullptr;
3596 }
3597 if (ExtForOpnd == Ext) {
3598 DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false)
;
3599 TPT.eraseInstruction(Ext);
3600 }
3601 return ExtOpnd;
3602}
3603
3604/// Check whether or not promoting an instruction to a wider type is profitable.
3605/// \p NewCost gives the cost of extension instructions created by the
3606/// promotion.
3607/// \p OldCost gives the cost of extension instructions before the promotion
3608/// plus the number of instructions that have been
3609/// matched in the addressing mode the promotion.
3610/// \p PromotedOperand is the value that has been promoted.
3611/// \return True if the promotion is profitable, false otherwise.
3612bool AddressingModeMatcher::isPromotionProfitable(
3613 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
3614 DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
;
3615 // The cost of the new extensions is greater than the cost of the
3616 // old extension plus what we folded.
3617 // This is not profitable.
3618 if (NewCost > OldCost)
3619 return false;
3620 if (NewCost < OldCost)
3621 return true;
3622 // The promotion is neutral but it may help folding the sign extension in
3623 // loads for instance.
3624 // Check that we did not create an illegal instruction.
3625 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
3626}
3627
3628/// Given an instruction or constant expr, see if we can fold the operation
3629/// into the addressing mode. If so, update the addressing mode and return
3630/// true, otherwise return false without modifying AddrMode.
3631/// If \p MovedAway is not NULL, it contains the information of whether or
3632/// not AddrInst has to be folded into the addressing mode on success.
3633/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
3634/// because it has been moved away.
3635/// Thus AddrInst must not be added in the matched instructions.
3636/// This state can happen when AddrInst is a sext, since it may be moved away.
3637/// Therefore, AddrInst may not be valid when MovedAway is true and it must
3638/// not be referenced anymore.
3639bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
3640 unsigned Depth,
3641 bool *MovedAway) {
3642 // Avoid exponential behavior on extremely deep expression trees.
3643 if (Depth >= 5) return false;
3644
3645 // By default, all matched instructions stay in place.
3646 if (MovedAway)
3647 *MovedAway = false;
3648
3649 switch (Opcode) {
3650 case Instruction::PtrToInt:
3651 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3652 return matchAddr(AddrInst->getOperand(0), Depth);
3653 case Instruction::IntToPtr: {
3654 auto AS = AddrInst->getType()->getPointerAddressSpace();
3655 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
3656 // This inttoptr is a no-op if the integer type is pointer sized.
3657 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
3658 return matchAddr(AddrInst->getOperand(0), Depth);
3659 return false;
3660 }
3661 case Instruction::BitCast:
3662 // BitCast is always a noop, and we can handle it as long as it is
3663 // int->int or pointer->pointer (we don't want int<->fp or something).
3664 if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
3665 AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
3666 // Don't touch identity bitcasts. These were probably put here by LSR,
3667 // and we don't want to mess around with them. Assume it knows what it
3668 // is doing.
3669 AddrInst->getOperand(0)->getType() != AddrInst->getType())
3670 return matchAddr(AddrInst->getOperand(0), Depth);
3671 return false;
3672 case Instruction::AddrSpaceCast: {
3673 unsigned SrcAS
3674 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
3675 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
3676 if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3677 return matchAddr(AddrInst->getOperand(0), Depth);
3678 return false;
3679 }
3680 case Instruction::Add: {
3681 // Check to see if we can merge in the RHS then the LHS. If so, we win.
3682 ExtAddrMode BackupAddrMode = AddrMode;
3683 unsigned OldSize = AddrModeInsts.size();
3684 // Start a transaction at this point.
3685 // The LHS may match but not the RHS.
3686 // Therefore, we need a higher level restoration point to undo partially
3687 // matched operation.
3688 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3689 TPT.getRestorationPoint();
3690
3691 if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
3692 matchAddr(AddrInst->getOperand(0), Depth+1))
3693 return true;
3694
3695 // Restore the old addr mode info.
3696 AddrMode = BackupAddrMode;
3697 AddrModeInsts.resize(OldSize);
3698 TPT.rollback(LastKnownGood);
3699
3700 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
3701 if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
3702 matchAddr(AddrInst->getOperand(1), Depth+1))
3703 return true;
3704
3705 // Otherwise we definitely can't merge the ADD in.
3706 AddrMode = BackupAddrMode;
3707 AddrModeInsts.resize(OldSize);
3708 TPT.rollback(LastKnownGood);
3709 break;
3710 }
3711 //case Instruction::Or:
3712 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
3713 //break;
3714 case Instruction::Mul:
3715 case Instruction::Shl: {
3716 // Can only handle X*C and X << C.
3717 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
3718 if (!RHS)
3719 return false;
3720 int64_t Scale = RHS->getSExtValue();
3721 if (Opcode == Instruction::Shl)
3722 Scale = 1LL << Scale;
3723
3724 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
3725 }
3726 case Instruction::GetElementPtr: {
3727 // Scan the GEP. We check it if it contains constant offsets and at most
3728 // one variable offset.
3729 int VariableOperand = -1;
3730 unsigned VariableScale = 0;
3731
3732 int64_t ConstantOffset = 0;
3733 gep_type_iterator GTI = gep_type_begin(AddrInst);
3734 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
3735 if (StructType *STy = GTI.getStructTypeOrNull()) {
3736 const StructLayout *SL = DL.getStructLayout(STy);
3737 unsigned Idx =
3738 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
3739 ConstantOffset += SL->getElementOffset(Idx);
3740 } else {
3741 uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
3742 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
3743 ConstantOffset += CI->getSExtValue()*TypeSize;
3744 } else if (TypeSize) { // Scales of zero don't do anything.
3745 // We only allow one variable index at the moment.
3746 if (VariableOperand != -1)
3747 return false;
3748
3749 // Remember the variable index.
3750 VariableOperand = i;
3751 VariableScale = TypeSize;
3752 }
3753 }
3754 }
3755
3756 // A common case is for the GEP to only do a constant offset. In this case,
3757 // just add it to the disp field and check validity.
3758 if (VariableOperand == -1) {
3759 AddrMode.BaseOffs += ConstantOffset;
3760 if (ConstantOffset == 0 ||
3761 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
3762 // Check to see if we can fold the base pointer in too.
3763 if (matchAddr(AddrInst->getOperand(0), Depth+1))
3764 return true;
3765 }
3766 AddrMode.BaseOffs -= ConstantOffset;
3767 return false;
3768 }
3769
3770 // Save the valid addressing mode in case we can't match.
3771 ExtAddrMode BackupAddrMode = AddrMode;
3772 unsigned OldSize = AddrModeInsts.size();
3773
3774 // See if the scale and offset amount is valid for this target.
3775 AddrMode.BaseOffs += ConstantOffset;
3776
3777 // Match the base operand of the GEP.
3778 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
3779 // If it couldn't be matched, just stuff the value in a register.
3780 if (AddrMode.HasBaseReg) {
3781 AddrMode = BackupAddrMode;
3782 AddrModeInsts.resize(OldSize);
3783 return false;
3784 }
3785 AddrMode.HasBaseReg = true;
3786 AddrMode.BaseReg = AddrInst->getOperand(0);
3787 }
3788
3789 // Match the remaining variable portion of the GEP.
3790 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
3791 Depth)) {
3792 // If it couldn't be matched, try stuffing the base into a register
3793 // instead of matching it, and retrying the match of the scale.
3794 AddrMode = BackupAddrMode;
3795 AddrModeInsts.resize(OldSize);
3796 if (AddrMode.HasBaseReg)
3797 return false;
3798 AddrMode.HasBaseReg = true;
3799 AddrMode.BaseReg = AddrInst->getOperand(0);
3800 AddrMode.BaseOffs += ConstantOffset;
3801 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
3802 VariableScale, Depth)) {
3803 // If even that didn't work, bail.
3804 AddrMode = BackupAddrMode;
3805 AddrModeInsts.resize(OldSize);
3806 return false;
3807 }
3808 }
3809
3810 return true;
3811 }
3812 case Instruction::SExt:
3813 case Instruction::ZExt: {
3814 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
3815 if (!Ext)
3816 return false;
3817
3818 // Try to move this ext out of the way of the addressing mode.
3819 // Ask for a method for doing so.
3820 TypePromotionHelper::Action TPH =
3821 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
3822 if (!TPH)
3823 return false;
3824
3825 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3826 TPT.getRestorationPoint();
3827 unsigned CreatedInstsCost = 0;
3828 unsigned ExtCost = !TLI.isExtFree(Ext);
3829 Value *PromotedOperand =
3830 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
3831 // SExt has been moved away.
3832 // Thus either it will be rematched later in the recursive calls or it is
3833 // gone. Anyway, we must not fold it into the addressing mode at this point.
3834 // E.g.,
3835 // op = add opnd, 1
3836 // idx = ext op
3837 // addr = gep base, idx
3838 // is now:
3839 // promotedOpnd = ext opnd <- no match here
3840 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
3841 // addr = gep base, op <- match
3842 if (MovedAway)
3843 *MovedAway = true;
3844
3845 assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3846, __PRETTY_FUNCTION__))
3846 "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 3846, __PRETTY_FUNCTION__))
;
3847
3848 ExtAddrMode BackupAddrMode = AddrMode;
3849 unsigned OldSize = AddrModeInsts.size();
3850
3851 if (!matchAddr(PromotedOperand, Depth) ||
3852 // The total of the new cost is equal to the cost of the created
3853 // instructions.
3854 // The total of the old cost is equal to the cost of the extension plus
3855 // what we have saved in the addressing mode.
3856 !isPromotionProfitable(CreatedInstsCost,
3857 ExtCost + (AddrModeInsts.size() - OldSize),
3858 PromotedOperand)) {
3859 AddrMode = BackupAddrMode;
3860 AddrModeInsts.resize(OldSize);
3861 DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false)
;
3862 TPT.rollback(LastKnownGood);
3863 return false;
3864 }
3865 return true;
3866 }
3867 }
3868 return false;
3869}
3870
3871/// If we can, try to add the value of 'Addr' into the current addressing mode.
3872/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
3873/// unmodified. This assumes that Addr is either a pointer type or intptr_t
3874/// for the target.
3875///
3876bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
3877 // Start a transaction at this point that we will rollback if the matching
3878 // fails.
3879 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3880 TPT.getRestorationPoint();
3881 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
3882 // Fold in immediates if legal for the target.
3883 AddrMode.BaseOffs += CI->getSExtValue();
3884 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3885 return true;
3886 AddrMode.BaseOffs -= CI->getSExtValue();
3887 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
3888 // If this is a global variable, try to fold it into the addressing mode.
3889 if (!AddrMode.BaseGV) {
3890 AddrMode.BaseGV = GV;
3891 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3892 return true;
3893 AddrMode.BaseGV = nullptr;
3894 }
3895 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
3896 ExtAddrMode BackupAddrMode = AddrMode;
3897 unsigned OldSize = AddrModeInsts.size();
3898
3899 // Check to see if it is possible to fold this operation.
3900 bool MovedAway = false;
3901 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
3902 // This instruction may have been moved away. If so, there is nothing
3903 // to check here.
3904 if (MovedAway)
3905 return true;
3906 // Okay, it's possible to fold this. Check to see if it is actually
3907 // *profitable* to do so. We use a simple cost model to avoid increasing
3908 // register pressure too much.
3909 if (I->hasOneUse() ||
3910 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
3911 AddrModeInsts.push_back(I);
3912 return true;
3913 }
3914
3915 // It isn't profitable to do this, roll back.
3916 //cerr << "NOT FOLDING: " << *I;
3917 AddrMode = BackupAddrMode;
3918 AddrModeInsts.resize(OldSize);
3919 TPT.rollback(LastKnownGood);
3920 }
3921 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
3922 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
3923 return true;
3924 TPT.rollback(LastKnownGood);
3925 } else if (isa<ConstantPointerNull>(Addr)) {
3926 // Null pointer gets folded without affecting the addressing mode.
3927 return true;
3928 }
3929
3930 // Worse case, the target should support [reg] addressing modes. :)
3931 if (!AddrMode.HasBaseReg) {
3932 AddrMode.HasBaseReg = true;
3933 AddrMode.BaseReg = Addr;
3934 // Still check for legality in case the target supports [imm] but not [i+r].
3935 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3936 return true;
3937 AddrMode.HasBaseReg = false;
3938 AddrMode.BaseReg = nullptr;
3939 }
3940
3941 // If the base register is already taken, see if we can do [r+r].
3942 if (AddrMode.Scale == 0) {
3943 AddrMode.Scale = 1;
3944 AddrMode.ScaledReg = Addr;
3945 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
3946 return true;
3947 AddrMode.Scale = 0;
3948 AddrMode.ScaledReg = nullptr;
3949 }
3950 // Couldn't match.
3951 TPT.rollback(LastKnownGood);
3952 return false;
3953}
3954
3955/// Check to see if all uses of OpVal by the specified inline asm call are due
3956/// to memory operands. If so, return true, otherwise return false.
3957static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
3958 const TargetLowering &TLI,
3959 const TargetRegisterInfo &TRI) {
3960 const Function *F = CI->getFunction();
3961 TargetLowering::AsmOperandInfoVector TargetConstraints =
3962 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
3963 ImmutableCallSite(CI));
3964
3965 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
3966 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
3967
3968 // Compute the constraint code and ConstraintType to use.
3969 TLI.ComputeConstraintToUse(OpInfo, SDValue());
3970
3971 // If this asm operand is our Value*, and if it isn't an indirect memory
3972 // operand, we can't fold it!
3973 if (OpInfo.CallOperandVal == OpVal &&
3974 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
3975 !OpInfo.isIndirect))
3976 return false;
3977 }
3978
3979 return true;
3980}
3981
3982/// Recursively walk all the uses of I until we find a memory use.
3983/// If we find an obviously non-foldable instruction, return true.
3984/// Add the ultimately found memory instructions to MemoryUses.
3985static bool FindAllMemoryUses(
3986 Instruction *I,
3987 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
3988 SmallPtrSetImpl<Instruction *> &ConsideredInsts,
3989 const TargetLowering &TLI, const TargetRegisterInfo &TRI) {
3990 // If we already considered this instruction, we're done.
3991 if (!ConsideredInsts.insert(I).second)
3992 return false;
3993
3994 // If this is an obviously unfoldable instruction, bail out.
3995 if (!MightBeFoldableInst(I))
3996 return true;
3997
3998 const bool OptSize = I->getFunction()->optForSize();
3999
4000 // Loop over all the uses, recursively processing them.
4001 for (Use &U : I->uses()) {
4002 Instruction *UserI = cast<Instruction>(U.getUser());
4003
4004 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4005 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4006 continue;
4007 }
4008
4009 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4010 unsigned opNo = U.getOperandNo();
4011 if (opNo != StoreInst::getPointerOperandIndex())
4012 return true; // Storing addr, not into addr.
4013 MemoryUses.push_back(std::make_pair(SI, opNo));
4014 continue;
4015 }
4016
4017 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4018 unsigned opNo = U.getOperandNo();
4019 if (opNo != AtomicRMWInst::getPointerOperandIndex())
4020 return true; // Storing addr, not into addr.
4021 MemoryUses.push_back(std::make_pair(RMW, opNo));
4022 continue;
4023 }
4024
4025 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4026 unsigned opNo = U.getOperandNo();
4027 if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4028 return true; // Storing addr, not into addr.
4029 MemoryUses.push_back(std::make_pair(CmpX, opNo));
4030 continue;
4031 }
4032
4033 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4034 // If this is a cold call, we can sink the addressing calculation into
4035 // the cold path. See optimizeCallInst
4036 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
4037 continue;
4038
4039 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
4040 if (!IA) return true;
4041
4042 // If this is a memory operand, we're cool, otherwise bail out.
4043 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4044 return true;
4045 continue;
4046 }
4047
4048 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI))
4049 return true;
4050 }
4051
4052 return false;
4053}
4054
4055/// Return true if Val is already known to be live at the use site that we're
4056/// folding it into. If so, there is no cost to include it in the addressing
4057/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4058/// instruction already.
4059bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4060 Value *KnownLive2) {
4061 // If Val is either of the known-live values, we know it is live!
4062 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4063 return true;
4064
4065 // All values other than instructions and arguments (e.g. constants) are live.
4066 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4067
4068 // If Val is a constant sized alloca in the entry block, it is live, this is
4069 // true because it is just a reference to the stack/frame pointer, which is
4070 // live for the whole function.
4071 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4072 if (AI->isStaticAlloca())
4073 return true;
4074
4075 // Check to see if this value is already used in the memory instruction's
4076 // block. If so, it's already live into the block at the very least, so we
4077 // can reasonably fold it.
4078 return Val->isUsedInBasicBlock(MemoryInst->getParent());
4079}
4080
4081/// It is possible for the addressing mode of the machine to fold the specified
4082/// instruction into a load or store that ultimately uses it.
4083/// However, the specified instruction has multiple uses.
4084/// Given this, it may actually increase register pressure to fold it
4085/// into the load. For example, consider this code:
4086///
4087/// X = ...
4088/// Y = X+1
4089/// use(Y) -> nonload/store
4090/// Z = Y+1
4091/// load Z
4092///
4093/// In this case, Y has multiple uses, and can be folded into the load of Z
4094/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4095/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4096/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4097/// number of computations either.
4098///
4099/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4100/// X was live across 'load Z' for other reasons, we actually *would* want to
4101/// fold the addressing mode in the Z case. This would make Y die earlier.
4102bool AddressingModeMatcher::
4103isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4104 ExtAddrMode &AMAfter) {
4105 if (IgnoreProfitability) return true;
4106
4107 // AMBefore is the addressing mode before this instruction was folded into it,
4108 // and AMAfter is the addressing mode after the instruction was folded. Get
4109 // the set of registers referenced by AMAfter and subtract out those
4110 // referenced by AMBefore: this is the set of values which folding in this
4111 // address extends the lifetime of.
4112 //
4113 // Note that there are only two potential values being referenced here,
4114 // BaseReg and ScaleReg (global addresses are always available, as are any
4115 // folded immediates).
4116 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4117
4118 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4119 // lifetime wasn't extended by adding this instruction.
4120 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4121 BaseReg = nullptr;
4122 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4123 ScaledReg = nullptr;
4124
4125 // If folding this instruction (and it's subexprs) didn't extend any live
4126 // ranges, we're ok with it.
4127 if (!BaseReg && !ScaledReg)
4128 return true;
4129
4130 // If all uses of this instruction can have the address mode sunk into them,
4131 // we can remove the addressing mode and effectively trade one live register
4132 // for another (at worst.) In this context, folding an addressing mode into
4133 // the use is just a particularly nice way of sinking it.
4134 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
4135 SmallPtrSet<Instruction*, 16> ConsideredInsts;
4136 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
4137 return false; // Has a non-memory, non-foldable use!
4138
4139 // Now that we know that all uses of this instruction are part of a chain of
4140 // computation involving only operations that could theoretically be folded
4141 // into a memory use, loop over each of these memory operation uses and see
4142 // if they could *actually* fold the instruction. The assumption is that
4143 // addressing modes are cheap and that duplicating the computation involved
4144 // many times is worthwhile, even on a fastpath. For sinking candidates
4145 // (i.e. cold call sites), this serves as a way to prevent excessive code
4146 // growth since most architectures have some reasonable small and fast way to
4147 // compute an effective address. (i.e LEA on x86)
4148 SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4149 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4150 Instruction *User = MemoryUses[i].first;
4151 unsigned OpNo = MemoryUses[i].second;
4152
4153 // Get the access type of this use. If the use isn't a pointer, we don't
4154 // know what it accesses.
4155 Value *Address = User->getOperand(OpNo);
4156 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4157 if (!AddrTy)
4158 return false;
4159 Type *AddressAccessTy = AddrTy->getElementType();
4160 unsigned AS = AddrTy->getAddressSpace();
4161
4162 // Do a match against the root of this address, ignoring profitability. This
4163 // will tell us if the addressing mode for the memory operation will
4164 // *actually* cover the shared instruction.
4165 ExtAddrMode Result;
4166 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4167 TPT.getRestorationPoint();
4168 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
4169 AddressAccessTy, AS,
4170 MemoryInst, Result, InsertedInsts,
4171 PromotedInsts, TPT);
4172 Matcher.IgnoreProfitability = true;
4173 bool Success = Matcher.matchAddr(Address, 0);
4174 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4174, __PRETTY_FUNCTION__))
;
4175
4176 // The match was to check the profitability, the changes made are not
4177 // part of the original matcher. Therefore, they should be dropped
4178 // otherwise the original matcher will not present the right state.
4179 TPT.rollback(LastKnownGood);
4180
4181 // If the match didn't cover I, then it won't be shared by it.
4182 if (!is_contained(MatchedAddrModeInsts, I))
4183 return false;
4184
4185 MatchedAddrModeInsts.clear();
4186 }
4187
4188 return true;
4189}
4190
4191} // end anonymous namespace
4192
4193/// Return true if the specified values are defined in a
4194/// different basic block than BB.
4195static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4196 if (Instruction *I = dyn_cast<Instruction>(V))
4197 return I->getParent() != BB;
4198 return false;
4199}
4200
4201/// Sink addressing mode computation immediate before MemoryInst if doing so
4202/// can be done without increasing register pressure. The need for the
4203/// register pressure constraint means this can end up being an all or nothing
4204/// decision for all uses of the same addressing computation.
4205///
4206/// Load and Store Instructions often have addressing modes that can do
4207/// significant amounts of computation. As such, instruction selection will try
4208/// to get the load or store to do as much computation as possible for the
4209/// program. The problem is that isel can only see within a single block. As
4210/// such, we sink as much legal addressing mode work into the block as possible.
4211///
4212/// This method is used to optimize both load/store and inline asms with memory
4213/// operands. It's also used to sink addressing computations feeding into cold
4214/// call sites into their (cold) basic block.
4215///
4216/// The motivation for handling sinking into cold blocks is that doing so can
4217/// both enable other address mode sinking (by satisfying the register pressure
4218/// constraint above), and reduce register pressure globally (by removing the
4219/// addressing mode computation from the fast path entirely.).
4220bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4221 Type *AccessTy, unsigned AddrSpace) {
4222 Value *Repl = Addr;
4223
4224 // Try to collapse single-value PHI nodes. This is necessary to undo
4225 // unprofitable PRE transformations.
4226 SmallVector<Value*, 8> worklist;
4227 SmallPtrSet<Value*, 16> Visited;
4228 worklist.push_back(Addr);
4229
4230 // Use a worklist to iteratively look through PHI nodes, and ensure that
4231 // the addressing mode obtained from the non-PHI roots of the graph
4232 // are equivalent.
4233 Value *Consensus = nullptr;
4234 unsigned NumUsesConsensus = 0;
4235 bool IsNumUsesConsensusValid = false;
4236 SmallVector<Instruction*, 16> AddrModeInsts;
4237 ExtAddrMode AddrMode;
4238 TypePromotionTransaction TPT(RemovedInsts);
4239 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4240 TPT.getRestorationPoint();
4241 while (!worklist.empty()) {
4242 Value *V = worklist.back();
4243 worklist.pop_back();
4244
4245 // Break use-def graph loops.
4246 if (!Visited.insert(V).second) {
4247 Consensus = nullptr;
4248 break;
4249 }
4250
4251 // For a PHI node, push all of its incoming values.
4252 if (PHINode *P = dyn_cast<PHINode>(V)) {
4253 for (Value *IncValue : P->incoming_values())
4254 worklist.push_back(IncValue);
4255 continue;
4256 }
4257
4258 // For non-PHIs, determine the addressing mode being computed. Note that
4259 // the result may differ depending on what other uses our candidate
4260 // addressing instructions might have.
4261 SmallVector<Instruction*, 16> NewAddrModeInsts;
4262 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4263 V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI,
4264 InsertedInsts, PromotedInsts, TPT);
4265
4266 // This check is broken into two cases with very similar code to avoid using
4267 // getNumUses() as much as possible. Some values have a lot of uses, so
4268 // calling getNumUses() unconditionally caused a significant compile-time
4269 // regression.
4270 if (!Consensus) {
4271 Consensus = V;
4272 AddrMode = NewAddrMode;
4273 AddrModeInsts = NewAddrModeInsts;
4274 continue;
4275 } else if (NewAddrMode == AddrMode) {
4276 if (!IsNumUsesConsensusValid) {
4277 NumUsesConsensus = Consensus->getNumUses();
4278 IsNumUsesConsensusValid = true;
4279 }
4280
4281 // Ensure that the obtained addressing mode is equivalent to that obtained
4282 // for all other roots of the PHI traversal. Also, when choosing one
4283 // such root as representative, select the one with the most uses in order
4284 // to keep the cost modeling heuristics in AddressingModeMatcher
4285 // applicable.
4286 unsigned NumUses = V->getNumUses();
4287 if (NumUses > NumUsesConsensus) {
4288 Consensus = V;
4289 NumUsesConsensus = NumUses;
4290 AddrModeInsts = NewAddrModeInsts;
4291 }
4292 continue;
4293 }
4294
4295 Consensus = nullptr;
4296 break;
4297 }
4298
4299 // If the addressing mode couldn't be determined, or if multiple different
4300 // ones were determined, bail out now.
4301 if (!Consensus) {
4302 TPT.rollback(LastKnownGood);
4303 return false;
4304 }
4305 TPT.commit();
4306
4307 // If all the instructions matched are already in this BB, don't do anything.
4308 if (none_of(AddrModeInsts, [&](Value *V) {
4309 return IsNonLocalValue(V, MemoryInst->getParent());
4310 })) {
4311 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
;
4312 return false;
4313 }
4314
4315 // Insert this computation right after this user. Since our caller is
4316 // scanning from the top of the BB to the bottom, reuse of the expr are
4317 // guaranteed to happen later.
4318 IRBuilder<> Builder(MemoryInst);
4319
4320 // Now that we determined the addressing expression we want to use and know
4321 // that we have to sink it into this block. Check to see if we have already
4322 // done this for some other load/store instr in this block. If so, reuse the
4323 // computation.
4324 Value *&SunkAddr = SunkAddrs[Addr];
4325 if (SunkAddr) {
4326 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4327 << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4328 if (SunkAddr->getType() != Addr->getType())
4329 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4330 } else if (AddrSinkUsingGEPs ||
4331 (!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
4332 SubtargetInfo->useAA())) {
4333 // By default, we use the GEP-based method when AA is used later. This
4334 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4335 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4336 << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4337 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4338 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4339
4340 // First, find the pointer.
4341 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4342 ResultPtr = AddrMode.BaseReg;
4343 AddrMode.BaseReg = nullptr;
4344 }
4345
4346 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4347 // We can't add more than one pointer together, nor can we scale a
4348 // pointer (both of which seem meaningless).
4349 if (ResultPtr || AddrMode.Scale != 1)
4350 return false;
4351
4352 ResultPtr = AddrMode.ScaledReg;
4353 AddrMode.Scale = 0;
4354 }
4355
4356 if (AddrMode.BaseGV) {
4357 if (ResultPtr)
4358 return false;
4359
4360 ResultPtr = AddrMode.BaseGV;
4361 }
4362
4363 // If the real base value actually came from an inttoptr, then the matcher
4364 // will look through it and provide only the integer value. In that case,
4365 // use it here.
4366 if (!ResultPtr && AddrMode.BaseReg) {
4367 ResultPtr =
4368 Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), "sunkaddr");
4369 AddrMode.BaseReg = nullptr;
4370 } else if (!ResultPtr && AddrMode.Scale == 1) {
4371 ResultPtr =
4372 Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), "sunkaddr");
4373 AddrMode.Scale = 0;
4374 }
4375
4376 if (!ResultPtr &&
4377 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4378 SunkAddr = Constant::getNullValue(Addr->getType());
4379 } else if (!ResultPtr) {
4380 return false;
4381 } else {
4382 Type *I8PtrTy =
4383 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4384 Type *I8Ty = Builder.getInt8Ty();
4385
4386 // Start with the base register. Do this first so that subsequent address
4387 // matching finds it last, which will prevent it from trying to match it
4388 // as the scaled value in case it happens to be a mul. That would be
4389 // problematic if we've sunk a different mul for the scale, because then
4390 // we'd end up sinking both muls.
4391 if (AddrMode.BaseReg) {
4392 Value *V = AddrMode.BaseReg;
4393 if (V->getType() != IntPtrTy)
4394 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4395
4396 ResultIndex = V;
4397 }
4398
4399 // Add the scale value.
4400 if (AddrMode.Scale) {
4401 Value *V = AddrMode.ScaledReg;
4402 if (V->getType() == IntPtrTy) {
4403 // done.
4404 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4405 cast<IntegerType>(V->getType())->getBitWidth()) {
4406 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4407 } else {
4408 // It is only safe to sign extend the BaseReg if we know that the math
4409 // required to create it did not overflow before we extend it. Since
4410 // the original IR value was tossed in favor of a constant back when
4411 // the AddrMode was created we need to bail out gracefully if widths
4412 // do not match instead of extending it.
4413 Instruction *I = dyn_cast_or_null<Instruction>(ResultIndex);
4414 if (I && (ResultIndex != AddrMode.BaseReg))
4415 I->eraseFromParent();
4416 return false;
4417 }
4418
4419 if (AddrMode.Scale != 1)
4420 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4421 "sunkaddr");
4422 if (ResultIndex)
4423 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4424 else
4425 ResultIndex = V;
4426 }
4427
4428 // Add in the Base Offset if present.
4429 if (AddrMode.BaseOffs) {
4430 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4431 if (ResultIndex) {
4432 // We need to add this separately from the scale above to help with
4433 // SDAG consecutive load/store merging.
4434 if (ResultPtr->getType() != I8PtrTy)
4435 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4436 ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4437 }
4438
4439 ResultIndex = V;
4440 }
4441
4442 if (!ResultIndex) {
4443 SunkAddr = ResultPtr;
4444 } else {
4445 if (ResultPtr->getType() != I8PtrTy)
4446 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4447 SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4448 }
4449
4450 if (SunkAddr->getType() != Addr->getType())
4451 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4452 }
4453 } else {
4454 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4455 << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4456 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4457 Value *Result = nullptr;
4458
4459 // Start with the base register. Do this first so that subsequent address
4460 // matching finds it last, which will prevent it from trying to match it
4461 // as the scaled value in case it happens to be a mul. That would be
4462 // problematic if we've sunk a different mul for the scale, because then
4463 // we'd end up sinking both muls.
4464 if (AddrMode.BaseReg) {
4465 Value *V = AddrMode.BaseReg;
4466 if (V->getType()->isPointerTy())
4467 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4468 if (V->getType() != IntPtrTy)
4469 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4470 Result = V;
4471 }
4472
4473 // Add the scale value.
4474 if (AddrMode.Scale) {
4475 Value *V = AddrMode.ScaledReg;
4476 if (V->getType() == IntPtrTy) {
4477 // done.
4478 } else if (V->getType()->isPointerTy()) {
4479 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4480 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4481 cast<IntegerType>(V->getType())->getBitWidth()) {
4482 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4483 } else {
4484 // It is only safe to sign extend the BaseReg if we know that the math
4485 // required to create it did not overflow before we extend it. Since
4486 // the original IR value was tossed in favor of a constant back when
4487 // the AddrMode was created we need to bail out gracefully if widths
4488 // do not match instead of extending it.
4489 Instruction *I = dyn_cast_or_null<Instruction>(Result);
4490 if (I && (Result != AddrMode.BaseReg))
4491 I->eraseFromParent();
4492 return false;
4493 }
4494 if (AddrMode.Scale != 1)
4495 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4496 "sunkaddr");
4497 if (Result)
4498 Result = Builder.CreateAdd(Result, V, "sunkaddr");
4499 else
4500 Result = V;
4501 }
4502
4503 // Add in the BaseGV if present.
4504 if (AddrMode.BaseGV) {
4505 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
4506 if (Result)
4507 Result = Builder.CreateAdd(Result, V, "sunkaddr");
4508 else
4509 Result = V;
4510 }
4511
4512 // Add in the Base Offset if present.
4513 if (AddrMode.BaseOffs) {
4514 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4515 if (Result)
4516 Result = Builder.CreateAdd(Result, V, "sunkaddr");
4517 else
4518 Result = V;
4519 }
4520
4521 if (!Result)
4522 SunkAddr = Constant::getNullValue(Addr->getType());
4523 else
4524 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
4525 }
4526
4527 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
4528
4529 // If we have no uses, recursively delete the value and all dead instructions
4530 // using it.
4531 if (Repl->use_empty()) {
4532 // This can cause recursive deletion, which can invalidate our iterator.
4533 // Use a WeakTrackingVH to hold onto it in case this happens.
4534 Value *CurValue = &*CurInstIterator;
4535 WeakTrackingVH IterHandle(CurValue);
4536 BasicBlock *BB = CurInstIterator->getParent();
4537
4538 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
4539
4540 if (IterHandle != CurValue) {
4541 // If the iterator instruction was recursively deleted, start over at the
4542 // start of the block.
4543 CurInstIterator = BB->begin();
4544 SunkAddrs.clear();
4545 }
4546 }
4547 ++NumMemoryInsts;
4548 return true;
4549}
4550
4551/// If there are any memory operands, use OptimizeMemoryInst to sink their
4552/// address computing into the block when possible / profitable.
4553bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
4554 bool MadeChange = false;
4555
4556 const TargetRegisterInfo *TRI =
4557 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
4558 TargetLowering::AsmOperandInfoVector TargetConstraints =
4559 TLI->ParseConstraints(*DL, TRI, CS);
4560 unsigned ArgNo = 0;
4561 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4562 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4563
4564 // Compute the constraint code and ConstraintType to use.
4565 TLI->ComputeConstraintToUse(OpInfo, SDValue());
4566
4567 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
4568 OpInfo.isIndirect) {
4569 Value *OpVal = CS->getArgOperand(ArgNo++);
4570 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
4571 } else if (OpInfo.Type == InlineAsm::isInput)
4572 ArgNo++;
4573 }
4574
4575 return MadeChange;
4576}
4577
4578/// \brief Check if all the uses of \p Val are equivalent (or free) zero or
4579/// sign extensions.
4580static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
4581 assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4581, __PRETTY_FUNCTION__))
;
4582 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
4583 bool IsSExt = isa<SExtInst>(FirstUser);
4584 Type *ExtTy = FirstUser->getType();
4585 for (const User *U : Val->users()) {
4586 const Instruction *UI = cast<Instruction>(U);
4587 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
4588 return false;
4589 Type *CurTy = UI->getType();
4590 // Same input and output types: Same instruction after CSE.
4591 if (CurTy == ExtTy)
4592 continue;
4593
4594 // If IsSExt is true, we are in this situation:
4595 // a = Val
4596 // b = sext ty1 a to ty2
4597 // c = sext ty1 a to ty3
4598 // Assuming ty2 is shorter than ty3, this could be turned into:
4599 // a = Val
4600 // b = sext ty1 a to ty2
4601 // c = sext ty2 b to ty3
4602 // However, the last sext is not free.
4603 if (IsSExt)
4604 return false;
4605
4606 // This is a ZExt, maybe this is free to extend from one type to another.
4607 // In that case, we would not account for a different use.
4608 Type *NarrowTy;
4609 Type *LargeTy;
4610 if (ExtTy->getScalarType()->getIntegerBitWidth() >
4611 CurTy->getScalarType()->getIntegerBitWidth()) {
4612 NarrowTy = CurTy;
4613 LargeTy = ExtTy;
4614 } else {
4615 NarrowTy = ExtTy;
4616 LargeTy = CurTy;
4617 }
4618
4619 if (!TLI.isZExtFree(NarrowTy, LargeTy))
4620 return false;
4621 }
4622 // All uses are the same or can be derived from one another for free.
4623 return true;
4624}
4625
4626/// \brief Try to speculatively promote extensions in \p Exts and continue
4627/// promoting through newly promoted operands recursively as far as doing so is
4628/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
4629/// When some promotion happened, \p TPT contains the proper state to revert
4630/// them.
4631///
4632/// \return true if some promotion happened, false otherwise.
4633bool CodeGenPrepare::tryToPromoteExts(
4634 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
4635 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
4636 unsigned CreatedInstsCost) {
4637 bool Promoted = false;
4638
4639 // Iterate over all the extensions to try to promote them.
4640 for (auto I : Exts) {
4641 // Early check if we directly have ext(load).
4642 if (isa<LoadInst>(I->getOperand(0))) {
4643 ProfitablyMovedExts.push_back(I);
4644 continue;
4645 }
4646
4647 // Check whether or not we want to do any promotion. The reason we have
4648 // this check inside the for loop is to catch the case where an extension
4649 // is directly fed by a load because in such case the extension can be moved
4650 // up without any promotion on its operands.
4651 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
4652 return false;
4653
4654 // Get the action to perform the promotion.
4655 TypePromotionHelper::Action TPH =
4656 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
4657 // Check if we can promote.
4658 if (!TPH) {
4659 // Save the current extension as we cannot move up through its operand.
4660 ProfitablyMovedExts.push_back(I);
4661 continue;
4662 }
4663
4664 // Save the current state.
4665 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4666 TPT.getRestorationPoint();
4667 SmallVector<Instruction *, 4> NewExts;
4668 unsigned NewCreatedInstsCost = 0;
4669 unsigned ExtCost = !TLI->isExtFree(I);
4670 // Promote.
4671 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
4672 &NewExts, nullptr, *TLI);
4673 assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4674, __PRETTY_FUNCTION__))
4674 "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4674, __PRETTY_FUNCTION__))
;
4675
4676 // We would be able to merge only one extension in a load.
4677 // Therefore, if we have more than 1 new extension we heuristically
4678 // cut this search path, because it means we degrade the code quality.
4679 // With exactly 2, the transformation is neutral, because we will merge
4680 // one extension but leave one. However, we optimistically keep going,
4681 // because the new extension may be removed too.
4682 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
4683 // FIXME: It would be possible to propagate a negative value instead of
4684 // conservatively ceiling it to 0.
4685 TotalCreatedInstsCost =
4686 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
4687 if (!StressExtLdPromotion &&
4688 (TotalCreatedInstsCost > 1 ||
4689 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
4690 // This promotion is not profitable, rollback to the previous state, and
4691 // save the current extension in ProfitablyMovedExts as the latest
4692 // speculative promotion turned out to be unprofitable.
4693 TPT.rollback(LastKnownGood);
4694 ProfitablyMovedExts.push_back(I);
4695 continue;
4696 }
4697 // Continue promoting NewExts as far as doing so is profitable.
4698 SmallVector<Instruction *, 2> NewlyMovedExts;
4699 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
4700 bool NewPromoted = false;
4701 for (auto ExtInst : NewlyMovedExts) {
4702 Instruction *MovedExt = cast<Instruction>(ExtInst);
4703 Value *ExtOperand = MovedExt->getOperand(0);
4704 // If we have reached to a load, we need this extra profitability check
4705 // as it could potentially be merged into an ext(load).
4706 if (isa<LoadInst>(ExtOperand) &&
4707 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
4708 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
4709 continue;
4710
4711 ProfitablyMovedExts.push_back(MovedExt);
4712 NewPromoted = true;
4713 }
4714
4715 // If none of speculative promotions for NewExts is profitable, rollback
4716 // and save the current extension (I) as the last profitable extension.
4717 if (!NewPromoted) {
4718 TPT.rollback(LastKnownGood);
4719 ProfitablyMovedExts.push_back(I);
4720 continue;
4721 }
4722 // The promotion is profitable.
4723 Promoted = true;
4724 }
4725 return Promoted;
4726}
4727
4728/// Merging redundant sexts when one is dominating the other.
4729bool CodeGenPrepare::mergeSExts(Function &F) {
4730 DominatorTree DT(F);
4731 bool Changed = false;
4732 for (auto &Entry : ValToSExtendedUses) {
4733 SExts &Insts = Entry.second;
4734 SExts CurPts;
4735 for (Instruction *Inst : Insts) {
4736 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
4737 Inst->getOperand(0) != Entry.first)
4738 continue;
4739 bool inserted = false;
4740 for (auto &Pt : CurPts) {
4741 if (DT.dominates(Inst, Pt)) {
4742 Pt->replaceAllUsesWith(Inst);
4743 RemovedInsts.insert(Pt);
4744 Pt->removeFromParent();
4745 Pt = Inst;
4746 inserted = true;
4747 Changed = true;
4748 break;
4749 }
4750 if (!DT.dominates(Pt, Inst))
4751 // Give up if we need to merge in a common dominator as the
4752 // expermients show it is not profitable.
4753 continue;
4754 Inst->replaceAllUsesWith(Pt);
4755 RemovedInsts.insert(Inst);
4756 Inst->removeFromParent();
4757 inserted = true;
4758 Changed = true;
4759 break;
4760 }
4761 if (!inserted)
4762 CurPts.push_back(Inst);
4763 }
4764 }
4765 return Changed;
4766}
4767
4768/// Return true, if an ext(load) can be formed from an extension in
4769/// \p MovedExts.
4770bool CodeGenPrepare::canFormExtLd(
4771 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
4772 Instruction *&Inst, bool HasPromoted) {
4773 for (auto *MovedExtInst : MovedExts) {
4774 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
4775 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
4776 Inst = MovedExtInst;
4777 break;
4778 }
4779 }
4780 if (!LI)
4781 return false;
4782
4783 // If they're already in the same block, there's nothing to do.
4784 // Make the cheap checks first if we did not promote.
4785 // If we promoted, we need to check if it is indeed profitable.
4786 if (!HasPromoted && LI->getParent() == Inst->getParent())
4787 return false;
4788
4789 EVT VT = TLI->getValueType(*DL, Inst->getType());
4790 EVT LoadVT = TLI->getValueType(*DL, LI->getType());
4791
4792 // If the load has other users and the truncate is not free, this probably
4793 // isn't worthwhile.
4794 if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
4795 !TLI->isTruncateFree(Inst->getType(), LI->getType()))
4796 return false;
4797
4798 // Check whether the target supports casts folded into loads.
4799 unsigned LType;
4800 if (isa<ZExtInst>(Inst))
4801 LType = ISD::ZEXTLOAD;
4802 else {
4803 assert(isa<SExtInst>(Inst) && "Unexpected ext type!")((isa<SExtInst>(Inst) && "Unexpected ext type!"
) ? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Inst) && \"Unexpected ext type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4803, __PRETTY_FUNCTION__))
;
4804 LType = ISD::SEXTLOAD;
4805 }
4806
4807 return TLI->isLoadExtLegal(LType, VT, LoadVT);
4808}
4809
4810/// Move a zext or sext fed by a load into the same basic block as the load,
4811/// unless conditions are unfavorable. This allows SelectionDAG to fold the
4812/// extend into the load.
4813///
4814/// E.g.,
4815/// \code
4816/// %ld = load i32* %addr
4817/// %add = add nuw i32 %ld, 4
4818/// %zext = zext i32 %add to i64
4819// \endcode
4820/// =>
4821/// \code
4822/// %ld = load i32* %addr
4823/// %zext = zext i32 %ld to i64
4824/// %add = add nuw i64 %zext, 4
4825/// \encode
4826/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
4827/// allow us to match zext(load i32*) to i64.
4828///
4829/// Also, try to promote the computations used to obtain a sign extended
4830/// value used into memory accesses.
4831/// E.g.,
4832/// \code
4833/// a = add nsw i32 b, 3
4834/// d = sext i32 a to i64
4835/// e = getelementptr ..., i64 d
4836/// \endcode
4837/// =>
4838/// \code
4839/// f = sext i32 b to i64
4840/// a = add nsw i64 f, 3
4841/// e = getelementptr ..., i64 a
4842/// \endcode
4843///
4844/// \p Inst[in/out] the extension may be modified during the process if some
4845/// promotions apply.
4846bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
4847 // ExtLoad formation and address type promotion infrastructure requires TLI to
4848 // be effective.
4849 if (!TLI)
4850 return false;
4851
4852 bool AllowPromotionWithoutCommonHeader = false;
4853 /// See if it is an interesting sext operations for the address type
4854 /// promotion before trying to promote it, e.g., the ones with the right
4855 /// type and used in memory accesses.
4856 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
4857 *Inst, AllowPromotionWithoutCommonHeader);
4858 TypePromotionTransaction TPT(RemovedInsts);
4859 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4860 TPT.getRestorationPoint();
4861 SmallVector<Instruction *, 1> Exts;
4862 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
4863 Exts.push_back(Inst);
4864
4865 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
4866
4867 // Look for a load being extended.
4868 LoadInst *LI = nullptr;
4869 Instruction *ExtFedByLoad;
4870
4871 // Try to promote a chain of computation if it allows to form an extended
4872 // load.
4873 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
4874 assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 4874, __PRETTY_FUNCTION__))
;
4875 TPT.commit();
4876 // Move the extend into the same block as the load
4877 ExtFedByLoad->removeFromParent();
4878 ExtFedByLoad->insertAfter(LI);
4879 // CGP does not check if the zext would be speculatively executed when moved
4880 // to the same basic block as the load. Preserving its original location
4881 // would pessimize the debugging experience, as well as negatively impact
4882 // the quality of sample pgo. We don't want to use "line 0" as that has a
4883 // size cost in the line-table section and logically the zext can be seen as
4884 // part of the load. Therefore we conservatively reuse the same debug
4885 // location for the load and the zext.
4886 ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
4887 ++NumExtsMoved;
4888 Inst = ExtFedByLoad;
4889 return true;
4890 }
4891
4892 // Continue promoting SExts if known as considerable depending on targets.
4893 if (ATPConsiderable &&
4894 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
4895 HasPromoted, TPT, SpeculativelyMovedExts))
4896 return true;
4897
4898 TPT.rollback(LastKnownGood);
4899 return false;
4900}
4901
4902// Perform address type promotion if doing so is profitable.
4903// If AllowPromotionWithoutCommonHeader == false, we should find other sext
4904// instructions that sign extended the same initial value. However, if
4905// AllowPromotionWithoutCommonHeader == true, we expect promoting the
4906// extension is just profitable.
4907bool CodeGenPrepare::performAddressTypePromotion(
4908 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
4909 bool HasPromoted, TypePromotionTransaction &TPT,
4910 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
4911 bool Promoted = false;
4912 SmallPtrSet<Instruction *, 1> UnhandledExts;
4913 bool AllSeenFirst = true;
4914 for (auto I : SpeculativelyMovedExts) {
4915 Value *HeadOfChain = I->getOperand(0);
4916 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
4917 SeenChainsForSExt.find(HeadOfChain);
4918 // If there is an unhandled SExt which has the same header, try to promote
4919 // it as well.
4920 if (AlreadySeen != SeenChainsForSExt.end()) {
4921 if (AlreadySeen->second != nullptr)
4922 UnhandledExts.insert(AlreadySeen->second);
4923 AllSeenFirst = false;
4924 }
4925 }
4926
4927 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
4928 SpeculativelyMovedExts.size() == 1)) {
4929 TPT.commit();
4930 if (HasPromoted)
4931 Promoted = true;
4932 for (auto I : SpeculativelyMovedExts) {
4933 Value *HeadOfChain = I->getOperand(0);
4934 SeenChainsForSExt[HeadOfChain] = nullptr;
4935 ValToSExtendedUses[HeadOfChain].push_back(I);
4936 }
4937 // Update Inst as promotion happen.
4938 Inst = SpeculativelyMovedExts.pop_back_val();
4939 } else {
4940 // This is the first chain visited from the header, keep the current chain
4941 // as unhandled. Defer to promote this until we encounter another SExt
4942 // chain derived from the same header.
4943 for (auto I : SpeculativelyMovedExts) {
4944 Value *HeadOfChain = I->getOperand(0);
4945 SeenChainsForSExt[HeadOfChain] = Inst;
4946 }
4947 return false;
4948 }
4949
4950 if (!AllSeenFirst && !UnhandledExts.empty())
4951 for (auto VisitedSExt : UnhandledExts) {
4952 if (RemovedInsts.count(VisitedSExt))
4953 continue;
4954 TypePromotionTransaction TPT(RemovedInsts);
4955 SmallVector<Instruction *, 1> Exts;
4956 SmallVector<Instruction *, 2> Chains;
4957 Exts.push_back(VisitedSExt);
4958 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
4959 TPT.commit();
4960 if (HasPromoted)
4961 Promoted = true;
4962 for (auto I : Chains) {
4963 Value *HeadOfChain = I->getOperand(0);
4964 // Mark this as handled.
4965 SeenChainsForSExt[HeadOfChain] = nullptr;
4966 ValToSExtendedUses[HeadOfChain].push_back(I);
4967 }
4968 }
4969 return Promoted;
4970}
4971
4972bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
4973 BasicBlock *DefBB = I->getParent();
4974
4975 // If the result of a {s|z}ext and its source are both live out, rewrite all
4976 // other uses of the source with result of extension.
4977 Value *Src = I->getOperand(0);
4978 if (Src->hasOneUse())
4979 return false;
4980
4981 // Only do this xform if truncating is free.
4982 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
4983 return false;
4984
4985 // Only safe to perform the optimization if the source is also defined in
4986 // this block.
4987 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
4988 return false;
4989
4990 bool DefIsLiveOut = false;
4991 for (User *U : I->users()) {
4992 Instruction *UI = cast<Instruction>(U);
4993
4994 // Figure out which BB this ext is used in.
4995 BasicBlock *UserBB = UI->getParent();
4996 if (UserBB == DefBB) continue;
4997 DefIsLiveOut = true;
4998 break;
4999 }
5000 if (!DefIsLiveOut)
5001 return false;
5002
5003 // Make sure none of the uses are PHI nodes.
5004 for (User *U : Src->users()) {
5005 Instruction *UI = cast<Instruction>(U);
5006 BasicBlock *UserBB = UI->getParent();
5007 if (UserBB == DefBB) continue;
5008 // Be conservative. We don't want this xform to end up introducing
5009 // reloads just before load / store instructions.
5010 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5011 return false;
5012 }
5013
5014 // InsertedTruncs - Only insert one trunc in each block once.
5015 DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5016
5017 bool MadeChange = false;
5018 for (Use &U : Src->uses()) {
5019 Instruction *User = cast<Instruction>(U.getUser());
5020
5021 // Figure out which BB this ext is used in.
5022 BasicBlock *UserBB = User->getParent();
5023 if (UserBB == DefBB) continue;
5024
5025 // Both src and def are live in this block. Rewrite the use.
5026 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5027
5028 if (!InsertedTrunc) {
5029 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5030 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5030, __PRETTY_FUNCTION__))
;
5031 InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5032 InsertedInsts.insert(InsertedTrunc);
5033 }
5034
5035 // Replace a use of the {s|z}ext source with a use of the result.
5036 U = InsertedTrunc;
5037 ++NumExtUses;
5038 MadeChange = true;
5039 }
5040
5041 return MadeChange;
5042}
5043
5044// Find loads whose uses only use some of the loaded value's bits. Add an "and"
5045// just after the load if the target can fold this into one extload instruction,
5046// with the hope of eliminating some of the other later "and" instructions using
5047// the loaded value. "and"s that are made trivially redundant by the insertion
5048// of the new "and" are removed by this function, while others (e.g. those whose
5049// path from the load goes through a phi) are left for isel to potentially
5050// remove.
5051//
5052// For example:
5053//
5054// b0:
5055// x = load i32
5056// ...
5057// b1:
5058// y = and x, 0xff
5059// z = use y
5060//
5061// becomes:
5062//
5063// b0:
5064// x = load i32
5065// x' = and x, 0xff
5066// ...
5067// b1:
5068// z = use x'
5069//
5070// whereas:
5071//
5072// b0:
5073// x1 = load i32
5074// ...
5075// b1:
5076// x2 = load i32
5077// ...
5078// b2:
5079// x = phi x1, x2
5080// y = and x, 0xff
5081//
5082// becomes (after a call to optimizeLoadExt for each load):
5083//
5084// b0:
5085// x1 = load i32
5086// x1' = and x1, 0xff
5087// ...
5088// b1:
5089// x2 = load i32
5090// x2' = and x2, 0xff
5091// ...
5092// b2:
5093// x = phi x1', x2'
5094// y = and x, 0xff
5095//
5096
5097bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
5098
5099 if (!Load->isSimple() ||
5100 !(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
5101 return false;
5102
5103 // Skip loads we've already transformed.
5104 if (Load->hasOneUse() &&
5105 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
5106 return false;
5107
5108 // Look at all uses of Load, looking through phis, to determine how many bits
5109 // of the loaded value are needed.
5110 SmallVector<Instruction *, 8> WorkList;
5111 SmallPtrSet<Instruction *, 16> Visited;
5112 SmallVector<Instruction *, 8> AndsToMaybeRemove;
5113 for (auto *U : Load->users())
5114 WorkList.push_back(cast<Instruction>(U));
5115
5116 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
5117 unsigned BitWidth = LoadResultVT.getSizeInBits();
5118 APInt DemandBits(BitWidth, 0);
5119 APInt WidestAndBits(BitWidth, 0);
5120
5121 while (!WorkList.empty()) {
5122 Instruction *I = WorkList.back();
5123 WorkList.pop_back();
5124
5125 // Break use-def graph loops.
5126 if (!Visited.insert(I).second)
5127 continue;
5128
5129 // For a PHI node, push all of its users.
5130 if (auto *Phi = dyn_cast<PHINode>(I)) {
5131 for (auto *U : Phi->users())
5132 WorkList.push_back(cast<Instruction>(U));
5133 continue;
5134 }
5135
5136 switch (I->getOpcode()) {
5137 case llvm::Instruction::And: {
5138 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
5139 if (!AndC)
5140 return false;
5141 APInt AndBits = AndC->getValue();
5142 DemandBits |= AndBits;
5143 // Keep track of the widest and mask we see.
5144 if (AndBits.ugt(WidestAndBits))
5145 WidestAndBits = AndBits;
5146 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
5147 AndsToMaybeRemove.push_back(I);
5148 break;
5149 }
5150
5151 case llvm::Instruction::Shl: {
5152 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
5153 if (!ShlC)
5154 return false;
5155 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
5156 DemandBits.setLowBits(BitWidth - ShiftAmt);
5157 break;
5158 }
5159
5160 case llvm::Instruction::Trunc: {
5161 EVT TruncVT = TLI->getValueType(*DL, I->getType());
5162 unsigned TruncBitWidth = TruncVT.getSizeInBits();
5163 DemandBits.setLowBits(TruncBitWidth);
5164 break;
5165 }
5166
5167 default:
5168 return false;
5169 }
5170 }
5171
5172 uint32_t ActiveBits = DemandBits.getActiveBits();
5173 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
5174 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
5175 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
5176 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
5177 // followed by an AND.
5178 // TODO: Look into removing this restriction by fixing backends to either
5179 // return false for isLoadExtLegal for i1 or have them select this pattern to
5180 // a single instruction.
5181 //
5182 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
5183 // mask, since these are the only ands that will be removed by isel.
5184 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
5185 WidestAndBits != DemandBits)
5186 return false;
5187
5188 LLVMContext &Ctx = Load->getType()->getContext();
5189 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
5190 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
5191
5192 // Reject cases that won't be matched as extloads.
5193 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
5194 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
5195 return false;
5196
5197 IRBuilder<> Builder(Load->getNextNode());
5198 auto *NewAnd = dyn_cast<Instruction>(
5199 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
5200 // Mark this instruction as "inserted by CGP", so that other
5201 // optimizations don't touch it.
5202 InsertedInsts.insert(NewAnd);
5203
5204 // Replace all uses of load with new and (except for the use of load in the
5205 // new and itself).
5206 Load->replaceAllUsesWith(NewAnd);
5207 NewAnd->setOperand(0, Load);
5208
5209 // Remove any and instructions that are now redundant.
5210 for (auto *And : AndsToMaybeRemove)
5211 // Check that the and mask is the same as the one we decided to put on the
5212 // new and.
5213 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
5214 And->replaceAllUsesWith(NewAnd);
5215 if (&*CurInstIterator == And)
5216 CurInstIterator = std::next(And->getIterator());
5217 And->eraseFromParent();
5218 ++NumAndUses;
5219 }
5220
5221 ++NumAndsAdded;
5222 return true;
5223}
5224
5225/// Check if V (an operand of a select instruction) is an expensive instruction
5226/// that is only used once.
5227static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
5228 auto *I = dyn_cast<Instruction>(V);
5229 // If it's safe to speculatively execute, then it should not have side
5230 // effects; therefore, it's safe to sink and possibly *not* execute.
5231 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
5232 TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
5233}
5234
5235/// Returns true if a SelectInst should be turned into an explicit branch.
5236static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
5237 const TargetLowering *TLI,
5238 SelectInst *SI) {
5239 // If even a predictable select is cheap, then a branch can't be cheaper.
5240 if (!TLI->isPredictableSelectExpensive())
5241 return false;
5242
5243 // FIXME: This should use the same heuristics as IfConversion to determine
5244 // whether a select is better represented as a branch.
5245
5246 // If metadata tells us that the select condition is obviously predictable,
5247 // then we want to replace the select with a branch.
5248 uint64_t TrueWeight, FalseWeight;
5249 if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
5250 uint64_t Max = std::max(TrueWeight, FalseWeight);
5251 uint64_t Sum = TrueWeight + FalseWeight;
5252 if (Sum != 0) {
5253 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
5254 if (Probability > TLI->getPredictableBranchThreshold())
5255 return true;
5256 }
5257 }
5258
5259 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
5260
5261 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
5262 // comparison condition. If the compare has more than one use, there's
5263 // probably another cmov or setcc around, so it's not worth emitting a branch.
5264 if (!Cmp || !Cmp->hasOneUse())
5265 return false;
5266
5267 // If either operand of the select is expensive and only needed on one side
5268 // of the select, we should form a branch.
5269 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
5270 sinkSelectOperand(TTI, SI->getFalseValue()))
5271 return true;
5272
5273 return false;
5274}
5275
5276/// If \p isTrue is true, return the true value of \p SI, otherwise return
5277/// false value of \p SI. If the true/false value of \p SI is defined by any
5278/// select instructions in \p Selects, look through the defining select
5279/// instruction until the true/false value is not defined in \p Selects.
5280static Value *getTrueOrFalseValue(
5281 SelectInst *SI, bool isTrue,
5282 const SmallPtrSet<const Instruction *, 2> &Selects) {
5283 Value *V;
5284
5285 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
5286 DefSI = dyn_cast<SelectInst>(V)) {
5287 assert(DefSI->getCondition() == SI->getCondition() &&((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5288, __PRETTY_FUNCTION__))
5288 "The condition of DefSI does not match with SI")((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5288, __PRETTY_FUNCTION__))
;
5289 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
5290 }
5291 return V;
5292}
5293
5294/// If we have a SelectInst that will likely profit from branch prediction,
5295/// turn it into a branch.
5296bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
5297 // Find all consecutive select instructions that share the same condition.
5298 SmallVector<SelectInst *, 2> ASI;
5299 ASI.push_back(SI);
5300 for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
5301 It != SI->getParent()->end(); ++It) {
5302 SelectInst *I = dyn_cast<SelectInst>(&*It);
5303 if (I && SI->getCondition() == I->getCondition()) {
5304 ASI.push_back(I);
5305 } else {
5306 break;
5307 }
5308 }
5309
5310 SelectInst *LastSI = ASI.back();
5311 // Increment the current iterator to skip all the rest of select instructions
5312 // because they will be either "not lowered" or "all lowered" to branch.
5313 CurInstIterator = std::next(LastSI->getIterator());
5314
5315 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
5316
5317 // Can we convert the 'select' to CF ?
5318 if (DisableSelectToBranch || OptSize || !TLI || VectorCond ||
5319 SI->getMetadata(LLVMContext::MD_unpredictable))
5320 return false;
5321
5322 TargetLowering::SelectSupportKind SelectKind;
5323 if (VectorCond)
5324 SelectKind = TargetLowering::VectorMaskSelect;
5325 else if (SI->getType()->isVectorTy())
5326 SelectKind = TargetLowering::ScalarCondVectorVal;
5327 else
5328 SelectKind = TargetLowering::ScalarValSelect;
5329
5330 if (TLI->isSelectSupported(SelectKind) &&
5331 !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
5332 return false;
5333
5334 ModifiedDT = true;
5335
5336 // Transform a sequence like this:
5337 // start:
5338 // %cmp = cmp uge i32 %a, %b
5339 // %sel = select i1 %cmp, i32 %c, i32 %d
5340 //
5341 // Into:
5342 // start:
5343 // %cmp = cmp uge i32 %a, %b
5344 // br i1 %cmp, label %select.true, label %select.false
5345 // select.true:
5346 // br label %select.end
5347 // select.false:
5348 // br label %select.end
5349 // select.end:
5350 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
5351 //
5352 // In addition, we may sink instructions that produce %c or %d from
5353 // the entry block into the destination(s) of the new branch.
5354 // If the true or false blocks do not contain a sunken instruction, that
5355 // block and its branch may be optimized away. In that case, one side of the
5356 // first branch will point directly to select.end, and the corresponding PHI
5357 // predecessor block will be the start block.
5358
5359 // First, we split the block containing the select into 2 blocks.
5360 BasicBlock *StartBlock = SI->getParent();
5361 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
5362 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
5363
5364 // Delete the unconditional branch that was just created by the split.
5365 StartBlock->getTerminator()->eraseFromParent();
5366
5367 // These are the new basic blocks for the conditional branch.
5368 // At least one will become an actual new basic block.
5369 BasicBlock *TrueBlock = nullptr;
5370 BasicBlock *FalseBlock = nullptr;
5371 BranchInst *TrueBranch = nullptr;
5372 BranchInst *FalseBranch = nullptr;
5373
5374 // Sink expensive instructions into the conditional blocks to avoid executing
5375 // them speculatively.
5376 for (SelectInst *SI : ASI) {
5377 if (sinkSelectOperand(TTI, SI->getTrueValue())) {
5378 if (TrueBlock == nullptr) {
5379 TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
5380 EndBlock->getParent(), EndBlock);
5381 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
5382 }
5383 auto *TrueInst = cast<Instruction>(SI->getTrueValue());
5384 TrueInst->moveBefore(TrueBranch);
5385 }
5386 if (sinkSelectOperand(TTI, SI->getFalseValue())) {
5387 if (FalseBlock == nullptr) {
5388 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
5389 EndBlock->getParent(), EndBlock);
5390 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
5391 }
5392 auto *FalseInst = cast<Instruction>(SI->getFalseValue());
5393 FalseInst->moveBefore(FalseBranch);
5394 }
5395 }
5396
5397 // If there was nothing to sink, then arbitrarily choose the 'false' side
5398 // for a new input value to the PHI.
5399 if (TrueBlock == FalseBlock) {
5400 assert(TrueBlock == nullptr &&((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5401, __PRETTY_FUNCTION__))
5401 "Unexpected basic block transform while optimizing select")((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5401, __PRETTY_FUNCTION__))
;
5402
5403 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
5404 EndBlock->getParent(), EndBlock);
5405 BranchInst::Create(EndBlock, FalseBlock);
5406 }
5407
5408 // Insert the real conditional branch based on the original condition.
5409 // If we did not create a new block for one of the 'true' or 'false' paths
5410 // of the condition, it means that side of the branch goes to the end block
5411 // directly and the path originates from the start block from the point of
5412 // view of the new PHI.
5413 BasicBlock *TT, *FT;
5414 if (TrueBlock == nullptr) {
5415 TT = EndBlock;
5416 FT = FalseBlock;
5417 TrueBlock = StartBlock;
5418 } else if (FalseBlock == nullptr) {
5419 TT = TrueBlock;
5420 FT = EndBlock;
5421 FalseBlock = StartBlock;
5422 } else {
5423 TT = TrueBlock;
5424 FT = FalseBlock;
5425 }
5426 IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
5427
5428 SmallPtrSet<const Instruction *, 2> INS;
5429 INS.insert(ASI.begin(), ASI.end());
5430 // Use reverse iterator because later select may use the value of the
5431 // earlier select, and we need to propagate value through earlier select
5432 // to get the PHI operand.
5433 for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
5434 SelectInst *SI = *It;
5435 // The select itself is replaced with a PHI Node.
5436 PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
5437 PN->takeName(SI);
5438 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
5439 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
5440
5441 SI->replaceAllUsesWith(PN);
5442 SI->eraseFromParent();
5443 INS.erase(SI);
5444 ++NumSelectsExpanded;
5445 }
5446
5447 // Instruct OptimizeBlock to skip to the next block.
5448 CurInstIterator = StartBlock->end();
5449 return true;
5450}
5451
5452static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
5453 SmallVector<int, 16> Mask(SVI->getShuffleMask());
5454 int SplatElem = -1;
5455 for (unsigned i = 0; i < Mask.size(); ++i) {
5456 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
5457 return false;
5458 SplatElem = Mask[i];
5459 }
5460
5461 return true;
5462}
5463
5464/// Some targets have expensive vector shifts if the lanes aren't all the same
5465/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
5466/// it's often worth sinking a shufflevector splat down to its use so that
5467/// codegen can spot all lanes are identical.
5468bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
5469 BasicBlock *DefBB = SVI->getParent();
5470
5471 // Only do this xform if variable vector shifts are particularly expensive.
5472 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
5473 return false;
5474
5475 // We only expect better codegen by sinking a shuffle if we can recognise a
5476 // constant splat.
5477 if (!isBroadcastShuffle(SVI))
5478 return false;
5479
5480 // InsertedShuffles - Only insert a shuffle in each block once.
5481 DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
5482
5483 bool MadeChange = false;
5484 for (User *U : SVI->users()) {
5485 Instruction *UI = cast<Instruction>(U);
5486
5487 // Figure out which BB this ext is used in.
5488 BasicBlock *UserBB = UI->getParent();
5489 if (UserBB == DefBB) continue;
5490
5491 // For now only apply this when the splat is used by a shift instruction.
5492 if (!UI->isShift()) continue;
5493
5494 // Everything checks out, sink the shuffle if the user's block doesn't
5495 // already have a copy.
5496 Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
5497
5498 if (!InsertedShuffle) {
5499 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5500 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5500, __PRETTY_FUNCTION__))
;
5501 InsertedShuffle =
5502 new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
5503 SVI->getOperand(2), "", &*InsertPt);
5504 }
5505
5506 UI->replaceUsesOfWith(SVI, InsertedShuffle);
5507 MadeChange = true;
5508 }
5509
5510 // If we removed all uses, nuke the shuffle.
5511 if (SVI->use_empty()) {
5512 SVI->eraseFromParent();
5513 MadeChange = true;
5514 }
5515
5516 return MadeChange;
5517}
5518
5519bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
5520 if (!TLI || !DL)
5521 return false;
5522
5523 Value *Cond = SI->getCondition();
5524 Type *OldType = Cond->getType();
5525 LLVMContext &Context = Cond->getContext();
5526 MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
5527 unsigned RegWidth = RegType.getSizeInBits();
5528
5529 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
5530 return false;
5531
5532 // If the register width is greater than the type width, expand the condition
5533 // of the switch instruction and each case constant to the width of the
5534 // register. By widening the type of the switch condition, subsequent
5535 // comparisons (for case comparisons) will not need to be extended to the
5536 // preferred register width, so we will potentially eliminate N-1 extends,
5537 // where N is the number of cases in the switch.
5538 auto *NewType = Type::getIntNTy(Context, RegWidth);
5539
5540 // Zero-extend the switch condition and case constants unless the switch
5541 // condition is a function argument that is already being sign-extended.
5542 // In that case, we can avoid an unnecessary mask/extension by sign-extending
5543 // everything instead.
5544 Instruction::CastOps ExtType = Instruction::ZExt;
5545 if (auto *Arg = dyn_cast<Argument>(Cond))
5546 if (Arg->hasSExtAttr())
5547 ExtType = Instruction::SExt;
5548
5549 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
5550 ExtInst->insertBefore(SI);
5551 SI->setCondition(ExtInst);
5552 for (auto Case : SI->cases()) {
5553 APInt NarrowConst = Case.getCaseValue()->getValue();
5554 APInt WideConst = (ExtType == Instruction::ZExt) ?
5555 NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
5556 Case.setValue(ConstantInt::get(Context, WideConst));
5557 }
5558
5559 return true;
5560}
5561
5562
5563namespace {
5564/// \brief Helper class to promote a scalar operation to a vector one.
5565/// This class is used to move downward extractelement transition.
5566/// E.g.,
5567/// a = vector_op <2 x i32>
5568/// b = extractelement <2 x i32> a, i32 0
5569/// c = scalar_op b
5570/// store c
5571///
5572/// =>
5573/// a = vector_op <2 x i32>
5574/// c = vector_op a (equivalent to scalar_op on the related lane)
5575/// * d = extractelement <2 x i32> c, i32 0
5576/// * store d
5577/// Assuming both extractelement and store can be combine, we get rid of the
5578/// transition.
5579class VectorPromoteHelper {
5580 /// DataLayout associated with the current module.
5581 const DataLayout &DL;
5582
5583 /// Used to perform some checks on the legality of vector operations.
5584 const TargetLowering &TLI;
5585
5586 /// Used to estimated the cost of the promoted chain.
5587 const TargetTransformInfo &TTI;
5588
5589 /// The transition being moved downwards.
5590 Instruction *Transition;
5591 /// The sequence of instructions to be promoted.
5592 SmallVector<Instruction *, 4> InstsToBePromoted;
5593 /// Cost of combining a store and an extract.
5594 unsigned StoreExtractCombineCost;
5595 /// Instruction that will be combined with the transition.
5596 Instruction *CombineInst;
5597
5598 /// \brief The instruction that represents the current end of the transition.
5599 /// Since we are faking the promotion until we reach the end of the chain
5600 /// of computation, we need a way to get the current end of the transition.
5601 Instruction *getEndOfTransition() const {
5602 if (InstsToBePromoted.empty())
5603 return Transition;
5604 return InstsToBePromoted.back();
5605 }
5606
5607 /// \brief Return the index of the original value in the transition.
5608 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
5609 /// c, is at index 0.
5610 unsigned getTransitionOriginalValueIdx() const {
5611 assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5612, __PRETTY_FUNCTION__))
5612 "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5612, __PRETTY_FUNCTION__))
;
5613 return 0;
5614 }
5615
5616 /// \brief Return the index of the index in the transition.
5617 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
5618 /// is at index 1.
5619 unsigned getTransitionIdx() const {
5620 assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5621, __PRETTY_FUNCTION__))
5621 "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5621, __PRETTY_FUNCTION__))
;
5622 return 1;
5623 }
5624
5625 /// \brief Get the type of the transition.
5626 /// This is the type of the original value.
5627 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
5628 /// transition is <2 x i32>.
5629 Type *getTransitionType() const {
5630 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
5631 }
5632
5633 /// \brief Promote \p ToBePromoted by moving \p Def downward through.
5634 /// I.e., we have the following sequence:
5635 /// Def = Transition <ty1> a to <ty2>
5636 /// b = ToBePromoted <ty2> Def, ...
5637 /// =>
5638 /// b = ToBePromoted <ty1> a, ...
5639 /// Def = Transition <ty1> ToBePromoted to <ty2>
5640 void promoteImpl(Instruction *ToBePromoted);
5641
5642 /// \brief Check whether or not it is profitable to promote all the
5643 /// instructions enqueued to be promoted.
5644 bool isProfitableToPromote() {
5645 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
5646 unsigned Index = isa<ConstantInt>(ValIdx)
5647 ? cast<ConstantInt>(ValIdx)->getZExtValue()
5648 : -1;
5649 Type *PromotedType = getTransitionType();
5650
5651 StoreInst *ST = cast<StoreInst>(CombineInst);
5652 unsigned AS = ST->getPointerAddressSpace();
5653 unsigned Align = ST->getAlignment();
5654 // Check if this store is supported.
5655 if (!TLI.allowsMisalignedMemoryAccesses(
5656 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
5657 Align)) {
5658 // If this is not supported, there is no way we can combine
5659 // the extract with the store.
5660 return false;
5661 }
5662
5663 // The scalar chain of computation has to pay for the transition
5664 // scalar to vector.
5665 // The vector chain has to account for the combining cost.
5666 uint64_t ScalarCost =
5667 TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
5668 uint64_t VectorCost = StoreExtractCombineCost;
5669 for (const auto &Inst : InstsToBePromoted) {
5670 // Compute the cost.
5671 // By construction, all instructions being promoted are arithmetic ones.
5672 // Moreover, one argument is a constant that can be viewed as a splat
5673 // constant.
5674 Value *Arg0 = Inst->getOperand(0);
5675 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
5676 isa<ConstantFP>(Arg0);
5677 TargetTransformInfo::OperandValueKind Arg0OVK =
5678 IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
5679 : TargetTransformInfo::OK_AnyValue;
5680 TargetTransformInfo::OperandValueKind Arg1OVK =
5681 !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
5682 : TargetTransformInfo::OK_AnyValue;
5683 ScalarCost += TTI.getArithmeticInstrCost(
5684 Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
5685 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
5686 Arg0OVK, Arg1OVK);
5687 }
5688 DEBUG(dbgs() << "Estimated cost of computation to be promoted:\nScalar: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
5689 << ScalarCost << "\nVector: " << VectorCost << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
;
5690 return ScalarCost > VectorCost;
5691 }
5692
5693 /// \brief Generate a constant vector with \p Val with the same
5694 /// number of elements as the transition.
5695 /// \p UseSplat defines whether or not \p Val should be replicated
5696 /// across the whole vector.
5697 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
5698 /// otherwise we generate a vector with as many undef as possible:
5699 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
5700 /// used at the index of the extract.
5701 Value *getConstantVector(Constant *Val, bool UseSplat) const {
5702 unsigned ExtractIdx = UINT_MAX(2147483647 *2U +1U);
5703 if (!UseSplat) {
5704 // If we cannot determine where the constant must be, we have to
5705 // use a splat constant.
5706 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
5707 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
5708 ExtractIdx = CstVal->getSExtValue();
5709 else
5710 UseSplat = true;
5711 }
5712
5713 unsigned End = getTransitionType()->getVectorNumElements();
5714 if (UseSplat)
5715 return ConstantVector::getSplat(End, Val);
5716
5717 SmallVector<Constant *, 4> ConstVec;
5718 UndefValue *UndefVal = UndefValue::get(Val->getType());
5719 for (unsigned Idx = 0; Idx != End; ++Idx) {
5720 if (Idx == ExtractIdx)
5721 ConstVec.push_back(Val);
5722 else
5723 ConstVec.push_back(UndefVal);
5724 }
5725 return ConstantVector::get(ConstVec);
5726 }
5727
5728 /// \brief Check if promoting to a vector type an operand at \p OperandIdx
5729 /// in \p Use can trigger undefined behavior.
5730 static bool canCauseUndefinedBehavior(const Instruction *Use,
5731 unsigned OperandIdx) {
5732 // This is not safe to introduce undef when the operand is on
5733 // the right hand side of a division-like instruction.
5734 if (OperandIdx != 1)
5735 return false;
5736 switch (Use->getOpcode()) {
5737 default:
5738 return false;
5739 case Instruction::SDiv:
5740 case Instruction::UDiv:
5741 case Instruction::SRem:
5742 case Instruction::URem:
5743 return true;
5744 case Instruction::FDiv:
5745 case Instruction::FRem:
5746 return !Use->hasNoNaNs();
5747 }
5748 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5748)
;
5749 }
5750
5751public:
5752 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
5753 const TargetTransformInfo &TTI, Instruction *Transition,
5754 unsigned CombineCost)
5755 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
5756 StoreExtractCombineCost(CombineCost), CombineInst(nullptr) {
5757 assert(Transition && "Do not know how to promote null")((Transition && "Do not know how to promote null") ? static_cast
<void> (0) : __assert_fail ("Transition && \"Do not know how to promote null\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5757, __PRETTY_FUNCTION__))
;
5758 }
5759
5760 /// \brief Check if we can promote \p ToBePromoted to \p Type.
5761 bool canPromote(const Instruction *ToBePromoted) const {
5762 // We could support CastInst too.
5763 return isa<BinaryOperator>(ToBePromoted);
5764 }
5765
5766 /// \brief Check if it is profitable to promote \p ToBePromoted
5767 /// by moving downward the transition through.
5768 bool shouldPromote(const Instruction *ToBePromoted) const {
5769 // Promote only if all the operands can be statically expanded.
5770 // Indeed, we do not want to introduce any new kind of transitions.
5771 for (const Use &U : ToBePromoted->operands()) {
5772 const Value *Val = U.get();
5773 if (Val == getEndOfTransition()) {
5774 // If the use is a division and the transition is on the rhs,
5775 // we cannot promote the operation, otherwise we may create a
5776 // division by zero.
5777 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
5778 return false;
5779 continue;
5780 }
5781 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
5782 !isa<ConstantFP>(Val))
5783 return false;
5784 }
5785 // Check that the resulting operation is legal.
5786 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
5787 if (!ISDOpcode)
5788 return false;
5789 return StressStoreExtract ||
5790 TLI.isOperationLegalOrCustom(
5791 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
5792 }
5793
5794 /// \brief Check whether or not \p Use can be combined
5795 /// with the transition.
5796 /// I.e., is it possible to do Use(Transition) => AnotherUse?
5797 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
5798
5799 /// \brief Record \p ToBePromoted as part of the chain to be promoted.
5800 void enqueueForPromotion(Instruction *ToBePromoted) {
5801 InstsToBePromoted.push_back(ToBePromoted);
5802 }
5803
5804 /// \brief Set the instruction that will be combined with the transition.
5805 void recordCombineInstruction(Instruction *ToBeCombined) {
5806 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine")((canCombine(ToBeCombined) && "Unsupported instruction to combine"
) ? static_cast<void> (0) : __assert_fail ("canCombine(ToBeCombined) && \"Unsupported instruction to combine\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5806, __PRETTY_FUNCTION__))
;
5807 CombineInst = ToBeCombined;
5808 }
5809
5810 /// \brief Promote all the instructions enqueued for promotion if it is
5811 /// is profitable.
5812 /// \return True if the promotion happened, false otherwise.
5813 bool promote() {
5814 // Check if there is something to promote.
5815 // Right now, if we do not have anything to combine with,
5816 // we assume the promotion is not profitable.
5817 if (InstsToBePromoted.empty() || !CombineInst)
5818 return false;
5819
5820 // Check cost.
5821 if (!StressStoreExtract && !isProfitableToPromote())
5822 return false;
5823
5824 // Promote.
5825 for (auto &ToBePromoted : InstsToBePromoted)
5826 promoteImpl(ToBePromoted);
5827 InstsToBePromoted.clear();
5828 return true;
5829 }
5830};
5831} // End of anonymous namespace.
5832
5833void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
5834 // At this point, we know that all the operands of ToBePromoted but Def
5835 // can be statically promoted.
5836 // For Def, we need to use its parameter in ToBePromoted:
5837 // b = ToBePromoted ty1 a
5838 // Def = Transition ty1 b to ty2
5839 // Move the transition down.
5840 // 1. Replace all uses of the promoted operation by the transition.
5841 // = ... b => = ... Def.
5842 assert(ToBePromoted->getType() == Transition->getType() &&((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5844, __PRETTY_FUNCTION__))
5843 "The type of the result of the transition does not match "((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5844, __PRETTY_FUNCTION__))
5844 "the final type")((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5844, __PRETTY_FUNCTION__))
;
5845 ToBePromoted->replaceAllUsesWith(Transition);
5846 // 2. Update the type of the uses.
5847 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
5848 Type *TransitionTy = getTransitionType();
5849 ToBePromoted->mutateType(TransitionTy);
5850 // 3. Update all the operands of the promoted operation with promoted
5851 // operands.
5852 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
5853 for (Use &U : ToBePromoted->operands()) {
5854 Value *Val = U.get();
5855 Value *NewVal = nullptr;
5856 if (Val == Transition)
5857 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
5858 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
5859 isa<ConstantFP>(Val)) {
5860 // Use a splat constant if it is not safe to use undef.
5861 NewVal = getConstantVector(
5862 cast<Constant>(Val),
5863 isa<UndefValue>(Val) ||
5864 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
5865 } else
5866 llvm_unreachable("Did you modified shouldPromote and forgot to update "::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5867)
5867 "this?")::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/CodeGen/CodeGenPrepare.cpp"
, 5867)
;
5868 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
5869 }
5870 Transition->removeFromParent();
5871 Transition->insertAfter(ToBePromoted);
5872 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
5873}
5874
5875/// Some targets can do store(extractelement) with one instruction.
5876/// Try to push the extractelement towards the stores when the target
5877/// has this feature and this is profitable.
5878bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
5879 unsigned CombineCost = UINT_MAX(2147483647 *2U +1U);
5880 if (DisableStoreExtract || !TLI ||
5881 (!StressStoreExtract &&
5882 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
5883 Inst->getOperand(1), CombineCost)))
5884 return false;
5885
5886 // At this point we know that Inst is a vector to scalar transition.
5887 // Try to move it down the def-use chain, until:
5888 // - We can combine the transition with its single use
5889 // => we got rid of the transition.
5890 // - We escape the current basic block
5891 // => we would need to check that we are moving it at a cheaper place and
5892 // we do not do that for now.
5893 BasicBlock *Parent = Inst->getParent();
5894 DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Found an interesting transition: "
<< *Inst << '\n'; } } while (false)
;
5895 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
5896 // If the transition has more than one use, assume this is not going to be
5897 // beneficial.
5898 while (Inst->hasOneUse()) {
5899 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
5900 DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Use: " << *ToBePromoted
<< '\n'; } } while (false)
;
5901
5902 if (ToBePromoted->getParent() != Parent) {
5903 DEBUG(dbgs() << "Instruction to promote is in a different block ("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
5904 << ToBePromoted->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
5905 << ") than the transition (" << Parent->getName() << ").\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
;
5906 return false;
5907 }
5908
5909 if (VPH.canCombine(ToBePromoted)) {
5910 DEBUG(dbgs() << "Assume " << *Inst << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false)
5911 << "will be combined with: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false)
;
5912 VPH.recordCombineInstruction(ToBePromoted);
5913 bool Changed = VPH.promote();
5914 NumStoreExtractExposed += Changed;
5915 return Changed;
5916 }
5917
5918 DEBUG(dbgs() << "Try promoting.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Try promoting.\n"; } }
while (false)
;
5919 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
5920 return false;
5921
5922 DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Promoting is possible... Enqueue for promotion!\n"
; } } while (false)
;
5923
5924 VPH.enqueueForPromotion(ToBePromoted);
5925 Inst = ToBePromoted;
5926 }
5927 return false;
5928}
5929
5930/// For the instruction sequence of store below, F and I values
5931/// are bundled together as an i64 value before being stored into memory.
5932/// Sometimes it is more efficent to generate separate stores for F and I,
5933/// which can remove the bitwise instructions or sink them to colder places.
5934///
5935/// (store (or (zext (bitcast F to i32) to i64),
5936/// (shl (zext I to i64), 32)), addr) -->
5937/// (store F, addr) and (store I, addr+4)
5938///
5939/// Similarly, splitting for other merged store can also be beneficial, like:
5940/// For pair of {i32, i32}, i64 store --> two i32 stores.
5941/// For pair of {i32, i16}, i64 store --> two i32 stores.
5942/// For pair of {i16, i16}, i32 store --> two i16 stores.
5943/// For pair of {i16, i8}, i32 store --> two i16 stores.
5944/// For pair of {i8, i8}, i16 store --> two i8 stores.
5945///
5946/// We allow each target to determine specifically which kind of splitting is
5947/// supported.
5948///
5949/// The store patterns are commonly seen from the simple code snippet below
5950/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
5951/// void goo(const std::pair<int, float> &);
5952/// hoo() {
5953/// ...
5954/// goo(std::make_pair(tmp, ftmp));
5955/// ...
5956/// }
5957///
5958/// Although we already have similar splitting in DAG Combine, we duplicate
5959/// it in CodeGenPrepare to catch the case in which pattern is across
5960/// multiple BBs. The logic in DAG Combine is kept to catch case generated
5961/// during code expansion.
5962static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
5963 const TargetLowering &TLI) {
5964 // Handle simple but common cases only.
5965 Type *StoreType = SI.getValueOperand()->getType();
5966 if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
5967 DL.getTypeSizeInBits(StoreType) == 0)
5968 return false;
5969
5970 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
5971 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
5972 if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
5973 DL.getTypeSizeInBits(SplitStoreType))
5974 return false;
5975
5976 // Match the following patterns:
5977 // (store (or (zext LValue to i64),
5978 // (shl (zext HValue to i64), 32)), HalfValBitSize)
5979 // or
5980 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
5981 // (zext LValue to i64),
5982 // Expect both operands of OR and the first operand of SHL have only
5983 // one use.
5984 Value *LValue, *HValue;
5985 if (!match(SI.getValueOperand(),
5986 m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
5987 m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
5988 m_SpecificInt(HalfValBitSize))))))
5989 return false;
5990
5991 // Check LValue and HValue are int with size less or equal than 32.
5992 if (!LValue->getType()->isIntegerTy() ||
5993 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
5994 !HValue->getType()->isIntegerTy() ||
5995 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
5996 return false;
5997
5998 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
5999 // as the input of target query.
6000 auto *LBC = dyn_cast<BitCastInst>(LValue);
6001 auto *HBC = dyn_cast<BitCastInst>(HValue);
6002 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
6003 : EVT::getEVT(LValue->getType());
6004 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
6005 : EVT::getEVT(HValue->getType());
6006 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
6007 return false;
6008
6009 // Start to split store.
6010 IRBuilder<> Builder(SI.getContext());
6011 Builder.SetInsertPoint(&SI);
6012
6013 // If LValue/HValue is a bitcast in another BB, create a new one in current
6014 // BB so it may be merged with the splitted stores by dag combiner.
6015 if (LBC && LBC->getParent() != SI.getParent())
6016 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
6017 if (HBC && HBC->getParent() != SI.getParent())
6018 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
6019
6020 auto CreateSplitStore = [&](Value *V, bool Upper) {
6021 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
6022 Value *Addr = Builder.CreateBitCast(
6023 SI.getOperand(1),
6024 SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
6025 if (Upper)
6026 Addr = Builder.CreateGEP(
6027 SplitStoreType, Addr,
6028 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
6029 Builder.CreateAlignedStore(
6030 V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
6031 };
6032
6033 CreateSplitStore(LValue, false);
6034 CreateSplitStore(HValue, true);
6035
6036 // Delete the old store.
6037 SI.eraseFromParent();
6038 return true;
6039}
6040
6041bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
6042 // Bail out if we inserted the instruction to prevent optimizations from
6043 // stepping on each other's toes.
6044 if (InsertedInsts.count(I))
6045 return false;
6046
6047 if (PHINode *P = dyn_cast<PHINode>(I)) {
6048 // It is possible for very late stage optimizations (such as SimplifyCFG)
6049 // to introduce PHI nodes too late to be cleaned up. If we detect such a
6050 // trivial PHI, go ahead and zap it here.
6051 if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
6052 P->replaceAllUsesWith(V);
6053 P->eraseFromParent();
6054 ++NumPHIsElim;
6055 return true;
6056 }
6057 return false;
6058 }
6059
6060 if (CastInst *CI = dyn_cast<CastInst>(I)) {
6061 // If the source of the cast is a constant, then this should have
6062 // already been constant folded. The only reason NOT to constant fold
6063 // it is if something (e.g. LSR) was careful to place the constant
6064 // evaluation in a block other than then one that uses it (e.g. to hoist
6065 // the address of globals out of a loop). If this is the case, we don't
6066 // want to forward-subst the cast.
6067 if (isa<Constant>(CI->getOperand(0)))
6068 return false;
6069
6070 if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
6071 return true;
6072
6073 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
6074 /// Sink a zext or sext into its user blocks if the target type doesn't
6075 /// fit in one register
6076 if (TLI &&
6077 TLI->getTypeAction(CI->getContext(),
6078 TLI->getValueType(*DL, CI->getType())) ==
6079 TargetLowering::TypeExpandInteger) {
6080 return SinkCast(CI);
6081 } else {
6082 bool MadeChange = optimizeExt(I);
6083 return MadeChange | optimizeExtUses(I);
6084 }
6085 }
6086 return false;
6087 }
6088
6089 if (CmpInst *CI = dyn_cast<CmpInst>(I))
6090 if (!TLI || !TLI->hasMultipleConditionRegisters())
6091 return OptimizeCmpExpression(CI, TLI);
6092
6093 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
6094 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
6095 if (TLI) {
6096 bool Modified = optimizeLoadExt(LI);
6097 unsigned AS = LI->getPointerAddressSpace();
6098 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
6099 return Modified;
6100 }
6101 return false;
6102 }
6103
6104 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
6105 if (TLI && splitMergedValStore(*SI, *DL, *TLI))
6106 return true;
6107 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
6108 if (TLI) {
6109 unsigned AS = SI->getPointerAddressSpace();
6110 return optimizeMemoryInst(I, SI->getOperand(1),
6111 SI->getOperand(0)->getType(), AS);
6112 }
6113 return false;
6114 }
6115
6116 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
6117 unsigned AS = RMW->getPointerAddressSpace();
6118 return optimizeMemoryInst(I, RMW->getPointerOperand(),
6119 RMW->getType(), AS);
6120 }
6121
6122 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
6123 unsigned AS = CmpX->getPointerAddressSpace();
6124 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
6125 CmpX->getCompareOperand()->getType(), AS);
6126 }
6127
6128 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
6129
6130 if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
6131 EnableAndCmpSinking && TLI)
6132 return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
6133
6134 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
6135 BinOp->getOpcode() == Instruction::LShr)) {
6136 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
6137 if (TLI && CI && TLI->hasExtractBitsInsn())
6138 return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
6139
6140 return false;
6141 }
6142
6143 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
6144 if (GEPI->hasAllZeroIndices()) {
6145 /// The GEP operand must be a pointer, so must its result -> BitCast
6146 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
6147 GEPI->getName(), GEPI);
6148 GEPI->replaceAllUsesWith(NC);
6149 GEPI->eraseFromParent();
6150 ++NumGEPsElim;
6151 optimizeInst(NC, ModifiedDT);
6152 return true;
6153 }
6154 return false;
6155 }
6156
6157 if (CallInst *CI = dyn_cast<CallInst>(I))
6158 return optimizeCallInst(CI, ModifiedDT);
6159
6160 if (SelectInst *SI = dyn_cast<SelectInst>(I))
6161 return optimizeSelectInst(SI);
6162
6163 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
6164 return optimizeShuffleVectorInst(SVI);
6165
6166 if (auto *Switch = dyn_cast<SwitchInst>(I))
6167 return optimizeSwitchInst(Switch);
6168
6169 if (isa<ExtractElementInst>(I))
6170 return optimizeExtractElementInst(I);
6171
6172 return false;
6173}
6174
6175/// Given an OR instruction, check to see if this is a bitreverse
6176/// idiom. If so, insert the new intrinsic and return true.
6177static bool makeBitReverse(Instruction &I, const DataLayout &DL,
6178 const TargetLowering &TLI) {
6179 if (!I.getType()->isIntegerTy() ||
6180 !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
6181 TLI.getValueType(DL, I.getType(), true)))
6182 return false;
6183
6184 SmallVector<Instruction*, 4> Insts;
6185 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
6186 return false;
6187 Instruction *LastInst = Insts.back();
6188 I.replaceAllUsesWith(LastInst);
6189 RecursivelyDeleteTriviallyDeadInstructions(&I);
6190 return true;
6191}
6192
6193// In this pass we look for GEP and cast instructions that are used
6194// across basic blocks and rewrite them to improve basic-block-at-a-time
6195// selection.
6196bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
6197 SunkAddrs.clear();
6198 bool MadeChange = false;
6199
6200 CurInstIterator = BB.begin();
6201 while (CurInstIterator != BB.end()) {
6202 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
6203 if (ModifiedDT)
6204 return true;
6205 }
6206
6207 bool MadeBitReverse = true;
6208 while (TLI && MadeBitReverse) {
6209 MadeBitReverse = false;
6210 for (auto &I : reverse(BB)) {
6211 if (makeBitReverse(I, *DL, *TLI)) {
6212 MadeBitReverse = MadeChange = true;
6213 ModifiedDT = true;
6214 break;
6215 }
6216 }
6217 }
6218 MadeChange |= dupRetToEnableTailCallOpts(&BB);
6219
6220 return MadeChange;
6221}
6222
6223// llvm.dbg.value is far away from the value then iSel may not be able
6224// handle it properly. iSel will drop llvm.dbg.value if it can not
6225// find a node corresponding to the value.
6226bool CodeGenPrepare::placeDbgValues(Function &F) {
6227 bool MadeChange = false;
6228 for (BasicBlock &BB : F) {
6229 Instruction *PrevNonDbgInst = nullptr;
6230 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
6231 Instruction *Insn = &*BI++;
6232 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
6233 // Leave dbg.values that refer to an alloca alone. These
6234 // instrinsics describe the address of a variable (= the alloca)
6235 // being taken. They should not be moved next to the alloca
6236 // (and to the beginning of the scope), but rather stay close to
6237 // where said address is used.
6238 if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
6239 PrevNonDbgInst = Insn;
6240 continue;
6241 }
6242
6243 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
6244 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
6245 // If VI is a phi in a block with an EHPad terminator, we can't insert
6246 // after it.
6247 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
6248 continue;
6249 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Moving Debug Value before :\n"
<< *DVI << ' ' << *VI; } } while (false)
;
6250 DVI->removeFromParent();
6251 if (isa<PHINode>(VI))
6252 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
6253 else
6254 DVI->insertAfter(VI);
6255 MadeChange = true;
6256 ++NumDbgValueMoved;
6257 }
6258 }
6259 }
6260 return MadeChange;
6261}
6262
6263/// \brief Scale down both weights to fit into uint32_t.
6264static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
6265 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
6266 uint32_t Scale = (NewMax / UINT32_MAX(4294967295U)) + 1;
6267 NewTrue = NewTrue / Scale;
6268 NewFalse = NewFalse / Scale;
6269}
6270
6271/// \brief Some targets prefer to split a conditional branch like:
6272/// \code
6273/// %0 = icmp ne i32 %a, 0
6274/// %1 = icmp ne i32 %b, 0
6275/// %or.cond = or i1 %0, %1
6276/// br i1 %or.cond, label %TrueBB, label %FalseBB
6277/// \endcode
6278/// into multiple branch instructions like:
6279/// \code
6280/// bb1:
6281/// %0 = icmp ne i32 %a, 0
6282/// br i1 %0, label %TrueBB, label %bb2
6283/// bb2:
6284/// %1 = icmp ne i32 %b, 0
6285/// br i1 %1, label %TrueBB, label %FalseBB
6286/// \endcode
6287/// This usually allows instruction selection to do even further optimizations
6288/// and combine the compare with the branch instruction. Currently this is
6289/// applied for targets which have "cheap" jump instructions.
6290///
6291/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
6292///
6293bool CodeGenPrepare::splitBranchCondition(Function &F) {
6294 if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
6295 return false;
6296
6297 bool MadeChange = false;
6298 for (auto &BB : F) {
6299 // Does this BB end with the following?
6300 // %cond1 = icmp|fcmp|binary instruction ...
6301 // %cond2 = icmp|fcmp|binary instruction ...
6302 // %cond.or = or|and i1 %cond1, cond2
6303 // br i1 %cond.or label %dest1, label %dest2"
6304 BinaryOperator *LogicOp;
6305 BasicBlock *TBB, *FBB;
6306 if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
6307 continue;
6308
6309 auto *Br1 = cast<BranchInst>(BB.getTerminator());
6310 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
6311 continue;
6312
6313 unsigned Opc;
6314 Value *Cond1, *Cond2;
6315 if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
6316 m_OneUse(m_Value(Cond2)))))
6317 Opc = Instruction::And;
6318 else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
6319 m_OneUse(m_Value(Cond2)))))
6320 Opc = Instruction::Or;
6321 else
6322 continue;
6323
6324 if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
6325 !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
6326 continue;
6327
6328 DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Before branch condition splitting\n"
; BB.dump(); } } while (false)
;
6329
6330 // Create a new BB.
6331 auto TmpBB =
6332 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
6333 BB.getParent(), BB.getNextNode());
6334
6335 // Update original basic block by using the first condition directly by the
6336 // branch instruction and removing the no longer needed and/or instruction.
6337 Br1->setCondition(Cond1);
6338 LogicOp->eraseFromParent();
6339
6340 // Depending on the conditon we have to either replace the true or the false
6341 // successor of the original branch instruction.
6342 if (Opc == Instruction::And)
6343 Br1->setSuccessor(0, TmpBB);
6344 else
6345 Br1->setSuccessor(1, TmpBB);
6346
6347 // Fill in the new basic block.
6348 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
6349 if (auto *I = dyn_cast<Instruction>(Cond2)) {
6350 I->removeFromParent();
6351 I->insertBefore(Br2);
6352 }
6353
6354 // Update PHI nodes in both successors. The original BB needs to be
6355 // replaced in one successor's PHI nodes, because the branch comes now from
6356 // the newly generated BB (NewBB). In the other successor we need to add one
6357 // incoming edge to the PHI nodes, because both branch instructions target
6358 // now the same successor. Depending on the original branch condition
6359 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
6360 // we perform the correct update for the PHI nodes.
6361 // This doesn't change the successor order of the just created branch
6362 // instruction (or any other instruction).
6363 if (Opc == Instruction::Or)
6364 std::swap(TBB, FBB);
6365
6366 // Replace the old BB with the new BB.
6367 for (auto &I : *TBB) {
6368 PHINode *PN = dyn_cast<PHINode>(&I);
6369 if (!PN)
6370 break;
6371 int i;
6372 while ((i = PN->getBasicBlockIndex(&BB)) >= 0)
6373 PN->setIncomingBlock(i, TmpBB);
6374 }
6375
6376 // Add another incoming edge form the new BB.
6377 for (auto &I : *FBB) {
6378 PHINode *PN = dyn_cast<PHINode>(&I);
6379 if (!PN)
6380 break;
6381 auto *Val = PN->getIncomingValueForBlock(&BB);
6382 PN->addIncoming(Val, TmpBB);
6383 }
6384
6385 // Update the branch weights (from SelectionDAGBuilder::
6386 // FindMergedConditions).
6387 if (Opc == Instruction::Or) {
6388 // Codegen X | Y as:
6389 // BB1:
6390 // jmp_if_X TBB
6391 // jmp TmpBB
6392 // TmpBB:
6393 // jmp_if_Y TBB
6394 // jmp FBB
6395 //
6396
6397 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
6398 // The requirement is that
6399 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
6400 // = TrueProb for orignal BB.
6401 // Assuming the orignal weights are A and B, one choice is to set BB1's
6402 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
6403 // assumes that
6404 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
6405 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
6406 // TmpBB, but the math is more complicated.
6407 uint64_t TrueWeight, FalseWeight;
6408 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
6409 uint64_t NewTrueWeight = TrueWeight;
6410 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
6411 scaleWeights(NewTrueWeight, NewFalseWeight);
6412 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
6413 .createBranchWeights(TrueWeight, FalseWeight));
6414
6415 NewTrueWeight = TrueWeight;
6416 NewFalseWeight = 2 * FalseWeight;
6417 scaleWeights(NewTrueWeight, NewFalseWeight);
6418 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
6419 .createBranchWeights(TrueWeight, FalseWeight));
6420 }
6421 } else {
6422 // Codegen X & Y as:
6423 // BB1:
6424 // jmp_if_X TmpBB
6425 // jmp FBB
6426 // TmpBB:
6427 // jmp_if_Y TBB
6428 // jmp FBB
6429 //
6430 // This requires creation of TmpBB after CurBB.
6431
6432 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
6433 // The requirement is that
6434 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
6435 // = FalseProb for orignal BB.
6436 // Assuming the orignal weights are A and B, one choice is to set BB1's
6437 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
6438 // assumes that
6439 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
6440 uint64_t TrueWeight, FalseWeight;
6441 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
6442 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
6443 uint64_t NewFalseWeight = FalseWeight;
6444 scaleWeights(NewTrueWeight, NewFalseWeight);
6445 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
6446 .createBranchWeights(TrueWeight, FalseWeight));
6447
6448 NewTrueWeight = 2 * TrueWeight;
6449 NewFalseWeight = FalseWeight;
6450 scaleWeights(NewTrueWeight, NewFalseWeight);
6451 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
6452 .createBranchWeights(TrueWeight, FalseWeight));
6453 }
6454 }
6455
6456 // Note: No point in getting fancy here, since the DT info is never
6457 // available to CodeGenPrepare.
6458 ModifiedDT = true;
6459
6460 MadeChange = true;
6461
6462 DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false)
6463 TmpBB->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false)
;
6464 }
6465 return MadeChange;
6466}