Line data Source code
1 : //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This pass munges the code in the input function to better prepare it for
11 : // SelectionDAG-based code generation. This works around limitations in it's
12 : // basic-block-at-a-time approach. It should eventually be removed.
13 : //
14 : //===----------------------------------------------------------------------===//
15 :
16 : #include "llvm/ADT/APInt.h"
17 : #include "llvm/ADT/ArrayRef.h"
18 : #include "llvm/ADT/DenseMap.h"
19 : #include "llvm/ADT/PointerIntPair.h"
20 : #include "llvm/ADT/STLExtras.h"
21 : #include "llvm/ADT/SmallPtrSet.h"
22 : #include "llvm/ADT/SmallVector.h"
23 : #include "llvm/ADT/Statistic.h"
24 : #include "llvm/Analysis/BlockFrequencyInfo.h"
25 : #include "llvm/Analysis/BranchProbabilityInfo.h"
26 : #include "llvm/Analysis/ConstantFolding.h"
27 : #include "llvm/Analysis/InstructionSimplify.h"
28 : #include "llvm/Analysis/LoopInfo.h"
29 : #include "llvm/Analysis/MemoryBuiltins.h"
30 : #include "llvm/Analysis/ProfileSummaryInfo.h"
31 : #include "llvm/Analysis/TargetLibraryInfo.h"
32 : #include "llvm/Analysis/TargetTransformInfo.h"
33 : #include "llvm/Transforms/Utils/Local.h"
34 : #include "llvm/Analysis/ValueTracking.h"
35 : #include "llvm/CodeGen/Analysis.h"
36 : #include "llvm/CodeGen/ISDOpcodes.h"
37 : #include "llvm/CodeGen/SelectionDAGNodes.h"
38 : #include "llvm/CodeGen/TargetLowering.h"
39 : #include "llvm/CodeGen/TargetPassConfig.h"
40 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
41 : #include "llvm/CodeGen/ValueTypes.h"
42 : #include "llvm/Config/llvm-config.h"
43 : #include "llvm/IR/Argument.h"
44 : #include "llvm/IR/Attributes.h"
45 : #include "llvm/IR/BasicBlock.h"
46 : #include "llvm/IR/CallSite.h"
47 : #include "llvm/IR/Constant.h"
48 : #include "llvm/IR/Constants.h"
49 : #include "llvm/IR/DataLayout.h"
50 : #include "llvm/IR/DerivedTypes.h"
51 : #include "llvm/IR/Dominators.h"
52 : #include "llvm/IR/Function.h"
53 : #include "llvm/IR/GetElementPtrTypeIterator.h"
54 : #include "llvm/IR/GlobalValue.h"
55 : #include "llvm/IR/GlobalVariable.h"
56 : #include "llvm/IR/IRBuilder.h"
57 : #include "llvm/IR/InlineAsm.h"
58 : #include "llvm/IR/InstrTypes.h"
59 : #include "llvm/IR/Instruction.h"
60 : #include "llvm/IR/Instructions.h"
61 : #include "llvm/IR/IntrinsicInst.h"
62 : #include "llvm/IR/Intrinsics.h"
63 : #include "llvm/IR/LLVMContext.h"
64 : #include "llvm/IR/MDBuilder.h"
65 : #include "llvm/IR/Module.h"
66 : #include "llvm/IR/Operator.h"
67 : #include "llvm/IR/PatternMatch.h"
68 : #include "llvm/IR/Statepoint.h"
69 : #include "llvm/IR/Type.h"
70 : #include "llvm/IR/Use.h"
71 : #include "llvm/IR/User.h"
72 : #include "llvm/IR/Value.h"
73 : #include "llvm/IR/ValueHandle.h"
74 : #include "llvm/IR/ValueMap.h"
75 : #include "llvm/Pass.h"
76 : #include "llvm/Support/BlockFrequency.h"
77 : #include "llvm/Support/BranchProbability.h"
78 : #include "llvm/Support/Casting.h"
79 : #include "llvm/Support/CommandLine.h"
80 : #include "llvm/Support/Compiler.h"
81 : #include "llvm/Support/Debug.h"
82 : #include "llvm/Support/ErrorHandling.h"
83 : #include "llvm/Support/MachineValueType.h"
84 : #include "llvm/Support/MathExtras.h"
85 : #include "llvm/Support/raw_ostream.h"
86 : #include "llvm/Target/TargetMachine.h"
87 : #include "llvm/Target/TargetOptions.h"
88 : #include "llvm/Transforms/Utils/BasicBlockUtils.h"
89 : #include "llvm/Transforms/Utils/BypassSlowDivision.h"
90 : #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
91 : #include <algorithm>
92 : #include <cassert>
93 : #include <cstdint>
94 : #include <iterator>
95 : #include <limits>
96 : #include <memory>
97 : #include <utility>
98 : #include <vector>
99 :
100 : using namespace llvm;
101 : using namespace llvm::PatternMatch;
102 :
103 : #define DEBUG_TYPE "codegenprepare"
104 :
105 : STATISTIC(NumBlocksElim, "Number of blocks eliminated");
106 : STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
107 : STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
108 : STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
109 : "sunken Cmps");
110 : STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
111 : "of sunken Casts");
112 : STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
113 : "computations were sunk");
114 : STATISTIC(NumMemoryInstsPhiCreated,
115 : "Number of phis created when address "
116 : "computations were sunk to memory instructions");
117 : STATISTIC(NumMemoryInstsSelectCreated,
118 : "Number of select created when address "
119 : "computations were sunk to memory instructions");
120 : STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
121 : STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
122 : STATISTIC(NumAndsAdded,
123 : "Number of and mask instructions added to form ext loads");
124 : STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
125 : STATISTIC(NumRetsDup, "Number of return instructions duplicated");
126 : STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
127 : STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
128 : STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
129 :
130 : static cl::opt<bool> DisableBranchOpts(
131 : "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
132 : cl::desc("Disable branch optimizations in CodeGenPrepare"));
133 :
134 : static cl::opt<bool>
135 : DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
136 : cl::desc("Disable GC optimizations in CodeGenPrepare"));
137 :
138 : static cl::opt<bool> DisableSelectToBranch(
139 : "disable-cgp-select2branch", cl::Hidden, cl::init(false),
140 : cl::desc("Disable select to branch conversion."));
141 :
142 : static cl::opt<bool> AddrSinkUsingGEPs(
143 : "addr-sink-using-gep", cl::Hidden, cl::init(true),
144 : cl::desc("Address sinking in CGP using GEPs."));
145 :
146 : static cl::opt<bool> EnableAndCmpSinking(
147 : "enable-andcmp-sinking", cl::Hidden, cl::init(true),
148 : cl::desc("Enable sinkinig and/cmp into branches."));
149 :
150 : static cl::opt<bool> DisableStoreExtract(
151 : "disable-cgp-store-extract", cl::Hidden, cl::init(false),
152 : cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
153 :
154 : static cl::opt<bool> StressStoreExtract(
155 : "stress-cgp-store-extract", cl::Hidden, cl::init(false),
156 : cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
157 :
158 : static cl::opt<bool> DisableExtLdPromotion(
159 : "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
160 : cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
161 : "CodeGenPrepare"));
162 :
163 : static cl::opt<bool> StressExtLdPromotion(
164 : "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
165 : cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
166 : "optimization in CodeGenPrepare"));
167 :
168 : static cl::opt<bool> DisablePreheaderProtect(
169 : "disable-preheader-prot", cl::Hidden, cl::init(false),
170 : cl::desc("Disable protection against removing loop preheaders"));
171 :
172 : static cl::opt<bool> ProfileGuidedSectionPrefix(
173 : "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
174 : cl::desc("Use profile info to add section prefix for hot/cold functions"));
175 :
176 : static cl::opt<unsigned> FreqRatioToSkipMerge(
177 : "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
178 : cl::desc("Skip merging empty blocks if (frequency of empty block) / "
179 : "(frequency of destination block) is greater than this ratio"));
180 :
181 : static cl::opt<bool> ForceSplitStore(
182 : "force-split-store", cl::Hidden, cl::init(false),
183 : cl::desc("Force store splitting no matter what the target query says."));
184 :
185 : static cl::opt<bool>
186 : EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
187 : cl::desc("Enable merging of redundant sexts when one is dominating"
188 : " the other."), cl::init(true));
189 :
190 : static cl::opt<bool> DisableComplexAddrModes(
191 : "disable-complex-addr-modes", cl::Hidden, cl::init(false),
192 : cl::desc("Disables combining addressing modes with different parts "
193 : "in optimizeMemoryInst."));
194 :
195 : static cl::opt<bool>
196 : AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
197 : cl::desc("Allow creation of Phis in Address sinking."));
198 :
199 : static cl::opt<bool>
200 : AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
201 : cl::desc("Allow creation of selects in Address sinking."));
202 :
203 : static cl::opt<bool> AddrSinkCombineBaseReg(
204 : "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
205 : cl::desc("Allow combining of BaseReg field in Address sinking."));
206 :
207 : static cl::opt<bool> AddrSinkCombineBaseGV(
208 : "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
209 : cl::desc("Allow combining of BaseGV field in Address sinking."));
210 :
211 : static cl::opt<bool> AddrSinkCombineBaseOffs(
212 : "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
213 : cl::desc("Allow combining of BaseOffs field in Address sinking."));
214 :
215 : static cl::opt<bool> AddrSinkCombineScaledReg(
216 : "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
217 : cl::desc("Allow combining of ScaledReg field in Address sinking."));
218 :
219 : static cl::opt<bool>
220 : EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
221 : cl::init(true),
222 : cl::desc("Enable splitting large offset of GEP."));
223 :
224 : namespace {
225 :
226 : enum ExtType {
227 : ZeroExtension, // Zero extension has been seen.
228 : SignExtension, // Sign extension has been seen.
229 : BothExtension // This extension type is used if we saw sext after
230 : // ZeroExtension had been set, or if we saw zext after
231 : // SignExtension had been set. It makes the type
232 : // information of a promoted instruction invalid.
233 : };
234 :
235 : using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
236 : using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
237 : using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
238 : using SExts = SmallVector<Instruction *, 16>;
239 : using ValueToSExts = DenseMap<Value *, SExts>;
240 :
241 : class TypePromotionTransaction;
242 :
243 : class CodeGenPrepare : public FunctionPass {
244 : const TargetMachine *TM = nullptr;
245 : const TargetSubtargetInfo *SubtargetInfo;
246 : const TargetLowering *TLI = nullptr;
247 : const TargetRegisterInfo *TRI;
248 : const TargetTransformInfo *TTI = nullptr;
249 : const TargetLibraryInfo *TLInfo;
250 : const LoopInfo *LI;
251 : std::unique_ptr<BlockFrequencyInfo> BFI;
252 : std::unique_ptr<BranchProbabilityInfo> BPI;
253 :
254 : /// As we scan instructions optimizing them, this is the next instruction
255 : /// to optimize. Transforms that can invalidate this should update it.
256 : BasicBlock::iterator CurInstIterator;
257 :
258 : /// Keeps track of non-local addresses that have been sunk into a block.
259 : /// This allows us to avoid inserting duplicate code for blocks with
260 : /// multiple load/stores of the same address. The usage of WeakTrackingVH
261 : /// enables SunkAddrs to be treated as a cache whose entries can be
262 : /// invalidated if a sunken address computation has been erased.
263 : ValueMap<Value*, WeakTrackingVH> SunkAddrs;
264 :
265 : /// Keeps track of all instructions inserted for the current function.
266 : SetOfInstrs InsertedInsts;
267 :
268 : /// Keeps track of the type of the related instruction before their
269 : /// promotion for the current function.
270 : InstrToOrigTy PromotedInsts;
271 :
272 : /// Keep track of instructions removed during promotion.
273 : SetOfInstrs RemovedInsts;
274 :
275 : /// Keep track of sext chains based on their initial value.
276 : DenseMap<Value *, Instruction *> SeenChainsForSExt;
277 :
278 : /// Keep track of GEPs accessing the same data structures such as structs or
279 : /// arrays that are candidates to be split later because of their large
280 : /// size.
281 : MapVector<
282 : AssertingVH<Value>,
283 : SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
284 : LargeOffsetGEPMap;
285 :
286 : /// Keep track of new GEP base after splitting the GEPs having large offset.
287 : SmallSet<AssertingVH<Value>, 2> NewGEPBases;
288 :
289 : /// Map serial numbers to Large offset GEPs.
290 : DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
291 :
292 : /// Keep track of SExt promoted.
293 : ValueToSExts ValToSExtendedUses;
294 :
295 : /// True if CFG is modified in any way.
296 : bool ModifiedDT;
297 :
298 : /// True if optimizing for size.
299 : bool OptSize;
300 :
301 : /// DataLayout for the Function being processed.
302 : const DataLayout *DL = nullptr;
303 :
304 : public:
305 : static char ID; // Pass identification, replacement for typeid
306 :
307 60894 : CodeGenPrepare() : FunctionPass(ID) {
308 20298 : initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
309 20298 : }
310 :
311 : bool runOnFunction(Function &F) override;
312 :
313 31 : StringRef getPassName() const override { return "CodeGen Prepare"; }
314 :
315 20165 : void getAnalysisUsage(AnalysisUsage &AU) const override {
316 : // FIXME: When we can selectively preserve passes, preserve the domtree.
317 : AU.addRequired<ProfileSummaryInfoWrapperPass>();
318 : AU.addRequired<TargetLibraryInfoWrapperPass>();
319 : AU.addRequired<TargetTransformInfoWrapperPass>();
320 : AU.addRequired<LoopInfoWrapperPass>();
321 20165 : }
322 :
323 : private:
324 : bool eliminateFallThrough(Function &F);
325 : bool eliminateMostlyEmptyBlocks(Function &F);
326 : BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
327 : bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
328 : void eliminateMostlyEmptyBlock(BasicBlock *BB);
329 : bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
330 : bool isPreheader);
331 : bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
332 : bool optimizeInst(Instruction *I, bool &ModifiedDT);
333 : bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
334 : Type *AccessTy, unsigned AddrSpace);
335 : bool optimizeInlineAsmInst(CallInst *CS);
336 : bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
337 : bool optimizeExt(Instruction *&I);
338 : bool optimizeExtUses(Instruction *I);
339 : bool optimizeLoadExt(LoadInst *Load);
340 : bool optimizeSelectInst(SelectInst *SI);
341 : bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
342 : bool optimizeSwitchInst(SwitchInst *SI);
343 : bool optimizeExtractElementInst(Instruction *Inst);
344 : bool dupRetToEnableTailCallOpts(BasicBlock *BB);
345 : bool placeDbgValues(Function &F);
346 : bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
347 : LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
348 : bool tryToPromoteExts(TypePromotionTransaction &TPT,
349 : const SmallVectorImpl<Instruction *> &Exts,
350 : SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
351 : unsigned CreatedInstsCost = 0);
352 : bool mergeSExts(Function &F);
353 : bool splitLargeGEPOffsets();
354 : bool performAddressTypePromotion(
355 : Instruction *&Inst,
356 : bool AllowPromotionWithoutCommonHeader,
357 : bool HasPromoted, TypePromotionTransaction &TPT,
358 : SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
359 : bool splitBranchCondition(Function &F);
360 : bool simplifyOffsetableRelocate(Instruction &I);
361 : };
362 :
363 : } // end anonymous namespace
364 :
365 : char CodeGenPrepare::ID = 0;
366 :
367 39044 : INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,
368 : "Optimize for code generation", false, false)
369 39044 : INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
370 123448 : INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,
371 : "Optimize for code generation", false, false)
372 :
373 20202 : FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
374 :
375 198715 : bool CodeGenPrepare::runOnFunction(Function &F) {
376 198715 : if (skipFunction(F))
377 : return false;
378 :
379 198528 : DL = &F.getParent()->getDataLayout();
380 :
381 : bool EverMadeChange = false;
382 : // Clear per function information.
383 198528 : InsertedInsts.clear();
384 198528 : PromotedInsts.clear();
385 :
386 198528 : ModifiedDT = false;
387 198528 : if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
388 198518 : TM = &TPC->getTM<TargetMachine>();
389 198518 : SubtargetInfo = TM->getSubtargetImpl(F);
390 198518 : TLI = SubtargetInfo->getTargetLowering();
391 198518 : TRI = SubtargetInfo->getRegisterInfo();
392 : }
393 198528 : TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
394 198528 : TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
395 198528 : LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
396 198528 : BPI.reset(new BranchProbabilityInfo(F, *LI));
397 198528 : BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
398 198528 : OptSize = F.optForSize();
399 :
400 : ProfileSummaryInfo *PSI =
401 198528 : getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
402 198528 : if (ProfileGuidedSectionPrefix) {
403 198528 : if (PSI->isFunctionHotInCallGraph(&F, *BFI))
404 13 : F.setSectionPrefix(".hot");
405 198515 : else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
406 6 : F.setSectionPrefix(".unlikely");
407 : }
408 :
409 : /// This optimization identifies DIV instructions that can be
410 : /// profitably bypassed and carried out with a shorter, faster divide.
411 198528 : if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
412 : TLI->isSlowDivBypassed()) {
413 : const DenseMap<unsigned int, unsigned int> &BypassWidths =
414 : TLI->getBypassSlowDivWidths();
415 : BasicBlock* BB = &*F.begin();
416 23763 : while (BB != nullptr) {
417 : // bypassSlowDivision may create new BBs, but we don't want to reapply the
418 : // optimization to those blocks.
419 : BasicBlock* Next = BB->getNextNode();
420 12301 : EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
421 : BB = Next;
422 : }
423 : }
424 :
425 : // Eliminate blocks that contain only PHI nodes and an
426 : // unconditional branch.
427 198528 : EverMadeChange |= eliminateMostlyEmptyBlocks(F);
428 :
429 198528 : if (!DisableBranchOpts)
430 198465 : EverMadeChange |= splitBranchCondition(F);
431 :
432 : // Split some critical edges where one of the sources is an indirect branch,
433 : // to help generate sane code for PHIs involving such edges.
434 198528 : EverMadeChange |= SplitIndirectBrCriticalEdges(F);
435 :
436 : bool MadeChange = true;
437 408828 : while (MadeChange) {
438 : MadeChange = false;
439 210300 : SeenChainsForSExt.clear();
440 210300 : ValToSExtendedUses.clear();
441 210300 : RemovedInsts.clear();
442 : LargeOffsetGEPMap.clear();
443 210300 : LargeOffsetGEPID.clear();
444 850560 : for (Function::iterator I = F.begin(); I != F.end(); ) {
445 : BasicBlock *BB = &*I++;
446 640297 : bool ModifiedDTOnIteration = false;
447 640297 : MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
448 :
449 : // Restart BB iteration if the dominator tree of the Function was changed
450 640297 : if (ModifiedDTOnIteration)
451 : break;
452 : }
453 210300 : if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
454 36 : MadeChange |= mergeSExts(F);
455 210300 : if (!LargeOffsetGEPMap.empty())
456 11 : MadeChange |= splitLargeGEPOffsets();
457 :
458 : // Really free removed instructions during promotion.
459 210688 : for (Instruction *I : RemovedInsts)
460 388 : I->deleteValue();
461 :
462 210300 : EverMadeChange |= MadeChange;
463 : }
464 :
465 : SunkAddrs.clear();
466 :
467 198528 : if (!DisableBranchOpts) {
468 : MadeChange = false;
469 : // Use a set vector to get deterministic iteration order. The order the
470 : // blocks are removed may affect whether or not PHI nodes in successors
471 : // are removed.
472 : SmallSetVector<BasicBlock*, 8> WorkList;
473 617057 : for (BasicBlock &BB : F) {
474 418592 : SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
475 418592 : MadeChange |= ConstantFoldTerminator(&BB, true);
476 418592 : if (!MadeChange) continue;
477 :
478 7281 : for (SmallVectorImpl<BasicBlock*>::iterator
479 11842 : II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
480 7281 : if (pred_begin(*II) == pred_end(*II))
481 1080 : WorkList.insert(*II);
482 : }
483 :
484 : // Delete the dead blocks and any of their dead successors.
485 198465 : MadeChange |= !WorkList.empty();
486 200149 : while (!WorkList.empty()) {
487 : BasicBlock *BB = WorkList.pop_back_val();
488 1684 : SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
489 :
490 1684 : DeleteDeadBlock(BB);
491 :
492 1382 : for (SmallVectorImpl<BasicBlock*>::iterator
493 3066 : II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
494 1382 : if (pred_begin(*II) == pred_end(*II))
495 775 : WorkList.insert(*II);
496 : }
497 :
498 : // Merge pairs of basic blocks with unconditional branches, connected by
499 : // a single edge.
500 198465 : if (EverMadeChange || MadeChange)
501 12219 : MadeChange |= eliminateFallThrough(F);
502 :
503 198465 : EverMadeChange |= MadeChange;
504 : }
505 :
506 198528 : if (!DisableGCOpts) {
507 : SmallVector<Instruction *, 2> Statepoints;
508 614070 : for (BasicBlock &BB : F)
509 3533670 : for (Instruction &I : BB)
510 3118128 : if (isStatepoint(I))
511 76 : Statepoints.push_back(&I);
512 198604 : for (auto &I : Statepoints)
513 76 : EverMadeChange |= simplifyOffsetableRelocate(*I);
514 : }
515 :
516 : // Do this last to clean up use-before-def scenarios introduced by other
517 : // preparatory transforms.
518 198528 : EverMadeChange |= placeDbgValues(F);
519 :
520 198528 : return EverMadeChange;
521 : }
522 :
523 : /// Merge basic blocks which are connected by a single edge, where one of the
524 : /// basic blocks has a single successor pointing to the other basic block,
525 : /// which has a single predecessor.
526 0 : bool CodeGenPrepare::eliminateFallThrough(Function &F) {
527 : bool Changed = false;
528 : // Scan all of the blocks in the function, except for the entry block.
529 : // Use a temporary array to avoid iterator being invalidated when
530 : // deleting blocks.
531 0 : SmallVector<WeakTrackingVH, 16> Blocks;
532 0 : for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
533 0 : Blocks.push_back(&Block);
534 :
535 0 : for (auto &Block : Blocks) {
536 : auto *BB = cast_or_null<BasicBlock>(Block);
537 0 : if (!BB)
538 0 : continue;
539 : // If the destination block has a single pred, then this is a trivial
540 : // edge, just collapse it.
541 : BasicBlock *SinglePred = BB->getSinglePredecessor();
542 :
543 : // Don't merge if BB's address is taken.
544 0 : if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
545 :
546 : BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
547 0 : if (Term && !Term->isConditional()) {
548 : Changed = true;
549 : LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
550 :
551 : // Merge BB into SinglePred and delete it.
552 0 : MergeBlockIntoPredecessor(BB);
553 : }
554 : }
555 0 : return Changed;
556 : }
557 :
558 : /// Find a destination block from BB if BB is mergeable empty block.
559 0 : BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
560 : // If this block doesn't end with an uncond branch, ignore it.
561 : BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
562 0 : if (!BI || !BI->isUnconditional())
563 0 : return nullptr;
564 :
565 : // If the instruction before the branch (skipping debug info) isn't a phi
566 : // node, then other stuff is happening here.
567 0 : BasicBlock::iterator BBI = BI->getIterator();
568 0 : if (BBI != BB->begin()) {
569 : --BBI;
570 0 : while (isa<DbgInfoIntrinsic>(BBI)) {
571 0 : if (BBI == BB->begin())
572 : break;
573 : --BBI;
574 : }
575 0 : if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
576 0 : return nullptr;
577 : }
578 :
579 : // Do not break infinite loops.
580 : BasicBlock *DestBB = BI->getSuccessor(0);
581 0 : if (DestBB == BB)
582 0 : return nullptr;
583 :
584 0 : if (!canMergeBlocks(BB, DestBB))
585 : DestBB = nullptr;
586 :
587 : return DestBB;
588 : }
589 :
590 : /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
591 : /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
592 : /// edges in ways that are non-optimal for isel. Start by eliminating these
593 : /// blocks so we can split them the way we want them.
594 198528 : bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
595 : SmallPtrSet<BasicBlock *, 16> Preheaders;
596 198528 : SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
597 206410 : while (!LoopList.empty()) {
598 : Loop *L = LoopList.pop_back_val();
599 7882 : LoopList.insert(LoopList.end(), L->begin(), L->end());
600 7882 : if (BasicBlock *Preheader = L->getLoopPreheader())
601 7745 : Preheaders.insert(Preheader);
602 : }
603 :
604 : bool MadeChange = false;
605 : // Copy blocks into a temporary array to avoid iterator invalidation issues
606 : // as we remove them.
607 : // Note that this intentionally skips the entry block.
608 198528 : SmallVector<WeakTrackingVH, 16> Blocks;
609 423062 : for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
610 673602 : Blocks.push_back(&Block);
611 :
612 423062 : for (auto &Block : Blocks) {
613 : BasicBlock *BB = cast_or_null<BasicBlock>(Block);
614 : if (!BB)
615 : continue;
616 224534 : BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
617 231116 : if (!DestBB ||
618 6582 : !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
619 219978 : continue;
620 :
621 4556 : eliminateMostlyEmptyBlock(BB);
622 : MadeChange = true;
623 : }
624 198528 : return MadeChange;
625 : }
626 :
627 6582 : bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
628 : BasicBlock *DestBB,
629 : bool isPreheader) {
630 : // Do not delete loop preheaders if doing so would create a critical edge.
631 : // Loop preheaders can be good locations to spill registers. If the
632 : // preheader is deleted and we create a critical edge, registers may be
633 : // spilled in the loop body instead.
634 8564 : if (!DisablePreheaderProtect && isPreheader &&
635 3845 : !(BB->getSinglePredecessor() &&
636 1863 : BB->getSinglePredecessor()->getSingleSuccessor()))
637 1976 : return false;
638 :
639 : // Try to skip merging if the unique predecessor of BB is terminated by a
640 : // switch or indirect branch instruction, and BB is used as an incoming block
641 : // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
642 : // add COPY instructions in the predecessor of BB instead of BB (if it is not
643 : // merged). Note that the critical edge created by merging such blocks wont be
644 : // split in MachineSink because the jump table is not analyzable. By keeping
645 : // such empty block (BB), ISel will place COPY instructions in BB, not in the
646 : // predecessor of BB.
647 4606 : BasicBlock *Pred = BB->getUniquePredecessor();
648 4606 : if (!Pred ||
649 3451 : !(isa<SwitchInst>(Pred->getTerminator()) ||
650 : isa<IndirectBrInst>(Pred->getTerminator())))
651 : return true;
652 :
653 423 : if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
654 : return true;
655 :
656 : // We use a simple cost heuristic which determine skipping merging is
657 : // profitable if the cost of skipping merging is less than the cost of
658 : // merging : Cost(skipping merging) < Cost(merging BB), where the
659 : // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
660 : // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
661 : // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
662 : // Freq(Pred) / Freq(BB) > 2.
663 : // Note that if there are multiple empty blocks sharing the same incoming
664 : // value for the PHIs in the DestBB, we consider them together. In such
665 : // case, Cost(merging BB) will be the sum of their frequencies.
666 :
667 423 : if (!isa<PHINode>(DestBB->begin()))
668 : return true;
669 :
670 : SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
671 :
672 : // Find all other incoming blocks from which incoming values of all PHIs in
673 : // DestBB are the same as the ones from BB.
674 324 : for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
675 : ++PI) {
676 255 : BasicBlock *DestBBPred = *PI;
677 255 : if (DestBBPred == BB)
678 69 : continue;
679 :
680 372 : if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
681 : return DestPN.getIncomingValueForBlock(BB) ==
682 : DestPN.getIncomingValueForBlock(DestBBPred);
683 : }))
684 36 : SameIncomingValueBBs.insert(DestBBPred);
685 : }
686 :
687 : // See if all BB's incoming values are same as the value from Pred. In this
688 : // case, no reason to skip merging because COPYs are expected to be place in
689 : // Pred already.
690 69 : if (SameIncomingValueBBs.count(Pred))
691 : return true;
692 :
693 63 : BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
694 63 : BlockFrequency BBFreq = BFI->getBlockFreq(BB);
695 :
696 78 : for (auto SameValueBB : SameIncomingValueBBs)
697 22 : if (SameValueBB->getUniquePredecessor() == Pred &&
698 7 : DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
699 4 : BBFreq += BFI->getBlockFreq(SameValueBB);
700 :
701 63 : return PredFreq.getFrequency() <=
702 126 : BBFreq.getFrequency() * FreqRatioToSkipMerge;
703 : }
704 :
705 : /// Return true if we can merge BB into DestBB if there is a single
706 : /// unconditional branch between them, and BB contains no other non-phi
707 : /// instructions.
708 0 : bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
709 : const BasicBlock *DestBB) const {
710 : // We only want to eliminate blocks whose phi nodes are used by phi nodes in
711 : // the successor. If there are more complex condition (e.g. preheaders),
712 : // don't mess around with them.
713 0 : for (const PHINode &PN : BB->phis()) {
714 0 : for (const User *U : PN.users()) {
715 : const Instruction *UI = cast<Instruction>(U);
716 0 : if (UI->getParent() != DestBB || !isa<PHINode>(UI))
717 0 : return false;
718 : // If User is inside DestBB block and it is a PHINode then check
719 : // incoming value. If incoming value is not from BB then this is
720 : // a complex condition (e.g. preheaders) we want to avoid here.
721 : if (UI->getParent() == DestBB) {
722 : if (const PHINode *UPN = dyn_cast<PHINode>(UI))
723 0 : for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
724 : Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
725 0 : if (Insn && Insn->getParent() == BB &&
726 : Insn->getParent() != UPN->getIncomingBlock(I))
727 0 : return false;
728 : }
729 : }
730 : }
731 : }
732 :
733 : // If BB and DestBB contain any common predecessors, then the phi nodes in BB
734 : // and DestBB may have conflicting incoming values for the block. If so, we
735 : // can't merge the block.
736 : const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
737 0 : if (!DestBBPN) return true; // no conflict.
738 :
739 : // Collect the preds of BB.
740 : SmallPtrSet<const BasicBlock*, 16> BBPreds;
741 : if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
742 : // It is faster to get preds from a PHI than with pred_iterator.
743 0 : for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
744 0 : BBPreds.insert(BBPN->getIncomingBlock(i));
745 : } else {
746 0 : BBPreds.insert(pred_begin(BB), pred_end(BB));
747 : }
748 :
749 : // Walk the preds of DestBB.
750 0 : for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
751 : BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
752 0 : if (BBPreds.count(Pred)) { // Common predecessor?
753 0 : for (const PHINode &PN : DestBB->phis()) {
754 0 : const Value *V1 = PN.getIncomingValueForBlock(Pred);
755 0 : const Value *V2 = PN.getIncomingValueForBlock(BB);
756 :
757 : // If V2 is a phi node in BB, look up what the mapped value will be.
758 : if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
759 0 : if (V2PN->getParent() == BB)
760 0 : V2 = V2PN->getIncomingValueForBlock(Pred);
761 :
762 : // If there is a conflict, bail out.
763 0 : if (V1 != V2) return false;
764 : }
765 : }
766 : }
767 :
768 : return true;
769 : }
770 :
771 : /// Eliminate a basic block that has only phi's and an unconditional branch in
772 : /// it.
773 0 : void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
774 : BranchInst *BI = cast<BranchInst>(BB->getTerminator());
775 : BasicBlock *DestBB = BI->getSuccessor(0);
776 :
777 : LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
778 : << *BB << *DestBB);
779 :
780 : // If the destination block has a single pred, then this is a trivial edge,
781 : // just collapse it.
782 0 : if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
783 0 : if (SinglePred != DestBB) {
784 : assert(SinglePred == BB &&
785 : "Single predecessor not the same as predecessor");
786 : // Merge DestBB into SinglePred/BB and delete it.
787 0 : MergeBlockIntoPredecessor(DestBB);
788 : // Note: BB(=SinglePred) will not be deleted on this path.
789 : // DestBB(=its single successor) is the one that was deleted.
790 : LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
791 0 : return;
792 : }
793 : }
794 :
795 : // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
796 : // to handle the new incoming edges it is about to have.
797 0 : for (PHINode &PN : DestBB->phis()) {
798 : // Remove the incoming value for BB, and remember it.
799 : Value *InVal = PN.removeIncomingValue(BB, false);
800 :
801 : // Two options: either the InVal is a phi node defined in BB or it is some
802 : // value that dominates BB.
803 : PHINode *InValPhi = dyn_cast<PHINode>(InVal);
804 0 : if (InValPhi && InValPhi->getParent() == BB) {
805 : // Add all of the input values of the input PHI as inputs of this phi.
806 0 : for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
807 0 : PN.addIncoming(InValPhi->getIncomingValue(i),
808 : InValPhi->getIncomingBlock(i));
809 : } else {
810 : // Otherwise, add one instance of the dominating value for each edge that
811 : // we will be adding.
812 : if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
813 0 : for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
814 0 : PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
815 : } else {
816 0 : for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
817 0 : PN.addIncoming(InVal, *PI);
818 : }
819 : }
820 : }
821 :
822 : // The PHIs are now updated, change everything that refers to BB to use
823 : // DestBB and remove BB.
824 0 : BB->replaceAllUsesWith(DestBB);
825 0 : BB->eraseFromParent();
826 : ++NumBlocksElim;
827 :
828 : LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
829 : }
830 :
831 : // Computes a map of base pointer relocation instructions to corresponding
832 : // derived pointer relocation instructions given a vector of all relocate calls
833 25 : static void computeBaseDerivedRelocateMap(
834 : const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
835 : DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
836 : &RelocateInstMap) {
837 : // Collect information in two maps: one primarily for locating the base object
838 : // while filling the second map; the second map is the final structure holding
839 : // a mapping between Base and corresponding Derived relocate calls
840 : DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
841 87 : for (auto *ThisRelocate : AllRelocateCalls) {
842 : auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
843 : ThisRelocate->getDerivedPtrIndex());
844 62 : RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
845 : }
846 112 : for (auto &Item : RelocateIdxMap) {
847 62 : std::pair<unsigned, unsigned> Key = Item.first;
848 62 : if (Key.first == Key.second)
849 : // Base relocation: nothing to insert
850 36 : continue;
851 :
852 28 : GCRelocateInst *I = Item.second;
853 28 : auto BaseKey = std::make_pair(Key.first, Key.first);
854 :
855 : // We're iterating over RelocateIdxMap so we cannot modify it.
856 28 : auto MaybeBase = RelocateIdxMap.find(BaseKey);
857 28 : if (MaybeBase == RelocateIdxMap.end())
858 : // TODO: We might want to insert a new base object relocate and gep off
859 : // that, if there are enough derived object relocates.
860 : continue;
861 :
862 26 : RelocateInstMap[MaybeBase->second].push_back(I);
863 : }
864 25 : }
865 :
866 : // Accepts a GEP and extracts the operands into a vector provided they're all
867 : // small integer constants
868 11 : static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
869 : SmallVectorImpl<Value *> &OffsetV) {
870 21 : for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
871 : // Only accept small constant integer operands
872 : auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
873 11 : if (!Op || Op->getZExtValue() > 20)
874 : return false;
875 : }
876 :
877 17 : for (unsigned i = 1; i < GEP->getNumOperands(); i++)
878 9 : OffsetV.push_back(GEP->getOperand(i));
879 : return true;
880 : }
881 :
882 : // Takes a RelocatedBase (base pointer relocation instruction) and Targets to
883 : // replace, computes a replacement, and affects it.
884 : static bool
885 18 : simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
886 : const SmallVectorImpl<GCRelocateInst *> &Targets) {
887 : bool MadeChange = false;
888 : // We must ensure the relocation of derived pointer is defined after
889 : // relocation of base pointer. If we find a relocation corresponding to base
890 : // defined earlier than relocation of base then we move relocation of base
891 : // right before found relocation. We consider only relocation in the same
892 : // basic block as relocation of base. Relocations from other basic block will
893 : // be skipped by optimization and we do not care about them.
894 36 : for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
895 128 : &*R != RelocatedBase; ++R)
896 12 : if (auto RI = dyn_cast<GCRelocateInst>(R))
897 12 : if (RI->getStatepoint() == RelocatedBase->getStatepoint())
898 6 : if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
899 6 : RelocatedBase->moveBefore(RI);
900 6 : break;
901 : }
902 :
903 44 : for (GCRelocateInst *ToReplace : Targets) {
904 : assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
905 : "Not relocating a derived object of the original base object");
906 26 : if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
907 : // A duplicate relocate call. TODO: coalesce duplicates.
908 18 : continue;
909 : }
910 :
911 26 : if (RelocatedBase->getParent() != ToReplace->getParent()) {
912 : // Base and derived relocates are in different basic blocks.
913 : // In this case transform is only valid when base dominates derived
914 : // relocate. However it would be too expensive to check dominance
915 : // for each such relocate, so we skip the whole transformation.
916 : continue;
917 : }
918 :
919 25 : Value *Base = ToReplace->getBasePtr();
920 25 : auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
921 11 : if (!Derived || Derived->getPointerOperand() != Base)
922 : continue;
923 :
924 : SmallVector<Value *, 2> OffsetV;
925 11 : if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
926 : continue;
927 :
928 : // Create a Builder and replace the target callsite with a gep
929 : assert(RelocatedBase->getNextNode() &&
930 : "Should always have one since it's not a terminator");
931 :
932 : // Insert after RelocatedBase
933 8 : IRBuilder<> Builder(RelocatedBase->getNextNode());
934 8 : Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
935 :
936 : // If gc_relocate does not match the actual type, cast it to the right type.
937 : // In theory, there must be a bitcast after gc_relocate if the type does not
938 : // match, and we should reuse it to get the derived pointer. But it could be
939 : // cases like this:
940 : // bb1:
941 : // ...
942 : // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
943 : // br label %merge
944 : //
945 : // bb2:
946 : // ...
947 : // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
948 : // br label %merge
949 : //
950 : // merge:
951 : // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
952 : // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
953 : //
954 : // In this case, we can not find the bitcast any more. So we insert a new bitcast
955 : // no matter there is already one or not. In this way, we can handle all cases, and
956 : // the extra bitcast should be optimized away in later passes.
957 : Value *ActualRelocatedBase = RelocatedBase;
958 8 : if (RelocatedBase->getType() != Base->getType()) {
959 : ActualRelocatedBase =
960 0 : Builder.CreateBitCast(RelocatedBase, Base->getType());
961 : }
962 8 : Value *Replacement = Builder.CreateGEP(
963 : Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
964 8 : Replacement->takeName(ToReplace);
965 : // If the newly generated derived pointer's type does not match the original derived
966 : // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
967 : Value *ActualReplacement = Replacement;
968 8 : if (Replacement->getType() != ToReplace->getType()) {
969 : ActualReplacement =
970 0 : Builder.CreateBitCast(Replacement, ToReplace->getType());
971 : }
972 8 : ToReplace->replaceAllUsesWith(ActualReplacement);
973 8 : ToReplace->eraseFromParent();
974 :
975 : MadeChange = true;
976 : }
977 18 : return MadeChange;
978 : }
979 :
980 : // Turns this:
981 : //
982 : // %base = ...
983 : // %ptr = gep %base + 15
984 : // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
985 : // %base' = relocate(%tok, i32 4, i32 4)
986 : // %ptr' = relocate(%tok, i32 4, i32 5)
987 : // %val = load %ptr'
988 : //
989 : // into this:
990 : //
991 : // %base = ...
992 : // %ptr = gep %base + 15
993 : // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
994 : // %base' = gc.relocate(%tok, i32 4, i32 4)
995 : // %ptr' = gep %base' + 15
996 : // %val = load %ptr'
997 0 : bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
998 : bool MadeChange = false;
999 : SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1000 :
1001 0 : for (auto *U : I.users())
1002 0 : if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1003 : // Collect all the relocate calls associated with a statepoint
1004 0 : AllRelocateCalls.push_back(Relocate);
1005 :
1006 : // We need atleast one base pointer relocation + one derived pointer
1007 : // relocation to mangle
1008 0 : if (AllRelocateCalls.size() < 2)
1009 0 : return false;
1010 :
1011 : // RelocateInstMap is a mapping from the base relocate instruction to the
1012 : // corresponding derived relocate instructions
1013 : DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1014 0 : computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1015 0 : if (RelocateInstMap.empty())
1016 0 : return false;
1017 :
1018 0 : for (auto &Item : RelocateInstMap)
1019 : // Item.first is the RelocatedBase to offset against
1020 : // Item.second is the vector of Targets to replace
1021 0 : MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1022 0 : return MadeChange;
1023 : }
1024 :
1025 : /// SinkCast - Sink the specified cast instruction into its user blocks
1026 395765 : static bool SinkCast(CastInst *CI) {
1027 395765 : BasicBlock *DefBB = CI->getParent();
1028 :
1029 : /// InsertedCasts - Only insert a cast in each block once.
1030 : DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1031 :
1032 : bool MadeChange = false;
1033 : for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1034 878043 : UI != E; ) {
1035 : Use &TheUse = UI.getUse();
1036 : Instruction *User = cast<Instruction>(*UI);
1037 :
1038 : // Figure out which BB this cast is used in. For PHI's this is the
1039 : // appropriate predecessor block.
1040 482278 : BasicBlock *UserBB = User->getParent();
1041 : if (PHINode *PN = dyn_cast<PHINode>(User)) {
1042 9773 : UserBB = PN->getIncomingBlock(TheUse);
1043 : }
1044 :
1045 : // Preincrement use iterator so we don't invalidate it.
1046 : ++UI;
1047 :
1048 : // The first insertion point of a block containing an EH pad is after the
1049 : // pad. If the pad is the user, we cannot sink the cast past the pad.
1050 : if (User->isEHPad())
1051 405788 : continue;
1052 :
1053 : // If the block selected to receive the cast is an EH pad that does not
1054 : // allow non-PHI instructions before the terminator, we can't sink the
1055 : // cast.
1056 482277 : if (UserBB->getTerminator()->isEHPad())
1057 : continue;
1058 :
1059 : // If this user is in the same block as the cast, don't change the cast.
1060 482277 : if (UserBB == DefBB) continue;
1061 :
1062 : // If we have already inserted a cast into this block, use it.
1063 : CastInst *&InsertedCast = InsertedCasts[UserBB];
1064 :
1065 76490 : if (!InsertedCast) {
1066 74360 : BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1067 : assert(InsertPt != UserBB->end());
1068 74360 : InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1069 : CI->getType(), "", &*InsertPt);
1070 74360 : InsertedCast->setDebugLoc(CI->getDebugLoc());
1071 : }
1072 :
1073 : // Replace a use of the cast with a use of the new cast.
1074 76490 : TheUse = InsertedCast;
1075 : MadeChange = true;
1076 : ++NumCastUses;
1077 : }
1078 :
1079 : // If we removed all uses, nuke the cast.
1080 395765 : if (CI->use_empty()) {
1081 10018 : salvageDebugInfo(*CI);
1082 10018 : CI->eraseFromParent();
1083 : MadeChange = true;
1084 : }
1085 :
1086 395765 : return MadeChange;
1087 : }
1088 :
1089 : /// If the specified cast instruction is a noop copy (e.g. it's casting from
1090 : /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1091 : /// reduce the number of virtual registers that must be created and coalesced.
1092 : ///
1093 : /// Return true if any changes are made.
1094 499999 : static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1095 : const DataLayout &DL) {
1096 : // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1097 : // than sinking only nop casts, but is helpful on some platforms.
1098 : if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1099 294 : if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
1100 294 : ASC->getDestAddressSpace()))
1101 : return false;
1102 : }
1103 :
1104 : // If this is a noop copy,
1105 499846 : EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1106 499846 : EVT DstVT = TLI.getValueType(DL, CI->getType());
1107 :
1108 : // This is an fp<->int conversion?
1109 499846 : if (SrcVT.isInteger() != DstVT.isInteger())
1110 : return false;
1111 :
1112 : // If this is an extension, it will be a zero or sign extension, which
1113 : // isn't a noop.
1114 479061 : if (SrcVT.bitsLT(DstVT)) return false;
1115 :
1116 : // If these values will be promoted, find out what they will be promoted
1117 : // to. This helps us consider truncates on PPC as noop copies when they
1118 : // are.
1119 861874 : if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1120 : TargetLowering::TypePromoteInteger)
1121 2640 : SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1122 861874 : if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1123 : TargetLowering::TypePromoteInteger)
1124 13186 : DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1125 :
1126 : // If, after promotion, these are the same types, this is a noop copy.
1127 430999 : if (SrcVT != DstVT)
1128 : return false;
1129 :
1130 392121 : return SinkCast(CI);
1131 : }
1132 :
1133 : /// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
1134 : /// possible.
1135 : ///
1136 : /// Return true if any changes were made.
1137 143764 : static bool CombineUAddWithOverflow(CmpInst *CI) {
1138 : Value *A, *B;
1139 : Instruction *AddI;
1140 143764 : if (!match(CI,
1141 143764 : m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
1142 : return false;
1143 :
1144 428 : Type *Ty = AddI->getType();
1145 428 : if (!isa<IntegerType>(Ty))
1146 : return false;
1147 :
1148 : // We don't want to move around uses of condition values this late, so we we
1149 : // check if it is legal to create the call to the intrinsic in the basic
1150 : // block containing the icmp:
1151 :
1152 99 : if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
1153 : return false;
1154 :
1155 : #ifndef NDEBUG
1156 : // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
1157 : // for now:
1158 : if (AddI->hasOneUse())
1159 : assert(*AddI->user_begin() == CI && "expected!");
1160 : #endif
1161 :
1162 81 : Module *M = CI->getModule();
1163 81 : Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
1164 :
1165 81 : auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
1166 :
1167 : DebugLoc Loc = CI->getDebugLoc();
1168 : auto *UAddWithOverflow =
1169 81 : CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
1170 81 : UAddWithOverflow->setDebugLoc(Loc);
1171 162 : auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
1172 81 : UAdd->setDebugLoc(Loc);
1173 : auto *Overflow =
1174 162 : ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
1175 81 : Overflow->setDebugLoc(Loc);
1176 :
1177 81 : CI->replaceAllUsesWith(Overflow);
1178 81 : AddI->replaceAllUsesWith(UAdd);
1179 81 : CI->eraseFromParent();
1180 81 : AddI->eraseFromParent();
1181 : return true;
1182 : }
1183 :
1184 : /// Sink the given CmpInst into user blocks to reduce the number of virtual
1185 : /// registers that must be created and coalesced. This is a clear win except on
1186 : /// targets with multiple condition code registers (PowerPC), where it might
1187 : /// lose; some adjustment may be wanted there.
1188 : ///
1189 : /// Return true if any changes are made.
1190 145212 : static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1191 145212 : BasicBlock *DefBB = CI->getParent();
1192 :
1193 : // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1194 145212 : if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
1195 : return false;
1196 :
1197 : // Only insert a cmp in each block once.
1198 : DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1199 :
1200 : bool MadeChange = false;
1201 : for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1202 292652 : UI != E; ) {
1203 : Use &TheUse = UI.getUse();
1204 : Instruction *User = cast<Instruction>(*UI);
1205 :
1206 : // Preincrement use iterator so we don't invalidate it.
1207 : ++UI;
1208 :
1209 : // Don't bother for PHI nodes.
1210 147485 : if (isa<PHINode>(User))
1211 145726 : continue;
1212 :
1213 : // Figure out which BB this cmp is used in.
1214 146696 : BasicBlock *UserBB = User->getParent();
1215 :
1216 : // If this user is in the same block as the cmp, don't change the cmp.
1217 146696 : if (UserBB == DefBB) continue;
1218 :
1219 : // If we have already inserted a cmp into this block, use it.
1220 : CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1221 :
1222 1759 : if (!InsertedCmp) {
1223 1639 : BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1224 : assert(InsertPt != UserBB->end());
1225 1639 : InsertedCmp =
1226 1639 : CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
1227 : CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
1228 : // Propagate the debug info.
1229 1639 : InsertedCmp->setDebugLoc(CI->getDebugLoc());
1230 : }
1231 :
1232 : // Replace a use of the cmp with a use of the new cmp.
1233 1759 : TheUse = InsertedCmp;
1234 : MadeChange = true;
1235 : ++NumCmpUses;
1236 : }
1237 :
1238 : // If we removed all uses, nuke the cmp.
1239 145167 : if (CI->use_empty()) {
1240 949 : CI->eraseFromParent();
1241 : MadeChange = true;
1242 : }
1243 :
1244 : return MadeChange;
1245 : }
1246 :
1247 145212 : static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
1248 145212 : if (SinkCmpExpression(CI, TLI))
1249 : return true;
1250 :
1251 143764 : if (CombineUAddWithOverflow(CI))
1252 81 : return true;
1253 :
1254 : return false;
1255 : }
1256 :
1257 : /// Duplicate and sink the given 'and' instruction into user blocks where it is
1258 : /// used in a compare to allow isel to generate better code for targets where
1259 : /// this operation can be combined.
1260 : ///
1261 : /// Return true if any changes are made.
1262 0 : static bool sinkAndCmp0Expression(Instruction *AndI,
1263 : const TargetLowering &TLI,
1264 : SetOfInstrs &InsertedInsts) {
1265 : // Double-check that we're not trying to optimize an instruction that was
1266 : // already optimized by some other part of this pass.
1267 : assert(!InsertedInsts.count(AndI) &&
1268 : "Attempting to optimize already optimized and instruction");
1269 : (void) InsertedInsts;
1270 :
1271 : // Nothing to do for single use in same basic block.
1272 0 : if (AndI->hasOneUse() &&
1273 0 : AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1274 0 : return false;
1275 :
1276 : // Try to avoid cases where sinking/duplicating is likely to increase register
1277 : // pressure.
1278 0 : if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1279 0 : !isa<ConstantInt>(AndI->getOperand(1)) &&
1280 0 : AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1281 0 : return false;
1282 :
1283 0 : for (auto *U : AndI->users()) {
1284 : Instruction *User = cast<Instruction>(U);
1285 :
1286 : // Only sink for and mask feeding icmp with 0.
1287 0 : if (!isa<ICmpInst>(User))
1288 0 : return false;
1289 :
1290 0 : auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1291 0 : if (!CmpC || !CmpC->isZero())
1292 0 : return false;
1293 : }
1294 :
1295 0 : if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1296 0 : return false;
1297 :
1298 : LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
1299 : LLVM_DEBUG(AndI->getParent()->dump());
1300 :
1301 : // Push the 'and' into the same block as the icmp 0. There should only be
1302 : // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1303 : // others, so we don't need to keep track of which BBs we insert into.
1304 : for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1305 0 : UI != E; ) {
1306 : Use &TheUse = UI.getUse();
1307 : Instruction *User = cast<Instruction>(*UI);
1308 :
1309 : // Preincrement use iterator so we don't invalidate it.
1310 : ++UI;
1311 :
1312 : LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
1313 :
1314 : // Keep the 'and' in the same place if the use is already in the same block.
1315 : Instruction *InsertPt =
1316 0 : User->getParent() == AndI->getParent() ? AndI : User;
1317 : Instruction *InsertedAnd =
1318 0 : BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1319 : AndI->getOperand(1), "", InsertPt);
1320 : // Propagate the debug info.
1321 0 : InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1322 :
1323 : // Replace a use of the 'and' with a use of the new 'and'.
1324 : TheUse = InsertedAnd;
1325 : ++NumAndUses;
1326 : LLVM_DEBUG(User->getParent()->dump());
1327 : }
1328 :
1329 : // We removed all uses, nuke the and.
1330 0 : AndI->eraseFromParent();
1331 0 : return true;
1332 : }
1333 :
1334 : /// Check if the candidates could be combined with a shift instruction, which
1335 : /// includes:
1336 : /// 1. Truncate instruction
1337 : /// 2. And instruction and the imm is a mask of the low bits:
1338 : /// imm & (imm+1) == 0
1339 2432 : static bool isExtractBitsCandidateUse(Instruction *User) {
1340 2432 : if (!isa<TruncInst>(User)) {
1341 1969 : if (User->getOpcode() != Instruction::And ||
1342 270 : !isa<ConstantInt>(User->getOperand(1)))
1343 : return false;
1344 :
1345 : const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1346 :
1347 260 : if ((Cimm & (Cimm + 1)).getBoolValue())
1348 15 : return false;
1349 : }
1350 : return true;
1351 : }
1352 :
1353 : /// Sink both shift and truncate instruction to the use of truncate's BB.
1354 : static bool
1355 99 : SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1356 : DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1357 : const TargetLowering &TLI, const DataLayout &DL) {
1358 99 : BasicBlock *UserBB = User->getParent();
1359 : DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1360 : TruncInst *TruncI = dyn_cast<TruncInst>(User);
1361 : bool MadeChange = false;
1362 :
1363 : for (Value::user_iterator TruncUI = TruncI->user_begin(),
1364 : TruncE = TruncI->user_end();
1365 218 : TruncUI != TruncE;) {
1366 :
1367 : Use &TruncTheUse = TruncUI.getUse();
1368 : Instruction *TruncUser = cast<Instruction>(*TruncUI);
1369 : // Preincrement use iterator so we don't invalidate it.
1370 :
1371 : ++TruncUI;
1372 :
1373 238 : int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1374 119 : if (!ISDOpcode)
1375 116 : continue;
1376 :
1377 : // If the use is actually a legal node, there will not be an
1378 : // implicit truncate.
1379 : // FIXME: always querying the result type is just an
1380 : // approximation; some nodes' legality is determined by the
1381 : // operand or other means. There's no good way to find out though.
1382 103 : if (TLI.isOperationLegalOrCustom(
1383 : ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1384 : continue;
1385 :
1386 : // Don't bother for PHI nodes.
1387 92 : if (isa<PHINode>(TruncUser))
1388 : continue;
1389 :
1390 92 : BasicBlock *TruncUserBB = TruncUser->getParent();
1391 :
1392 92 : if (UserBB == TruncUserBB)
1393 : continue;
1394 :
1395 : BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1396 : CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1397 :
1398 3 : if (!InsertedShift && !InsertedTrunc) {
1399 3 : BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1400 : assert(InsertPt != TruncUserBB->end());
1401 : // Sink the shift
1402 3 : if (ShiftI->getOpcode() == Instruction::AShr)
1403 0 : InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1404 : "", &*InsertPt);
1405 : else
1406 3 : InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1407 : "", &*InsertPt);
1408 3 : InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1409 :
1410 : // Sink the trunc
1411 3 : BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1412 : TruncInsertPt++;
1413 : assert(TruncInsertPt != TruncUserBB->end());
1414 :
1415 6 : InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1416 : TruncI->getType(), "", &*TruncInsertPt);
1417 3 : InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1418 :
1419 : MadeChange = true;
1420 :
1421 3 : TruncTheUse = InsertedTrunc;
1422 : }
1423 : }
1424 99 : return MadeChange;
1425 : }
1426 :
1427 : /// Sink the shift *right* instruction into user blocks if the uses could
1428 : /// potentially be combined with this shift instruction and generate BitExtract
1429 : /// instruction. It will only be applied if the architecture supports BitExtract
1430 : /// instruction. Here is an example:
1431 : /// BB1:
1432 : /// %x.extract.shift = lshr i64 %arg1, 32
1433 : /// BB2:
1434 : /// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1435 : /// ==>
1436 : ///
1437 : /// BB2:
1438 : /// %x.extract.shift.1 = lshr i64 %arg1, 32
1439 : /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1440 : ///
1441 : /// CodeGen will recognize the pattern in BB2 and generate BitExtract
1442 : /// instruction.
1443 : /// Return true if any changes are made.
1444 1951 : static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1445 : const TargetLowering &TLI,
1446 : const DataLayout &DL) {
1447 1951 : BasicBlock *DefBB = ShiftI->getParent();
1448 :
1449 : /// Only insert instructions in each block once.
1450 : DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1451 :
1452 1951 : bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1453 :
1454 : bool MadeChange = false;
1455 : for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1456 4388 : UI != E;) {
1457 : Use &TheUse = UI.getUse();
1458 : Instruction *User = cast<Instruction>(*UI);
1459 : // Preincrement use iterator so we don't invalidate it.
1460 : ++UI;
1461 :
1462 : // Don't bother for PHI nodes.
1463 2437 : if (isa<PHINode>(User))
1464 2381 : continue;
1465 :
1466 2432 : if (!isExtractBitsCandidateUse(User))
1467 : continue;
1468 :
1469 978 : BasicBlock *UserBB = User->getParent();
1470 :
1471 978 : if (UserBB == DefBB) {
1472 : // If the shift and truncate instruction are in the same BB. The use of
1473 : // the truncate(TruncUse) may still introduce another truncate if not
1474 : // legal. In this case, we would like to sink both shift and truncate
1475 : // instruction to the BB of TruncUse.
1476 : // for example:
1477 : // BB1:
1478 : // i64 shift.result = lshr i64 opnd, imm
1479 : // trunc.result = trunc shift.result to i16
1480 : //
1481 : // BB2:
1482 : // ----> We will have an implicit truncate here if the architecture does
1483 : // not have i16 compare.
1484 : // cmp i16 trunc.result, opnd2
1485 : //
1486 729 : if (isa<TruncInst>(User) && shiftIsLegal
1487 : // If the type of the truncate is legal, no truncate will be
1488 : // introduced in other basic blocks.
1489 922 : &&
1490 692 : (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1491 : MadeChange =
1492 99 : SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1493 :
1494 922 : continue;
1495 : }
1496 : // If we have already inserted a shift into this block, use it.
1497 : BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1498 :
1499 56 : if (!InsertedShift) {
1500 56 : BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1501 : assert(InsertPt != UserBB->end());
1502 :
1503 56 : if (ShiftI->getOpcode() == Instruction::AShr)
1504 8 : InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1505 : "", &*InsertPt);
1506 : else
1507 48 : InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1508 : "", &*InsertPt);
1509 112 : InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1510 :
1511 : MadeChange = true;
1512 : }
1513 :
1514 : // Replace a use of the shift with a use of the new shift.
1515 56 : TheUse = InsertedShift;
1516 : }
1517 :
1518 : // If we removed all uses, nuke the shift.
1519 1951 : if (ShiftI->use_empty()) {
1520 26 : salvageDebugInfo(*ShiftI);
1521 26 : ShiftI->eraseFromParent();
1522 : }
1523 :
1524 1951 : return MadeChange;
1525 : }
1526 :
1527 : /// If counting leading or trailing zeros is an expensive operation and a zero
1528 : /// input is defined, add a check for zero to avoid calling the intrinsic.
1529 : ///
1530 : /// We want to transform:
1531 : /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1532 : ///
1533 : /// into:
1534 : /// entry:
1535 : /// %cmpz = icmp eq i64 %A, 0
1536 : /// br i1 %cmpz, label %cond.end, label %cond.false
1537 : /// cond.false:
1538 : /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1539 : /// br label %cond.end
1540 : /// cond.end:
1541 : /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1542 : ///
1543 : /// If the transform is performed, return true and set ModifiedDT to true.
1544 1827 : static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1545 : const TargetLowering *TLI,
1546 : const DataLayout *DL,
1547 : bool &ModifiedDT) {
1548 1827 : if (!TLI || !DL)
1549 : return false;
1550 :
1551 : // If a zero input is undefined, it doesn't make sense to despeculate that.
1552 3654 : if (match(CountZeros->getOperand(1), m_One()))
1553 : return false;
1554 :
1555 : // If it's cheap to speculate, there's nothing to do.
1556 : auto IntrinsicID = CountZeros->getIntrinsicID();
1557 814 : if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1558 457 : (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1559 305 : return false;
1560 :
1561 : // Only handle legal scalar cases. Anything else requires too much work.
1562 509 : Type *Ty = CountZeros->getType();
1563 509 : unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1564 509 : if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1565 478 : return false;
1566 :
1567 : // The intrinsic will be sunk behind a compare against zero and branch.
1568 31 : BasicBlock *StartBlock = CountZeros->getParent();
1569 31 : BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1570 :
1571 : // Create another block after the count zero intrinsic. A PHI will be added
1572 : // in this block to select the result of the intrinsic or the bit-width
1573 : // constant if the input to the intrinsic is zero.
1574 31 : BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1575 31 : BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1576 :
1577 : // Set up a builder to create a compare, conditional branch, and PHI.
1578 31 : IRBuilder<> Builder(CountZeros->getContext());
1579 31 : Builder.SetInsertPoint(StartBlock->getTerminator());
1580 31 : Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1581 :
1582 : // Replace the unconditional branch that was created by the first split with
1583 : // a compare against zero and a conditional branch.
1584 31 : Value *Zero = Constant::getNullValue(Ty);
1585 31 : Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1586 31 : Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1587 31 : StartBlock->getTerminator()->eraseFromParent();
1588 :
1589 : // Create a PHI in the end block to select either the output of the intrinsic
1590 : // or the bit width of the operand.
1591 31 : Builder.SetInsertPoint(&EndBlock->front());
1592 31 : PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1593 31 : CountZeros->replaceAllUsesWith(PN);
1594 62 : Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1595 31 : PN->addIncoming(BitWidth, StartBlock);
1596 31 : PN->addIncoming(CountZeros, CallBlock);
1597 :
1598 : // We are explicitly handling the zero case, so we can set the intrinsic's
1599 : // undefined zero argument to 'true'. This will also prevent reprocessing the
1600 : // intrinsic; we only despeculate when a zero input is defined.
1601 31 : CountZeros->setArgOperand(1, Builder.getTrue());
1602 31 : ModifiedDT = true;
1603 : return true;
1604 : }
1605 :
1606 666803 : bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1607 666803 : BasicBlock *BB = CI->getParent();
1608 :
1609 : // Lower inline assembly if we can.
1610 : // If we found an inline asm expession, and if the target knows how to
1611 : // lower it to normal LLVM code, do so now.
1612 666803 : if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1613 14152 : if (TLI->ExpandInlineAsm(CI)) {
1614 : // Avoid invalidating the iterator.
1615 49 : CurInstIterator = BB->begin();
1616 : // Avoid processing instructions out of order, which could cause
1617 : // reuse before a value is defined.
1618 : SunkAddrs.clear();
1619 49 : return true;
1620 : }
1621 : // Sink address computing for memory operands into the block.
1622 14103 : if (optimizeInlineAsmInst(CI))
1623 : return true;
1624 : }
1625 :
1626 : // Align the pointer arguments to this call if the target thinks it's a good
1627 : // idea
1628 : unsigned MinSize, PrefAlign;
1629 666739 : if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1630 3320 : for (auto &Arg : CI->arg_operands()) {
1631 : // We want to align both objects whose address is used directly and
1632 : // objects whose address is used in casts and GEPs, though it only makes
1633 : // sense for GEPs if the offset is a multiple of the desired alignment and
1634 : // if size - offset meets the size threshold.
1635 5312 : if (!Arg->getType()->isPointerTy())
1636 1548 : continue;
1637 1154 : APInt Offset(DL->getIndexSizeInBits(
1638 : cast<PointerType>(Arg->getType())->getAddressSpace()),
1639 : 0);
1640 1154 : Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
1641 : uint64_t Offset2 = Offset.getLimitedValue();
1642 1154 : if ((Offset2 & (PrefAlign-1)) != 0)
1643 : continue;
1644 : AllocaInst *AI;
1645 149 : if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1646 138 : DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1647 60 : AI->setAlignment(PrefAlign);
1648 : // Global variables can only be aligned if they are defined in this
1649 : // object (i.e. they are uniquely initialized in this object), and
1650 : // over-aligning global variables that have an explicit section is
1651 : // forbidden.
1652 : GlobalVariable *GV;
1653 214 : if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1654 82 : GV->getPointerAlignment(*DL) < PrefAlign &&
1655 67 : DL->getTypeAllocSize(GV->getValueType()) >=
1656 67 : MinSize + Offset2)
1657 39 : GV->setAlignment(PrefAlign);
1658 : }
1659 : // If this is a memcpy (or similar) then we may be able to improve the
1660 : // alignment
1661 : if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1662 664 : unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1663 664 : if (DestAlign > MI->getDestAlignment())
1664 454 : MI->setDestAlignment(DestAlign);
1665 : if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1666 490 : unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1667 490 : if (SrcAlign > MTI->getSourceAlignment())
1668 329 : MTI->setSourceAlignment(SrcAlign);
1669 : }
1670 : }
1671 : }
1672 :
1673 : // If we have a cold call site, try to sink addressing computation into the
1674 : // cold block. This interacts with our handling for loads and stores to
1675 : // ensure that we can fold all uses of a potential addressing computation
1676 : // into their uses. TODO: generalize this to work over profiling data
1677 666739 : if (!OptSize && CI->hasFnAttr(Attribute::Cold))
1678 137 : for (auto &Arg : CI->arg_operands()) {
1679 250 : if (!Arg->getType()->isPointerTy())
1680 : continue;
1681 : unsigned AS = Arg->getType()->getPointerAddressSpace();
1682 113 : return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1683 : }
1684 :
1685 : IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1686 : if (II) {
1687 502386 : switch (II->getIntrinsicID()) {
1688 : default: break;
1689 34 : case Intrinsic::objectsize: {
1690 : // Lower all uses of llvm.objectsize.*
1691 : ConstantInt *RetVal =
1692 34 : lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
1693 : // Substituting this can cause recursive simplifications, which can
1694 : // invalidate our iterator. Use a WeakTrackingVH to hold onto it in case
1695 : // this
1696 : // happens.
1697 34 : Value *CurValue = &*CurInstIterator;
1698 : WeakTrackingVH IterHandle(CurValue);
1699 :
1700 34 : replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1701 :
1702 : // If the iterator instruction was recursively deleted, start over at the
1703 : // start of the block.
1704 34 : if (IterHandle != CurValue) {
1705 4 : CurInstIterator = BB->begin();
1706 : SunkAddrs.clear();
1707 : }
1708 : return true;
1709 : }
1710 114 : case Intrinsic::aarch64_stlxr:
1711 : case Intrinsic::aarch64_stxr: {
1712 114 : ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1713 87 : if (!ExtVal || !ExtVal->hasOneUse() ||
1714 87 : ExtVal->getParent() == CI->getParent())
1715 : return false;
1716 : // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1717 1 : ExtVal->moveBefore(CI);
1718 : // Mark this instruction as "inserted by CGP", so that other
1719 : // optimizations don't touch it.
1720 1 : InsertedInsts.insert(ExtVal);
1721 1 : return true;
1722 : }
1723 4 : case Intrinsic::launder_invariant_group:
1724 : case Intrinsic::strip_invariant_group:
1725 4 : II->replaceAllUsesWith(II->getArgOperand(0));
1726 4 : II->eraseFromParent();
1727 4 : return true;
1728 :
1729 1827 : case Intrinsic::cttz:
1730 : case Intrinsic::ctlz:
1731 : // If counting zeros is expensive, try to avoid it.
1732 1827 : return despeculateCountZeros(II, TLI, DL, ModifiedDT);
1733 : }
1734 :
1735 500407 : if (TLI) {
1736 : SmallVector<Value*, 2> PtrOps;
1737 : Type *AccessTy;
1738 500406 : if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
1739 522 : while (!PtrOps.empty()) {
1740 : Value *PtrVal = PtrOps.pop_back_val();
1741 269 : unsigned AS = PtrVal->getType()->getPointerAddressSpace();
1742 269 : if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
1743 : return true;
1744 : }
1745 : }
1746 : }
1747 :
1748 : // From here on out we're working with named functions.
1749 : if (!CI->getCalledFunction()) return false;
1750 :
1751 : // Lower all default uses of _chk calls. This is very similar
1752 : // to what InstCombineCalls does, but here we are only lowering calls
1753 : // to fortified library functions (e.g. __memcpy_chk) that have the default
1754 : // "don't know" as the objectsize. Anything else should be left alone.
1755 635068 : FortifiedLibCallSimplifier Simplifier(TLInfo, true);
1756 635068 : if (Value *V = Simplifier.optimizeCall(CI)) {
1757 38 : CI->replaceAllUsesWith(V);
1758 38 : CI->eraseFromParent();
1759 38 : return true;
1760 : }
1761 :
1762 : return false;
1763 : }
1764 :
1765 : /// Look for opportunities to duplicate return instructions to the predecessor
1766 : /// to enable tail call optimizations. The case it is currently looking for is:
1767 : /// @code
1768 : /// bb0:
1769 : /// %tmp0 = tail call i32 @f0()
1770 : /// br label %return
1771 : /// bb1:
1772 : /// %tmp1 = tail call i32 @f1()
1773 : /// br label %return
1774 : /// bb2:
1775 : /// %tmp2 = tail call i32 @f2()
1776 : /// br label %return
1777 : /// return:
1778 : /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
1779 : /// ret i32 %retval
1780 : /// @endcode
1781 : ///
1782 : /// =>
1783 : ///
1784 : /// @code
1785 : /// bb0:
1786 : /// %tmp0 = tail call i32 @f0()
1787 : /// ret i32 %tmp0
1788 : /// bb1:
1789 : /// %tmp1 = tail call i32 @f1()
1790 : /// ret i32 %tmp1
1791 : /// bb2:
1792 : /// %tmp2 = tail call i32 @f2()
1793 : /// ret i32 %tmp2
1794 : /// @endcode
1795 0 : bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
1796 0 : if (!TLI)
1797 0 : return false;
1798 :
1799 : ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
1800 : if (!RetI)
1801 0 : return false;
1802 :
1803 : PHINode *PN = nullptr;
1804 : BitCastInst *BCI = nullptr;
1805 : Value *V = RetI->getReturnValue();
1806 : if (V) {
1807 : BCI = dyn_cast<BitCastInst>(V);
1808 : if (BCI)
1809 : V = BCI->getOperand(0);
1810 :
1811 : PN = dyn_cast<PHINode>(V);
1812 : if (!PN)
1813 0 : return false;
1814 : }
1815 :
1816 0 : if (PN && PN->getParent() != BB)
1817 0 : return false;
1818 :
1819 : // Make sure there are no instructions between the PHI and return, or that the
1820 : // return is the first instruction in the block.
1821 0 : if (PN) {
1822 0 : BasicBlock::iterator BI = BB->begin();
1823 0 : do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
1824 0 : if (&*BI == BCI)
1825 : // Also skip over the bitcast.
1826 : ++BI;
1827 0 : if (&*BI != RetI)
1828 0 : return false;
1829 : } else {
1830 0 : BasicBlock::iterator BI = BB->begin();
1831 0 : while (isa<DbgInfoIntrinsic>(BI)) ++BI;
1832 0 : if (&*BI != RetI)
1833 0 : return false;
1834 : }
1835 :
1836 : /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
1837 : /// call.
1838 0 : const Function *F = BB->getParent();
1839 : SmallVector<CallInst*, 4> TailCalls;
1840 0 : if (PN) {
1841 0 : for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
1842 0 : CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
1843 : // Make sure the phi value is indeed produced by the tail call.
1844 0 : if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
1845 0 : TLI->mayBeEmittedAsTailCall(CI) &&
1846 0 : attributesPermitTailCall(F, CI, RetI, *TLI))
1847 0 : TailCalls.push_back(CI);
1848 : }
1849 : } else {
1850 : SmallPtrSet<BasicBlock*, 4> VisitedBBs;
1851 0 : for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
1852 0 : if (!VisitedBBs.insert(*PI).second)
1853 0 : continue;
1854 :
1855 : BasicBlock::InstListType &InstList = (*PI)->getInstList();
1856 : BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
1857 : BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
1858 0 : do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
1859 0 : if (RI == RE)
1860 0 : continue;
1861 :
1862 0 : CallInst *CI = dyn_cast<CallInst>(&*RI);
1863 0 : if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
1864 0 : attributesPermitTailCall(F, CI, RetI, *TLI))
1865 0 : TailCalls.push_back(CI);
1866 : }
1867 : }
1868 :
1869 : bool Changed = false;
1870 0 : for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
1871 0 : CallInst *CI = TailCalls[i];
1872 : CallSite CS(CI);
1873 :
1874 : // Make sure the call instruction is followed by an unconditional branch to
1875 : // the return block.
1876 0 : BasicBlock *CallBB = CI->getParent();
1877 : BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
1878 0 : if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
1879 0 : continue;
1880 :
1881 : // Duplicate the return into CallBB.
1882 0 : (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
1883 0 : ModifiedDT = Changed = true;
1884 : ++NumRetsDup;
1885 : }
1886 :
1887 : // If we eliminated all predecessors of the block, delete the block now.
1888 0 : if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
1889 0 : BB->eraseFromParent();
1890 :
1891 : return Changed;
1892 : }
1893 :
1894 : //===----------------------------------------------------------------------===//
1895 : // Memory Optimization
1896 : //===----------------------------------------------------------------------===//
1897 :
1898 : namespace {
1899 :
1900 : /// This is an extended version of TargetLowering::AddrMode
1901 : /// which holds actual Value*'s for register values.
1902 : struct ExtAddrMode : public TargetLowering::AddrMode {
1903 : Value *BaseReg = nullptr;
1904 : Value *ScaledReg = nullptr;
1905 : Value *OriginalValue = nullptr;
1906 :
1907 : enum FieldName {
1908 : NoField = 0x00,
1909 : BaseRegField = 0x01,
1910 : BaseGVField = 0x02,
1911 : BaseOffsField = 0x04,
1912 : ScaledRegField = 0x08,
1913 : ScaleField = 0x10,
1914 : MultipleFields = 0xff
1915 : };
1916 :
1917 : ExtAddrMode() = default;
1918 :
1919 : void print(raw_ostream &OS) const;
1920 : void dump() const;
1921 :
1922 201084 : FieldName compare(const ExtAddrMode &other) {
1923 : // First check that the types are the same on each field, as differing types
1924 : // is something we can't cope with later on.
1925 201084 : if (BaseReg && other.BaseReg &&
1926 111802 : BaseReg->getType() != other.BaseReg->getType())
1927 : return MultipleFields;
1928 199443 : if (BaseGV && other.BaseGV &&
1929 : BaseGV->getType() != other.BaseGV->getType())
1930 : return MultipleFields;
1931 199401 : if (ScaledReg && other.ScaledReg &&
1932 52 : ScaledReg->getType() != other.ScaledReg->getType())
1933 : return MultipleFields;
1934 :
1935 : // Check each field to see if it differs.
1936 : unsigned Result = NoField;
1937 199401 : if (BaseReg != other.BaseReg)
1938 : Result |= BaseRegField;
1939 199401 : if (BaseGV != other.BaseGV)
1940 154 : Result |= BaseGVField;
1941 199401 : if (BaseOffs != other.BaseOffs)
1942 196514 : Result |= BaseOffsField;
1943 199401 : if (ScaledReg != other.ScaledReg)
1944 513 : Result |= ScaledRegField;
1945 : // Don't count 0 as being a different scale, because that actually means
1946 : // unscaled (which will already be counted by having no ScaledReg).
1947 199401 : if (Scale && other.Scale && Scale != other.Scale)
1948 1 : Result |= ScaleField;
1949 :
1950 199401 : if (countPopulation(Result) > 1)
1951 : return MultipleFields;
1952 : else
1953 196440 : return static_cast<FieldName>(Result);
1954 : }
1955 :
1956 : // An AddrMode is trivial if it involves no calculation i.e. it is just a base
1957 : // with no offset.
1958 : bool isTrivial() {
1959 : // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
1960 : // trivial if at most one of these terms is nonzero, except that BaseGV and
1961 : // BaseReg both being zero actually means a null pointer value, which we
1962 : // consider to be 'non-zero' here.
1963 1720320 : return !BaseOffs && !Scale && !(BaseGV && BaseReg);
1964 : }
1965 :
1966 342487 : Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
1967 342487 : switch (Field) {
1968 : default:
1969 : return nullptr;
1970 712 : case BaseRegField:
1971 712 : return BaseReg;
1972 160 : case BaseGVField:
1973 160 : return BaseGV;
1974 84 : case ScaledRegField:
1975 84 : return ScaledReg;
1976 341531 : case BaseOffsField:
1977 341531 : return ConstantInt::get(IntPtrTy, BaseOffs);
1978 : }
1979 : }
1980 :
1981 73 : void SetCombinedField(FieldName Field, Value *V,
1982 : const SmallVectorImpl<ExtAddrMode> &AddrModes) {
1983 73 : switch (Field) {
1984 0 : default:
1985 0 : llvm_unreachable("Unhandled fields are expected to be rejected earlier");
1986 : break;
1987 49 : case ExtAddrMode::BaseRegField:
1988 49 : BaseReg = V;
1989 49 : break;
1990 0 : case ExtAddrMode::BaseGVField:
1991 : // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
1992 : // in the BaseReg field.
1993 : assert(BaseReg == nullptr);
1994 0 : BaseReg = V;
1995 0 : BaseGV = nullptr;
1996 0 : break;
1997 5 : case ExtAddrMode::ScaledRegField:
1998 5 : ScaledReg = V;
1999 : // If we have a mix of scaled and unscaled addrmodes then we want scale
2000 : // to be the scale and not zero.
2001 5 : if (!Scale)
2002 4 : for (const ExtAddrMode &AM : AddrModes)
2003 4 : if (AM.Scale) {
2004 2 : Scale = AM.Scale;
2005 2 : break;
2006 : }
2007 : break;
2008 19 : case ExtAddrMode::BaseOffsField:
2009 : // The offset is no longer a constant, so it goes in ScaledReg with a
2010 : // scale of 1.
2011 : assert(ScaledReg == nullptr);
2012 19 : ScaledReg = V;
2013 19 : Scale = 1;
2014 19 : BaseOffs = 0;
2015 19 : break;
2016 : }
2017 73 : }
2018 : };
2019 :
2020 : } // end anonymous namespace
2021 :
2022 : #ifndef NDEBUG
2023 : static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2024 : AM.print(OS);
2025 : return OS;
2026 : }
2027 : #endif
2028 :
2029 : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2030 : void ExtAddrMode::print(raw_ostream &OS) const {
2031 : bool NeedPlus = false;
2032 : OS << "[";
2033 : if (BaseGV) {
2034 : OS << (NeedPlus ? " + " : "")
2035 : << "GV:";
2036 : BaseGV->printAsOperand(OS, /*PrintType=*/false);
2037 : NeedPlus = true;
2038 : }
2039 :
2040 : if (BaseOffs) {
2041 : OS << (NeedPlus ? " + " : "")
2042 : << BaseOffs;
2043 : NeedPlus = true;
2044 : }
2045 :
2046 : if (BaseReg) {
2047 : OS << (NeedPlus ? " + " : "")
2048 : << "Base:";
2049 : BaseReg->printAsOperand(OS, /*PrintType=*/false);
2050 : NeedPlus = true;
2051 : }
2052 : if (Scale) {
2053 : OS << (NeedPlus ? " + " : "")
2054 : << Scale << "*";
2055 : ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2056 : }
2057 :
2058 : OS << ']';
2059 : }
2060 :
2061 : LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
2062 : print(dbgs());
2063 : dbgs() << '\n';
2064 : }
2065 : #endif
2066 :
2067 : namespace {
2068 :
2069 : /// This class provides transaction based operation on the IR.
2070 : /// Every change made through this class is recorded in the internal state and
2071 : /// can be undone (rollback) until commit is called.
2072 1758953 : class TypePromotionTransaction {
2073 : /// This represents the common interface of the individual transaction.
2074 : /// Each class implements the logic for doing one specific modification on
2075 : /// the IR via the TypePromotionTransaction.
2076 : class TypePromotionAction {
2077 : protected:
2078 : /// The Instruction modified.
2079 : Instruction *Inst;
2080 :
2081 : public:
2082 : /// Constructor of the action.
2083 : /// The constructor performs the related action on the IR.
2084 16151 : TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2085 :
2086 : virtual ~TypePromotionAction() = default;
2087 :
2088 : /// Undo the modification done by this action.
2089 : /// When this method is called, the IR must be in the same state as it was
2090 : /// before this action was applied.
2091 : /// \pre Undoing the action works if and only if the IR is in the exact same
2092 : /// state as it was directly after this action was applied.
2093 : virtual void undo() = 0;
2094 :
2095 : /// Advocate every change made by this action.
2096 : /// When the results on the IR of the action are to be kept, it is important
2097 : /// to call this function, otherwise hidden information may be kept forever.
2098 0 : virtual void commit() {
2099 : // Nothing to be done, this action is not doing anything.
2100 0 : }
2101 : };
2102 :
2103 : /// Utility to remember the position of an instruction.
2104 : class InsertionHandler {
2105 : /// Position of an instruction.
2106 : /// Either an instruction:
2107 : /// - Is the first in a basic block: BB is used.
2108 : /// - Has a previous instruction: PrevInst is used.
2109 : union {
2110 : Instruction *PrevInst;
2111 : BasicBlock *BB;
2112 : } Point;
2113 :
2114 : /// Remember whether or not the instruction had a previous instruction.
2115 : bool HasPrevInstruction;
2116 :
2117 : public:
2118 : /// Record the position of \p Inst.
2119 3008 : InsertionHandler(Instruction *Inst) {
2120 3008 : BasicBlock::iterator It = Inst->getIterator();
2121 6016 : HasPrevInstruction = (It != (Inst->getParent()->begin()));
2122 3008 : if (HasPrevInstruction)
2123 2907 : Point.PrevInst = &*--It;
2124 : else
2125 101 : Point.BB = Inst->getParent();
2126 : }
2127 :
2128 : /// Insert \p Inst at the recorded position.
2129 0 : void insert(Instruction *Inst) {
2130 0 : if (HasPrevInstruction) {
2131 0 : if (Inst->getParent())
2132 0 : Inst->removeFromParent();
2133 0 : Inst->insertAfter(Point.PrevInst);
2134 : } else {
2135 0 : Instruction *Position = &*Point.BB->getFirstInsertionPt();
2136 0 : if (Inst->getParent())
2137 0 : Inst->moveBefore(Position);
2138 : else
2139 0 : Inst->insertBefore(Position);
2140 : }
2141 0 : }
2142 : };
2143 :
2144 : /// Move an instruction before another.
2145 0 : class InstructionMoveBefore : public TypePromotionAction {
2146 : /// Original position of the instruction.
2147 : InsertionHandler Position;
2148 :
2149 : public:
2150 : /// Move \p Inst before \p Before.
2151 2513 : InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2152 2513 : : TypePromotionAction(Inst), Position(Inst) {
2153 : LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
2154 : << "\n");
2155 2513 : Inst->moveBefore(Before);
2156 2513 : }
2157 :
2158 : /// Move the instruction back to its original position.
2159 1125 : void undo() override {
2160 : LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
2161 1125 : Position.insert(Inst);
2162 1125 : }
2163 : };
2164 :
2165 : /// Set the operand of an instruction with a new value.
2166 0 : class OperandSetter : public TypePromotionAction {
2167 : /// Original operand of the instruction.
2168 : Value *Origin;
2169 :
2170 : /// Index of the modified instruction.
2171 : unsigned Idx;
2172 :
2173 : public:
2174 : /// Set \p Idx operand of \p Inst with \p NewVal.
2175 7083 : OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2176 14166 : : TypePromotionAction(Inst), Idx(Idx) {
2177 : LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
2178 : << "for:" << *Inst << "\n"
2179 : << "with:" << *NewVal << "\n");
2180 7083 : Origin = Inst->getOperand(Idx);
2181 7083 : Inst->setOperand(Idx, NewVal);
2182 7083 : }
2183 :
2184 : /// Restore the original value of the instruction.
2185 3144 : void undo() override {
2186 : LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
2187 : << "for: " << *Inst << "\n"
2188 : << "with: " << *Origin << "\n");
2189 3144 : Inst->setOperand(Idx, Origin);
2190 3144 : }
2191 : };
2192 :
2193 : /// Hide the operands of an instruction.
2194 : /// Do as if this instruction was not using any of its operands.
2195 495 : class OperandsHider : public TypePromotionAction {
2196 : /// The list of original operands.
2197 : SmallVector<Value *, 4> OriginalValues;
2198 :
2199 : public:
2200 : /// Remove \p Inst from the uses of the operands of \p Inst.
2201 495 : OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2202 : LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
2203 : unsigned NumOpnds = Inst->getNumOperands();
2204 495 : OriginalValues.reserve(NumOpnds);
2205 990 : for (unsigned It = 0; It < NumOpnds; ++It) {
2206 : // Save the current operand.
2207 495 : Value *Val = Inst->getOperand(It);
2208 495 : OriginalValues.push_back(Val);
2209 : // Set a dummy one.
2210 : // We could use OperandSetter here, but that would imply an overhead
2211 : // that we are not willing to pay.
2212 495 : Inst->setOperand(It, UndefValue::get(Val->getType()));
2213 : }
2214 495 : }
2215 :
2216 : /// Restore the original list of uses.
2217 160 : void undo() override {
2218 : LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
2219 320 : for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2220 320 : Inst->setOperand(It, OriginalValues[It]);
2221 160 : }
2222 : };
2223 :
2224 : /// Build a truncate instruction.
2225 0 : class TruncBuilder : public TypePromotionAction {
2226 : Value *Val;
2227 :
2228 : public:
2229 : /// Build a truncate instruction of \p Opnd producing a \p Ty
2230 : /// result.
2231 : /// trunc Opnd to Ty.
2232 173 : TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2233 173 : IRBuilder<> Builder(Opnd);
2234 173 : Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2235 : LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
2236 173 : }
2237 :
2238 : /// Get the built value.
2239 0 : Value *getBuiltValue() { return Val; }
2240 :
2241 : /// Remove the built instruction.
2242 79 : void undo() override {
2243 : LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
2244 79 : if (Instruction *IVal = dyn_cast<Instruction>(Val))
2245 79 : IVal->eraseFromParent();
2246 79 : }
2247 : };
2248 :
2249 : /// Build a sign extension instruction.
2250 0 : class SExtBuilder : public TypePromotionAction {
2251 : Value *Val;
2252 :
2253 : public:
2254 : /// Build a sign extension instruction of \p Opnd producing a \p Ty
2255 : /// result.
2256 : /// sext Opnd to Ty.
2257 103 : SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2258 103 : : TypePromotionAction(InsertPt) {
2259 103 : IRBuilder<> Builder(InsertPt);
2260 103 : Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2261 : LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
2262 103 : }
2263 :
2264 : /// Get the built value.
2265 0 : Value *getBuiltValue() { return Val; }
2266 :
2267 : /// Remove the built instruction.
2268 60 : void undo() override {
2269 : LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
2270 60 : if (Instruction *IVal = dyn_cast<Instruction>(Val))
2271 60 : IVal->eraseFromParent();
2272 60 : }
2273 : };
2274 :
2275 : /// Build a zero extension instruction.
2276 0 : class ZExtBuilder : public TypePromotionAction {
2277 : Value *Val;
2278 :
2279 : public:
2280 : /// Build a zero extension instruction of \p Opnd producing a \p Ty
2281 : /// result.
2282 : /// zext Opnd to Ty.
2283 494 : ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2284 494 : : TypePromotionAction(InsertPt) {
2285 494 : IRBuilder<> Builder(InsertPt);
2286 494 : Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2287 : LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
2288 494 : }
2289 :
2290 : /// Get the built value.
2291 0 : Value *getBuiltValue() { return Val; }
2292 :
2293 : /// Remove the built instruction.
2294 162 : void undo() override {
2295 : LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
2296 162 : if (Instruction *IVal = dyn_cast<Instruction>(Val))
2297 161 : IVal->eraseFromParent();
2298 162 : }
2299 : };
2300 :
2301 : /// Mutate an instruction to another type.
2302 0 : class TypeMutator : public TypePromotionAction {
2303 : /// Record the original type.
2304 : Type *OrigTy;
2305 :
2306 : public:
2307 : /// Mutate the type of \p Inst into \p NewTy.
2308 : TypeMutator(Instruction *Inst, Type *NewTy)
2309 2169 : : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2310 : LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
2311 : << "\n");
2312 : Inst->mutateType(NewTy);
2313 : }
2314 :
2315 : /// Mutate the instruction back to its original type.
2316 946 : void undo() override {
2317 : LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
2318 : << "\n");
2319 946 : Inst->mutateType(OrigTy);
2320 946 : }
2321 : };
2322 :
2323 : /// Replace the uses of an instruction by another instruction.
2324 37 : class UsesReplacer : public TypePromotionAction {
2325 : /// Helper structure to keep track of the replaced uses.
2326 : struct InstructionAndIdx {
2327 : /// The instruction using the instruction.
2328 : Instruction *Inst;
2329 :
2330 : /// The index where this instruction is used for Inst.
2331 : unsigned Idx;
2332 :
2333 : InstructionAndIdx(Instruction *Inst, unsigned Idx)
2334 3562 : : Inst(Inst), Idx(Idx) {}
2335 : };
2336 :
2337 : /// Keep track of the original uses (pair Instruction, Index).
2338 : SmallVector<InstructionAndIdx, 4> OriginalUses;
2339 :
2340 : using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2341 :
2342 : public:
2343 : /// Replace all the use of \p Inst by \p New.
2344 2626 : UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2345 : LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
2346 : << "\n");
2347 : // Record the original uses.
2348 6188 : for (Use &U : Inst->uses()) {
2349 3562 : Instruction *UserI = cast<Instruction>(U.getUser());
2350 3562 : OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2351 : }
2352 : // Now, we can replace the uses.
2353 2626 : Inst->replaceAllUsesWith(New);
2354 2626 : }
2355 :
2356 : /// Reassign the original uses of Inst to Inst.
2357 1105 : void undo() override {
2358 : LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
2359 1434 : for (use_iterator UseIt = OriginalUses.begin(),
2360 : EndIt = OriginalUses.end();
2361 2539 : UseIt != EndIt; ++UseIt) {
2362 1434 : UseIt->Inst->setOperand(UseIt->Idx, Inst);
2363 : }
2364 1105 : }
2365 : };
2366 :
2367 : /// Remove an instruction from the IR.
2368 : class InstructionRemover : public TypePromotionAction {
2369 : /// Original position of the instruction.
2370 : InsertionHandler Inserter;
2371 :
2372 : /// Helper structure to hide all the link to the instruction. In other
2373 : /// words, this helps to do as if the instruction was removed.
2374 : OperandsHider Hider;
2375 :
2376 : /// Keep track of the uses replaced, if any.
2377 : UsesReplacer *Replacer = nullptr;
2378 :
2379 : /// Keep track of instructions removed.
2380 : SetOfInstrs &RemovedInsts;
2381 :
2382 : public:
2383 : /// Remove all reference of \p Inst and optionally replace all its
2384 : /// uses with New.
2385 : /// \p RemovedInsts Keep track of the instructions removed by this Action.
2386 : /// \pre If !Inst->use_empty(), then New != nullptr
2387 495 : InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2388 : Value *New = nullptr)
2389 495 : : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2390 990 : RemovedInsts(RemovedInsts) {
2391 495 : if (New)
2392 37 : Replacer = new UsesReplacer(Inst, New);
2393 : LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
2394 495 : RemovedInsts.insert(Inst);
2395 : /// The instructions removed here will be freed after completing
2396 : /// optimizeBlock() for all blocks as we need to keep track of the
2397 : /// removed instructions during promotion.
2398 495 : Inst->removeFromParent();
2399 495 : }
2400 :
2401 990 : ~InstructionRemover() override { delete Replacer; }
2402 :
2403 : /// Resurrect the instruction and reassign it to the proper uses if
2404 : /// new value was provided when build this action.
2405 160 : void undo() override {
2406 : LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
2407 160 : Inserter.insert(Inst);
2408 160 : if (Replacer)
2409 37 : Replacer->undo();
2410 160 : Hider.undo();
2411 160 : RemovedInsts.erase(Inst);
2412 160 : }
2413 : };
2414 :
2415 : public:
2416 : /// Restoration point.
2417 : /// The restoration point is a pointer to an action instead of an iterator
2418 : /// because the iterator may be invalidated but not the pointer.
2419 : using ConstRestorationPt = const TypePromotionAction *;
2420 :
2421 : TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2422 1758953 : : RemovedInsts(RemovedInsts) {}
2423 :
2424 : /// Advocate every changes made in that transaction.
2425 : void commit();
2426 :
2427 : /// Undo all the changes made after the given point.
2428 : void rollback(ConstRestorationPt Point);
2429 :
2430 : /// Get the current restoration point.
2431 : ConstRestorationPt getRestorationPoint() const;
2432 :
2433 : /// \name API for IR modification with state keeping to support rollback.
2434 : /// @{
2435 : /// Same as Instruction::setOperand.
2436 : void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2437 :
2438 : /// Same as Instruction::eraseFromParent.
2439 : void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2440 :
2441 : /// Same as Value::replaceAllUsesWith.
2442 : void replaceAllUsesWith(Instruction *Inst, Value *New);
2443 :
2444 : /// Same as Value::mutateType.
2445 : void mutateType(Instruction *Inst, Type *NewTy);
2446 :
2447 : /// Same as IRBuilder::createTrunc.
2448 : Value *createTrunc(Instruction *Opnd, Type *Ty);
2449 :
2450 : /// Same as IRBuilder::createSExt.
2451 : Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2452 :
2453 : /// Same as IRBuilder::createZExt.
2454 : Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2455 :
2456 : /// Same as Instruction::moveBefore.
2457 : void moveBefore(Instruction *Inst, Instruction *Before);
2458 : /// @}
2459 :
2460 : private:
2461 : /// The ordered list of actions made so far.
2462 : SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2463 :
2464 : using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2465 :
2466 : SetOfInstrs &RemovedInsts;
2467 : };
2468 :
2469 : } // end anonymous namespace
2470 :
2471 7083 : void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2472 : Value *NewVal) {
2473 14166 : Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>(
2474 : Inst, Idx, NewVal));
2475 7083 : }
2476 :
2477 495 : void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2478 : Value *NewVal) {
2479 495 : Actions.push_back(
2480 495 : llvm::make_unique<TypePromotionTransaction::InstructionRemover>(
2481 495 : Inst, RemovedInsts, NewVal));
2482 495 : }
2483 :
2484 2589 : void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2485 : Value *New) {
2486 5178 : Actions.push_back(
2487 : llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2488 2589 : }
2489 :
2490 2169 : void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2491 2169 : Actions.push_back(
2492 : llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2493 2169 : }
2494 :
2495 173 : Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2496 : Type *Ty) {
2497 173 : std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2498 173 : Value *Val = Ptr->getBuiltValue();
2499 346 : Actions.push_back(std::move(Ptr));
2500 173 : return Val;
2501 : }
2502 :
2503 103 : Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2504 : Value *Opnd, Type *Ty) {
2505 103 : std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2506 103 : Value *Val = Ptr->getBuiltValue();
2507 206 : Actions.push_back(std::move(Ptr));
2508 103 : return Val;
2509 : }
2510 :
2511 494 : Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2512 : Value *Opnd, Type *Ty) {
2513 494 : std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2514 494 : Value *Val = Ptr->getBuiltValue();
2515 988 : Actions.push_back(std::move(Ptr));
2516 494 : return Val;
2517 : }
2518 :
2519 2513 : void TypePromotionTransaction::moveBefore(Instruction *Inst,
2520 : Instruction *Before) {
2521 5026 : Actions.push_back(
2522 : llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2523 : Inst, Before));
2524 2513 : }
2525 :
2526 : TypePromotionTransaction::ConstRestorationPt
2527 : TypePromotionTransaction::getRestorationPoint() const {
2528 2201 : return !Actions.empty() ? Actions.back().get() : nullptr;
2529 : }
2530 :
2531 : void TypePromotionTransaction::commit() {
2532 : for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2533 : ++It)
2534 : (*It)->commit();
2535 1563877 : Actions.clear();
2536 : }
2537 :
2538 223889 : void TypePromotionTransaction::rollback(
2539 : TypePromotionTransaction::ConstRestorationPt Point) {
2540 237902 : while (!Actions.empty() && Point != Actions.back().get()) {
2541 6744 : std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2542 6744 : Curr->undo();
2543 : }
2544 223889 : }
2545 :
2546 : namespace {
2547 :
2548 : /// A helper class for matching addressing modes.
2549 : ///
2550 : /// This encapsulates the logic for matching the target-legal addressing modes.
2551 : class AddressingModeMatcher {
2552 : SmallVectorImpl<Instruction*> &AddrModeInsts;
2553 : const TargetLowering &TLI;
2554 : const TargetRegisterInfo &TRI;
2555 : const DataLayout &DL;
2556 :
2557 : /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2558 : /// the memory instruction that we're computing this address for.
2559 : Type *AccessTy;
2560 : unsigned AddrSpace;
2561 : Instruction *MemoryInst;
2562 :
2563 : /// This is the addressing mode that we're building up. This is
2564 : /// part of the return value of this addressing mode matching stuff.
2565 : ExtAddrMode &AddrMode;
2566 :
2567 : /// The instructions inserted by other CodeGenPrepare optimizations.
2568 : const SetOfInstrs &InsertedInsts;
2569 :
2570 : /// A map from the instructions to their type before promotion.
2571 : InstrToOrigTy &PromotedInsts;
2572 :
2573 : /// The ongoing transaction where every action should be registered.
2574 : TypePromotionTransaction &TPT;
2575 :
2576 : // A GEP which has too large offset to be folded into the addressing mode.
2577 : std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2578 :
2579 : /// This is set to true when we should not do profitability checks.
2580 : /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2581 : bool IgnoreProfitability;
2582 :
2583 : AddressingModeMatcher(
2584 : SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
2585 : const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2586 : ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2587 : InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2588 : std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
2589 1917698 : : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2590 1917698 : DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2591 : MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2592 3835396 : PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
2593 1917698 : IgnoreProfitability = false;
2594 : }
2595 :
2596 : public:
2597 : /// Find the maximal addressing mode that a load/store of V can fold,
2598 : /// give an access type of AccessTy. This returns a list of involved
2599 : /// instructions in AddrModeInsts.
2600 : /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2601 : /// optimizations.
2602 : /// \p PromotedInsts maps the instructions to their type before promotion.
2603 : /// \p The ongoing transaction where every action should be registered.
2604 : static ExtAddrMode
2605 1917698 : Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2606 : SmallVectorImpl<Instruction *> &AddrModeInsts,
2607 : const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2608 : const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2609 : TypePromotionTransaction &TPT,
2610 : std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
2611 1917698 : ExtAddrMode Result;
2612 :
2613 1917698 : bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2614 : MemoryInst, Result, InsertedInsts,
2615 : PromotedInsts, TPT, LargeOffsetGEP)
2616 1917698 : .matchAddr(V, 0);
2617 : (void)Success; assert(Success && "Couldn't select *anything*?");
2618 1917698 : return Result;
2619 : }
2620 :
2621 : private:
2622 : bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2623 : bool matchAddr(Value *Addr, unsigned Depth);
2624 : bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2625 : bool *MovedAway = nullptr);
2626 : bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2627 : ExtAddrMode &AMBefore,
2628 : ExtAddrMode &AMAfter);
2629 : bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2630 : bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2631 : Value *PromotedOperand) const;
2632 : };
2633 :
2634 : /// Keep track of simplification of Phi nodes.
2635 : /// Accept the set of all phi nodes and erase phi node from this set
2636 : /// if it is simplified.
2637 : class SimplificationTracker {
2638 : DenseMap<Value *, Value *> Storage;
2639 : const SimplifyQuery &SQ;
2640 : // Tracks newly created Phi nodes. We use a SetVector to get deterministic
2641 : // order when iterating over the set in MatchPhiSet.
2642 : SmallSetVector<PHINode *, 32> AllPhiNodes;
2643 : // Tracks newly created Select nodes.
2644 : SmallPtrSet<SelectInst *, 32> AllSelectNodes;
2645 :
2646 : public:
2647 148383 : SimplificationTracker(const SimplifyQuery &sq)
2648 148383 : : SQ(sq) {}
2649 :
2650 : Value *Get(Value *V) {
2651 : do {
2652 150019 : auto SV = Storage.find(V);
2653 150019 : if (SV == Storage.end())
2654 149351 : return V;
2655 594 : V = SV->second;
2656 : } while (true);
2657 : }
2658 :
2659 149315 : Value *Simplify(Value *Val) {
2660 : SmallVector<Value *, 32> WorkList;
2661 : SmallPtrSet<Value *, 32> Visited;
2662 149315 : WorkList.push_back(Val);
2663 298698 : while (!WorkList.empty()) {
2664 : auto P = WorkList.pop_back_val();
2665 149383 : if (!Visited.insert(P).second)
2666 : continue;
2667 : if (auto *PI = dyn_cast<Instruction>(P))
2668 149338 : if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
2669 636 : for (auto *U : PI->users())
2670 68 : WorkList.push_back(cast<Value>(U));
2671 : Put(PI, V);
2672 568 : PI->replaceAllUsesWith(V);
2673 568 : if (auto *PHI = dyn_cast<PHINode>(PI))
2674 568 : AllPhiNodes.remove(PHI);
2675 : if (auto *Select = dyn_cast<SelectInst>(PI))
2676 : AllSelectNodes.erase(Select);
2677 568 : PI->eraseFromParent();
2678 : }
2679 : }
2680 149315 : return Get(Val);
2681 : }
2682 :
2683 : void Put(Value *From, Value *To) {
2684 568 : Storage.insert({ From, To });
2685 : }
2686 :
2687 36 : void ReplacePhi(PHINode *From, PHINode *To) {
2688 36 : Value* OldReplacement = Get(From);
2689 37 : while (OldReplacement != From) {
2690 1 : From = To;
2691 : To = dyn_cast<PHINode>(OldReplacement);
2692 : OldReplacement = Get(From);
2693 : }
2694 : assert(Get(To) == To && "Replacement PHI node is already replaced.");
2695 : Put(From, To);
2696 36 : From->replaceAllUsesWith(To);
2697 36 : AllPhiNodes.remove(From);
2698 36 : From->eraseFromParent();
2699 36 : }
2700 :
2701 148383 : SmallSetVector<PHINode *, 32>& newPhiNodes() { return AllPhiNodes; }
2702 :
2703 149286 : void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
2704 :
2705 29 : void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
2706 :
2707 : unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
2708 :
2709 0 : unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
2710 :
2711 148310 : void destroyNewNodes(Type *CommonType) {
2712 : // For safe erasing, replace the uses with dummy value first.
2713 148310 : auto Dummy = UndefValue::get(CommonType);
2714 296967 : for (auto I : AllPhiNodes) {
2715 148657 : I->replaceAllUsesWith(Dummy);
2716 148657 : I->eraseFromParent();
2717 : }
2718 : AllPhiNodes.clear();
2719 148312 : for (auto I : AllSelectNodes) {
2720 2 : I->replaceAllUsesWith(Dummy);
2721 2 : I->eraseFromParent();
2722 : }
2723 148310 : AllSelectNodes.clear();
2724 148310 : }
2725 : };
2726 :
2727 : /// A helper class for combining addressing modes.
2728 1716614 : class AddressingModeCombiner {
2729 : typedef std::pair<Value *, BasicBlock *> ValueInBB;
2730 : typedef DenseMap<ValueInBB, Value *> FoldAddrToValueMapping;
2731 : typedef std::pair<PHINode *, PHINode *> PHIPair;
2732 :
2733 : private:
2734 : /// The addressing modes we've collected.
2735 : SmallVector<ExtAddrMode, 16> AddrModes;
2736 :
2737 : /// The field in which the AddrModes differ, when we have more than one.
2738 : ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
2739 :
2740 : /// Are the AddrModes that we have all just equal to their original values?
2741 : bool AllAddrModesTrivial = true;
2742 :
2743 : /// Common Type for all different fields in addressing modes.
2744 : Type *CommonType;
2745 :
2746 : /// SimplifyQuery for simplifyInstruction utility.
2747 : const SimplifyQuery &SQ;
2748 :
2749 : /// Original Address.
2750 : ValueInBB Original;
2751 :
2752 : public:
2753 : AddressingModeCombiner(const SimplifyQuery &_SQ, ValueInBB OriginalValue)
2754 1716614 : : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
2755 :
2756 : /// Get the combined AddrMode
2757 : const ExtAddrMode &getAddrMode() const {
2758 : return AddrModes[0];
2759 : }
2760 :
2761 : /// Add a new AddrMode if it's compatible with the AddrModes we already
2762 : /// have.
2763 : /// \return True iff we succeeded in doing so.
2764 1917698 : bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
2765 : // Take note of if we have any non-trivial AddrModes, as we need to detect
2766 : // when all AddrModes are trivial as then we would introduce a phi or select
2767 : // which just duplicates what's already there.
2768 1917698 : AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
2769 :
2770 : // If this is the first addrmode then everything is fine.
2771 1917698 : if (AddrModes.empty()) {
2772 1716614 : AddrModes.emplace_back(NewAddrMode);
2773 1716614 : return true;
2774 : }
2775 :
2776 : // Figure out how different this is from the other address modes, which we
2777 : // can do just by comparing against the first one given that we only care
2778 : // about the cumulative difference.
2779 : ExtAddrMode::FieldName ThisDifferentField =
2780 201084 : AddrModes[0].compare(NewAddrMode);
2781 201084 : if (DifferentField == ExtAddrMode::NoField)
2782 154823 : DifferentField = ThisDifferentField;
2783 46261 : else if (DifferentField != ThisDifferentField)
2784 256 : DifferentField = ExtAddrMode::MultipleFields;
2785 :
2786 : // If NewAddrMode differs in more than one dimension we cannot handle it.
2787 201084 : bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
2788 :
2789 : // If Scale Field is different then we reject.
2790 201084 : CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
2791 :
2792 : // We also must reject the case when base offset is different and
2793 : // scale reg is not null, we cannot handle this case due to merge of
2794 : // different offsets will be used as ScaleReg.
2795 196412 : CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
2796 193741 : !NewAddrMode.ScaledReg);
2797 :
2798 : // We also must reject the case when GV is different and BaseReg installed
2799 : // due to we want to use base reg as a merge of GV values.
2800 196411 : CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
2801 139 : !NewAddrMode.HasBaseReg);
2802 :
2803 : // Even if NewAddMode is the same we still need to collect it due to
2804 : // original value is different. And later we will need all original values
2805 : // as anchors during finding the common Phi node.
2806 : if (CanHandle)
2807 196410 : AddrModes.emplace_back(NewAddrMode);
2808 : else
2809 : AddrModes.clear();
2810 :
2811 : return CanHandle;
2812 : }
2813 :
2814 : /// Combine the addressing modes we've collected into a single
2815 : /// addressing mode.
2816 : /// \return True iff we successfully combined them or we only had one so
2817 : /// didn't need to combine them anyway.
2818 1716614 : bool combineAddrModes() {
2819 : // If we have no AddrModes then they can't be combined.
2820 3433228 : if (AddrModes.size() == 0)
2821 : return false;
2822 :
2823 : // A single AddrMode can trivially be combined.
2824 1711940 : if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
2825 : return true;
2826 :
2827 : // If the AddrModes we collected are all just equal to the value they are
2828 : // derived from then combining them wouldn't do anything useful.
2829 149603 : if (AllAddrModesTrivial)
2830 : return false;
2831 :
2832 148387 : if (!addrModeCombiningAllowed())
2833 : return false;
2834 :
2835 : // Build a map between <original value, basic block where we saw it> to
2836 : // value of base register.
2837 : // Bail out if there is no common type.
2838 : FoldAddrToValueMapping Map;
2839 148387 : if (!initializeMap(Map))
2840 : return false;
2841 :
2842 148383 : Value *CommonValue = findCommon(Map);
2843 148383 : if (CommonValue)
2844 73 : AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
2845 148383 : return CommonValue != nullptr;
2846 : }
2847 :
2848 : private:
2849 : /// Initialize Map with anchor values. For address seen in some BB
2850 : /// we set the value of different field saw in this address.
2851 : /// If address is not an instruction than basic block is set to null.
2852 : /// At the same time we find a common type for different field we will
2853 : /// use to create new Phi/Select nodes. Keep it in CommonType field.
2854 : /// Return false if there is no common type found.
2855 148387 : bool initializeMap(FoldAddrToValueMapping &Map) {
2856 : // Keep track of keys where the value is null. We will need to replace it
2857 : // with constant null when we know the common type.
2858 : SmallVector<ValueInBB, 2> NullValue;
2859 296774 : Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
2860 490870 : for (auto &AM : AddrModes) {
2861 : BasicBlock *BB = nullptr;
2862 342487 : if (Instruction *I = dyn_cast<Instruction>(AM.OriginalValue))
2863 696 : BB = I->getParent();
2864 :
2865 342487 : Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
2866 342487 : if (DV) {
2867 342443 : auto *Type = DV->getType();
2868 342443 : if (CommonType && CommonType != Type)
2869 : return false;
2870 342439 : CommonType = Type;
2871 342439 : Map[{ AM.OriginalValue, BB }] = DV;
2872 : } else {
2873 44 : NullValue.push_back({ AM.OriginalValue, BB });
2874 : }
2875 : }
2876 : assert(CommonType && "At least one non-null value must be!");
2877 148423 : for (auto VIBB : NullValue)
2878 40 : Map[VIBB] = Constant::getNullValue(CommonType);
2879 148383 : return true;
2880 : }
2881 :
2882 : /// We have mapping between value A and basic block where value A
2883 : /// seen to other value B where B was a field in addressing mode represented
2884 : /// by A. Also we have an original value C representing an address in some
2885 : /// basic block. Traversing from C through phi and selects we ended up with
2886 : /// A's in a map. This utility function tries to find a value V which is a
2887 : /// field in addressing mode C and traversing through phi nodes and selects
2888 : /// we will end up in corresponded values B in a map.
2889 : /// The utility will create a new Phi/Selects if needed.
2890 : // The simple example looks as follows:
2891 : // BB1:
2892 : // p1 = b1 + 40
2893 : // br cond BB2, BB3
2894 : // BB2:
2895 : // p2 = b2 + 40
2896 : // br BB3
2897 : // BB3:
2898 : // p = phi [p1, BB1], [p2, BB2]
2899 : // v = load p
2900 : // Map is
2901 : // <p1, BB1> -> b1
2902 : // <p2, BB2> -> b2
2903 : // Request is
2904 : // <p, BB3> -> ?
2905 : // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3
2906 148383 : Value *findCommon(FoldAddrToValueMapping &Map) {
2907 : // Tracks the simplification of newly created phi nodes. The reason we use
2908 : // this mapping is because we will add new created Phi nodes in AddrToBase.
2909 : // Simplification of Phi nodes is recursive, so some Phi node may
2910 : // be simplified after we added it to AddrToBase.
2911 : // Using this mapping we can find the current value in AddrToBase.
2912 296766 : SimplificationTracker ST(SQ);
2913 :
2914 : // First step, DFS to create PHI nodes for all intermediate blocks.
2915 : // Also fill traverse order for the second step.
2916 : SmallVector<ValueInBB, 32> TraverseOrder;
2917 148383 : InsertPlaceholders(Map, TraverseOrder, ST);
2918 :
2919 : // Second Step, fill new nodes by merged values and simplify if possible.
2920 148383 : FillPlaceholders(Map, TraverseOrder, ST);
2921 :
2922 148383 : if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
2923 0 : ST.destroyNewNodes(CommonType);
2924 0 : return nullptr;
2925 : }
2926 :
2927 : // Now we'd like to match New Phi nodes to existed ones.
2928 148383 : unsigned PhiNotMatchedCount = 0;
2929 148383 : if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
2930 148310 : ST.destroyNewNodes(CommonType);
2931 148310 : return nullptr;
2932 : }
2933 :
2934 146 : auto *Result = ST.Get(Map.find(Original)->second);
2935 : if (Result) {
2936 : NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
2937 : NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
2938 : }
2939 : return Result;
2940 : }
2941 :
2942 : /// Try to match PHI node to Candidate.
2943 : /// Matcher tracks the matched Phi nodes.
2944 0 : bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
2945 : SmallSetVector<PHIPair, 8> &Matcher,
2946 : SmallSetVector<PHINode *, 32> &PhiNodesToMatch) {
2947 : SmallVector<PHIPair, 8> WorkList;
2948 0 : Matcher.insert({ PHI, Candidate });
2949 0 : WorkList.push_back({ PHI, Candidate });
2950 0 : SmallSet<PHIPair, 8> Visited;
2951 0 : while (!WorkList.empty()) {
2952 0 : auto Item = WorkList.pop_back_val();
2953 0 : if (!Visited.insert(Item).second)
2954 0 : continue;
2955 : // We iterate over all incoming values to Phi to compare them.
2956 : // If values are different and both of them Phi and the first one is a
2957 : // Phi we added (subject to match) and both of them is in the same basic
2958 : // block then we can match our pair if values match. So we state that
2959 : // these values match and add it to work list to verify that.
2960 0 : for (auto B : Item.first->blocks()) {
2961 0 : Value *FirstValue = Item.first->getIncomingValueForBlock(B);
2962 0 : Value *SecondValue = Item.second->getIncomingValueForBlock(B);
2963 0 : if (FirstValue == SecondValue)
2964 0 : continue;
2965 :
2966 : PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
2967 : PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
2968 :
2969 : // One of them is not Phi or
2970 : // The first one is not Phi node from the set we'd like to match or
2971 : // Phi nodes from different basic blocks then
2972 : // we will not be able to match.
2973 0 : if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
2974 0 : FirstPhi->getParent() != SecondPhi->getParent())
2975 0 : return false;
2976 :
2977 : // If we already matched them then continue.
2978 0 : if (Matcher.count({ FirstPhi, SecondPhi }))
2979 0 : continue;
2980 : // So the values are different and does not match. So we need them to
2981 : // match.
2982 0 : Matcher.insert({ FirstPhi, SecondPhi });
2983 : // But me must check it.
2984 0 : WorkList.push_back({ FirstPhi, SecondPhi });
2985 : }
2986 : }
2987 : return true;
2988 : }
2989 :
2990 : /// For the given set of PHI nodes (in the SimplificationTracker) try
2991 : /// to find their equivalents.
2992 : /// Returns false if this matching fails and creation of new Phi is disabled.
2993 148383 : bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
2994 : unsigned &PhiNotMatchedCount) {
2995 : // Use a SetVector for Matched to make sure we do replacements (ReplacePhi)
2996 : // in a deterministic order below.
2997 : SmallSetVector<PHIPair, 8> Matched;
2998 : SmallPtrSet<PHINode *, 8> WillNotMatch;
2999 : SmallSetVector<PHINode *, 32> &PhiNodesToMatch = ST.newPhiNodes();
3000 148438 : while (PhiNodesToMatch.size()) {
3001 148365 : PHINode *PHI = *PhiNodesToMatch.begin();
3002 :
3003 : // Add us, if no Phi nodes in the basic block we do not match.
3004 148365 : WillNotMatch.clear();
3005 148365 : WillNotMatch.insert(PHI);
3006 :
3007 : // Traverse all Phis until we found equivalent or fail to do that.
3008 : bool IsMatched = false;
3009 716228 : for (auto &P : PHI->getParent()->phis()) {
3010 419530 : if (&P == PHI)
3011 : continue;
3012 271165 : if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
3013 : break;
3014 : // If it does not match, collect all Phi nodes from matcher.
3015 : // if we end up with no match, them all these Phi nodes will not match
3016 : // later.
3017 542596 : for (auto M : Matched)
3018 271463 : WillNotMatch.insert(M.first);
3019 : Matched.clear();
3020 : }
3021 148365 : if (IsMatched) {
3022 : // Replace all matched values and erase them.
3023 68 : for (auto MV : Matched)
3024 36 : ST.ReplacePhi(MV.first, MV.second);
3025 : Matched.clear();
3026 32 : continue;
3027 : }
3028 : // If we are not allowed to create new nodes then bail out.
3029 148333 : if (!AllowNewPhiNodes)
3030 : return false;
3031 : // Just remove all seen values in matcher. They will not match anything.
3032 23 : PhiNotMatchedCount += WillNotMatch.size();
3033 49 : for (auto *P : WillNotMatch)
3034 26 : PhiNodesToMatch.remove(P);
3035 : }
3036 : return true;
3037 : }
3038 : /// Fill the placeholder with values from predecessors and simplify it.
3039 0 : void FillPlaceholders(FoldAddrToValueMapping &Map,
3040 : SmallVectorImpl<ValueInBB> &TraverseOrder,
3041 : SimplificationTracker &ST) {
3042 0 : while (!TraverseOrder.empty()) {
3043 0 : auto Current = TraverseOrder.pop_back_val();
3044 : assert(Map.find(Current) != Map.end() && "No node to fill!!!");
3045 : Value *CurrentValue = Current.first;
3046 : BasicBlock *CurrentBlock = Current.second;
3047 0 : Value *V = Map[Current];
3048 :
3049 : if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3050 : // CurrentValue also must be Select.
3051 : auto *CurrentSelect = cast<SelectInst>(CurrentValue);
3052 : auto *TrueValue = CurrentSelect->getTrueValue();
3053 : ValueInBB TrueItem = { TrueValue, isa<Instruction>(TrueValue)
3054 0 : ? CurrentBlock
3055 : : nullptr };
3056 : assert(Map.find(TrueItem) != Map.end() && "No True Value!");
3057 0 : Select->setTrueValue(ST.Get(Map[TrueItem]));
3058 : auto *FalseValue = CurrentSelect->getFalseValue();
3059 : ValueInBB FalseItem = { FalseValue, isa<Instruction>(FalseValue)
3060 0 : ? CurrentBlock
3061 : : nullptr };
3062 : assert(Map.find(FalseItem) != Map.end() && "No False Value!");
3063 0 : Select->setFalseValue(ST.Get(Map[FalseItem]));
3064 : } else {
3065 : // Must be a Phi node then.
3066 : PHINode *PHI = cast<PHINode>(V);
3067 : // Fill the Phi node with values from predecessors.
3068 : bool IsDefinedInThisBB =
3069 0 : cast<Instruction>(CurrentValue)->getParent() == CurrentBlock;
3070 : auto *CurrentPhi = dyn_cast<PHINode>(CurrentValue);
3071 0 : for (auto B : predecessors(CurrentBlock)) {
3072 : Value *PV = IsDefinedInThisBB
3073 0 : ? CurrentPhi->getIncomingValueForBlock(B)
3074 : : CurrentValue;
3075 0 : ValueInBB item = { PV, isa<Instruction>(PV) ? B : nullptr };
3076 : assert(Map.find(item) != Map.end() && "No predecessor Value!");
3077 0 : PHI->addIncoming(ST.Get(Map[item]), B);
3078 : }
3079 : }
3080 : // Simplify if possible.
3081 0 : Map[Current] = ST.Simplify(V);
3082 : }
3083 0 : }
3084 :
3085 : /// Starting from value recursively iterates over predecessors up to known
3086 : /// ending values represented in a map. For each traversed block inserts
3087 : /// a placeholder Phi or Select.
3088 : /// Reports all new created Phi/Select nodes by adding them to set.
3089 : /// Also reports and order in what basic blocks have been traversed.
3090 148383 : void InsertPlaceholders(FoldAddrToValueMapping &Map,
3091 : SmallVectorImpl<ValueInBB> &TraverseOrder,
3092 : SimplificationTracker &ST) {
3093 : SmallVector<ValueInBB, 32> Worklist;
3094 : assert((isa<PHINode>(Original.first) || isa<SelectInst>(Original.first)) &&
3095 : "Address must be a Phi or Select node");
3096 148383 : auto *Dummy = UndefValue::get(CommonType);
3097 148383 : Worklist.push_back(Original);
3098 642765 : while (!Worklist.empty()) {
3099 494382 : auto Current = Worklist.pop_back_val();
3100 : // If value is not an instruction it is something global, constant,
3101 : // parameter and we can say that this value is observable in any block.
3102 : // Set block to null to denote it.
3103 : // Also please take into account that it is how we build anchors.
3104 494382 : if (!isa<Instruction>(Current.first))
3105 344207 : Current.second = nullptr;
3106 : // if it is already visited or it is an ending value then skip it.
3107 494382 : if (Map.find(Current) != Map.end())
3108 345635 : continue;
3109 149315 : TraverseOrder.push_back(Current);
3110 :
3111 149315 : Value *CurrentValue = Current.first;
3112 149315 : BasicBlock *CurrentBlock = Current.second;
3113 : // CurrentValue must be a Phi node or select. All others must be covered
3114 : // by anchors.
3115 : Instruction *CurrentI = cast<Instruction>(CurrentValue);
3116 149315 : bool IsDefinedInThisBB = CurrentI->getParent() == CurrentBlock;
3117 :
3118 149315 : unsigned PredCount = pred_size(CurrentBlock);
3119 : // if Current Value is not defined in this basic block we are interested
3120 : // in values in predecessors.
3121 149315 : if (!IsDefinedInThisBB) {
3122 : assert(PredCount && "Unreachable block?!");
3123 1136 : PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
3124 : &CurrentBlock->front());
3125 568 : Map[Current] = PHI;
3126 568 : ST.insertNewPhi(PHI);
3127 : // Add all predecessors in work list.
3128 1289 : for (auto B : predecessors(CurrentBlock))
3129 721 : Worklist.push_back({ CurrentValue, B });
3130 568 : continue;
3131 : }
3132 : // Value is defined in this basic block.
3133 : if (SelectInst *OrigSelect = dyn_cast<SelectInst>(CurrentI)) {
3134 : // Is it OK to get metadata from OrigSelect?!
3135 : // Create a Select placeholder with dummy value.
3136 : SelectInst *Select =
3137 29 : SelectInst::Create(OrigSelect->getCondition(), Dummy, Dummy,
3138 58 : OrigSelect->getName(), OrigSelect, OrigSelect);
3139 29 : Map[Current] = Select;
3140 : ST.insertNewSelect(Select);
3141 : // We are interested in True and False value in this basic block.
3142 29 : Worklist.push_back({ OrigSelect->getTrueValue(), CurrentBlock });
3143 29 : Worklist.push_back({ OrigSelect->getFalseValue(), CurrentBlock });
3144 : } else {
3145 : // It must be a Phi node then.
3146 : auto *CurrentPhi = cast<PHINode>(CurrentI);
3147 : // Create new Phi node for merge of bases.
3148 : assert(PredCount && "Unreachable block?!");
3149 297436 : PHINode *PHI = PHINode::Create(CommonType, PredCount, "sunk_phi",
3150 : &CurrentBlock->front());
3151 148718 : Map[Current] = PHI;
3152 148718 : ST.insertNewPhi(PHI);
3153 :
3154 : // Add all predecessors in work list.
3155 493938 : for (auto B : predecessors(CurrentBlock))
3156 690440 : Worklist.push_back({ CurrentPhi->getIncomingValueForBlock(B), B });
3157 : }
3158 : }
3159 148383 : }
3160 :
3161 0 : bool addrModeCombiningAllowed() {
3162 0 : if (DisableComplexAddrModes)
3163 0 : return false;
3164 0 : switch (DifferentField) {
3165 : default:
3166 : return false;
3167 : case ExtAddrMode::BaseRegField:
3168 0 : return AddrSinkCombineBaseReg;
3169 : case ExtAddrMode::BaseGVField:
3170 0 : return AddrSinkCombineBaseGV;
3171 : case ExtAddrMode::BaseOffsField:
3172 0 : return AddrSinkCombineBaseOffs;
3173 : case ExtAddrMode::ScaledRegField:
3174 0 : return AddrSinkCombineScaledReg;
3175 : }
3176 : }
3177 : };
3178 : } // end anonymous namespace
3179 :
3180 : /// Try adding ScaleReg*Scale to the current addressing mode.
3181 : /// Return true and update AddrMode if this addr mode is legal for the target,
3182 : /// false if not.
3183 68638 : bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3184 : unsigned Depth) {
3185 : // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3186 : // mode. Just process that directly.
3187 68638 : if (Scale == 1)
3188 23020 : return matchAddr(ScaleReg, Depth);
3189 :
3190 : // If the scale is 0, it takes nothing to add this.
3191 45618 : if (Scale == 0)
3192 : return true;
3193 :
3194 : // If we already have a scale of this value, we can add to it, otherwise, we
3195 : // need an available scale field.
3196 45614 : if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3197 : return false;
3198 :
3199 44931 : ExtAddrMode TestAddrMode = AddrMode;
3200 :
3201 : // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3202 : // [A+B + A*7] -> [B+A*8].
3203 44931 : TestAddrMode.Scale += Scale;
3204 44931 : TestAddrMode.ScaledReg = ScaleReg;
3205 :
3206 : // If the new address isn't legal, bail out.
3207 44931 : if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3208 : return false;
3209 :
3210 : // It was legal, so commit it.
3211 12900 : AddrMode = TestAddrMode;
3212 :
3213 : // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3214 : // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3215 : // X*Scale + C*Scale to addr mode.
3216 12900 : ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3217 25021 : if (isa<Instruction>(ScaleReg) && // not a constant expr.
3218 22991 : match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3219 2030 : TestAddrMode.ScaledReg = AddLHS;
3220 2030 : TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3221 :
3222 : // If this addressing mode is legal, commit it and remember that we folded
3223 : // this instruction.
3224 2030 : if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3225 1937 : AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3226 1937 : AddrMode = TestAddrMode;
3227 1937 : return true;
3228 : }
3229 : }
3230 :
3231 : // Otherwise, not (x+c)*scale, just return what we have.
3232 : return true;
3233 : }
3234 :
3235 : /// This is a little filter, which returns true if an addressing computation
3236 : /// involving I might be folded into a load/store accessing it.
3237 : /// This doesn't need to be perfect, but needs to accept at least
3238 : /// the set of instructions that MatchOperationAddr can.
3239 45775 : static bool MightBeFoldableInst(Instruction *I) {
3240 : switch (I->getOpcode()) {
3241 17012 : case Instruction::BitCast:
3242 : case Instruction::AddrSpaceCast:
3243 : // Don't touch identity bitcasts.
3244 34024 : if (I->getType() == I->getOperand(0)->getType())
3245 : return false;
3246 : return I->getType()->isIntOrPtrTy();
3247 : case Instruction::PtrToInt:
3248 : // PtrToInt is always a noop, as we know that the int type is pointer sized.
3249 : return true;
3250 : case Instruction::IntToPtr:
3251 : // We know the input is intptr_t, so this is foldable.
3252 : return true;
3253 : case Instruction::Add:
3254 : return true;
3255 30 : case Instruction::Mul:
3256 : case Instruction::Shl:
3257 : // Can only handle X*C and X << C.
3258 30 : return isa<ConstantInt>(I->getOperand(1));
3259 : case Instruction::GetElementPtr:
3260 : return true;
3261 : default:
3262 : return false;
3263 : }
3264 : }
3265 :
3266 : /// Check whether or not \p Val is a legal instruction for \p TLI.
3267 : /// \note \p Val is assumed to be the product of some type promotion.
3268 : /// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3269 : /// to be legal, as the non-promoted value would have had the same state.
3270 2219 : static bool isPromotedInstructionLegal(const TargetLowering &TLI,
3271 : const DataLayout &DL, Value *Val) {
3272 : Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3273 : if (!PromotedInst)
3274 : return false;
3275 4436 : int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3276 : // If the ISDOpcode is undefined, it was undefined before the promotion.
3277 2218 : if (!ISDOpcode)
3278 : return true;
3279 : // Otherwise, check if the promoted instruction is legal or not.
3280 2218 : return TLI.isOperationLegalOrCustom(
3281 2218 : ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3282 : }
3283 :
3284 : namespace {
3285 :
3286 : /// Hepler class to perform type promotion.
3287 : class TypePromotionHelper {
3288 : /// Utility function to add a promoted instruction \p ExtOpnd to
3289 : /// \p PromotedInsts and record the type of extension we have seen.
3290 2169 : static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3291 : Instruction *ExtOpnd,
3292 : bool IsSExt) {
3293 2169 : ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3294 2169 : InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3295 2169 : if (It != PromotedInsts.end()) {
3296 : // If the new extension is same as original, the information in
3297 : // PromotedInsts[ExtOpnd] is still correct.
3298 1008 : if (It->second.getInt() == ExtTy)
3299 503 : return;
3300 :
3301 : // Now the new extension is different from old extension, we make
3302 : // the type information invalid by setting extension type to
3303 : // BothExtension.
3304 : ExtTy = BothExtension;
3305 : }
3306 1666 : PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3307 : }
3308 :
3309 : /// Utility function to query the original type of instruction \p Opnd
3310 : /// with a matched extension type. If the extension doesn't match, we
3311 : /// cannot use the information we had on the original type.
3312 : /// BothExtension doesn't match any extension type.
3313 965 : static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3314 : Instruction *Opnd,
3315 : bool IsSExt) {
3316 965 : ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3317 965 : InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3318 965 : if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3319 45 : return It->second.getPointer();
3320 : return nullptr;
3321 : }
3322 :
3323 : /// Utility function to check whether or not a sign or zero extension
3324 : /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3325 : /// either using the operands of \p Inst or promoting \p Inst.
3326 : /// The type of the extension is defined by \p IsSExt.
3327 : /// In other words, check if:
3328 : /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3329 : /// #1 Promotion applies:
3330 : /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3331 : /// #2 Operand reuses:
3332 : /// ext opnd1 to ConsideredExtType.
3333 : /// \p PromotedInsts maps the instructions to their type before promotion.
3334 : static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3335 : const InstrToOrigTy &PromotedInsts, bool IsSExt);
3336 :
3337 : /// Utility function to determine if \p OpIdx should be promoted when
3338 : /// promoting \p Inst.
3339 : static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3340 4338 : return !(isa<SelectInst>(Inst) && OpIdx == 0);
3341 : }
3342 :
3343 : /// Utility function to promote the operand of \p Ext when this
3344 : /// operand is a promotable trunc or sext or zext.
3345 : /// \p PromotedInsts maps the instructions to their type before promotion.
3346 : /// \p CreatedInstsCost[out] contains the cost of all instructions
3347 : /// created to promote the operand of Ext.
3348 : /// Newly added extensions are inserted in \p Exts.
3349 : /// Newly added truncates are inserted in \p Truncs.
3350 : /// Should never be called directly.
3351 : /// \return The promoted value which is used instead of Ext.
3352 : static Value *promoteOperandForTruncAndAnyExt(
3353 : Instruction *Ext, TypePromotionTransaction &TPT,
3354 : InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3355 : SmallVectorImpl<Instruction *> *Exts,
3356 : SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3357 :
3358 : /// Utility function to promote the operand of \p Ext when this
3359 : /// operand is promotable and is not a supported trunc or sext.
3360 : /// \p PromotedInsts maps the instructions to their type before promotion.
3361 : /// \p CreatedInstsCost[out] contains the cost of all the instructions
3362 : /// created to promote the operand of Ext.
3363 : /// Newly added extensions are inserted in \p Exts.
3364 : /// Newly added truncates are inserted in \p Truncs.
3365 : /// Should never be called directly.
3366 : /// \return The promoted value which is used instead of Ext.
3367 : static Value *promoteOperandForOther(Instruction *Ext,
3368 : TypePromotionTransaction &TPT,
3369 : InstrToOrigTy &PromotedInsts,
3370 : unsigned &CreatedInstsCost,
3371 : SmallVectorImpl<Instruction *> *Exts,
3372 : SmallVectorImpl<Instruction *> *Truncs,
3373 : const TargetLowering &TLI, bool IsSExt);
3374 :
3375 : /// \see promoteOperandForOther.
3376 390 : static Value *signExtendOperandForOther(
3377 : Instruction *Ext, TypePromotionTransaction &TPT,
3378 : InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3379 : SmallVectorImpl<Instruction *> *Exts,
3380 : SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3381 390 : return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3382 390 : Exts, Truncs, TLI, true);
3383 : }
3384 :
3385 : /// \see promoteOperandForOther.
3386 1779 : static Value *zeroExtendOperandForOther(
3387 : Instruction *Ext, TypePromotionTransaction &TPT,
3388 : InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3389 : SmallVectorImpl<Instruction *> *Exts,
3390 : SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3391 1779 : return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3392 1779 : Exts, Truncs, TLI, false);
3393 : }
3394 :
3395 : public:
3396 : /// Type for the utility function that promotes the operand of Ext.
3397 : using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3398 : InstrToOrigTy &PromotedInsts,
3399 : unsigned &CreatedInstsCost,
3400 : SmallVectorImpl<Instruction *> *Exts,
3401 : SmallVectorImpl<Instruction *> *Truncs,
3402 : const TargetLowering &TLI);
3403 :
3404 : /// Given a sign/zero extend instruction \p Ext, return the appropriate
3405 : /// action to promote the operand of \p Ext instead of using Ext.
3406 : /// \return NULL if no promotable action is possible with the current
3407 : /// sign extension.
3408 : /// \p InsertedInsts keeps track of all the instructions inserted by the
3409 : /// other CodeGenPrepare optimizations. This information is important
3410 : /// because we do not want to promote these instructions as CodeGenPrepare
3411 : /// will reinsert them later. Thus creating an infinite loop: create/remove.
3412 : /// \p PromotedInsts maps the instructions to their type before promotion.
3413 : static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3414 : const TargetLowering &TLI,
3415 : const InstrToOrigTy &PromotedInsts);
3416 : };
3417 :
3418 : } // end anonymous namespace
3419 :
3420 14462 : bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3421 : Type *ConsideredExtType,
3422 : const InstrToOrigTy &PromotedInsts,
3423 : bool IsSExt) {
3424 : // The promotion helper does not know how to deal with vector types yet.
3425 : // To be able to fix that, we would need to fix the places where we
3426 : // statically extend, e.g., constants and such.
3427 28924 : if (Inst->getType()->isVectorTy())
3428 : return false;
3429 :
3430 : // We can always get through zext.
3431 10133 : if (isa<ZExtInst>(Inst))
3432 : return true;
3433 :
3434 : // sext(sext) is ok too.
3435 9886 : if (IsSExt && isa<SExtInst>(Inst))
3436 : return true;
3437 :
3438 : // We can get through binary operator, if it is legal. In other words, the
3439 : // binary operator must have a nuw or nsw flag.
3440 : const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3441 1770 : if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3442 1770 : ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3443 493 : (IsSExt && BinOp->hasNoSignedWrap())))
3444 811 : return true;
3445 :
3446 : // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3447 9068 : if ((Inst->getOpcode() == Instruction::And ||
3448 : Inst->getOpcode() == Instruction::Or))
3449 : return true;
3450 :
3451 : // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3452 8480 : if (Inst->getOpcode() == Instruction::Xor) {
3453 935 : const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3454 : // Make sure it is not a NOT.
3455 873 : if (Cst && !Cst->getValue().isAllOnesValue())
3456 : return true;
3457 : }
3458 :
3459 : // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3460 : // It may change a poisoned value into a regular value, like
3461 : // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3462 : // poisoned value regular value
3463 : // It should be OK since undef covers valid value.
3464 8471 : if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3465 : return true;
3466 :
3467 : // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3468 : // It may change a poisoned value into a regular value, like
3469 : // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3470 : // poisoned value regular value
3471 : // It should be OK since undef covers valid value.
3472 7782 : if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3473 : const Instruction *ExtInst =
3474 : dyn_cast<const Instruction>(*Inst->user_begin());
3475 51 : if (ExtInst->hasOneUse()) {
3476 : const Instruction *AndInst =
3477 : dyn_cast<const Instruction>(*ExtInst->user_begin());
3478 50 : if (AndInst && AndInst->getOpcode() == Instruction::And) {
3479 5 : const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3480 5 : if (Cst &&
3481 5 : Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3482 : return true;
3483 : }
3484 : }
3485 : }
3486 :
3487 : // Check if we can do the following simplification.
3488 : // ext(trunc(opnd)) --> ext(opnd)
3489 7710 : if (!isa<TruncInst>(Inst))
3490 : return false;
3491 :
3492 1118 : Value *OpndVal = Inst->getOperand(0);
3493 : // Check if we can use this operand in the extension.
3494 : // If the type is larger than the result type of the extension, we cannot.
3495 2236 : if (!OpndVal->getType()->isIntegerTy() ||
3496 : OpndVal->getType()->getIntegerBitWidth() >
3497 : ConsideredExtType->getIntegerBitWidth())
3498 : return false;
3499 :
3500 : // If the operand of the truncate is not an instruction, we will not have
3501 : // any information on the dropped bits.
3502 : // (Actually we could for constant but it is not worth the extra logic).
3503 : Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3504 : if (!Opnd)
3505 : return false;
3506 :
3507 : // Check if the source of the type is narrow enough.
3508 : // I.e., check that trunc just drops extended bits of the same kind of
3509 : // the extension.
3510 : // #1 get the type of the operand and check the kind of the extended bits.
3511 965 : const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3512 965 : if (OpndType)
3513 : ;
3514 920 : else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3515 636 : OpndType = Opnd->getOperand(0)->getType();
3516 : else
3517 : return false;
3518 :
3519 : // #2 check that the truncate just drops extended bits.
3520 363 : return Inst->getType()->getIntegerBitWidth() >=
3521 363 : OpndType->getIntegerBitWidth();
3522 : }
3523 :
3524 18611 : TypePromotionHelper::Action TypePromotionHelper::getAction(
3525 : Instruction *Ext, const SetOfInstrs &InsertedInsts,
3526 : const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3527 : assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
3528 : "Unexpected instruction type");
3529 18611 : Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3530 18611 : Type *ExtTy = Ext->getType();
3531 : bool IsSExt = isa<SExtInst>(Ext);
3532 : // If the operand of the extension is not an instruction, we cannot
3533 : // get through.
3534 : // If it, check we can get through.
3535 18611 : if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3536 15827 : return nullptr;
3537 :
3538 : // Do not promote if the operand has been added by codegenprepare.
3539 : // Otherwise, it means we are undoing an optimization that is likely to be
3540 : // redone, thus causing potential infinite loop.
3541 2784 : if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3542 : return nullptr;
3543 :
3544 : // SExt or Trunc instructions.
3545 : // Return the related handler.
3546 2475 : if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3547 : isa<ZExtInst>(ExtOpnd))
3548 : return promoteOperandForTruncAndAnyExt;
3549 :
3550 : // Regular instruction.
3551 : // Abort early if we will have to insert non-free instructions.
3552 2169 : if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3553 : return nullptr;
3554 2169 : return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3555 : }
3556 :
3557 306 : Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3558 : Instruction *SExt, TypePromotionTransaction &TPT,
3559 : InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3560 : SmallVectorImpl<Instruction *> *Exts,
3561 : SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3562 : // By construction, the operand of SExt is an instruction. Otherwise we cannot
3563 : // get through it and this method should not be called.
3564 306 : Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3565 : Value *ExtVal = SExt;
3566 : bool HasMergedNonFreeExt = false;
3567 306 : if (isa<ZExtInst>(SExtOpnd)) {
3568 : // Replace s|zext(zext(opnd))
3569 : // => zext(opnd).
3570 247 : HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3571 : Value *ZExt =
3572 494 : TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3573 247 : TPT.replaceAllUsesWith(SExt, ZExt);
3574 247 : TPT.eraseInstruction(SExt);
3575 : ExtVal = ZExt;
3576 : } else {
3577 : // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3578 : // => z|sext(opnd).
3579 118 : TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3580 : }
3581 306 : CreatedInstsCost = 0;
3582 :
3583 : // Remove dead code.
3584 306 : if (SExtOpnd->use_empty())
3585 211 : TPT.eraseInstruction(SExtOpnd);
3586 :
3587 : // Check if the extension is still needed.
3588 306 : Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3589 610 : if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3590 269 : if (ExtInst) {
3591 267 : if (Exts)
3592 255 : Exts->push_back(ExtInst);
3593 522 : CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3594 : }
3595 269 : return ExtVal;
3596 : }
3597 :
3598 : // At this point we have: ext ty opnd to ty.
3599 : // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3600 : Value *NextVal = ExtInst->getOperand(0);
3601 37 : TPT.eraseInstruction(ExtInst, NextVal);
3602 37 : return NextVal;
3603 : }
3604 :
3605 2169 : Value *TypePromotionHelper::promoteOperandForOther(
3606 : Instruction *Ext, TypePromotionTransaction &TPT,
3607 : InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3608 : SmallVectorImpl<Instruction *> *Exts,
3609 : SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
3610 : bool IsSExt) {
3611 : // By construction, the operand of Ext is an instruction. Otherwise we cannot
3612 : // get through it and this method should not be called.
3613 2169 : Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
3614 2169 : CreatedInstsCost = 0;
3615 2169 : if (!ExtOpnd->hasOneUse()) {
3616 : // ExtOpnd will be promoted.
3617 : // All its uses, but Ext, will need to use a truncated value of the
3618 : // promoted version.
3619 : // Create the truncate now.
3620 173 : Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
3621 173 : if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
3622 : // Insert it just after the definition.
3623 173 : ITrunc->moveAfter(ExtOpnd);
3624 173 : if (Truncs)
3625 0 : Truncs->push_back(ITrunc);
3626 : }
3627 :
3628 173 : TPT.replaceAllUsesWith(ExtOpnd, Trunc);
3629 : // Restore the operand of Ext (which has been replaced by the previous call
3630 : // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
3631 173 : TPT.setOperand(Ext, 0, ExtOpnd);
3632 : }
3633 :
3634 : // Get through the Instruction:
3635 : // 1. Update its type.
3636 : // 2. Replace the uses of Ext by Inst.
3637 : // 3. Extend each operand that needs to be extended.
3638 :
3639 : // Remember the original type of the instruction before promotion.
3640 : // This is useful to know that the high bits are sign extended bits.
3641 2169 : addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
3642 : // Step #1.
3643 2169 : TPT.mutateType(ExtOpnd, Ext->getType());
3644 : // Step #2.
3645 2169 : TPT.replaceAllUsesWith(Ext, ExtOpnd);
3646 : // Step #3.
3647 2169 : Instruction *ExtForOpnd = Ext;
3648 :
3649 : LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
3650 6507 : for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
3651 : ++OpIdx) {
3652 : LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
3653 8676 : if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
3654 : !shouldExtOperand(ExtOpnd, OpIdx)) {
3655 : LLVM_DEBUG(dbgs() << "No need to propagate\n");
3656 : continue;
3657 : }
3658 : // Check if we can statically extend the operand.
3659 : Value *Opnd = ExtOpnd->getOperand(OpIdx);
3660 : if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
3661 : LLVM_DEBUG(dbgs() << "Statically extend\n");
3662 : unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
3663 : APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
3664 1819 : : Cst->getValue().zext(BitWidth);
3665 1819 : TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
3666 : continue;
3667 : }
3668 : // UndefValue are typed, so we have to statically sign extend them.
3669 2519 : if (isa<UndefValue>(Opnd)) {
3670 : LLVM_DEBUG(dbgs() << "Statically extend\n");
3671 0 : TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
3672 0 : continue;
3673 : }
3674 :
3675 : // Otherwise we have to explicitly sign extend the operand.
3676 : // Check if Ext was reused to extend an operand.
3677 2519 : if (!ExtForOpnd) {
3678 : // If yes, create a new one.
3679 : LLVM_DEBUG(dbgs() << "More operands to ext\n");
3680 350 : Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
3681 247 : : TPT.createZExt(Ext, Opnd, Ext->getType());
3682 350 : if (!isa<Instruction>(ValForExtOpnd)) {
3683 6 : TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
3684 : continue;
3685 : }
3686 344 : ExtForOpnd = cast<Instruction>(ValForExtOpnd);
3687 : }
3688 2513 : if (Exts)
3689 2235 : Exts->push_back(ExtForOpnd);
3690 2513 : TPT.setOperand(ExtForOpnd, 0, Opnd);
3691 :
3692 : // Move the sign extension before the insertion point.
3693 2513 : TPT.moveBefore(ExtForOpnd, ExtOpnd);
3694 2513 : TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
3695 2513 : CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
3696 : // If more sext are required, new instructions will have to be created.
3697 2513 : ExtForOpnd = nullptr;
3698 : }
3699 2169 : if (ExtForOpnd == Ext) {
3700 : LLVM_DEBUG(dbgs() << "Extension is useless now\n");
3701 0 : TPT.eraseInstruction(Ext);
3702 : }
3703 2169 : return ExtOpnd;
3704 : }
3705 :
3706 : /// Check whether or not promoting an instruction to a wider type is profitable.
3707 : /// \p NewCost gives the cost of extension instructions created by the
3708 : /// promotion.
3709 : /// \p OldCost gives the cost of extension instructions before the promotion
3710 : /// plus the number of instructions that have been
3711 : /// matched in the addressing mode the promotion.
3712 : /// \p PromotedOperand is the value that has been promoted.
3713 : /// \return True if the promotion is profitable, false otherwise.
3714 0 : bool AddressingModeMatcher::isPromotionProfitable(
3715 : unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
3716 : LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
3717 : << '\n');
3718 : // The cost of the new extensions is greater than the cost of the
3719 : // old extension plus what we folded.
3720 : // This is not profitable.
3721 0 : if (NewCost > OldCost)
3722 0 : return false;
3723 195 : if (NewCost < OldCost)
3724 0 : return true;
3725 : // The promotion is neutral but it may help folding the sign extension in
3726 : // loads for instance.
3727 : // Check that we did not create an illegal instruction.
3728 105 : return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
3729 : }
3730 :
3731 : /// Given an instruction or constant expr, see if we can fold the operation
3732 : /// into the addressing mode. If so, update the addressing mode and return
3733 : /// true, otherwise return false without modifying AddrMode.
3734 : /// If \p MovedAway is not NULL, it contains the information of whether or
3735 : /// not AddrInst has to be folded into the addressing mode on success.
3736 : /// If \p MovedAway == true, \p AddrInst will not be part of the addressing
3737 : /// because it has been moved away.
3738 : /// Thus AddrInst must not be added in the matched instructions.
3739 : /// This state can happen when AddrInst is a sext, since it may be moved away.
3740 : /// Therefore, AddrInst may not be valid when MovedAway is true and it must
3741 : /// not be referenced anymore.
3742 2227831 : bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
3743 : unsigned Depth,
3744 : bool *MovedAway) {
3745 : // Avoid exponential behavior on extremely deep expression trees.
3746 2227831 : if (Depth >= 5) return false;
3747 :
3748 : // By default, all matched instructions stay in place.
3749 2227731 : if (MovedAway)
3750 739020 : *MovedAway = false;
3751 :
3752 2227731 : switch (Opcode) {
3753 518 : case Instruction::PtrToInt:
3754 : // PtrToInt is always a noop, as we know that the int type is pointer sized.
3755 518 : return matchAddr(AddrInst->getOperand(0), Depth);
3756 4802 : case Instruction::IntToPtr: {
3757 4802 : auto AS = AddrInst->getType()->getPointerAddressSpace();
3758 4802 : auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
3759 : // This inttoptr is a no-op if the integer type is pointer sized.
3760 14406 : if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
3761 4739 : return matchAddr(AddrInst->getOperand(0), Depth);
3762 : return false;
3763 : }
3764 588626 : case Instruction::BitCast:
3765 : // BitCast is always a noop, and we can handle it as long as it is
3766 : // int->int or pointer->pointer (we don't want int<->fp or something).
3767 588626 : if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
3768 : // Don't touch identity bitcasts. These were probably put here by LSR,
3769 : // and we don't want to mess around with them. Assume it knows what it
3770 : // is doing.
3771 588622 : AddrInst->getOperand(0)->getType() != AddrInst->getType())
3772 583140 : return matchAddr(AddrInst->getOperand(0), Depth);
3773 : return false;
3774 312 : case Instruction::AddrSpaceCast: {
3775 : unsigned SrcAS
3776 312 : = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
3777 312 : unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
3778 312 : if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3779 104 : return matchAddr(AddrInst->getOperand(0), Depth);
3780 : return false;
3781 : }
3782 3346 : case Instruction::Add: {
3783 : // Check to see if we can merge in the RHS then the LHS. If so, we win.
3784 3346 : ExtAddrMode BackupAddrMode = AddrMode;
3785 3346 : unsigned OldSize = AddrModeInsts.size();
3786 : // Start a transaction at this point.
3787 : // The LHS may match but not the RHS.
3788 : // Therefore, we need a higher level restoration point to undo partially
3789 : // matched operation.
3790 : TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3791 3346 : TPT.getRestorationPoint();
3792 :
3793 9713 : if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
3794 3021 : matchAddr(AddrInst->getOperand(0), Depth+1))
3795 : return true;
3796 :
3797 : // Restore the old addr mode info.
3798 846 : AddrMode = BackupAddrMode;
3799 846 : AddrModeInsts.resize(OldSize);
3800 846 : TPT.rollback(LastKnownGood);
3801 :
3802 : // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
3803 1176 : if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
3804 330 : matchAddr(AddrInst->getOperand(1), Depth+1))
3805 : return true;
3806 :
3807 : // Otherwise we definitely can't merge the ADD in.
3808 845 : AddrMode = BackupAddrMode;
3809 845 : AddrModeInsts.resize(OldSize);
3810 845 : TPT.rollback(LastKnownGood);
3811 : break;
3812 : }
3813 : //case Instruction::Or:
3814 : // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
3815 : //break;
3816 3835 : case Instruction::Mul:
3817 : case Instruction::Shl: {
3818 : // Can only handle X*C and X << C.
3819 : ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
3820 3597 : if (!RHS || RHS->getBitWidth() > 64)
3821 : return false;
3822 : int64_t Scale = RHS->getSExtValue();
3823 3596 : if (Opcode == Instruction::Shl)
3824 1835 : Scale = 1LL << Scale;
3825 :
3826 3596 : return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
3827 : }
3828 1351625 : case Instruction::GetElementPtr: {
3829 : // Scan the GEP. We check it if it contains constant offsets and at most
3830 : // one variable offset.
3831 : int VariableOperand = -1;
3832 : unsigned VariableScale = 0;
3833 :
3834 : int64_t ConstantOffset = 0;
3835 1351625 : gep_type_iterator GTI = gep_type_begin(AddrInst);
3836 3983397 : for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
3837 139880 : if (StructType *STy = GTI.getStructTypeOrNull()) {
3838 139880 : const StructLayout *SL = DL.getStructLayout(STy);
3839 : unsigned Idx =
3840 139880 : cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
3841 139880 : ConstantOffset += SL->getElementOffset(Idx);
3842 : } else {
3843 2492831 : uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
3844 : if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
3845 : const APInt &CVal = CI->getValue();
3846 2443429 : if (CVal.getMinSignedBits() <= 64) {
3847 2443427 : ConstantOffset += CVal.getSExtValue() * TypeSize;
3848 2443427 : continue;
3849 : }
3850 : }
3851 49404 : if (TypeSize) { // Scales of zero don't do anything.
3852 : // We only allow one variable index at the moment.
3853 49395 : if (VariableOperand != -1)
3854 : return false;
3855 :
3856 : // Remember the variable index.
3857 48456 : VariableOperand = i;
3858 48456 : VariableScale = TypeSize;
3859 : }
3860 : }
3861 : }
3862 :
3863 : // A common case is for the GEP to only do a constant offset. In this case,
3864 : // just add it to the disp field and check validity.
3865 1350686 : if (VariableOperand == -1) {
3866 1303169 : AddrMode.BaseOffs += ConstantOffset;
3867 2467646 : if (ConstantOffset == 0 ||
3868 1164477 : TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
3869 : // Check to see if we can fold the base pointer in too.
3870 2599000 : if (matchAddr(AddrInst->getOperand(0), Depth+1))
3871 : return true;
3872 3542 : } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
3873 7485 : TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
3874 : ConstantOffset > 0) {
3875 : // Record GEPs with non-zero offsets as candidates for splitting in the
3876 : // event that the offset cannot fit into the r+i addressing mode.
3877 : // Simple and common case that only one GEP is used in calculating the
3878 : // address for the memory access.
3879 : Value *Base = AddrInst->getOperand(0);
3880 : auto *BaseI = dyn_cast<Instruction>(Base);
3881 : auto *GEP = cast<GetElementPtrInst>(AddrInst);
3882 311 : if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
3883 71 : (BaseI && !isa<CastInst>(BaseI) &&
3884 : !isa<GetElementPtrInst>(BaseI))) {
3885 : // If the base is an instruction, make sure the GEP is not in the same
3886 : // basic block as the base. If the base is an argument or global
3887 : // value, make sure the GEP is not in the entry block. Otherwise,
3888 : // instruction selection can undo the split. Also make sure the
3889 : // parent block allows inserting non-PHI instructions before the
3890 : // terminator.
3891 : BasicBlock *Parent =
3892 205 : BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
3893 205 : if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
3894 28 : LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
3895 : }
3896 : }
3897 3669 : AddrMode.BaseOffs -= ConstantOffset;
3898 3669 : return false;
3899 : }
3900 :
3901 : // Save the valid addressing mode in case we can't match.
3902 47517 : ExtAddrMode BackupAddrMode = AddrMode;
3903 47517 : unsigned OldSize = AddrModeInsts.size();
3904 :
3905 : // See if the scale and offset amount is valid for this target.
3906 47517 : AddrMode.BaseOffs += ConstantOffset;
3907 :
3908 : // Match the base operand of the GEP.
3909 95034 : if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
3910 : // If it couldn't be matched, just stuff the value in a register.
3911 5 : if (AddrMode.HasBaseReg) {
3912 0 : AddrMode = BackupAddrMode;
3913 0 : AddrModeInsts.resize(OldSize);
3914 0 : return false;
3915 : }
3916 5 : AddrMode.HasBaseReg = true;
3917 5 : AddrMode.BaseReg = AddrInst->getOperand(0);
3918 : }
3919 :
3920 : // Match the remaining variable portion of the GEP.
3921 95034 : if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
3922 : Depth)) {
3923 : // If it couldn't be matched, try stuffing the base into a register
3924 : // instead of matching it, and retrying the match of the scale.
3925 17534 : AddrMode = BackupAddrMode;
3926 17534 : AddrModeInsts.resize(OldSize);
3927 17534 : if (AddrMode.HasBaseReg)
3928 : return false;
3929 17525 : AddrMode.HasBaseReg = true;
3930 17525 : AddrMode.BaseReg = AddrInst->getOperand(0);
3931 17525 : AddrMode.BaseOffs += ConstantOffset;
3932 17525 : if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
3933 : VariableScale, Depth)) {
3934 : // If even that didn't work, bail.
3935 16264 : AddrMode = BackupAddrMode;
3936 16264 : AddrModeInsts.resize(OldSize);
3937 16264 : return false;
3938 : }
3939 : }
3940 :
3941 : return true;
3942 : }
3943 1303 : case Instruction::SExt:
3944 : case Instruction::ZExt: {
3945 : Instruction *Ext = dyn_cast<Instruction>(AddrInst);
3946 : if (!Ext)
3947 : return false;
3948 :
3949 : // Try to move this ext out of the way of the addressing mode.
3950 : // Ask for a method for doing so.
3951 : TypePromotionHelper::Action TPH =
3952 1302 : TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
3953 1302 : if (!TPH)
3954 : return false;
3955 :
3956 : TypePromotionTransaction::ConstRestorationPt LastKnownGood =
3957 274 : TPT.getRestorationPoint();
3958 274 : unsigned CreatedInstsCost = 0;
3959 274 : unsigned ExtCost = !TLI.isExtFree(Ext);
3960 : Value *PromotedOperand =
3961 274 : TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
3962 : // SExt has been moved away.
3963 : // Thus either it will be rematched later in the recursive calls or it is
3964 : // gone. Anyway, we must not fold it into the addressing mode at this point.
3965 : // E.g.,
3966 : // op = add opnd, 1
3967 : // idx = ext op
3968 : // addr = gep base, idx
3969 : // is now:
3970 : // promotedOpnd = ext opnd <- no match here
3971 : // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
3972 : // addr = gep base, op <- match
3973 274 : if (MovedAway)
3974 274 : *MovedAway = true;
3975 :
3976 : assert(PromotedOperand &&
3977 : "TypePromotionHelper should have filtered out those cases");
3978 :
3979 274 : ExtAddrMode BackupAddrMode = AddrMode;
3980 274 : unsigned OldSize = AddrModeInsts.size();
3981 :
3982 379 : if (!matchAddr(PromotedOperand, Depth) ||
3983 : // The total of the new cost is equal to the cost of the created
3984 : // instructions.
3985 : // The total of the old cost is equal to the cost of the extension plus
3986 : // what we have saved in the addressing mode.
3987 206 : !isPromotionProfitable(CreatedInstsCost,
3988 206 : ExtCost + (AddrModeInsts.size() - OldSize),
3989 : PromotedOperand)) {
3990 109 : AddrMode = BackupAddrMode;
3991 109 : AddrModeInsts.resize(OldSize);
3992 : LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
3993 109 : TPT.rollback(LastKnownGood);
3994 109 : return false;
3995 : }
3996 : return true;
3997 : }
3998 : }
3999 : return false;
4000 : }
4001 :
4002 : /// If we can, try to add the value of 'Addr' into the current addressing mode.
4003 : /// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4004 : /// unmodified. This assumes that Addr is either a pointer type or intptr_t
4005 : /// for the target.
4006 : ///
4007 3898032 : bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4008 : // Start a transaction at this point that we will rollback if the matching
4009 : // fails.
4010 : TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4011 3898032 : TPT.getRestorationPoint();
4012 : if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4013 : // Fold in immediates if legal for the target.
4014 2689 : AddrMode.BaseOffs += CI->getSExtValue();
4015 2689 : if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4016 : return true;
4017 732 : AddrMode.BaseOffs -= CI->getSExtValue();
4018 : } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4019 : // If this is a global variable, try to fold it into the addressing mode.
4020 1508582 : if (!AddrMode.BaseGV) {
4021 1508582 : AddrMode.BaseGV = GV;
4022 1508582 : if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4023 : return true;
4024 562757 : AddrMode.BaseGV = nullptr;
4025 : }
4026 2386761 : } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4027 739120 : ExtAddrMode BackupAddrMode = AddrMode;
4028 739120 : unsigned OldSize = AddrModeInsts.size();
4029 :
4030 : // Check to see if it is possible to fold this operation.
4031 739120 : bool MovedAway = false;
4032 739120 : if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4033 : // This instruction may have been moved away. If so, there is nothing
4034 : // to check here.
4035 435450 : if (MovedAway)
4036 427055 : return true;
4037 : // Okay, it's possible to fold this. Check to see if it is actually
4038 : // *profitable* to do so. We use a simple cost model to avoid increasing
4039 : // register pressure too much.
4040 505838 : if (I->hasOneUse() ||
4041 70553 : isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4042 426890 : AddrModeInsts.push_back(I);
4043 426890 : return true;
4044 : }
4045 :
4046 : // It isn't profitable to do this, roll back.
4047 : //cerr << "NOT FOLDING: " << *I;
4048 8395 : AddrMode = BackupAddrMode;
4049 8395 : AddrModeInsts.resize(OldSize);
4050 8395 : TPT.rollback(LastKnownGood);
4051 : }
4052 : } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4053 1488711 : if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4054 : return true;
4055 179 : TPT.rollback(LastKnownGood);
4056 158930 : } else if (isa<ConstantPointerNull>(Addr)) {
4057 : // Null pointer gets folded without affecting the addressing mode.
4058 : return true;
4059 : }
4060 :
4061 : // Worse case, the target should support [reg] addressing modes. :)
4062 1033178 : if (!AddrMode.HasBaseReg) {
4063 1009302 : AddrMode.HasBaseReg = true;
4064 1009302 : AddrMode.BaseReg = Addr;
4065 : // Still check for legality in case the target supports [imm] but not [i+r].
4066 1009302 : if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4067 : return true;
4068 10 : AddrMode.HasBaseReg = false;
4069 10 : AddrMode.BaseReg = nullptr;
4070 : }
4071 :
4072 : // If the base register is already taken, see if we can do [r+r].
4073 23886 : if (AddrMode.Scale == 0) {
4074 22734 : AddrMode.Scale = 1;
4075 22734 : AddrMode.ScaledReg = Addr;
4076 22734 : if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4077 : return true;
4078 3221 : AddrMode.Scale = 0;
4079 3221 : AddrMode.ScaledReg = nullptr;
4080 : }
4081 : // Couldn't match.
4082 4373 : TPT.rollback(LastKnownGood);
4083 4373 : return false;
4084 : }
4085 :
4086 : /// Check to see if all uses of OpVal by the specified inline asm call are due
4087 : /// to memory operands. If so, return true, otherwise return false.
4088 0 : static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4089 : const TargetLowering &TLI,
4090 : const TargetRegisterInfo &TRI) {
4091 0 : const Function *F = CI->getFunction();
4092 : TargetLowering::AsmOperandInfoVector TargetConstraints =
4093 : TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
4094 0 : ImmutableCallSite(CI));
4095 :
4096 0 : for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4097 0 : TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4098 :
4099 : // Compute the constraint code and ConstraintType to use.
4100 0 : TLI.ComputeConstraintToUse(OpInfo, SDValue());
4101 :
4102 : // If this asm operand is our Value*, and if it isn't an indirect memory
4103 : // operand, we can't fold it!
4104 0 : if (OpInfo.CallOperandVal == OpVal &&
4105 0 : (OpInfo.ConstraintType != TargetLowering::C_Memory ||
4106 0 : !OpInfo.isIndirect))
4107 0 : return false;
4108 : }
4109 :
4110 : return true;
4111 : }
4112 :
4113 : // Max number of memory uses to look at before aborting the search to conserve
4114 : // compile time.
4115 : static constexpr int MaxMemoryUsesToScan = 20;
4116 :
4117 : /// Recursively walk all the uses of I until we find a memory use.
4118 : /// If we find an obviously non-foldable instruction, return true.
4119 : /// Add the ultimately found memory instructions to MemoryUses.
4120 45775 : static bool FindAllMemoryUses(
4121 : Instruction *I,
4122 : SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4123 : SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4124 : const TargetRegisterInfo &TRI, int SeenInsts = 0) {
4125 : // If we already considered this instruction, we're done.
4126 45775 : if (!ConsideredInsts.insert(I).second)
4127 : return false;
4128 :
4129 : // If this is an obviously unfoldable instruction, bail out.
4130 45775 : if (!MightBeFoldableInst(I))
4131 : return true;
4132 :
4133 39934 : const bool OptSize = I->getFunction()->optForSize();
4134 :
4135 : // Loop over all the uses, recursively processing them.
4136 101627 : for (Use &U : I->uses()) {
4137 : // Conservatively return true if we're seeing a large number or a deep chain
4138 : // of users. This avoids excessive compilation times in pathological cases.
4139 71643 : if (SeenInsts++ >= MaxMemoryUsesToScan)
4140 : return true;
4141 :
4142 70877 : Instruction *UserI = cast<Instruction>(U.getUser());
4143 : if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4144 24847 : MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4145 : continue;
4146 : }
4147 :
4148 : if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4149 11298 : unsigned opNo = U.getOperandNo();
4150 11298 : if (opNo != StoreInst::getPointerOperandIndex())
4151 : return true; // Storing addr, not into addr.
4152 22390 : MemoryUses.push_back(std::make_pair(SI, opNo));
4153 : continue;
4154 : }
4155 :
4156 : if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4157 54 : unsigned opNo = U.getOperandNo();
4158 54 : if (opNo != AtomicRMWInst::getPointerOperandIndex())
4159 : return true; // Storing addr, not into addr.
4160 108 : MemoryUses.push_back(std::make_pair(RMW, opNo));
4161 : continue;
4162 : }
4163 :
4164 : if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4165 3 : unsigned opNo = U.getOperandNo();
4166 3 : if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4167 : return true; // Storing addr, not into addr.
4168 6 : MemoryUses.push_back(std::make_pair(CmpX, opNo));
4169 : continue;
4170 : }
4171 :
4172 : if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4173 : // If this is a cold call, we can sink the addressing calculation into
4174 : // the cold path. See optimizeCallInst
4175 765 : if (!OptSize && CI->hasFnAttr(Attribute::Cold))
4176 : continue;
4177 :
4178 : InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
4179 : if (!IA) return true;
4180 :
4181 : // If this is a memory operand, we're cool, otherwise bail out.
4182 14 : if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4183 : return true;
4184 : continue;
4185 : }
4186 :
4187 33910 : if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
4188 : SeenInsts))
4189 : return true;
4190 : }
4191 :
4192 : return false;
4193 : }
4194 :
4195 : /// Return true if Val is already known to be live at the use site that we're
4196 : /// folding it into. If so, there is no cost to include it in the addressing
4197 : /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4198 : /// instruction already.
4199 0 : bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4200 : Value *KnownLive2) {
4201 : // If Val is either of the known-live values, we know it is live!
4202 0 : if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4203 0 : return true;
4204 :
4205 : // All values other than instructions and arguments (e.g. constants) are live.
4206 0 : if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4207 :
4208 : // If Val is a constant sized alloca in the entry block, it is live, this is
4209 : // true because it is just a reference to the stack/frame pointer, which is
4210 : // live for the whole function.
4211 : if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4212 0 : if (AI->isStaticAlloca())
4213 0 : return true;
4214 :
4215 : // Check to see if this value is already used in the memory instruction's
4216 : // block. If so, it's already live into the block at the very least, so we
4217 : // can reasonably fold it.
4218 0 : return Val->isUsedInBasicBlock(MemoryInst->getParent());
4219 : }
4220 :
4221 : /// It is possible for the addressing mode of the machine to fold the specified
4222 : /// instruction into a load or store that ultimately uses it.
4223 : /// However, the specified instruction has multiple uses.
4224 : /// Given this, it may actually increase register pressure to fold it
4225 : /// into the load. For example, consider this code:
4226 : ///
4227 : /// X = ...
4228 : /// Y = X+1
4229 : /// use(Y) -> nonload/store
4230 : /// Z = Y+1
4231 : /// load Z
4232 : ///
4233 : /// In this case, Y has multiple uses, and can be folded into the load of Z
4234 : /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4235 : /// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4236 : /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4237 : /// number of computations either.
4238 : ///
4239 : /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4240 : /// X was live across 'load Z' for other reasons, we actually *would* want to
4241 : /// fold the addressing mode in the Z case. This would make Y die earlier.
4242 0 : bool AddressingModeMatcher::
4243 : isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4244 : ExtAddrMode &AMAfter) {
4245 0 : if (IgnoreProfitability) return true;
4246 :
4247 : // AMBefore is the addressing mode before this instruction was folded into it,
4248 : // and AMAfter is the addressing mode after the instruction was folded. Get
4249 : // the set of registers referenced by AMAfter and subtract out those
4250 : // referenced by AMBefore: this is the set of values which folding in this
4251 : // address extends the lifetime of.
4252 : //
4253 : // Note that there are only two potential values being referenced here,
4254 : // BaseReg and ScaleReg (global addresses are always available, as are any
4255 : // folded immediates).
4256 0 : Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4257 :
4258 : // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4259 : // lifetime wasn't extended by adding this instruction.
4260 0 : if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4261 : BaseReg = nullptr;
4262 0 : if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4263 : ScaledReg = nullptr;
4264 :
4265 : // If folding this instruction (and it's subexprs) didn't extend any live
4266 : // ranges, we're ok with it.
4267 0 : if (!BaseReg && !ScaledReg)
4268 0 : return true;
4269 :
4270 : // If all uses of this instruction can have the address mode sunk into them,
4271 : // we can remove the addressing mode and effectively trade one live register
4272 : // for another (at worst.) In this context, folding an addressing mode into
4273 : // the use is just a particularly nice way of sinking it.
4274 : SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
4275 : SmallPtrSet<Instruction*, 16> ConsideredInsts;
4276 0 : if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
4277 0 : return false; // Has a non-memory, non-foldable use!
4278 :
4279 : // Now that we know that all uses of this instruction are part of a chain of
4280 : // computation involving only operations that could theoretically be folded
4281 : // into a memory use, loop over each of these memory operation uses and see
4282 : // if they could *actually* fold the instruction. The assumption is that
4283 : // addressing modes are cheap and that duplicating the computation involved
4284 : // many times is worthwhile, even on a fastpath. For sinking candidates
4285 : // (i.e. cold call sites), this serves as a way to prevent excessive code
4286 : // growth since most architectures have some reasonable small and fast way to
4287 : // compute an effective address. (i.e LEA on x86)
4288 : SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4289 0 : for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4290 0 : Instruction *User = MemoryUses[i].first;
4291 0 : unsigned OpNo = MemoryUses[i].second;
4292 :
4293 : // Get the access type of this use. If the use isn't a pointer, we don't
4294 : // know what it accesses.
4295 0 : Value *Address = User->getOperand(OpNo);
4296 0 : PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4297 : if (!AddrTy)
4298 0 : return false;
4299 0 : Type *AddressAccessTy = AddrTy->getElementType();
4300 : unsigned AS = AddrTy->getAddressSpace();
4301 :
4302 : // Do a match against the root of this address, ignoring profitability. This
4303 : // will tell us if the addressing mode for the memory operation will
4304 : // *actually* cover the shared instruction.
4305 0 : ExtAddrMode Result;
4306 : std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4307 : 0);
4308 : TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4309 0 : TPT.getRestorationPoint();
4310 : AddressingModeMatcher Matcher(
4311 : MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4312 0 : InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4313 0 : Matcher.IgnoreProfitability = true;
4314 0 : bool Success = Matcher.matchAddr(Address, 0);
4315 : (void)Success; assert(Success && "Couldn't select *anything*?");
4316 :
4317 : // The match was to check the profitability, the changes made are not
4318 : // part of the original matcher. Therefore, they should be dropped
4319 : // otherwise the original matcher will not present the right state.
4320 0 : TPT.rollback(LastKnownGood);
4321 :
4322 : // If the match didn't cover I, then it won't be shared by it.
4323 0 : if (!is_contained(MatchedAddrModeInsts, I))
4324 0 : return false;
4325 :
4326 : MatchedAddrModeInsts.clear();
4327 : }
4328 :
4329 : return true;
4330 : }
4331 :
4332 : /// Return true if the specified values are defined in a
4333 : /// different basic block than BB.
4334 : static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4335 : if (Instruction *I = dyn_cast<Instruction>(V))
4336 0 : return I->getParent() != BB;
4337 : return false;
4338 : }
4339 :
4340 : /// Sink addressing mode computation immediate before MemoryInst if doing so
4341 : /// can be done without increasing register pressure. The need for the
4342 : /// register pressure constraint means this can end up being an all or nothing
4343 : /// decision for all uses of the same addressing computation.
4344 : ///
4345 : /// Load and Store Instructions often have addressing modes that can do
4346 : /// significant amounts of computation. As such, instruction selection will try
4347 : /// to get the load or store to do as much computation as possible for the
4348 : /// program. The problem is that isel can only see within a single block. As
4349 : /// such, we sink as much legal addressing mode work into the block as possible.
4350 : ///
4351 : /// This method is used to optimize both load/store and inline asms with memory
4352 : /// operands. It's also used to sink addressing computations feeding into cold
4353 : /// call sites into their (cold) basic block.
4354 : ///
4355 : /// The motivation for handling sinking into cold blocks is that doing so can
4356 : /// both enable other address mode sinking (by satisfying the register pressure
4357 : /// constraint above), and reduce register pressure globally (by removing the
4358 : /// addressing mode computation from the fast path entirely.).
4359 1716614 : bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4360 : Type *AccessTy, unsigned AddrSpace) {
4361 1716614 : Value *Repl = Addr;
4362 :
4363 : // Try to collapse single-value PHI nodes. This is necessary to undo
4364 : // unprofitable PRE transformations.
4365 : SmallVector<Value*, 8> worklist;
4366 : SmallPtrSet<Value*, 16> Visited;
4367 1716614 : worklist.push_back(Addr);
4368 :
4369 : // Use a worklist to iteratively look through PHI and select nodes, and
4370 : // ensure that the addressing mode obtained from the non-PHI/select roots of
4371 : // the graph are compatible.
4372 : bool PhiOrSelectSeen = false;
4373 : SmallVector<Instruction*, 16> AddrModeInsts;
4374 1716614 : const SimplifyQuery SQ(*DL, TLInfo);
4375 1716614 : AddressingModeCombiner AddrModes(SQ, { Addr, MemoryInst->getParent() });
4376 1716614 : TypePromotionTransaction TPT(RemovedInsts);
4377 : TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4378 : TPT.getRestorationPoint();
4379 3790014 : while (!worklist.empty()) {
4380 2078074 : Value *V = worklist.back();
4381 : worklist.pop_back();
4382 :
4383 : // We allow traversing cyclic Phi nodes.
4384 : // In case of success after this loop we ensure that traversing through
4385 : // Phi nodes ends up with all cases to compute address of the form
4386 : // BaseGV + Base + Scale * Index + Offset
4387 : // where Scale and Offset are constans and BaseGV, Base and Index
4388 : // are exactly the same Values in all cases.
4389 : // It means that BaseGV, Scale and Offset dominate our memory instruction
4390 : // and have the same value as they had in address computation represented
4391 : // as Phi. So we can safely sink address computation to memory instruction.
4392 2078074 : if (!Visited.insert(V).second)
4393 160376 : continue;
4394 :
4395 : // For a PHI node, push all of its incoming values.
4396 : if (PHINode *P = dyn_cast<PHINode>(V)) {
4397 518272 : for (Value *IncValue : P->incoming_values())
4398 362197 : worklist.push_back(IncValue);
4399 : PhiOrSelectSeen = true;
4400 156075 : continue;
4401 : }
4402 : // Similar for select.
4403 : if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
4404 737 : worklist.push_back(SI->getFalseValue());
4405 737 : worklist.push_back(SI->getTrueValue());
4406 : PhiOrSelectSeen = true;
4407 737 : continue;
4408 : }
4409 :
4410 : // For non-PHIs, determine the addressing mode being computed. Note that
4411 : // the result may differ depending on what other uses our candidate
4412 : // addressing instructions might have.
4413 : AddrModeInsts.clear();
4414 : std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4415 : 0);
4416 : ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4417 1917698 : V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4418 1917698 : InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4419 :
4420 : GetElementPtrInst *GEP = LargeOffsetGEP.first;
4421 1917722 : if (GEP && GEP->getParent() != MemoryInst->getParent() &&
4422 1917746 : !NewGEPBases.count(GEP)) {
4423 : // If splitting the underlying data structure can reduce the offset of a
4424 : // GEP, collect the GEP. Skip the GEPs that are the new bases of
4425 : // previously split data structures.
4426 24 : LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4427 48 : if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4428 24 : LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4429 : }
4430 :
4431 1917698 : NewAddrMode.OriginalValue = V;
4432 1917698 : if (!AddrModes.addNewAddrMode(NewAddrMode))
4433 : break;
4434 : }
4435 :
4436 : // Try to combine the AddrModes we've collected. If we couldn't collect any,
4437 : // or we have multiple but either couldn't combine them or combining them
4438 : // wouldn't do anything useful, bail out now.
4439 1716614 : if (!AddrModes.combineAddrModes()) {
4440 154204 : TPT.rollback(LastKnownGood);
4441 154204 : return false;
4442 : }
4443 : TPT.commit();
4444 :
4445 : // Get the combined AddrMode (or the only AddrMode, if we only had one).
4446 1562410 : ExtAddrMode AddrMode = AddrModes.getAddrMode();
4447 :
4448 : // If all the instructions matched are already in this BB, don't do anything.
4449 : // If we saw a Phi node then it is not local definitely, and if we saw a select
4450 : // then we want to push the address calculation past it even if it's already
4451 : // in this BB.
4452 1562410 : if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4453 0 : return IsNonLocalValue(V, MemoryInst->getParent());
4454 : })) {
4455 : LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
4456 : << "\n");
4457 : return false;
4458 : }
4459 :
4460 : // Insert this computation right after this user. Since our caller is
4461 : // scanning from the top of the BB to the bottom, reuse of the expr are
4462 : // guaranteed to happen later.
4463 14487 : IRBuilder<> Builder(MemoryInst);
4464 :
4465 : // Now that we determined the addressing expression we want to use and know
4466 : // that we have to sink it into this block. Check to see if we have already
4467 : // done this for some other load/store instr in this block. If so, reuse
4468 : // the computation. Before attempting reuse, check if the address is valid
4469 : // as it may have been erased.
4470 :
4471 14487 : WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4472 :
4473 14487 : Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4474 14487 : if (SunkAddr) {
4475 : LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
4476 : << " for " << *MemoryInst << "\n");
4477 1139 : if (SunkAddr->getType() != Addr->getType())
4478 0 : SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4479 13348 : } else if (AddrSinkUsingGEPs ||
4480 5 : (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) {
4481 : // By default, we use the GEP-based method when AA is used later. This
4482 : // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4483 : LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
4484 : << " for " << *MemoryInst << "\n");
4485 13343 : Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4486 : Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4487 :
4488 : // First, find the pointer.
4489 13343 : if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4490 : ResultPtr = AddrMode.BaseReg;
4491 : AddrMode.BaseReg = nullptr;
4492 : }
4493 :
4494 13343 : if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4495 : // We can't add more than one pointer together, nor can we scale a
4496 : // pointer (both of which seem meaningless).
4497 0 : if (ResultPtr || AddrMode.Scale != 1)
4498 : return false;
4499 :
4500 : ResultPtr = AddrMode.ScaledReg;
4501 : AddrMode.Scale = 0;
4502 : }
4503 :
4504 : // It is only safe to sign extend the BaseReg if we know that the math
4505 : // required to create it did not overflow before we extend it. Since
4506 : // the original IR value was tossed in favor of a constant back when
4507 : // the AddrMode was created we need to bail out gracefully if widths
4508 : // do not match instead of extending it.
4509 : //
4510 : // (See below for code to add the scale.)
4511 13343 : if (AddrMode.Scale) {
4512 2190 : Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4513 2190 : if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4514 : cast<IntegerType>(ScaledRegTy)->getBitWidth())
4515 : return false;
4516 : }
4517 :
4518 13333 : if (AddrMode.BaseGV) {
4519 4 : if (ResultPtr)
4520 : return false;
4521 :
4522 : ResultPtr = AddrMode.BaseGV;
4523 : }
4524 :
4525 : // If the real base value actually came from an inttoptr, then the matcher
4526 : // will look through it and provide only the integer value. In that case,
4527 : // use it here.
4528 13333 : if (!DL->isNonIntegralPointerType(Addr->getType())) {
4529 13330 : if (!ResultPtr && AddrMode.BaseReg) {
4530 295 : ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
4531 : "sunkaddr");
4532 295 : AddrMode.BaseReg = nullptr;
4533 13035 : } else if (!ResultPtr && AddrMode.Scale == 1) {
4534 0 : ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
4535 : "sunkaddr");
4536 : AddrMode.Scale = 0;
4537 : }
4538 : }
4539 :
4540 13333 : if (!ResultPtr &&
4541 9 : !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4542 3 : SunkAddr = Constant::getNullValue(Addr->getType());
4543 13330 : } else if (!ResultPtr) {
4544 : return false;
4545 : } else {
4546 : Type *I8PtrTy =
4547 13322 : Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4548 13322 : Type *I8Ty = Builder.getInt8Ty();
4549 :
4550 : // Start with the base register. Do this first so that subsequent address
4551 : // matching finds it last, which will prevent it from trying to match it
4552 : // as the scaled value in case it happens to be a mul. That would be
4553 : // problematic if we've sunk a different mul for the scale, because then
4554 : // we'd end up sinking both muls.
4555 13322 : if (AddrMode.BaseReg) {
4556 : Value *V = AddrMode.BaseReg;
4557 2 : if (V->getType() != IntPtrTy)
4558 2 : V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4559 :
4560 : ResultIndex = V;
4561 : }
4562 :
4563 : // Add the scale value.
4564 13322 : if (AddrMode.Scale) {
4565 : Value *V = AddrMode.ScaledReg;
4566 2180 : if (V->getType() == IntPtrTy) {
4567 : // done.
4568 : } else {
4569 : assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
4570 : cast<IntegerType>(V->getType())->getBitWidth() &&
4571 : "We can't transform if ScaledReg is too narrow");
4572 1 : V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4573 : }
4574 :
4575 2180 : if (AddrMode.Scale != 1)
4576 1185 : V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4577 : "sunkaddr");
4578 2180 : if (ResultIndex)
4579 0 : ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4580 : else
4581 : ResultIndex = V;
4582 : }
4583 :
4584 : // Add in the Base Offset if present.
4585 13322 : if (AddrMode.BaseOffs) {
4586 12253 : Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4587 12253 : if (ResultIndex) {
4588 : // We need to add this separately from the scale above to help with
4589 : // SDAG consecutive load/store merging.
4590 1663 : if (ResultPtr->getType() != I8PtrTy)
4591 1080 : ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4592 1663 : ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4593 : }
4594 :
4595 : ResultIndex = V;
4596 : }
4597 :
4598 13322 : if (!ResultIndex) {
4599 : SunkAddr = ResultPtr;
4600 : } else {
4601 12772 : if (ResultPtr->getType() != I8PtrTy)
4602 10569 : ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4603 12772 : SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4604 : }
4605 :
4606 13322 : if (SunkAddr->getType() != Addr->getType())
4607 11934 : SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4608 : }
4609 : } else {
4610 : // We'd require a ptrtoint/inttoptr down the line, which we can't do for
4611 : // non-integral pointers, so in that case bail out now.
4612 5 : Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
4613 5 : Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
4614 : PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
4615 : PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
4616 6 : if (DL->isNonIntegralPointerType(Addr->getType()) ||
4617 1 : (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
4618 0 : (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
4619 0 : (AddrMode.BaseGV &&
4620 0 : DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
4621 5 : return false;
4622 :
4623 : LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
4624 : << " for " << *MemoryInst << "\n");
4625 0 : Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4626 : Value *Result = nullptr;
4627 :
4628 : // Start with the base register. Do this first so that subsequent address
4629 : // matching finds it last, which will prevent it from trying to match it
4630 : // as the scaled value in case it happens to be a mul. That would be
4631 : // problematic if we've sunk a different mul for the scale, because then
4632 : // we'd end up sinking both muls.
4633 0 : if (AddrMode.BaseReg) {
4634 : Value *V = AddrMode.BaseReg;
4635 0 : if (V->getType()->isPointerTy())
4636 0 : V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4637 0 : if (V->getType() != IntPtrTy)
4638 0 : V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4639 : Result = V;
4640 : }
4641 :
4642 : // Add the scale value.
4643 0 : if (AddrMode.Scale) {
4644 : Value *V = AddrMode.ScaledReg;
4645 0 : if (V->getType() == IntPtrTy) {
4646 : // done.
4647 0 : } else if (V->getType()->isPointerTy()) {
4648 0 : V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4649 0 : } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4650 : cast<IntegerType>(V->getType())->getBitWidth()) {
4651 0 : V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4652 : } else {
4653 : // It is only safe to sign extend the BaseReg if we know that the math
4654 : // required to create it did not overflow before we extend it. Since
4655 : // the original IR value was tossed in favor of a constant back when
4656 : // the AddrMode was created we need to bail out gracefully if widths
4657 : // do not match instead of extending it.
4658 : Instruction *I = dyn_cast_or_null<Instruction>(Result);
4659 0 : if (I && (Result != AddrMode.BaseReg))
4660 0 : I->eraseFromParent();
4661 0 : return false;
4662 : }
4663 0 : if (AddrMode.Scale != 1)
4664 0 : V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4665 : "sunkaddr");
4666 0 : if (Result)
4667 0 : Result = Builder.CreateAdd(Result, V, "sunkaddr");
4668 : else
4669 : Result = V;
4670 : }
4671 :
4672 : // Add in the BaseGV if present.
4673 0 : if (AddrMode.BaseGV) {
4674 0 : Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
4675 0 : if (Result)
4676 0 : Result = Builder.CreateAdd(Result, V, "sunkaddr");
4677 : else
4678 : Result = V;
4679 : }
4680 :
4681 : // Add in the Base Offset if present.
4682 0 : if (AddrMode.BaseOffs) {
4683 0 : Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4684 0 : if (Result)
4685 0 : Result = Builder.CreateAdd(Result, V, "sunkaddr");
4686 : else
4687 : Result = V;
4688 : }
4689 :
4690 0 : if (!Result)
4691 0 : SunkAddr = Constant::getNullValue(Addr->getType());
4692 : else
4693 0 : SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
4694 : }
4695 :
4696 14464 : MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
4697 : // Store the newly computed address into the cache. In the case we reused a
4698 : // value, this should be idempotent.
4699 14464 : SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
4700 :
4701 : // If we have no uses, recursively delete the value and all dead instructions
4702 : // using it.
4703 14464 : if (Repl->use_empty()) {
4704 : // This can cause recursive deletion, which can invalidate our iterator.
4705 : // Use a WeakTrackingVH to hold onto it in case this happens.
4706 6817 : Value *CurValue = &*CurInstIterator;
4707 : WeakTrackingVH IterHandle(CurValue);
4708 6817 : BasicBlock *BB = CurInstIterator->getParent();
4709 :
4710 6817 : RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
4711 :
4712 6817 : if (IterHandle != CurValue) {
4713 : // If the iterator instruction was recursively deleted, start over at the
4714 : // start of the block.
4715 1 : CurInstIterator = BB->begin();
4716 : SunkAddrs.clear();
4717 : }
4718 : }
4719 : ++NumMemoryInsts;
4720 : return true;
4721 : }
4722 :
4723 : /// If there are any memory operands, use OptimizeMemoryInst to sink their
4724 : /// address computing into the block when possible / profitable.
4725 14103 : bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
4726 : bool MadeChange = false;
4727 :
4728 : const TargetRegisterInfo *TRI =
4729 14103 : TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
4730 : TargetLowering::AsmOperandInfoVector TargetConstraints =
4731 42309 : TLI->ParseConstraints(*DL, TRI, CS);
4732 : unsigned ArgNo = 0;
4733 94571 : for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4734 66365 : TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4735 :
4736 : // Compute the constraint code and ConstraintType to use.
4737 66365 : TLI->ComputeConstraintToUse(OpInfo, SDValue());
4738 :
4739 66365 : if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
4740 4002 : OpInfo.isIndirect) {
4741 3099 : Value *OpVal = CS->getArgOperand(ArgNo++);
4742 3099 : MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
4743 63266 : } else if (OpInfo.Type == InlineAsm::isInput)
4744 8256 : ArgNo++;
4745 : }
4746 :
4747 14103 : return MadeChange;
4748 : }
4749 :
4750 : /// Check if all the uses of \p Val are equivalent (or free) zero or
4751 : /// sign extensions.
4752 38 : static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
4753 : assert(!Val->use_empty() && "Input must have at least one use");
4754 : const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
4755 : bool IsSExt = isa<SExtInst>(FirstUser);
4756 38 : Type *ExtTy = FirstUser->getType();
4757 99 : for (const User *U : Val->users()) {
4758 : const Instruction *UI = cast<Instruction>(U);
4759 85 : if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
4760 : return false;
4761 66 : Type *CurTy = UI->getType();
4762 : // Same input and output types: Same instruction after CSE.
4763 66 : if (CurTy == ExtTy)
4764 : continue;
4765 :
4766 : // If IsSExt is true, we are in this situation:
4767 : // a = Val
4768 : // b = sext ty1 a to ty2
4769 : // c = sext ty1 a to ty3
4770 : // Assuming ty2 is shorter than ty3, this could be turned into:
4771 : // a = Val
4772 : // b = sext ty1 a to ty2
4773 : // c = sext ty2 b to ty3
4774 : // However, the last sext is not free.
4775 14 : if (IsSExt)
4776 : return false;
4777 :
4778 : // This is a ZExt, maybe this is free to extend from one type to another.
4779 : // In that case, we would not account for a different use.
4780 : Type *NarrowTy;
4781 : Type *LargeTy;
4782 13 : if (ExtTy->getScalarType()->getIntegerBitWidth() >
4783 : CurTy->getScalarType()->getIntegerBitWidth()) {
4784 : NarrowTy = CurTy;
4785 : LargeTy = ExtTy;
4786 : } else {
4787 : NarrowTy = ExtTy;
4788 : LargeTy = CurTy;
4789 : }
4790 :
4791 13 : if (!TLI.isZExtFree(NarrowTy, LargeTy))
4792 : return false;
4793 : }
4794 : // All uses are the same or can be derived from one another for free.
4795 : return true;
4796 : }
4797 :
4798 : /// Try to speculatively promote extensions in \p Exts and continue
4799 : /// promoting through newly promoted operands recursively as far as doing so is
4800 : /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
4801 : /// When some promotion happened, \p TPT contains the proper state to revert
4802 : /// them.
4803 : ///
4804 : /// \return true if some promotion happened, false otherwise.
4805 44509 : bool CodeGenPrepare::tryToPromoteExts(
4806 : TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
4807 : SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
4808 : unsigned CreatedInstsCost) {
4809 : bool Promoted = false;
4810 :
4811 : // Iterate over all the extensions to try to promote them.
4812 75678 : for (auto I : Exts) {
4813 : // Early check if we directly have ext(load).
4814 44769 : if (isa<LoadInst>(I->getOperand(0))) {
4815 13860 : ProfitablyMovedExts.push_back(I);
4816 29037 : continue;
4817 : }
4818 :
4819 : // Check whether or not we want to do any promotion. The reason we have
4820 : // this check inside the for loop is to catch the case where an extension
4821 : // is directly fed by a load because in such case the extension can be moved
4822 : // up without any promotion on its operands.
4823 30909 : if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
4824 13600 : return false;
4825 :
4826 : // Get the action to perform the promotion.
4827 : TypePromotionHelper::Action TPH =
4828 17309 : TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
4829 : // Check if we can promote.
4830 17309 : if (!TPH) {
4831 : // Save the current extension as we cannot move up through its operand.
4832 15108 : ProfitablyMovedExts.push_back(I);
4833 15108 : continue;
4834 : }
4835 :
4836 : // Save the current state.
4837 : TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4838 : TPT.getRestorationPoint();
4839 : SmallVector<Instruction *, 4> NewExts;
4840 2201 : unsigned NewCreatedInstsCost = 0;
4841 2201 : unsigned ExtCost = !TLI->isExtFree(I);
4842 : // Promote.
4843 2201 : Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
4844 2201 : &NewExts, nullptr, *TLI);
4845 : assert(PromotedVal &&
4846 : "TypePromotionHelper should have filtered out those cases");
4847 :
4848 : // We would be able to merge only one extension in a load.
4849 : // Therefore, if we have more than 1 new extension we heuristically
4850 : // cut this search path, because it means we degrade the code quality.
4851 : // With exactly 2, the transformation is neutral, because we will merge
4852 : // one extension but leave one. However, we optimistically keep going,
4853 : // because the new extension may be removed too.
4854 2201 : long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
4855 : // FIXME: It would be possible to propagate a negative value instead of
4856 : // conservatively ceiling it to 0.
4857 2201 : TotalCreatedInstsCost =
4858 2532 : std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
4859 2201 : if (!StressExtLdPromotion &&
4860 2114 : (TotalCreatedInstsCost > 1 ||
4861 2114 : !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
4862 : // This promotion is not profitable, rollback to the previous state, and
4863 : // save the current extension in ProfitablyMovedExts as the latest
4864 : // speculative promotion turned out to be unprofitable.
4865 31 : TPT.rollback(LastKnownGood);
4866 31 : ProfitablyMovedExts.push_back(I);
4867 31 : continue;
4868 : }
4869 : // Continue promoting NewExts as far as doing so is profitable.
4870 : SmallVector<Instruction *, 2> NewlyMovedExts;
4871 2170 : (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
4872 : bool NewPromoted = false;
4873 4663 : for (auto ExtInst : NewlyMovedExts) {
4874 2493 : Instruction *MovedExt = cast<Instruction>(ExtInst);
4875 2493 : Value *ExtOperand = MovedExt->getOperand(0);
4876 : // If we have reached to a load, we need this extra profitability check
4877 : // as it could potentially be merged into an ext(load).
4878 1323 : if (isa<LoadInst>(ExtOperand) &&
4879 1366 : !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
4880 38 : (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
4881 24 : continue;
4882 :
4883 2469 : ProfitablyMovedExts.push_back(MovedExt);
4884 : NewPromoted = true;
4885 : }
4886 :
4887 : // If none of speculative promotions for NewExts is profitable, rollback
4888 : // and save the current extension (I) as the last profitable extension.
4889 2170 : if (!NewPromoted) {
4890 38 : TPT.rollback(LastKnownGood);
4891 38 : ProfitablyMovedExts.push_back(I);
4892 : continue;
4893 : }
4894 : // The promotion is profitable.
4895 : Promoted = true;
4896 : }
4897 30909 : return Promoted;
4898 : }
4899 :
4900 : /// Merging redundant sexts when one is dominating the other.
4901 36 : bool CodeGenPrepare::mergeSExts(Function &F) {
4902 36 : DominatorTree DT(F);
4903 : bool Changed = false;
4904 86 : for (auto &Entry : ValToSExtendedUses) {
4905 : SExts &Insts = Entry.second;
4906 : SExts CurPts;
4907 153 : for (Instruction *Inst : Insts) {
4908 103 : if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
4909 206 : Inst->getOperand(0) != Entry.first)
4910 : continue;
4911 : bool inserted = false;
4912 103 : for (auto &Pt : CurPts) {
4913 53 : if (DT.dominates(Inst, Pt)) {
4914 30 : Pt->replaceAllUsesWith(Inst);
4915 30 : RemovedInsts.insert(Pt);
4916 30 : Pt->removeFromParent();
4917 30 : Pt = Inst;
4918 : inserted = true;
4919 : Changed = true;
4920 30 : break;
4921 : }
4922 23 : if (!DT.dominates(Pt, Inst))
4923 : // Give up if we need to merge in a common dominator as the
4924 : // experiments show it is not profitable.
4925 : continue;
4926 23 : Inst->replaceAllUsesWith(Pt);
4927 23 : RemovedInsts.insert(Inst);
4928 23 : Inst->removeFromParent();
4929 : inserted = true;
4930 : Changed = true;
4931 23 : break;
4932 : }
4933 103 : if (!inserted)
4934 50 : CurPts.push_back(Inst);
4935 : }
4936 : }
4937 36 : return Changed;
4938 : }
4939 :
4940 : // Spliting large data structures so that the GEPs accessing them can have
4941 : // smaller offsets so that they can be sunk to the same blocks as their users.
4942 : // For example, a large struct starting from %base is splitted into two parts
4943 : // where the second part starts from %new_base.
4944 : //
4945 : // Before:
4946 : // BB0:
4947 : // %base =
4948 : //
4949 : // BB1:
4950 : // %gep0 = gep %base, off0
4951 : // %gep1 = gep %base, off1
4952 : // %gep2 = gep %base, off2
4953 : //
4954 : // BB2:
4955 : // %load1 = load %gep0
4956 : // %load2 = load %gep1
4957 : // %load3 = load %gep2
4958 : //
4959 : // After:
4960 : // BB0:
4961 : // %base =
4962 : // %new_base = gep %base, off0
4963 : //
4964 : // BB1:
4965 : // %new_gep0 = %new_base
4966 : // %new_gep1 = gep %new_base, off1 - off0
4967 : // %new_gep2 = gep %new_base, off2 - off0
4968 : //
4969 : // BB2:
4970 : // %load1 = load i32, i32* %new_gep0
4971 : // %load2 = load i32, i32* %new_gep1
4972 : // %load3 = load i32, i32* %new_gep2
4973 : //
4974 : // %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
4975 : // their offsets are smaller enough to fit into the addressing mode.
4976 11 : bool CodeGenPrepare::splitLargeGEPOffsets() {
4977 : bool Changed = false;
4978 23 : for (auto &Entry : LargeOffsetGEPMap) {
4979 : Value *OldBase = Entry.first;
4980 : SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
4981 : &LargeOffsetGEPs = Entry.second;
4982 : auto compareGEPOffset =
4983 : [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
4984 : const std::pair<GetElementPtrInst *, int64_t> &RHS) {
4985 : if (LHS.first == RHS.first)
4986 : return false;
4987 : if (LHS.second != RHS.second)
4988 : return LHS.second < RHS.second;
4989 : return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
4990 : };
4991 : // Sorting all the GEPs of the same data structures based on the offsets.
4992 : llvm::sort(LargeOffsetGEPs, compareGEPOffset);
4993 : LargeOffsetGEPs.erase(
4994 : std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
4995 : LargeOffsetGEPs.end());
4996 : // Skip if all the GEPs have the same offsets.
4997 24 : if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
4998 : continue;
4999 : GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5000 : int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5001 : Value *NewBaseGEP = nullptr;
5002 :
5003 : auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5004 36 : while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5005 : GetElementPtrInst *GEP = LargeOffsetGEP->first;
5006 24 : int64_t Offset = LargeOffsetGEP->second;
5007 24 : if (Offset != BaseOffset) {
5008 12 : TargetLowering::AddrMode AddrMode;
5009 12 : AddrMode.BaseOffs = Offset - BaseOffset;
5010 : // The result type of the GEP might not be the type of the memory
5011 : // access.
5012 12 : if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5013 : GEP->getResultElementType(),
5014 12 : GEP->getAddressSpace())) {
5015 : // We need to create a new base if the offset to the current base is
5016 : // too large to fit into the addressing mode. So, a very large struct
5017 : // may be splitted into several parts.
5018 : BaseGEP = GEP;
5019 : BaseOffset = Offset;
5020 : NewBaseGEP = nullptr;
5021 : }
5022 : }
5023 :
5024 : // Generate a new GEP to replace the current one.
5025 24 : IRBuilder<> Builder(GEP);
5026 24 : Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5027 : Type *I8PtrTy =
5028 24 : Builder.getInt8PtrTy(GEP->getType()->getPointerAddressSpace());
5029 24 : Type *I8Ty = Builder.getInt8Ty();
5030 :
5031 24 : if (!NewBaseGEP) {
5032 : // Create a new base if we don't have one yet. Find the insertion
5033 : // pointer for the new base first.
5034 : BasicBlock::iterator NewBaseInsertPt;
5035 : BasicBlock *NewBaseInsertBB;
5036 : if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5037 : // If the base of the struct is an instruction, the new base will be
5038 : // inserted close to it.
5039 9 : NewBaseInsertBB = BaseI->getParent();
5040 9 : if (isa<PHINode>(BaseI))
5041 1 : NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5042 : else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5043 : NewBaseInsertBB =
5044 1 : SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5045 1 : NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5046 : } else
5047 14 : NewBaseInsertPt = std::next(BaseI->getIterator());
5048 : } else {
5049 : // If the current base is an argument or global value, the new base
5050 : // will be inserted to the entry block.
5051 3 : NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5052 3 : NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5053 : }
5054 12 : IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5055 : // Create a new base.
5056 12 : Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5057 : NewBaseGEP = OldBase;
5058 12 : if (NewBaseGEP->getType() != I8PtrTy)
5059 12 : NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5060 : NewBaseGEP =
5061 12 : NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5062 24 : NewGEPBases.insert(NewBaseGEP);
5063 : }
5064 :
5065 : Value *NewGEP = NewBaseGEP;
5066 24 : if (Offset == BaseOffset) {
5067 12 : if (GEP->getType() != I8PtrTy)
5068 12 : NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5069 : } else {
5070 : // Calculate the new offset for the new GEP.
5071 12 : Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5072 12 : NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5073 :
5074 12 : if (GEP->getType() != I8PtrTy)
5075 12 : NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5076 : }
5077 24 : GEP->replaceAllUsesWith(NewGEP);
5078 48 : LargeOffsetGEPID.erase(GEP);
5079 : LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5080 24 : GEP->eraseFromParent();
5081 : Changed = true;
5082 : }
5083 : }
5084 11 : return Changed;
5085 : }
5086 :
5087 : /// Return true, if an ext(load) can be formed from an extension in
5088 : /// \p MovedExts.
5089 42309 : bool CodeGenPrepare::canFormExtLd(
5090 : const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5091 : Instruction *&Inst, bool HasPromoted) {
5092 57411 : for (auto *MovedExtInst : MovedExts) {
5093 28860 : if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5094 13758 : LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5095 13758 : Inst = MovedExtInst;
5096 13758 : break;
5097 : }
5098 : }
5099 42309 : if (!LI)
5100 : return false;
5101 :
5102 : // If they're already in the same block, there's nothing to do.
5103 : // Make the cheap checks first if we did not promote.
5104 : // If we promoted, we need to check if it is indeed profitable.
5105 13758 : if (!HasPromoted && LI->getParent() == Inst->getParent())
5106 : return false;
5107 :
5108 1380 : return TLI->isExtLoad(LI, Inst, *DL);
5109 : }
5110 :
5111 : /// Move a zext or sext fed by a load into the same basic block as the load,
5112 : /// unless conditions are unfavorable. This allows SelectionDAG to fold the
5113 : /// extend into the load.
5114 : ///
5115 : /// E.g.,
5116 : /// \code
5117 : /// %ld = load i32* %addr
5118 : /// %add = add nuw i32 %ld, 4
5119 : /// %zext = zext i32 %add to i64
5120 : // \endcode
5121 : /// =>
5122 : /// \code
5123 : /// %ld = load i32* %addr
5124 : /// %zext = zext i32 %ld to i64
5125 : /// %add = add nuw i64 %zext, 4
5126 : /// \encode
5127 : /// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5128 : /// allow us to match zext(load i32*) to i64.
5129 : ///
5130 : /// Also, try to promote the computations used to obtain a sign extended
5131 : /// value used into memory accesses.
5132 : /// E.g.,
5133 : /// \code
5134 : /// a = add nsw i32 b, 3
5135 : /// d = sext i32 a to i64
5136 : /// e = getelementptr ..., i64 d
5137 : /// \endcode
5138 : /// =>
5139 : /// \code
5140 : /// f = sext i32 b to i64
5141 : /// a = add nsw i64 f, 3
5142 : /// e = getelementptr ..., i64 a
5143 : /// \endcode
5144 : ///
5145 : /// \p Inst[in/out] the extension may be modified during the process if some
5146 : /// promotions apply.
5147 42311 : bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5148 : // ExtLoad formation and address type promotion infrastructure requires TLI to
5149 : // be effective.
5150 42311 : if (!TLI)
5151 : return false;
5152 :
5153 42309 : bool AllowPromotionWithoutCommonHeader = false;
5154 : /// See if it is an interesting sext operations for the address type
5155 : /// promotion before trying to promote it, e.g., the ones with the right
5156 : /// type and used in memory accesses.
5157 42309 : bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5158 : *Inst, AllowPromotionWithoutCommonHeader);
5159 42309 : TypePromotionTransaction TPT(RemovedInsts);
5160 : TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5161 : TPT.getRestorationPoint();
5162 : SmallVector<Instruction *, 1> Exts;
5163 : SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5164 42309 : Exts.push_back(Inst);
5165 :
5166 42309 : bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5167 :
5168 : // Look for a load being extended.
5169 42309 : LoadInst *LI = nullptr;
5170 : Instruction *ExtFedByLoad;
5171 :
5172 : // Try to promote a chain of computation if it allows to form an extended
5173 : // load.
5174 42309 : if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5175 : assert(LI && ExtFedByLoad && "Expect a valid load and extension");
5176 : TPT.commit();
5177 : // Move the extend into the same block as the load
5178 1367 : ExtFedByLoad->moveAfter(LI);
5179 : // CGP does not check if the zext would be speculatively executed when moved
5180 : // to the same basic block as the load. Preserving its original location
5181 : // would pessimize the debugging experience, as well as negatively impact
5182 : // the quality of sample pgo. We don't want to use "line 0" as that has a
5183 : // size cost in the line-table section and logically the zext can be seen as
5184 : // part of the load. Therefore we conservatively reuse the same debug
5185 : // location for the load and the zext.
5186 1367 : ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5187 : ++NumExtsMoved;
5188 1367 : Inst = ExtFedByLoad;
5189 1367 : return true;
5190 : }
5191 :
5192 : // Continue promoting SExts if known as considerable depending on targets.
5193 41088 : if (ATPConsiderable &&
5194 146 : performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5195 : HasPromoted, TPT, SpeculativelyMovedExts))
5196 : return true;
5197 :
5198 40890 : TPT.rollback(LastKnownGood);
5199 40890 : return false;
5200 : }
5201 :
5202 : // Perform address type promotion if doing so is profitable.
5203 : // If AllowPromotionWithoutCommonHeader == false, we should find other sext
5204 : // instructions that sign extended the same initial value. However, if
5205 : // AllowPromotionWithoutCommonHeader == true, we expect promoting the
5206 : // extension is just profitable.
5207 146 : bool CodeGenPrepare::performAddressTypePromotion(
5208 : Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5209 : bool HasPromoted, TypePromotionTransaction &TPT,
5210 : SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5211 : bool Promoted = false;
5212 : SmallPtrSet<Instruction *, 1> UnhandledExts;
5213 : bool AllSeenFirst = true;
5214 296 : for (auto I : SpeculativelyMovedExts) {
5215 150 : Value *HeadOfChain = I->getOperand(0);
5216 : DenseMap<Value *, Instruction *>::iterator AlreadySeen =
5217 150 : SeenChainsForSExt.find(HeadOfChain);
5218 : // If there is an unhandled SExt which has the same header, try to promote
5219 : // it as well.
5220 150 : if (AlreadySeen != SeenChainsForSExt.end()) {
5221 53 : if (AlreadySeen->second != nullptr)
5222 30 : UnhandledExts.insert(AlreadySeen->second);
5223 : AllSeenFirst = false;
5224 : }
5225 : }
5226 :
5227 146 : if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5228 17 : SpeculativelyMovedExts.size() == 1)) {
5229 : TPT.commit();
5230 70 : if (HasPromoted)
5231 : Promoted = true;
5232 141 : for (auto I : SpeculativelyMovedExts) {
5233 71 : Value *HeadOfChain = I->getOperand(0);
5234 71 : SeenChainsForSExt[HeadOfChain] = nullptr;
5235 71 : ValToSExtendedUses[HeadOfChain].push_back(I);
5236 : }
5237 : // Update Inst as promotion happen.
5238 70 : Inst = SpeculativelyMovedExts.pop_back_val();
5239 : } else {
5240 : // This is the first chain visited from the header, keep the current chain
5241 : // as unhandled. Defer to promote this until we encounter another SExt
5242 : // chain derived from the same header.
5243 155 : for (auto I : SpeculativelyMovedExts) {
5244 79 : Value *HeadOfChain = I->getOperand(0);
5245 79 : SeenChainsForSExt[HeadOfChain] = Inst;
5246 : }
5247 : return false;
5248 : }
5249 :
5250 70 : if (!AllSeenFirst && !UnhandledExts.empty())
5251 60 : for (auto VisitedSExt : UnhandledExts) {
5252 30 : if (RemovedInsts.count(VisitedSExt))
5253 0 : continue;
5254 30 : TypePromotionTransaction TPT(RemovedInsts);
5255 : SmallVector<Instruction *, 1> Exts;
5256 : SmallVector<Instruction *, 2> Chains;
5257 30 : Exts.push_back(VisitedSExt);
5258 30 : bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5259 : TPT.commit();
5260 30 : if (HasPromoted)
5261 : Promoted = true;
5262 62 : for (auto I : Chains) {
5263 32 : Value *HeadOfChain = I->getOperand(0);
5264 : // Mark this as handled.
5265 32 : SeenChainsForSExt[HeadOfChain] = nullptr;
5266 32 : ValToSExtendedUses[HeadOfChain].push_back(I);
5267 : }
5268 : }
5269 : return Promoted;
5270 : }
5271 :
5272 42311 : bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5273 42311 : BasicBlock *DefBB = I->getParent();
5274 :
5275 : // If the result of a {s|z}ext and its source are both live out, rewrite all
5276 : // other uses of the source with result of extension.
5277 42311 : Value *Src = I->getOperand(0);
5278 42311 : if (Src->hasOneUse())
5279 : return false;
5280 :
5281 : // Only do this xform if truncating is free.
5282 6849 : if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
5283 : return false;
5284 :
5285 : // Only safe to perform the optimization if the source is also defined in
5286 : // this block.
5287 6440 : if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5288 : return false;
5289 :
5290 : bool DefIsLiveOut = false;
5291 8649 : for (User *U : I->users()) {
5292 : Instruction *UI = cast<Instruction>(U);
5293 :
5294 : // Figure out which BB this ext is used in.
5295 5585 : BasicBlock *UserBB = UI->getParent();
5296 5585 : if (UserBB == DefBB) continue;
5297 : DefIsLiveOut = true;
5298 : break;
5299 : }
5300 5320 : if (!DefIsLiveOut)
5301 : return false;
5302 :
5303 : // Make sure none of the uses are PHI nodes.
5304 7901 : for (User *U : Src->users()) {
5305 : Instruction *UI = cast<Instruction>(U);
5306 5808 : BasicBlock *UserBB = UI->getParent();
5307 5808 : if (UserBB == DefBB) continue;
5308 : // Be conservative. We don't want this xform to end up introducing
5309 : // reloads just before load / store instructions.
5310 1445 : if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5311 : return false;
5312 : }
5313 :
5314 : // InsertedTruncs - Only insert one trunc in each block once.
5315 : DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5316 :
5317 : bool MadeChange = false;
5318 5788 : for (Use &U : Src->uses()) {
5319 3695 : Instruction *User = cast<Instruction>(U.getUser());
5320 :
5321 : // Figure out which BB this ext is used in.
5322 3695 : BasicBlock *UserBB = User->getParent();
5323 3695 : if (UserBB == DefBB) continue;
5324 :
5325 : // Both src and def are live in this block. Rewrite the use.
5326 : Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5327 :
5328 774 : if (!InsertedTrunc) {
5329 774 : BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5330 : assert(InsertPt != UserBB->end());
5331 1548 : InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5332 774 : InsertedInsts.insert(InsertedTrunc);
5333 : }
5334 :
5335 : // Replace a use of the {s|z}ext source with a use of the result.
5336 774 : U = InsertedTrunc;
5337 : ++NumExtUses;
5338 : MadeChange = true;
5339 : }
5340 :
5341 : return MadeChange;
5342 : }
5343 :
5344 : // Find loads whose uses only use some of the loaded value's bits. Add an "and"
5345 : // just after the load if the target can fold this into one extload instruction,
5346 : // with the hope of eliminating some of the other later "and" instructions using
5347 : // the loaded value. "and"s that are made trivially redundant by the insertion
5348 : // of the new "and" are removed by this function, while others (e.g. those whose
5349 : // path from the load goes through a phi) are left for isel to potentially
5350 : // remove.
5351 : //
5352 : // For example:
5353 : //
5354 : // b0:
5355 : // x = load i32
5356 : // ...
5357 : // b1:
5358 : // y = and x, 0xff
5359 : // z = use y
5360 : //
5361 : // becomes:
5362 : //
5363 : // b0:
5364 : // x = load i32
5365 : // x' = and x, 0xff
5366 : // ...
5367 : // b1:
5368 : // z = use x'
5369 : //
5370 : // whereas:
5371 : //
5372 : // b0:
5373 : // x1 = load i32
5374 : // ...
5375 : // b1:
5376 : // x2 = load i32
5377 : // ...
5378 : // b2:
5379 : // x = phi x1, x2
5380 : // y = and x, 0xff
5381 : //
5382 : // becomes (after a call to optimizeLoadExt for each load):
5383 : //
5384 : // b0:
5385 : // x1 = load i32
5386 : // x1' = and x1, 0xff
5387 : // ...
5388 : // b1:
5389 : // x2 = load i32
5390 : // x2' = and x2, 0xff
5391 : // ...
5392 : // b2:
5393 : // x = phi x1', x2'
5394 : // y = and x, 0xff
5395 914971 : bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
5396 905525 : if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
5397 : return false;
5398 :
5399 : // Skip loads we've already transformed.
5400 1297172 : if (Load->hasOneUse() &&
5401 1250010 : InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
5402 : return false;
5403 :
5404 : // Look at all uses of Load, looking through phis, to determine how many bits
5405 : // of the loaded value are needed.
5406 : SmallVector<Instruction *, 8> WorkList;
5407 : SmallPtrSet<Instruction *, 16> Visited;
5408 : SmallVector<Instruction *, 8> AndsToMaybeRemove;
5409 1431052 : for (auto *U : Load->users())
5410 758251 : WorkList.push_back(cast<Instruction>(U));
5411 :
5412 672801 : EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
5413 672801 : unsigned BitWidth = LoadResultVT.getSizeInBits();
5414 : APInt DemandBits(BitWidth, 0);
5415 : APInt WidestAndBits(BitWidth, 0);
5416 :
5417 695135 : while (!WorkList.empty()) {
5418 687004 : Instruction *I = WorkList.back();
5419 : WorkList.pop_back();
5420 :
5421 : // Break use-def graph loops.
5422 687004 : if (!Visited.insert(I).second)
5423 12871 : continue;
5424 :
5425 : // For a PHI node, push all of its users.
5426 686783 : if (auto *Phi = dyn_cast<PHINode>(I)) {
5427 31577 : for (auto *U : Phi->users())
5428 18927 : WorkList.push_back(cast<Instruction>(U));
5429 : continue;
5430 : }
5431 :
5432 674133 : switch (I->getOpcode()) {
5433 8898 : case Instruction::And: {
5434 8898 : auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
5435 : if (!AndC)
5436 1725 : return false;
5437 : APInt AndBits = AndC->getValue();
5438 : DemandBits |= AndBits;
5439 : // Keep track of the widest and mask we see.
5440 7173 : if (AndBits.ugt(WidestAndBits))
5441 6908 : WidestAndBits = AndBits;
5442 14178 : if (AndBits == WidestAndBits && I->getOperand(0) == Load)
5443 6539 : AndsToMaybeRemove.push_back(I);
5444 : break;
5445 : }
5446 :
5447 1318 : case Instruction::Shl: {
5448 1318 : auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
5449 : if (!ShlC)
5450 : return false;
5451 956 : uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
5452 956 : DemandBits.setLowBits(BitWidth - ShiftAmt);
5453 : break;
5454 : }
5455 :
5456 1334 : case Instruction::Trunc: {
5457 1334 : EVT TruncVT = TLI->getValueType(*DL, I->getType());
5458 1334 : unsigned TruncBitWidth = TruncVT.getSizeInBits();
5459 : DemandBits.setLowBits(TruncBitWidth);
5460 : break;
5461 : }
5462 :
5463 : default:
5464 : return false;
5465 : }
5466 : }
5467 :
5468 : uint32_t ActiveBits = DemandBits.getActiveBits();
5469 : // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
5470 : // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
5471 : // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
5472 : // (and (load x) 1) is not matched as a single instruction, rather as a LDR
5473 : // followed by an AND.
5474 : // TODO: Look into removing this restriction by fixing backends to either
5475 : // return false for isLoadExtLegal for i1 or have them select this pattern to
5476 : // a single instruction.
5477 : //
5478 : // Also avoid hoisting if we didn't see any ands with the exact DemandBits
5479 : // mask, since these are the only ands that will be removed by isel.
5480 10483 : if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
5481 : WidestAndBits != DemandBits)
5482 : return false;
5483 :
5484 252 : LLVMContext &Ctx = Load->getType()->getContext();
5485 252 : Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
5486 252 : EVT TruncVT = TLI->getValueType(*DL, TruncTy);
5487 :
5488 : // Reject cases that won't be matched as extloads.
5489 323 : if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
5490 71 : !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
5491 : return false;
5492 :
5493 71 : IRBuilder<> Builder(Load->getNextNode());
5494 71 : auto *NewAnd = dyn_cast<Instruction>(
5495 71 : Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
5496 : // Mark this instruction as "inserted by CGP", so that other
5497 : // optimizations don't touch it.
5498 71 : InsertedInsts.insert(NewAnd);
5499 :
5500 : // Replace all uses of load with new and (except for the use of load in the
5501 : // new and itself).
5502 71 : Load->replaceAllUsesWith(NewAnd);
5503 71 : NewAnd->setOperand(0, Load);
5504 :
5505 : // Remove any and instructions that are now redundant.
5506 133 : for (auto *And : AndsToMaybeRemove)
5507 : // Check that the and mask is the same as the one we decided to put on the
5508 : // new and.
5509 124 : if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
5510 62 : And->replaceAllUsesWith(NewAnd);
5511 124 : if (&*CurInstIterator == And)
5512 76 : CurInstIterator = std::next(And->getIterator());
5513 62 : And->eraseFromParent();
5514 : ++NumAndUses;
5515 : }
5516 :
5517 : ++NumAndsAdded;
5518 : return true;
5519 : }
5520 :
5521 : /// Check if V (an operand of a select instruction) is an expensive instruction
5522 : /// that is only used once.
5523 10265 : static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
5524 : auto *I = dyn_cast<Instruction>(V);
5525 : // If it's safe to speculatively execute, then it should not have side
5526 : // effects; therefore, it's safe to sink and possibly *not* execute.
5527 8837 : return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
5528 2762 : TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
5529 : }
5530 :
5531 : /// Returns true if a SelectInst should be turned into an explicit branch.
5532 15238 : static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
5533 : const TargetLowering *TLI,
5534 : SelectInst *SI) {
5535 : // If even a predictable select is cheap, then a branch can't be cheaper.
5536 15238 : if (!TLI->isPredictableSelectExpensive())
5537 : return false;
5538 :
5539 : // FIXME: This should use the same heuristics as IfConversion to determine
5540 : // whether a select is better represented as a branch.
5541 :
5542 : // If metadata tells us that the select condition is obviously predictable,
5543 : // then we want to replace the select with a branch.
5544 : uint64_t TrueWeight, FalseWeight;
5545 6709 : if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
5546 9 : uint64_t Max = std::max(TrueWeight, FalseWeight);
5547 9 : uint64_t Sum = TrueWeight + FalseWeight;
5548 9 : if (Sum != 0) {
5549 8 : auto Probability = BranchProbability::getBranchProbability(Max, Sum);
5550 8 : if (Probability > TLI->getPredictableBranchThreshold())
5551 7 : return true;
5552 : }
5553 : }
5554 :
5555 : CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
5556 :
5557 : // If a branch is predictable, an out-of-order CPU can avoid blocking on its
5558 : // comparison condition. If the compare has more than one use, there's
5559 : // probably another cmov or setcc around, so it's not worth emitting a branch.
5560 5687 : if (!Cmp || !Cmp->hasOneUse())
5561 : return false;
5562 :
5563 : // If either operand of the select is expensive and only needed on one side
5564 : // of the select, we should form a branch.
5565 10211 : if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
5566 5103 : sinkSelectOperand(TTI, SI->getFalseValue()))
5567 8 : return true;
5568 :
5569 : return false;
5570 : }
5571 :
5572 : /// If \p isTrue is true, return the true value of \p SI, otherwise return
5573 : /// false value of \p SI. If the true/false value of \p SI is defined by any
5574 : /// select instructions in \p Selects, look through the defining select
5575 : /// instruction until the true/false value is not defined in \p Selects.
5576 54 : static Value *getTrueOrFalseValue(
5577 : SelectInst *SI, bool isTrue,
5578 : const SmallPtrSet<const Instruction *, 2> &Selects) {
5579 : Value *V;
5580 :
5581 111 : for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
5582 : DefSI = dyn_cast<SelectInst>(V)) {
5583 : assert(DefSI->getCondition() == SI->getCondition() &&
5584 : "The condition of DefSI does not match with SI");
5585 57 : V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
5586 : }
5587 54 : return V;
5588 : }
5589 :
5590 : /// If we have a SelectInst that will likely profit from branch prediction,
5591 : /// turn it into a branch.
5592 37997 : bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
5593 : // If branch conversion isn't desirable, exit early.
5594 37997 : if (DisableSelectToBranch || OptSize || !TLI)
5595 : return false;
5596 :
5597 : // Find all consecutive select instructions that share the same condition.
5598 : SmallVector<SelectInst *, 2> ASI;
5599 37871 : ASI.push_back(SI);
5600 37871 : for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
5601 77284 : It != SI->getParent()->end(); ++It) {
5602 38642 : SelectInst *I = dyn_cast<SelectInst>(&*It);
5603 38642 : if (I && SI->getCondition() == I->getCondition()) {
5604 771 : ASI.push_back(I);
5605 : } else {
5606 : break;
5607 : }
5608 : }
5609 :
5610 37871 : SelectInst *LastSI = ASI.back();
5611 : // Increment the current iterator to skip all the rest of select instructions
5612 : // because they will be either "not lowered" or "all lowered" to branch.
5613 75742 : CurInstIterator = std::next(LastSI->getIterator());
5614 :
5615 37871 : bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
5616 :
5617 : // Can we convert the 'select' to CF ?
5618 41946 : if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
5619 22623 : return false;
5620 :
5621 : TargetLowering::SelectSupportKind SelectKind;
5622 : if (VectorCond)
5623 : SelectKind = TargetLowering::VectorMaskSelect;
5624 30496 : else if (SI->getType()->isVectorTy())
5625 : SelectKind = TargetLowering::ScalarCondVectorVal;
5626 : else
5627 : SelectKind = TargetLowering::ScalarValSelect;
5628 :
5629 30486 : if (TLI->isSelectSupported(SelectKind) &&
5630 15238 : !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
5631 : return false;
5632 :
5633 25 : ModifiedDT = true;
5634 :
5635 : // Transform a sequence like this:
5636 : // start:
5637 : // %cmp = cmp uge i32 %a, %b
5638 : // %sel = select i1 %cmp, i32 %c, i32 %d
5639 : //
5640 : // Into:
5641 : // start:
5642 : // %cmp = cmp uge i32 %a, %b
5643 : // br i1 %cmp, label %select.true, label %select.false
5644 : // select.true:
5645 : // br label %select.end
5646 : // select.false:
5647 : // br label %select.end
5648 : // select.end:
5649 : // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
5650 : //
5651 : // In addition, we may sink instructions that produce %c or %d from
5652 : // the entry block into the destination(s) of the new branch.
5653 : // If the true or false blocks do not contain a sunken instruction, that
5654 : // block and its branch may be optimized away. In that case, one side of the
5655 : // first branch will point directly to select.end, and the corresponding PHI
5656 : // predecessor block will be the start block.
5657 :
5658 : // First, we split the block containing the select into 2 blocks.
5659 25 : BasicBlock *StartBlock = SI->getParent();
5660 25 : BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
5661 25 : BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
5662 :
5663 : // Delete the unconditional branch that was just created by the split.
5664 25 : StartBlock->getTerminator()->eraseFromParent();
5665 :
5666 : // These are the new basic blocks for the conditional branch.
5667 : // At least one will become an actual new basic block.
5668 : BasicBlock *TrueBlock = nullptr;
5669 : BasicBlock *FalseBlock = nullptr;
5670 : BranchInst *TrueBranch = nullptr;
5671 : BranchInst *FalseBranch = nullptr;
5672 :
5673 : // Sink expensive instructions into the conditional blocks to avoid executing
5674 : // them speculatively.
5675 52 : for (SelectInst *SI : ASI) {
5676 27 : if (sinkSelectOperand(TTI, SI->getTrueValue())) {
5677 5 : if (TrueBlock == nullptr) {
5678 15 : TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
5679 : EndBlock->getParent(), EndBlock);
5680 : TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
5681 5 : TrueBranch->setDebugLoc(SI->getDebugLoc());
5682 : }
5683 : auto *TrueInst = cast<Instruction>(SI->getTrueValue());
5684 5 : TrueInst->moveBefore(TrueBranch);
5685 : }
5686 27 : if (sinkSelectOperand(TTI, SI->getFalseValue())) {
5687 5 : if (FalseBlock == nullptr) {
5688 15 : FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
5689 : EndBlock->getParent(), EndBlock);
5690 : FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
5691 5 : FalseBranch->setDebugLoc(SI->getDebugLoc());
5692 : }
5693 : auto *FalseInst = cast<Instruction>(SI->getFalseValue());
5694 5 : FalseInst->moveBefore(FalseBranch);
5695 : }
5696 : }
5697 :
5698 : // If there was nothing to sink, then arbitrarily choose the 'false' side
5699 : // for a new input value to the PHI.
5700 25 : if (TrueBlock == FalseBlock) {
5701 : assert(TrueBlock == nullptr &&
5702 : "Unexpected basic block transform while optimizing select");
5703 :
5704 51 : FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
5705 : EndBlock->getParent(), EndBlock);
5706 : auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
5707 34 : FalseBranch->setDebugLoc(SI->getDebugLoc());
5708 : }
5709 :
5710 : // Insert the real conditional branch based on the original condition.
5711 : // If we did not create a new block for one of the 'true' or 'false' paths
5712 : // of the condition, it means that side of the branch goes to the end block
5713 : // directly and the path originates from the start block from the point of
5714 : // view of the new PHI.
5715 : BasicBlock *TT, *FT;
5716 25 : if (TrueBlock == nullptr) {
5717 : TT = EndBlock;
5718 : FT = FalseBlock;
5719 : TrueBlock = StartBlock;
5720 5 : } else if (FalseBlock == nullptr) {
5721 : TT = TrueBlock;
5722 : FT = EndBlock;
5723 : FalseBlock = StartBlock;
5724 : } else {
5725 : TT = TrueBlock;
5726 : FT = FalseBlock;
5727 : }
5728 50 : IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
5729 :
5730 : SmallPtrSet<const Instruction *, 2> INS;
5731 : INS.insert(ASI.begin(), ASI.end());
5732 : // Use reverse iterator because later select may use the value of the
5733 : // earlier select, and we need to propagate value through earlier select
5734 : // to get the PHI operand.
5735 52 : for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
5736 27 : SelectInst *SI = *It;
5737 : // The select itself is replaced with a PHI Node.
5738 27 : PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
5739 27 : PN->takeName(SI);
5740 27 : PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
5741 27 : PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
5742 27 : PN->setDebugLoc(SI->getDebugLoc());
5743 :
5744 27 : SI->replaceAllUsesWith(PN);
5745 27 : SI->eraseFromParent();
5746 : INS.erase(SI);
5747 : ++NumSelectsExpanded;
5748 : }
5749 :
5750 : // Instruct OptimizeBlock to skip to the next block.
5751 25 : CurInstIterator = StartBlock->end();
5752 : return true;
5753 : }
5754 :
5755 18015 : static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
5756 : SmallVector<int, 16> Mask(SVI->getShuffleMask());
5757 : int SplatElem = -1;
5758 57789 : for (unsigned i = 0; i < Mask.size(); ++i) {
5759 55561 : if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
5760 : return false;
5761 39774 : SplatElem = Mask[i];
5762 : }
5763 :
5764 : return true;
5765 : }
5766 :
5767 : /// Some targets have expensive vector shifts if the lanes aren't all the same
5768 : /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
5769 : /// it's often worth sinking a shufflevector splat down to its use so that
5770 : /// codegen can spot all lanes are identical.
5771 0 : bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
5772 0 : BasicBlock *DefBB = SVI->getParent();
5773 :
5774 : // Only do this xform if variable vector shifts are particularly expensive.
5775 0 : if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
5776 0 : return false;
5777 :
5778 : // We only expect better codegen by sinking a shuffle if we can recognise a
5779 : // constant splat.
5780 0 : if (!isBroadcastShuffle(SVI))
5781 0 : return false;
5782 :
5783 : // InsertedShuffles - Only insert a shuffle in each block once.
5784 : DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
5785 :
5786 : bool MadeChange = false;
5787 0 : for (User *U : SVI->users()) {
5788 : Instruction *UI = cast<Instruction>(U);
5789 :
5790 : // Figure out which BB this ext is used in.
5791 0 : BasicBlock *UserBB = UI->getParent();
5792 0 : if (UserBB == DefBB) continue;
5793 :
5794 : // For now only apply this when the splat is used by a shift instruction.
5795 0 : if (!UI->isShift()) continue;
5796 :
5797 : // Everything checks out, sink the shuffle if the user's block doesn't
5798 : // already have a copy.
5799 : Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
5800 :
5801 0 : if (!InsertedShuffle) {
5802 0 : BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5803 : assert(InsertPt != UserBB->end());
5804 0 : InsertedShuffle =
5805 : new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
5806 0 : SVI->getOperand(2), "", &*InsertPt);
5807 : }
5808 :
5809 0 : UI->replaceUsesOfWith(SVI, InsertedShuffle);
5810 : MadeChange = true;
5811 : }
5812 :
5813 : // If we removed all uses, nuke the shuffle.
5814 0 : if (SVI->use_empty()) {
5815 0 : SVI->eraseFromParent();
5816 : MadeChange = true;
5817 : }
5818 :
5819 : return MadeChange;
5820 : }
5821 :
5822 0 : bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
5823 0 : if (!TLI || !DL)
5824 0 : return false;
5825 :
5826 : Value *Cond = SI->getCondition();
5827 0 : Type *OldType = Cond->getType();
5828 0 : LLVMContext &Context = Cond->getContext();
5829 0 : MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
5830 0 : unsigned RegWidth = RegType.getSizeInBits();
5831 :
5832 0 : if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
5833 0 : return false;
5834 :
5835 : // If the register width is greater than the type width, expand the condition
5836 : // of the switch instruction and each case constant to the width of the
5837 : // register. By widening the type of the switch condition, subsequent
5838 : // comparisons (for case comparisons) will not need to be extended to the
5839 : // preferred register width, so we will potentially eliminate N-1 extends,
5840 : // where N is the number of cases in the switch.
5841 0 : auto *NewType = Type::getIntNTy(Context, RegWidth);
5842 :
5843 : // Zero-extend the switch condition and case constants unless the switch
5844 : // condition is a function argument that is already being sign-extended.
5845 : // In that case, we can avoid an unnecessary mask/extension by sign-extending
5846 : // everything instead.
5847 : Instruction::CastOps ExtType = Instruction::ZExt;
5848 : if (auto *Arg = dyn_cast<Argument>(Cond))
5849 0 : if (Arg->hasSExtAttr())
5850 : ExtType = Instruction::SExt;
5851 :
5852 0 : auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
5853 0 : ExtInst->insertBefore(SI);
5854 0 : ExtInst->setDebugLoc(SI->getDebugLoc());
5855 : SI->setCondition(ExtInst);
5856 0 : for (auto Case : SI->cases()) {
5857 : APInt NarrowConst = Case.getCaseValue()->getValue();
5858 : APInt WideConst = (ExtType == Instruction::ZExt) ?
5859 0 : NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
5860 0 : Case.setValue(ConstantInt::get(Context, WideConst));
5861 : }
5862 :
5863 0 : return true;
5864 : }
5865 :
5866 :
5867 : namespace {
5868 :
5869 : /// Helper class to promote a scalar operation to a vector one.
5870 : /// This class is used to move downward extractelement transition.
5871 : /// E.g.,
5872 : /// a = vector_op <2 x i32>
5873 : /// b = extractelement <2 x i32> a, i32 0
5874 : /// c = scalar_op b
5875 : /// store c
5876 : ///
5877 : /// =>
5878 : /// a = vector_op <2 x i32>
5879 : /// c = vector_op a (equivalent to scalar_op on the related lane)
5880 : /// * d = extractelement <2 x i32> c, i32 0
5881 : /// * store d
5882 : /// Assuming both extractelement and store can be combine, we get rid of the
5883 : /// transition.
5884 : class VectorPromoteHelper {
5885 : /// DataLayout associated with the current module.
5886 : const DataLayout &DL;
5887 :
5888 : /// Used to perform some checks on the legality of vector operations.
5889 : const TargetLowering &TLI;
5890 :
5891 : /// Used to estimated the cost of the promoted chain.
5892 : const TargetTransformInfo &TTI;
5893 :
5894 : /// The transition being moved downwards.
5895 : Instruction *Transition;
5896 :
5897 : /// The sequence of instructions to be promoted.
5898 : SmallVector<Instruction *, 4> InstsToBePromoted;
5899 :
5900 : /// Cost of combining a store and an extract.
5901 : unsigned StoreExtractCombineCost;
5902 :
5903 : /// Instruction that will be combined with the transition.
5904 : Instruction *CombineInst = nullptr;
5905 :
5906 : /// The instruction that represents the current end of the transition.
5907 : /// Since we are faking the promotion until we reach the end of the chain
5908 : /// of computation, we need a way to get the current end of the transition.
5909 : Instruction *getEndOfTransition() const {
5910 117 : if (InstsToBePromoted.empty())
5911 81 : return Transition;
5912 36 : return InstsToBePromoted.back();
5913 : }
5914 :
5915 : /// Return the index of the original value in the transition.
5916 : /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
5917 : /// c, is at index 0.
5918 0 : unsigned getTransitionOriginalValueIdx() const {
5919 : assert(isa<ExtractElementInst>(Transition) &&
5920 : "Other kind of transitions are not supported yet");
5921 0 : return 0;
5922 : }
5923 :
5924 : /// Return the index of the index in the transition.
5925 : /// E.g., for "extractelement <2 x i32> c, i32 0" the index
5926 : /// is at index 1.
5927 0 : unsigned getTransitionIdx() const {
5928 : assert(isa<ExtractElementInst>(Transition) &&
5929 : "Other kind of transitions are not supported yet");
5930 0 : return 1;
5931 : }
5932 :
5933 : /// Get the type of the transition.
5934 : /// This is the type of the original value.
5935 : /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
5936 : /// transition is <2 x i32>.
5937 0 : Type *getTransitionType() const {
5938 76 : return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
5939 : }
5940 :
5941 : /// Promote \p ToBePromoted by moving \p Def downward through.
5942 : /// I.e., we have the following sequence:
5943 : /// Def = Transition <ty1> a to <ty2>
5944 : /// b = ToBePromoted <ty2> Def, ...
5945 : /// =>
5946 : /// b = ToBePromoted <ty1> a, ...
5947 : /// Def = Transition <ty1> ToBePromoted to <ty2>
5948 : void promoteImpl(Instruction *ToBePromoted);
5949 :
5950 : /// Check whether or not it is profitable to promote all the
5951 : /// instructions enqueued to be promoted.
5952 6 : bool isProfitableToPromote() {
5953 6 : Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
5954 : unsigned Index = isa<ConstantInt>(ValIdx)
5955 : ? cast<ConstantInt>(ValIdx)->getZExtValue()
5956 6 : : -1;
5957 : Type *PromotedType = getTransitionType();
5958 :
5959 6 : StoreInst *ST = cast<StoreInst>(CombineInst);
5960 : unsigned AS = ST->getPointerAddressSpace();
5961 : unsigned Align = ST->getAlignment();
5962 : // Check if this store is supported.
5963 6 : if (!TLI.allowsMisalignedMemoryAccesses(
5964 : TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
5965 6 : Align)) {
5966 : // If this is not supported, there is no way we can combine
5967 : // the extract with the store.
5968 : return false;
5969 : }
5970 :
5971 : // The scalar chain of computation has to pay for the transition
5972 : // scalar to vector.
5973 : // The vector chain has to account for the combining cost.
5974 : uint64_t ScalarCost =
5975 12 : TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
5976 6 : uint64_t VectorCost = StoreExtractCombineCost;
5977 24 : for (const auto &Inst : InstsToBePromoted) {
5978 : // Compute the cost.
5979 : // By construction, all instructions being promoted are arithmetic ones.
5980 : // Moreover, one argument is a constant that can be viewed as a splat
5981 : // constant.
5982 18 : Value *Arg0 = Inst->getOperand(0);
5983 18 : bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
5984 : isa<ConstantFP>(Arg0);
5985 : TargetTransformInfo::OperandValueKind Arg0OVK =
5986 : IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
5987 : : TargetTransformInfo::OK_AnyValue;
5988 : TargetTransformInfo::OperandValueKind Arg1OVK =
5989 18 : !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
5990 : : TargetTransformInfo::OK_AnyValue;
5991 18 : ScalarCost += TTI.getArithmeticInstrCost(
5992 18 : Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
5993 18 : VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
5994 18 : Arg0OVK, Arg1OVK);
5995 : }
5996 : LLVM_DEBUG(
5997 : dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
5998 : << ScalarCost << "\nVector: " << VectorCost << '\n');
5999 6 : return ScalarCost > VectorCost;
6000 : }
6001 :
6002 : /// Generate a constant vector with \p Val with the same
6003 : /// number of elements as the transition.
6004 : /// \p UseSplat defines whether or not \p Val should be replicated
6005 : /// across the whole vector.
6006 : /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
6007 : /// otherwise we generate a vector with as many undef as possible:
6008 : /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
6009 : /// used at the index of the extract.
6010 38 : Value *getConstantVector(Constant *Val, bool UseSplat) const {
6011 : unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
6012 38 : if (!UseSplat) {
6013 : // If we cannot determine where the constant must be, we have to
6014 : // use a splat constant.
6015 32 : Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
6016 : if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
6017 31 : ExtractIdx = CstVal->getSExtValue();
6018 : else
6019 : UseSplat = true;
6020 : }
6021 :
6022 38 : unsigned End = getTransitionType()->getVectorNumElements();
6023 38 : if (UseSplat)
6024 7 : return ConstantVector::getSplat(End, Val);
6025 :
6026 : SmallVector<Constant *, 4> ConstVec;
6027 31 : UndefValue *UndefVal = UndefValue::get(Val->getType());
6028 105 : for (unsigned Idx = 0; Idx != End; ++Idx) {
6029 74 : if (Idx == ExtractIdx)
6030 31 : ConstVec.push_back(Val);
6031 : else
6032 43 : ConstVec.push_back(UndefVal);
6033 : }
6034 31 : return ConstantVector::get(ConstVec);
6035 : }
6036 :
6037 : /// Check if promoting to a vector type an operand at \p OperandIdx
6038 : /// in \p Use can trigger undefined behavior.
6039 96 : static bool canCauseUndefinedBehavior(const Instruction *Use,
6040 : unsigned OperandIdx) {
6041 : // This is not safe to introduce undef when the operand is on
6042 : // the right hand side of a division-like instruction.
6043 96 : if (OperandIdx != 1)
6044 : return false;
6045 : switch (Use->getOpcode()) {
6046 : default:
6047 : return false;
6048 : case Instruction::SDiv:
6049 : case Instruction::UDiv:
6050 : case Instruction::SRem:
6051 : case Instruction::URem:
6052 : return true;
6053 4 : case Instruction::FDiv:
6054 : case Instruction::FRem:
6055 4 : return !Use->hasNoNaNs();
6056 : }
6057 : llvm_unreachable(nullptr);
6058 : }
6059 :
6060 : public:
6061 : VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
6062 : const TargetTransformInfo &TTI, Instruction *Transition,
6063 : unsigned CombineCost)
6064 138 : : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
6065 138 : StoreExtractCombineCost(CombineCost) {
6066 : assert(Transition && "Do not know how to promote null");
6067 : }
6068 :
6069 : /// Check if we can promote \p ToBePromoted to \p Type.
6070 0 : bool canPromote(const Instruction *ToBePromoted) const {
6071 : // We could support CastInst too.
6072 0 : return isa<BinaryOperator>(ToBePromoted);
6073 : }
6074 :
6075 : /// Check if it is profitable to promote \p ToBePromoted
6076 : /// by moving downward the transition through.
6077 59 : bool shouldPromote(const Instruction *ToBePromoted) const {
6078 : // Promote only if all the operands can be statically expanded.
6079 : // Indeed, we do not want to introduce any new kind of transitions.
6080 227 : for (const Use &U : ToBePromoted->operands()) {
6081 117 : const Value *Val = U.get();
6082 117 : if (Val == getEndOfTransition()) {
6083 : // If the use is a division and the transition is on the rhs,
6084 : // we cannot promote the operation, otherwise we may create a
6085 : // division by zero.
6086 58 : if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
6087 : return false;
6088 : continue;
6089 : }
6090 59 : if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
6091 : !isa<ConstantFP>(Val))
6092 : return false;
6093 : }
6094 : // Check that the resulting operation is legal.
6095 102 : int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
6096 51 : if (!ISDOpcode)
6097 : return false;
6098 81 : return StressStoreExtract ||
6099 60 : TLI.isOperationLegalOrCustom(
6100 : ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
6101 : }
6102 :
6103 : /// Check whether or not \p Use can be combined
6104 : /// with the transition.
6105 : /// I.e., is it possible to do Use(Transition) => AnotherUse?
6106 0 : bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
6107 :
6108 : /// Record \p ToBePromoted as part of the chain to be promoted.
6109 : void enqueueForPromotion(Instruction *ToBePromoted) {
6110 41 : InstsToBePromoted.push_back(ToBePromoted);
6111 : }
6112 :
6113 : /// Set the instruction that will be combined with the transition.
6114 0 : void recordCombineInstruction(Instruction *ToBeCombined) {
6115 : assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
6116 73 : CombineInst = ToBeCombined;
6117 0 : }
6118 :
6119 : /// Promote all the instructions enqueued for promotion if it is
6120 : /// is profitable.
6121 : /// \return True if the promotion happened, false otherwise.
6122 73 : bool promote() {
6123 : // Check if there is something to promote.
6124 : // Right now, if we do not have anything to combine with,
6125 : // we assume the promotion is not profitable.
6126 73 : if (InstsToBePromoted.empty() || !CombineInst)
6127 : return false;
6128 :
6129 : // Check cost.
6130 20 : if (!StressStoreExtract && !isProfitableToPromote())
6131 : return false;
6132 :
6133 : // Promote.
6134 58 : for (auto &ToBePromoted : InstsToBePromoted)
6135 38 : promoteImpl(ToBePromoted);
6136 : InstsToBePromoted.clear();
6137 20 : return true;
6138 : }
6139 : };
6140 :
6141 : } // end anonymous namespace
6142 :
6143 38 : void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
6144 : // At this point, we know that all the operands of ToBePromoted but Def
6145 : // can be statically promoted.
6146 : // For Def, we need to use its parameter in ToBePromoted:
6147 : // b = ToBePromoted ty1 a
6148 : // Def = Transition ty1 b to ty2
6149 : // Move the transition down.
6150 : // 1. Replace all uses of the promoted operation by the transition.
6151 : // = ... b => = ... Def.
6152 : assert(ToBePromoted->getType() == Transition->getType() &&
6153 : "The type of the result of the transition does not match "
6154 : "the final type");
6155 38 : ToBePromoted->replaceAllUsesWith(Transition);
6156 : // 2. Update the type of the uses.
6157 : // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
6158 38 : Type *TransitionTy = getTransitionType();
6159 : ToBePromoted->mutateType(TransitionTy);
6160 : // 3. Update all the operands of the promoted operation with promoted
6161 : // operands.
6162 : // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
6163 152 : for (Use &U : ToBePromoted->operands()) {
6164 76 : Value *Val = U.get();
6165 : Value *NewVal = nullptr;
6166 76 : if (Val == Transition)
6167 38 : NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
6168 38 : else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
6169 : isa<ConstantFP>(Val)) {
6170 : // Use a splat constant if it is not safe to use undef.
6171 38 : NewVal = getConstantVector(
6172 : cast<Constant>(Val),
6173 76 : isa<UndefValue>(Val) ||
6174 38 : canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
6175 : } else
6176 0 : llvm_unreachable("Did you modified shouldPromote and forgot to update "
6177 : "this?");
6178 76 : ToBePromoted->setOperand(U.getOperandNo(), NewVal);
6179 : }
6180 38 : Transition->moveAfter(ToBePromoted);
6181 38 : Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
6182 38 : }
6183 :
6184 : /// Some targets can do store(extractelement) with one instruction.
6185 : /// Try to push the extractelement towards the stores when the target
6186 : /// has this feature and this is profitable.
6187 30205 : bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
6188 30205 : unsigned CombineCost = std::numeric_limits<unsigned>::max();
6189 30205 : if (DisableStoreExtract || !TLI ||
6190 30158 : (!StressStoreExtract &&
6191 30158 : !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
6192 30158 : Inst->getOperand(1), CombineCost)))
6193 30067 : return false;
6194 :
6195 : // At this point we know that Inst is a vector to scalar transition.
6196 : // Try to move it down the def-use chain, until:
6197 : // - We can combine the transition with its single use
6198 : // => we got rid of the transition.
6199 : // - We escape the current basic block
6200 : // => we would need to check that we are moving it at a cheaper place and
6201 : // we do not do that for now.
6202 138 : BasicBlock *Parent = Inst->getParent();
6203 : LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
6204 138 : VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
6205 : // If the transition has more than one use, assume this is not going to be
6206 : // beneficial.
6207 179 : while (Inst->hasOneUse()) {
6208 : Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
6209 : LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
6210 :
6211 166 : if (ToBePromoted->getParent() != Parent) {
6212 : LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
6213 : << ToBePromoted->getParent()->getName()
6214 : << ") than the transition (" << Parent->getName()
6215 : << ").\n");
6216 : return false;
6217 : }
6218 :
6219 163 : if (VPH.canCombine(ToBePromoted)) {
6220 : LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
6221 : << "will be combined with: " << *ToBePromoted << '\n');
6222 : VPH.recordCombineInstruction(ToBePromoted);
6223 73 : bool Changed = VPH.promote();
6224 : NumStoreExtractExposed += Changed;
6225 73 : return Changed;
6226 : }
6227 :
6228 : LLVM_DEBUG(dbgs() << "Try promoting.\n");
6229 90 : if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
6230 49 : return false;
6231 :
6232 : LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
6233 :
6234 41 : VPH.enqueueForPromotion(ToBePromoted);
6235 : Inst = ToBePromoted;
6236 : }
6237 : return false;
6238 : }
6239 :
6240 : /// For the instruction sequence of store below, F and I values
6241 : /// are bundled together as an i64 value before being stored into memory.
6242 : /// Sometimes it is more efficient to generate separate stores for F and I,
6243 : /// which can remove the bitwise instructions or sink them to colder places.
6244 : ///
6245 : /// (store (or (zext (bitcast F to i32) to i64),
6246 : /// (shl (zext I to i64), 32)), addr) -->
6247 : /// (store F, addr) and (store I, addr+4)
6248 : ///
6249 : /// Similarly, splitting for other merged store can also be beneficial, like:
6250 : /// For pair of {i32, i32}, i64 store --> two i32 stores.
6251 : /// For pair of {i32, i16}, i64 store --> two i32 stores.
6252 : /// For pair of {i16, i16}, i32 store --> two i16 stores.
6253 : /// For pair of {i16, i8}, i32 store --> two i16 stores.
6254 : /// For pair of {i8, i8}, i16 store --> two i8 stores.
6255 : ///
6256 : /// We allow each target to determine specifically which kind of splitting is
6257 : /// supported.
6258 : ///
6259 : /// The store patterns are commonly seen from the simple code snippet below
6260 : /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
6261 : /// void goo(const std::pair<int, float> &);
6262 : /// hoo() {
6263 : /// ...
6264 : /// goo(std::make_pair(tmp, ftmp));
6265 : /// ...
6266 : /// }
6267 : ///
6268 : /// Although we already have similar splitting in DAG Combine, we duplicate
6269 : /// it in CodeGenPrepare to catch the case in which pattern is across
6270 : /// multiple BBs. The logic in DAG Combine is kept to catch case generated
6271 : /// during code expansion.
6272 791326 : static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
6273 : const TargetLowering &TLI) {
6274 : // Handle simple but common cases only.
6275 791326 : Type *StoreType = SI.getValueOperand()->getType();
6276 1581070 : if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
6277 789744 : DL.getTypeSizeInBits(StoreType) == 0)
6278 1613 : return false;
6279 :
6280 789713 : unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
6281 789713 : Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
6282 789713 : if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
6283 789713 : DL.getTypeSizeInBits(SplitStoreType))
6284 : return false;
6285 :
6286 : // Match the following patterns:
6287 : // (store (or (zext LValue to i64),
6288 : // (shl (zext HValue to i64), 32)), HalfValBitSize)
6289 : // or
6290 : // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
6291 : // (zext LValue to i64),
6292 : // Expect both operands of OR and the first operand of SHL have only
6293 : // one use.
6294 : Value *LValue, *HValue;
6295 772125 : if (!match(SI.getValueOperand(),
6296 772125 : m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
6297 : m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
6298 : m_SpecificInt(HalfValBitSize))))))
6299 : return false;
6300 :
6301 : // Check LValue and HValue are int with size less or equal than 32.
6302 138 : if (!LValue->getType()->isIntegerTy() ||
6303 138 : DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
6304 207 : !HValue->getType()->isIntegerTy() ||
6305 69 : DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
6306 0 : return false;
6307 :
6308 : // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
6309 : // as the input of target query.
6310 69 : auto *LBC = dyn_cast<BitCastInst>(LValue);
6311 69 : auto *HBC = dyn_cast<BitCastInst>(HValue);
6312 1 : EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
6313 69 : : EVT::getEVT(LValue->getType());
6314 6 : EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
6315 69 : : EVT::getEVT(HValue->getType());
6316 69 : if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
6317 : return false;
6318 :
6319 : // Start to split store.
6320 15 : IRBuilder<> Builder(SI.getContext());
6321 15 : Builder.SetInsertPoint(&SI);
6322 :
6323 : // If LValue/HValue is a bitcast in another BB, create a new one in current
6324 : // BB so it may be merged with the splitted stores by dag combiner.
6325 15 : if (LBC && LBC->getParent() != SI.getParent())
6326 0 : LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
6327 15 : if (HBC && HBC->getParent() != SI.getParent())
6328 6 : HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
6329 :
6330 15 : bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
6331 : auto CreateSplitStore = [&](Value *V, bool Upper) {
6332 : V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
6333 : Value *Addr = Builder.CreateBitCast(
6334 : SI.getOperand(1),
6335 : SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
6336 : if ((IsLE && Upper) || (!IsLE && !Upper))
6337 : Addr = Builder.CreateGEP(
6338 : SplitStoreType, Addr,
6339 : ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
6340 : Builder.CreateAlignedStore(
6341 : V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
6342 15 : };
6343 :
6344 15 : CreateSplitStore(LValue, false);
6345 15 : CreateSplitStore(HValue, true);
6346 :
6347 : // Delete the old store.
6348 15 : SI.eraseFromParent();
6349 : return true;
6350 : }
6351 :
6352 : // Return true if the GEP has two operands, the first operand is of a sequential
6353 : // type, and the second operand is a constant.
6354 86 : static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
6355 86 : gep_type_iterator I = gep_type_begin(*GEP);
6356 21 : return GEP->getNumOperands() == 2 &&
6357 107 : I.isSequential() &&
6358 86 : isa<ConstantInt>(GEP->getOperand(1));
6359 : }
6360 :
6361 : // Try unmerging GEPs to reduce liveness interference (register pressure) across
6362 : // IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
6363 : // reducing liveness interference across those edges benefits global register
6364 : // allocation. Currently handles only certain cases.
6365 : //
6366 : // For example, unmerge %GEPI and %UGEPI as below.
6367 : //
6368 : // ---------- BEFORE ----------
6369 : // SrcBlock:
6370 : // ...
6371 : // %GEPIOp = ...
6372 : // ...
6373 : // %GEPI = gep %GEPIOp, Idx
6374 : // ...
6375 : // indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
6376 : // (* %GEPI is alive on the indirectbr edges due to other uses ahead)
6377 : // (* %GEPIOp is alive on the indirectbr edges only because of it's used by
6378 : // %UGEPI)
6379 : //
6380 : // DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
6381 : // DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
6382 : // ...
6383 : //
6384 : // DstBi:
6385 : // ...
6386 : // %UGEPI = gep %GEPIOp, UIdx
6387 : // ...
6388 : // ---------------------------
6389 : //
6390 : // ---------- AFTER ----------
6391 : // SrcBlock:
6392 : // ... (same as above)
6393 : // (* %GEPI is still alive on the indirectbr edges)
6394 : // (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
6395 : // unmerging)
6396 : // ...
6397 : //
6398 : // DstBi:
6399 : // ...
6400 : // %UGEPI = gep %GEPI, (UIdx-Idx)
6401 : // ...
6402 : // ---------------------------
6403 : //
6404 : // The register pressure on the IndirectBr edges is reduced because %GEPIOp is
6405 : // no longer alive on them.
6406 : //
6407 : // We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
6408 : // of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
6409 : // not to disable further simplications and optimizations as a result of GEP
6410 : // merging.
6411 : //
6412 : // Note this unmerging may increase the length of the data flow critical path
6413 : // (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
6414 : // between the register pressure and the length of data-flow critical
6415 : // path. Restricting this to the uncommon IndirectBr case would minimize the
6416 : // impact of potentially longer critical path, if any, and the impact on compile
6417 : // time.
6418 209279 : static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
6419 : const TargetTransformInfo *TTI) {
6420 209279 : BasicBlock *SrcBlock = GEPI->getParent();
6421 : // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
6422 : // (non-IndirectBr) cases exit early here.
6423 209279 : if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
6424 : return false;
6425 : // Check that GEPI is a simple gep with a single constant index.
6426 83 : if (!GEPSequentialConstIndexed(GEPI))
6427 : return false;
6428 : ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
6429 : // Check that GEPI is a cheap one.
6430 8 : if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
6431 : > TargetTransformInfo::TCC_Basic)
6432 : return false;
6433 : Value *GEPIOp = GEPI->getOperand(0);
6434 : // Check that GEPIOp is an instruction that's also defined in SrcBlock.
6435 8 : if (!isa<Instruction>(GEPIOp))
6436 : return false;
6437 : auto *GEPIOpI = cast<Instruction>(GEPIOp);
6438 8 : if (GEPIOpI->getParent() != SrcBlock)
6439 : return false;
6440 : // Check that GEP is used outside the block, meaning it's alive on the
6441 : // IndirectBr edge(s).
6442 8 : if (find_if(GEPI->users(), [&](User *Usr) {
6443 : if (auto *I = dyn_cast<Instruction>(Usr)) {
6444 0 : if (I->getParent() != SrcBlock) {
6445 : return true;
6446 : }
6447 : }
6448 : return false;
6449 : }) == GEPI->users().end())
6450 : return false;
6451 : // The second elements of the GEP chains to be unmerged.
6452 : std::vector<GetElementPtrInst *> UGEPIs;
6453 : // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
6454 : // on IndirectBr edges.
6455 21 : for (User *Usr : GEPIOp->users()) {
6456 21 : if (Usr == GEPI) continue;
6457 : // Check if Usr is an Instruction. If not, give up.
6458 9 : if (!isa<Instruction>(Usr))
6459 0 : return false;
6460 : auto *UI = cast<Instruction>(Usr);
6461 : // Check if Usr in the same block as GEPIOp, which is fine, skip.
6462 9 : if (UI->getParent() == SrcBlock)
6463 : continue;
6464 : // Check if Usr is a GEP. If not, give up.
6465 : if (!isa<GetElementPtrInst>(Usr))
6466 : return false;
6467 3 : auto *UGEPI = cast<GetElementPtrInst>(Usr);
6468 : // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
6469 : // the pointer operand to it. If so, record it in the vector. If not, give
6470 : // up.
6471 3 : if (!GEPSequentialConstIndexed(UGEPI))
6472 : return false;
6473 3 : if (UGEPI->getOperand(0) != GEPIOp)
6474 : return false;
6475 3 : if (GEPIIdx->getType() !=
6476 : cast<ConstantInt>(UGEPI->getOperand(1))->getType())
6477 : return false;
6478 : ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6479 3 : if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
6480 : > TargetTransformInfo::TCC_Basic)
6481 : return false;
6482 3 : UGEPIs.push_back(UGEPI);
6483 : }
6484 6 : if (UGEPIs.size() == 0)
6485 : return false;
6486 : // Check the materializing cost of (Uidx-Idx).
6487 4 : for (GetElementPtrInst *UGEPI : UGEPIs) {
6488 : ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6489 3 : APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
6490 3 : unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
6491 3 : if (ImmCost > TargetTransformInfo::TCC_Basic)
6492 : return false;
6493 : }
6494 : // Now unmerge between GEPI and UGEPIs.
6495 4 : for (GetElementPtrInst *UGEPI : UGEPIs) {
6496 : UGEPI->setOperand(0, GEPI);
6497 : ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6498 : Constant *NewUGEPIIdx =
6499 3 : ConstantInt::get(GEPIIdx->getType(),
6500 3 : UGEPIIdx->getValue() - GEPIIdx->getValue());
6501 : UGEPI->setOperand(1, NewUGEPIIdx);
6502 : // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
6503 : // inbounds to avoid UB.
6504 3 : if (!GEPI->isInBounds()) {
6505 3 : UGEPI->setIsInBounds(false);
6506 : }
6507 : }
6508 : // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
6509 : // alive on IndirectBr edges).
6510 : assert(find_if(GEPIOp->users(), [&](User *Usr) {
6511 : return cast<Instruction>(Usr)->getParent() != SrcBlock;
6512 : }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock");
6513 : return true;
6514 : }
6515 :
6516 5337332 : bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
6517 : // Bail out if we inserted the instruction to prevent optimizations from
6518 : // stepping on each other's toes.
6519 5337332 : if (InsertedInsts.count(I))
6520 : return false;
6521 :
6522 5335662 : if (PHINode *P = dyn_cast<PHINode>(I)) {
6523 : // It is possible for very late stage optimizations (such as SimplifyCFG)
6524 : // to introduce PHI nodes too late to be cleaned up. If we detect such a
6525 : // trivial PHI, go ahead and zap it here.
6526 360854 : if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
6527 455 : P->replaceAllUsesWith(V);
6528 455 : P->eraseFromParent();
6529 : ++NumPHIsElim;
6530 455 : return true;
6531 : }
6532 : return false;
6533 : }
6534 :
6535 : if (CastInst *CI = dyn_cast<CastInst>(I)) {
6536 : // If the source of the cast is a constant, then this should have
6537 : // already been constant folded. The only reason NOT to constant fold
6538 : // it is if something (e.g. LSR) was careful to place the constant
6539 : // evaluation in a block other than then one that uses it (e.g. to hoist
6540 : // the address of globals out of a loop). If this is the case, we don't
6541 : // want to forward-subst the cast.
6542 503611 : if (isa<Constant>(CI->getOperand(0)))
6543 : return false;
6544 :
6545 500007 : if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
6546 : return true;
6547 :
6548 931136 : if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
6549 : /// Sink a zext or sext into its user blocks if the target type doesn't
6550 : /// fit in one register
6551 45955 : if (TLI &&
6552 91906 : TLI->getTypeAction(CI->getContext(),
6553 45953 : TLI->getValueType(*DL, CI->getType())) ==
6554 : TargetLowering::TypeExpandInteger) {
6555 3644 : return SinkCast(CI);
6556 : } else {
6557 42311 : bool MadeChange = optimizeExt(I);
6558 42311 : return MadeChange | optimizeExtUses(I);
6559 : }
6560 : }
6561 : return false;
6562 : }
6563 :
6564 : if (CmpInst *CI = dyn_cast<CmpInst>(I))
6565 153307 : if (!TLI || !TLI->hasMultipleConditionRegisters())
6566 145212 : return OptimizeCmpExpression(CI, TLI);
6567 :
6568 : if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
6569 914998 : LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
6570 914998 : if (TLI) {
6571 914971 : bool Modified = optimizeLoadExt(LI);
6572 : unsigned AS = LI->getPointerAddressSpace();
6573 1829942 : Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
6574 914971 : return Modified;
6575 : }
6576 : return false;
6577 : }
6578 :
6579 : if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
6580 791345 : if (TLI && splitMergedValStore(*SI, *DL, *TLI))
6581 : return true;
6582 791330 : SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
6583 791330 : if (TLI) {
6584 : unsigned AS = SI->getPointerAddressSpace();
6585 791311 : return optimizeMemoryInst(I, SI->getOperand(1),
6586 791311 : SI->getOperand(0)->getType(), AS);
6587 : }
6588 : return false;
6589 : }
6590 :
6591 : if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
6592 : unsigned AS = RMW->getPointerAddressSpace();
6593 5272 : return optimizeMemoryInst(I, RMW->getPointerOperand(),
6594 5272 : RMW->getType(), AS);
6595 : }
6596 :
6597 : if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
6598 : unsigned AS = CmpX->getPointerAddressSpace();
6599 1579 : return optimizeMemoryInst(I, CmpX->getPointerOperand(),
6600 1579 : CmpX->getCompareOperand()->getType(), AS);
6601 : }
6602 :
6603 : BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
6604 :
6605 850192 : if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
6606 30694 : EnableAndCmpSinking && TLI)
6607 30694 : return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
6608 :
6609 2762524 : if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
6610 : BinOp->getOpcode() == Instruction::LShr)) {
6611 : ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
6612 19081 : if (TLI && CI && TLI->hasExtractBitsInsn())
6613 1951 : return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
6614 :
6615 : return false;
6616 : }
6617 :
6618 : if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
6619 234571 : if (GEPI->hasAllZeroIndices()) {
6620 : /// The GEP operand must be a pointer, so must its result -> BitCast
6621 25292 : Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
6622 50584 : GEPI->getName(), GEPI);
6623 25292 : NC->setDebugLoc(GEPI->getDebugLoc());
6624 25292 : GEPI->replaceAllUsesWith(NC);
6625 25292 : GEPI->eraseFromParent();
6626 : ++NumGEPsElim;
6627 25292 : optimizeInst(NC, ModifiedDT);
6628 25292 : return true;
6629 : }
6630 209279 : if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
6631 : return true;
6632 : }
6633 209278 : return false;
6634 : }
6635 :
6636 : if (CallInst *CI = dyn_cast<CallInst>(I))
6637 666803 : return optimizeCallInst(CI, ModifiedDT);
6638 :
6639 : if (SelectInst *SI = dyn_cast<SelectInst>(I))
6640 37997 : return optimizeSelectInst(SI);
6641 :
6642 : if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
6643 48026 : return optimizeShuffleVectorInst(SVI);
6644 :
6645 : if (auto *Switch = dyn_cast<SwitchInst>(I))
6646 2274 : return optimizeSwitchInst(Switch);
6647 :
6648 1753772 : if (isa<ExtractElementInst>(I))
6649 30205 : return optimizeExtractElementInst(I);
6650 :
6651 : return false;
6652 : }
6653 :
6654 : /// Given an OR instruction, check to see if this is a bitreverse
6655 : /// idiom. If so, insert the new intrinsic and return true.
6656 5332172 : static bool makeBitReverse(Instruction &I, const DataLayout &DL,
6657 : const TargetLowering &TLI) {
6658 10664344 : if (!I.getType()->isIntegerTy() ||
6659 1518337 : !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
6660 : TLI.getValueType(DL, I.getType(), true)))
6661 5261439 : return false;
6662 :
6663 : SmallVector<Instruction*, 4> Insts;
6664 70733 : if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
6665 : return false;
6666 6 : Instruction *LastInst = Insts.back();
6667 6 : I.replaceAllUsesWith(LastInst);
6668 6 : RecursivelyDeleteTriviallyDeadInstructions(&I);
6669 6 : return true;
6670 : }
6671 :
6672 : // In this pass we look for GEP and cast instructions that are used
6673 : // across basic blocks and rewrite them to improve basic-block-at-a-time
6674 : // selection.
6675 640297 : bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
6676 : SunkAddrs.clear();
6677 : bool MadeChange = false;
6678 :
6679 640297 : CurInstIterator = BB.begin();
6680 5952306 : while (CurInstIterator != BB.end()) {
6681 5312040 : MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
6682 5312040 : if (ModifiedDT)
6683 : return true;
6684 : }
6685 :
6686 : bool MadeBitReverse = true;
6687 1280374 : while (TLI && MadeBitReverse) {
6688 : MadeBitReverse = false;
6689 5972274 : for (auto &I : reverse(BB)) {
6690 5332172 : if (makeBitReverse(I, *DL, *TLI)) {
6691 : MadeBitReverse = MadeChange = true;
6692 6 : ModifiedDT = true;
6693 6 : break;
6694 : }
6695 : }
6696 : }
6697 640266 : MadeChange |= dupRetToEnableTailCallOpts(&BB);
6698 :
6699 640266 : return MadeChange;
6700 : }
6701 :
6702 : // llvm.dbg.value is far away from the value then iSel may not be able
6703 : // handle it properly. iSel will drop llvm.dbg.value if it can not
6704 : // find a node corresponding to the value.
6705 0 : bool CodeGenPrepare::placeDbgValues(Function &F) {
6706 : bool MadeChange = false;
6707 0 : for (BasicBlock &BB : F) {
6708 : Instruction *PrevNonDbgInst = nullptr;
6709 0 : for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
6710 : Instruction *Insn = &*BI++;
6711 : DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
6712 : // Leave dbg.values that refer to an alloca alone. These
6713 : // intrinsics describe the address of a variable (= the alloca)
6714 : // being taken. They should not be moved next to the alloca
6715 : // (and to the beginning of the scope), but rather stay close to
6716 : // where said address is used.
6717 0 : if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
6718 : PrevNonDbgInst = Insn;
6719 0 : continue;
6720 : }
6721 :
6722 : Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
6723 0 : if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
6724 : // If VI is a phi in a block with an EHPad terminator, we can't insert
6725 : // after it.
6726 0 : if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
6727 0 : continue;
6728 : LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
6729 : << *DVI << ' ' << *VI);
6730 0 : DVI->removeFromParent();
6731 0 : if (isa<PHINode>(VI))
6732 0 : DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
6733 : else
6734 0 : DVI->insertAfter(VI);
6735 : MadeChange = true;
6736 : ++NumDbgValueMoved;
6737 : }
6738 : }
6739 : }
6740 0 : return MadeChange;
6741 : }
6742 :
6743 : /// Scale down both weights to fit into uint32_t.
6744 : static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
6745 : uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
6746 : uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
6747 : NewTrue = NewTrue / Scale;
6748 : NewFalse = NewFalse / Scale;
6749 : }
6750 :
6751 : /// Some targets prefer to split a conditional branch like:
6752 : /// \code
6753 : /// %0 = icmp ne i32 %a, 0
6754 : /// %1 = icmp ne i32 %b, 0
6755 : /// %or.cond = or i1 %0, %1
6756 : /// br i1 %or.cond, label %TrueBB, label %FalseBB
6757 : /// \endcode
6758 : /// into multiple branch instructions like:
6759 : /// \code
6760 : /// bb1:
6761 : /// %0 = icmp ne i32 %a, 0
6762 : /// br i1 %0, label %TrueBB, label %bb2
6763 : /// bb2:
6764 : /// %1 = icmp ne i32 %b, 0
6765 : /// br i1 %1, label %TrueBB, label %FalseBB
6766 : /// \endcode
6767 : /// This usually allows instruction selection to do even further optimizations
6768 : /// and combine the compare with the branch instruction. Currently this is
6769 : /// applied for targets which have "cheap" jump instructions.
6770 : ///
6771 : /// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
6772 : ///
6773 198465 : bool CodeGenPrepare::splitBranchCondition(Function &F) {
6774 198465 : if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
6775 : return false;
6776 :
6777 : bool MadeChange = false;
6778 16336 : for (auto &BB : F) {
6779 : // Does this BB end with the following?
6780 : // %cond1 = icmp|fcmp|binary instruction ...
6781 : // %cond2 = icmp|fcmp|binary instruction ...
6782 : // %cond.or = or|and i1 %cond1, cond2
6783 : // br i1 %cond.or label %dest1, label %dest2"
6784 : BinaryOperator *LogicOp;
6785 : BasicBlock *TBB, *FBB;
6786 8358 : if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
6787 8355 : continue;
6788 :
6789 : auto *Br1 = cast<BranchInst>(BB.getTerminator());
6790 10 : if (Br1->getMetadata(LLVMContext::MD_unpredictable))
6791 : continue;
6792 :
6793 : unsigned Opc;
6794 : Value *Cond1, *Cond2;
6795 4 : if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
6796 4 : m_OneUse(m_Value(Cond2)))))
6797 : Opc = Instruction::And;
6798 2 : else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
6799 2 : m_OneUse(m_Value(Cond2)))))
6800 : Opc = Instruction::Or;
6801 : else
6802 : continue;
6803 :
6804 3 : if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
6805 3 : !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
6806 0 : continue;
6807 :
6808 : LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
6809 :
6810 : // Create a new BB.
6811 : auto TmpBB =
6812 3 : BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
6813 : BB.getParent(), BB.getNextNode());
6814 :
6815 : // Update original basic block by using the first condition directly by the
6816 : // branch instruction and removing the no longer needed and/or instruction.
6817 3 : Br1->setCondition(Cond1);
6818 3 : LogicOp->eraseFromParent();
6819 :
6820 : // Depending on the condition we have to either replace the true or the
6821 : // false successor of the original branch instruction.
6822 3 : if (Opc == Instruction::And)
6823 : Br1->setSuccessor(0, TmpBB);
6824 : else
6825 : Br1->setSuccessor(1, TmpBB);
6826 :
6827 : // Fill in the new basic block.
6828 3 : auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
6829 3 : if (auto *I = dyn_cast<Instruction>(Cond2)) {
6830 3 : I->removeFromParent();
6831 3 : I->insertBefore(Br2);
6832 : }
6833 :
6834 : // Update PHI nodes in both successors. The original BB needs to be
6835 : // replaced in one successor's PHI nodes, because the branch comes now from
6836 : // the newly generated BB (NewBB). In the other successor we need to add one
6837 : // incoming edge to the PHI nodes, because both branch instructions target
6838 : // now the same successor. Depending on the original branch condition
6839 : // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
6840 : // we perform the correct update for the PHI nodes.
6841 : // This doesn't change the successor order of the just created branch
6842 : // instruction (or any other instruction).
6843 3 : if (Opc == Instruction::Or)
6844 : std::swap(TBB, FBB);
6845 :
6846 : // Replace the old BB with the new BB.
6847 6 : for (PHINode &PN : TBB->phis()) {
6848 : int i;
6849 0 : while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
6850 0 : PN.setIncomingBlock(i, TmpBB);
6851 : }
6852 :
6853 : // Add another incoming edge form the new BB.
6854 6 : for (PHINode &PN : FBB->phis()) {
6855 0 : auto *Val = PN.getIncomingValueForBlock(&BB);
6856 0 : PN.addIncoming(Val, TmpBB);
6857 : }
6858 :
6859 : // Update the branch weights (from SelectionDAGBuilder::
6860 : // FindMergedConditions).
6861 3 : if (Opc == Instruction::Or) {
6862 : // Codegen X | Y as:
6863 : // BB1:
6864 : // jmp_if_X TBB
6865 : // jmp TmpBB
6866 : // TmpBB:
6867 : // jmp_if_Y TBB
6868 : // jmp FBB
6869 : //
6870 :
6871 : // We have flexibility in setting Prob for BB1 and Prob for NewBB.
6872 : // The requirement is that
6873 : // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
6874 : // = TrueProb for original BB.
6875 : // Assuming the original weights are A and B, one choice is to set BB1's
6876 : // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
6877 : // assumes that
6878 : // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
6879 : // Another choice is to assume TrueProb for BB1 equals to TrueProb for
6880 : // TmpBB, but the math is more complicated.
6881 : uint64_t TrueWeight, FalseWeight;
6882 1 : if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
6883 : uint64_t NewTrueWeight = TrueWeight;
6884 : uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
6885 : scaleWeights(NewTrueWeight, NewFalseWeight);
6886 1 : Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
6887 : .createBranchWeights(TrueWeight, FalseWeight));
6888 :
6889 : NewTrueWeight = TrueWeight;
6890 : NewFalseWeight = 2 * FalseWeight;
6891 : scaleWeights(NewTrueWeight, NewFalseWeight);
6892 1 : Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
6893 : .createBranchWeights(TrueWeight, FalseWeight));
6894 : }
6895 : } else {
6896 : // Codegen X & Y as:
6897 : // BB1:
6898 : // jmp_if_X TmpBB
6899 : // jmp FBB
6900 : // TmpBB:
6901 : // jmp_if_Y TBB
6902 : // jmp FBB
6903 : //
6904 : // This requires creation of TmpBB after CurBB.
6905 :
6906 : // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
6907 : // The requirement is that
6908 : // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
6909 : // = FalseProb for original BB.
6910 : // Assuming the original weights are A and B, one choice is to set BB1's
6911 : // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
6912 : // assumes that
6913 : // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
6914 : uint64_t TrueWeight, FalseWeight;
6915 2 : if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
6916 : uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
6917 : uint64_t NewFalseWeight = FalseWeight;
6918 : scaleWeights(NewTrueWeight, NewFalseWeight);
6919 1 : Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
6920 : .createBranchWeights(TrueWeight, FalseWeight));
6921 :
6922 : NewTrueWeight = 2 * TrueWeight;
6923 : NewFalseWeight = FalseWeight;
6924 : scaleWeights(NewTrueWeight, NewFalseWeight);
6925 1 : Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
6926 : .createBranchWeights(TrueWeight, FalseWeight));
6927 : }
6928 : }
6929 :
6930 : // Note: No point in getting fancy here, since the DT info is never
6931 : // available to CodeGenPrepare.
6932 3 : ModifiedDT = true;
6933 :
6934 : MadeChange = true;
6935 :
6936 : LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
6937 : TmpBB->dump());
6938 : }
6939 : return MadeChange;
6940 : }
|