LLVM 20.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugInfo.h"
53#include "llvm/IR/Dominators.h"
54#include "llvm/IR/Function.h"
56#include "llvm/IR/GlobalValue.h"
58#include "llvm/IR/IRBuilder.h"
59#include "llvm/IR/InlineAsm.h"
60#include "llvm/IR/InstrTypes.h"
61#include "llvm/IR/Instruction.h"
64#include "llvm/IR/Intrinsics.h"
65#include "llvm/IR/IntrinsicsAArch64.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/MDBuilder.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
72#include "llvm/IR/Statepoint.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/User.h"
76#include "llvm/IR/Value.h"
77#include "llvm/IR/ValueHandle.h"
78#include "llvm/IR/ValueMap.h"
80#include "llvm/Pass.h"
86#include "llvm/Support/Debug.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <optional>
103#include <utility>
104#include <vector>
105
106using namespace llvm;
107using namespace llvm::PatternMatch;
108
109#define DEBUG_TYPE "codegenprepare"
110
111STATISTIC(NumBlocksElim, "Number of blocks eliminated");
112STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
113STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
114STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
115 "sunken Cmps");
116STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
117 "of sunken Casts");
118STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
119 "computations were sunk");
120STATISTIC(NumMemoryInstsPhiCreated,
121 "Number of phis created when address "
122 "computations were sunk to memory instructions");
123STATISTIC(NumMemoryInstsSelectCreated,
124 "Number of select created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
127STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
128STATISTIC(NumAndsAdded,
129 "Number of and mask instructions added to form ext loads");
130STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
131STATISTIC(NumRetsDup, "Number of return instructions duplicated");
132STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
133STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
134STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
135
137 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
138 cl::desc("Disable branch optimizations in CodeGenPrepare"));
139
140static cl::opt<bool>
141 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
142 cl::desc("Disable GC optimizations in CodeGenPrepare"));
143
144static cl::opt<bool>
145 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
146 cl::init(false),
147 cl::desc("Disable select to branch conversion."));
148
149static cl::opt<bool>
150 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
151 cl::desc("Address sinking in CGP using GEPs."));
152
153static cl::opt<bool>
154 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
155 cl::desc("Enable sinkinig and/cmp into branches."));
156
158 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
159 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
160
162 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
163 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
164
166 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
167 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
168 "CodeGenPrepare"));
169
171 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
172 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
173 "optimization in CodeGenPrepare"));
174
176 "disable-preheader-prot", cl::Hidden, cl::init(false),
177 cl::desc("Disable protection against removing loop preheaders"));
178
180 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
181 cl::desc("Use profile info to add section prefix for hot/cold functions"));
182
184 "profile-unknown-in-special-section", cl::Hidden,
185 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
186 "profile, we cannot tell the function is cold for sure because "
187 "it may be a function newly added without ever being sampled. "
188 "With the flag enabled, compiler can put such profile unknown "
189 "functions into a special section, so runtime system can choose "
190 "to handle it in a different way than .text section, to save "
191 "RAM for example. "));
192
194 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
195 cl::desc("Use the basic-block-sections profile to determine the text "
196 "section prefix for hot functions. Functions with "
197 "basic-block-sections profile will be placed in `.text.hot` "
198 "regardless of their FDO profile info. Other functions won't be "
199 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
200 "profiles."));
201
203 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
204 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
205 "(frequency of destination block) is greater than this ratio"));
206
208 "force-split-store", cl::Hidden, cl::init(false),
209 cl::desc("Force store splitting no matter what the target query says."));
210
212 "cgp-type-promotion-merge", cl::Hidden,
213 cl::desc("Enable merging of redundant sexts when one is dominating"
214 " the other."),
215 cl::init(true));
216
218 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
219 cl::desc("Disables combining addressing modes with different parts "
220 "in optimizeMemoryInst."));
221
222static cl::opt<bool>
223 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
224 cl::desc("Allow creation of Phis in Address sinking."));
225
227 "addr-sink-new-select", cl::Hidden, cl::init(true),
228 cl::desc("Allow creation of selects in Address sinking."));
229
231 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
232 cl::desc("Allow combining of BaseReg field in Address sinking."));
233
235 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
236 cl::desc("Allow combining of BaseGV field in Address sinking."));
237
239 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
240 cl::desc("Allow combining of BaseOffs field in Address sinking."));
241
243 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
244 cl::desc("Allow combining of ScaledReg field in Address sinking."));
245
246static cl::opt<bool>
247 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
248 cl::init(true),
249 cl::desc("Enable splitting large offset of GEP."));
250
252 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
253 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
254
255static cl::opt<bool>
256 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
257 cl::desc("Enable BFI update verification for "
258 "CodeGenPrepare."));
259
260static cl::opt<bool>
261 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
262 cl::desc("Enable converting phi types in CodeGenPrepare"));
263
265 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
266 cl::desc("Least BB number of huge function."));
267
269 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
271 cl::desc("Max number of address users to look at"));
272
273static cl::opt<bool>
274 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
275 cl::desc("Disable elimination of dead PHI nodes."));
276
277namespace {
278
279enum ExtType {
280 ZeroExtension, // Zero extension has been seen.
281 SignExtension, // Sign extension has been seen.
282 BothExtension // This extension type is used if we saw sext after
283 // ZeroExtension had been set, or if we saw zext after
284 // SignExtension had been set. It makes the type
285 // information of a promoted instruction invalid.
286};
287
288enum ModifyDT {
289 NotModifyDT, // Not Modify any DT.
290 ModifyBBDT, // Modify the Basic Block Dominator Tree.
291 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
292 // This usually means we move/delete/insert instruction
293 // in a Basic Block. So we should re-iterate instructions
294 // in such Basic Block.
295};
296
297using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
298using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
299using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
301using ValueToSExts = MapVector<Value *, SExts>;
302
303class TypePromotionTransaction;
304
305class CodeGenPrepare {
306 friend class CodeGenPrepareLegacyPass;
307 const TargetMachine *TM = nullptr;
308 const TargetSubtargetInfo *SubtargetInfo = nullptr;
309 const TargetLowering *TLI = nullptr;
310 const TargetRegisterInfo *TRI = nullptr;
311 const TargetTransformInfo *TTI = nullptr;
312 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
313 const TargetLibraryInfo *TLInfo = nullptr;
314 LoopInfo *LI = nullptr;
315 std::unique_ptr<BlockFrequencyInfo> BFI;
316 std::unique_ptr<BranchProbabilityInfo> BPI;
317 ProfileSummaryInfo *PSI = nullptr;
318
319 /// As we scan instructions optimizing them, this is the next instruction
320 /// to optimize. Transforms that can invalidate this should update it.
321 BasicBlock::iterator CurInstIterator;
322
323 /// Keeps track of non-local addresses that have been sunk into a block.
324 /// This allows us to avoid inserting duplicate code for blocks with
325 /// multiple load/stores of the same address. The usage of WeakTrackingVH
326 /// enables SunkAddrs to be treated as a cache whose entries can be
327 /// invalidated if a sunken address computation has been erased.
329
330 /// Keeps track of all instructions inserted for the current function.
331 SetOfInstrs InsertedInsts;
332
333 /// Keeps track of the type of the related instruction before their
334 /// promotion for the current function.
335 InstrToOrigTy PromotedInsts;
336
337 /// Keep track of instructions removed during promotion.
338 SetOfInstrs RemovedInsts;
339
340 /// Keep track of sext chains based on their initial value.
341 DenseMap<Value *, Instruction *> SeenChainsForSExt;
342
343 /// Keep track of GEPs accessing the same data structures such as structs or
344 /// arrays that are candidates to be split later because of their large
345 /// size.
348 LargeOffsetGEPMap;
349
350 /// Keep track of new GEP base after splitting the GEPs having large offset.
351 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
352
353 /// Map serial numbers to Large offset GEPs.
354 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
355
356 /// Keep track of SExt promoted.
357 ValueToSExts ValToSExtendedUses;
358
359 /// True if the function has the OptSize attribute.
360 bool OptSize;
361
362 /// DataLayout for the Function being processed.
363 const DataLayout *DL = nullptr;
364
365 /// Building the dominator tree can be expensive, so we only build it
366 /// lazily and update it when required.
367 std::unique_ptr<DominatorTree> DT;
368
369public:
370 CodeGenPrepare(){};
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 BPI.reset();
387 BFI.reset();
388 }
389
391
392private:
393 template <typename F>
394 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
395 // Substituting can cause recursive simplifications, which can invalidate
396 // our iterator. Use a WeakTrackingVH to hold onto it in case this
397 // happens.
398 Value *CurValue = &*CurInstIterator;
399 WeakTrackingVH IterHandle(CurValue);
400
401 f();
402
403 // If the iterator instruction was recursively deleted, start over at the
404 // start of the block.
405 if (IterHandle != CurValue) {
406 CurInstIterator = BB->begin();
407 SunkAddrs.clear();
408 }
409 }
410
411 // Get the DominatorTree, building if necessary.
412 DominatorTree &getDT(Function &F) {
413 if (!DT)
414 DT = std::make_unique<DominatorTree>(F);
415 return *DT;
416 }
417
418 void removeAllAssertingVHReferences(Value *V);
419 bool eliminateAssumptions(Function &F);
420 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
421 bool eliminateMostlyEmptyBlocks(Function &F);
422 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
423 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
424 void eliminateMostlyEmptyBlock(BasicBlock *BB);
425 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
426 bool isPreheader);
427 bool makeBitReverse(Instruction &I);
428 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
429 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
430 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
431 unsigned AddrSpace);
432 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
433 bool optimizeInlineAsmInst(CallInst *CS);
434 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
435 bool optimizeExt(Instruction *&I);
436 bool optimizeExtUses(Instruction *I);
437 bool optimizeLoadExt(LoadInst *Load);
438 bool optimizeShiftInst(BinaryOperator *BO);
439 bool optimizeFunnelShift(IntrinsicInst *Fsh);
440 bool optimizeSelectInst(SelectInst *SI);
441 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
442 bool optimizeSwitchType(SwitchInst *SI);
443 bool optimizeSwitchPhiConstants(SwitchInst *SI);
444 bool optimizeSwitchInst(SwitchInst *SI);
445 bool optimizeExtractElementInst(Instruction *Inst);
446 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
447 bool fixupDbgValue(Instruction *I);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool optimizeURem(Instruction *Rem);
475 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 void verifyBFIUpdates(Function &F);
478 bool _run(Function &F);
479};
480
481class CodeGenPrepareLegacyPass : public FunctionPass {
482public:
483 static char ID; // Pass identification, replacement for typeid
484
485 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
487 }
488
489 bool runOnFunction(Function &F) override;
490
491 StringRef getPassName() const override { return "CodeGen Prepare"; }
492
493 void getAnalysisUsage(AnalysisUsage &AU) const override {
494 // FIXME: When we can selectively preserve passes, preserve the domtree.
501 }
502};
503
504} // end anonymous namespace
505
506char CodeGenPrepareLegacyPass::ID = 0;
507
508bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
509 if (skipFunction(F))
510 return false;
511 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
512 CodeGenPrepare CGP(TM);
513 CGP.DL = &F.getDataLayout();
514 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
515 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
516 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
517 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
518 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
519 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
520 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
521 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
522 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
523 auto BBSPRWP =
524 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
525 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
526
527 return CGP._run(F);
528}
529
530INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
531 "Optimize for code generation", false, false)
538INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
540
542 return new CodeGenPrepareLegacyPass();
543}
544
547 CodeGenPrepare CGP(TM);
548
549 bool Changed = CGP.run(F, AM);
550 if (!Changed)
551 return PreservedAnalyses::all();
552
557 return PA;
558}
559
560bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
561 DL = &F.getDataLayout();
562 SubtargetInfo = TM->getSubtargetImpl(F);
563 TLI = SubtargetInfo->getTargetLowering();
564 TRI = SubtargetInfo->getRegisterInfo();
565 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
567 LI = &AM.getResult<LoopAnalysis>(F);
568 BPI.reset(new BranchProbabilityInfo(F, *LI));
569 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
570 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
571 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
572 BBSectionsProfileReader =
574 return _run(F);
575}
576
577bool CodeGenPrepare::_run(Function &F) {
578 bool EverMadeChange = false;
579
580 OptSize = F.hasOptSize();
581 // Use the basic-block-sections profile to promote hot functions to .text.hot
582 // if requested.
583 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
584 BBSectionsProfileReader->isFunctionHot(F.getName())) {
585 F.setSectionPrefix("hot");
586 } else if (ProfileGuidedSectionPrefix) {
587 // The hot attribute overwrites profile count based hotness while profile
588 // counts based hotness overwrite the cold attribute.
589 // This is a conservative behabvior.
590 if (F.hasFnAttribute(Attribute::Hot) ||
591 PSI->isFunctionHotInCallGraph(&F, *BFI))
592 F.setSectionPrefix("hot");
593 // If PSI shows this function is not hot, we will placed the function
594 // into unlikely section if (1) PSI shows this is a cold function, or
595 // (2) the function has a attribute of cold.
596 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
597 F.hasFnAttribute(Attribute::Cold))
598 F.setSectionPrefix("unlikely");
599 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
600 PSI->isFunctionHotnessUnknown(F))
601 F.setSectionPrefix("unknown");
602 }
603
604 /// This optimization identifies DIV instructions that can be
605 /// profitably bypassed and carried out with a shorter, faster divide.
606 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
607 const DenseMap<unsigned int, unsigned int> &BypassWidths =
609 BasicBlock *BB = &*F.begin();
610 while (BB != nullptr) {
611 // bypassSlowDivision may create new BBs, but we don't want to reapply the
612 // optimization to those blocks.
613 BasicBlock *Next = BB->getNextNode();
614 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
615 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620 // Get rid of @llvm.assume builtins before attempting to eliminate empty
621 // blocks, since there might be blocks that only contain @llvm.assume calls
622 // (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625 // Eliminate blocks that contain only PHI nodes and an
626 // unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633 // Split some critical edges where one of the sources is an indirect branch,
634 // to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
637
638 // If we are optimzing huge function, we need to consider the build time.
639 // Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
641
642 // Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647 bool MadeChange = true;
648 bool FuncIterated = false;
649 while (MadeChange) {
650 MadeChange = false;
651
653 if (FuncIterated && !FreshBBs.contains(&BB))
654 continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
658
659 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663 if (IsHugeFunc) {
664 // If the BB is updated, it may still has chance to be optimized.
665 // This usually happen at sink optimization.
666 // For example:
667 //
668 // bb0:
669 // %and = and i32 %a, 4
670 // %cmp = icmp eq i32 %and, 0
671 //
672 // If the %cmp sink to other BB, the %and will has chance to sink.
673 if (Changed)
674 FreshBBs.insert(&BB);
675 else if (FuncIterated)
676 FreshBBs.erase(&BB);
677 } else {
678 // For small/normal functions, we restart BB iteration if the dominator
679 // tree of the Function was changed.
680 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681 break;
682 }
683 }
684 // We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689 if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693 if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697 if (MadeChange && VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701 // Really free removed instructions during promotion.
702 for (Instruction *I : RemovedInsts)
703 I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716 if (!DisableBranchOpts) {
717 MadeChange = false;
718 // Use a set vector to get deterministic iteration order. The order the
719 // blocks are removed may affect whether or not PHI nodes in successors
720 // are removed.
722 for (BasicBlock &BB : F) {
724 MadeChange |= ConstantFoldTerminator(&BB, true);
725 if (!MadeChange)
726 continue;
727
728 for (BasicBlock *Succ : Successors)
729 if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733 // Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735 while (!WorkList.empty()) {
736 BasicBlock *BB = WorkList.pop_back_val();
738
739 DeleteDeadBlock(BB);
740
741 for (BasicBlock *Succ : Successors)
742 if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746 // Merge pairs of basic blocks with unconditional branches, connected by
747 // a single edge.
748 if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754 if (!DisableGCOpts) {
756 for (BasicBlock &BB : F)
757 for (Instruction &I : BB)
758 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760 for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764 // Do this last to clean up use-before-def scenarios introduced by other
765 // preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
771 verifyBFIUpdates(F);
772#endif
773
774 return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778 bool MadeChange = false;
779 for (BasicBlock &BB : F) {
780 CurInstIterator = BB.begin();
781 while (CurInstIterator != BB.end()) {
782 Instruction *I = &*(CurInstIterator++);
783 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange = true;
785 Value *Operand = Assume->getOperand(0);
786 Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
790 });
791 }
792 }
793 }
794 return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
803 auto GEP = dyn_cast<GetElementPtrInst>(V);
804 if (!GEP)
805 return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810 if (VecI == LargeOffsetGEPMap.end())
811 return;
812
813 auto &GEPVector = VecI->second;
814 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
815
816 if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
822 DominatorTree NewDT(F);
823 LoopInfo NewLI(NewDT);
824 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
833 bool Changed = false;
834 // Scan all of the blocks in the function, except for the entry block.
835 // Use a temporary array to avoid iterator being invalidated when
836 // deleting blocks.
838 for (auto &Block : llvm::drop_begin(F))
839 Blocks.push_back(&Block);
840
842 for (auto &Block : Blocks) {
843 auto *BB = cast_or_null<BasicBlock>(Block);
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 while (isa<DbgInfoIntrinsic>(BBI)) {
899 if (BBI == BB->begin())
900 break;
901 --BBI;
902 }
903 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
904 return nullptr;
905 }
906
907 // Do not break infinite loops.
908 BasicBlock *DestBB = BI->getSuccessor(0);
909 if (DestBB == BB)
910 return nullptr;
911
912 if (!canMergeBlocks(BB, DestBB))
913 DestBB = nullptr;
914
915 return DestBB;
916}
917
918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
920/// edges in ways that are non-optimal for isel. Start by eliminating these
921/// blocks so we can split them the way we want them.
922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
924 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
925 while (!LoopList.empty()) {
926 Loop *L = LoopList.pop_back_val();
927 llvm::append_range(LoopList, *L);
928 if (BasicBlock *Preheader = L->getLoopPreheader())
929 Preheaders.insert(Preheader);
930 }
931
932 bool MadeChange = false;
933 // Copy blocks into a temporary array to avoid iterator invalidation issues
934 // as we remove them.
935 // Note that this intentionally skips the entry block.
937 for (auto &Block : llvm::drop_begin(F)) {
938 // Delete phi nodes that could block deleting other empty blocks.
940 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
941 Blocks.push_back(&Block);
942 }
943
944 for (auto &Block : Blocks) {
945 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
946 if (!BB)
947 continue;
948 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
949 if (!DestBB ||
950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
951 continue;
952
953 eliminateMostlyEmptyBlock(BB);
954 MadeChange = true;
955 }
956 return MadeChange;
957}
958
959bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
960 BasicBlock *DestBB,
961 bool isPreheader) {
962 // Do not delete loop preheaders if doing so would create a critical edge.
963 // Loop preheaders can be good locations to spill registers. If the
964 // preheader is deleted and we create a critical edge, registers may be
965 // spilled in the loop body instead.
966 if (!DisablePreheaderProtect && isPreheader &&
967 !(BB->getSinglePredecessor() &&
969 return false;
970
971 // Skip merging if the block's successor is also a successor to any callbr
972 // that leads to this block.
973 // FIXME: Is this really needed? Is this a correctness issue?
974 for (BasicBlock *Pred : predecessors(BB)) {
975 if (isa<CallBrInst>(Pred->getTerminator()) &&
976 llvm::is_contained(successors(Pred), DestBB))
977 return false;
978 }
979
980 // Try to skip merging if the unique predecessor of BB is terminated by a
981 // switch or indirect branch instruction, and BB is used as an incoming block
982 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
983 // add COPY instructions in the predecessor of BB instead of BB (if it is not
984 // merged). Note that the critical edge created by merging such blocks wont be
985 // split in MachineSink because the jump table is not analyzable. By keeping
986 // such empty block (BB), ISel will place COPY instructions in BB, not in the
987 // predecessor of BB.
988 BasicBlock *Pred = BB->getUniquePredecessor();
989 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
990 isa<IndirectBrInst>(Pred->getTerminator())))
991 return true;
992
993 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
994 return true;
995
996 // We use a simple cost heuristic which determine skipping merging is
997 // profitable if the cost of skipping merging is less than the cost of
998 // merging : Cost(skipping merging) < Cost(merging BB), where the
999 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1000 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1001 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1002 // Freq(Pred) / Freq(BB) > 2.
1003 // Note that if there are multiple empty blocks sharing the same incoming
1004 // value for the PHIs in the DestBB, we consider them together. In such
1005 // case, Cost(merging BB) will be the sum of their frequencies.
1006
1007 if (!isa<PHINode>(DestBB->begin()))
1008 return true;
1009
1010 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1011
1012 // Find all other incoming blocks from which incoming values of all PHIs in
1013 // DestBB are the same as the ones from BB.
1014 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1015 if (DestBBPred == BB)
1016 continue;
1017
1018 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1019 return DestPN.getIncomingValueForBlock(BB) ==
1020 DestPN.getIncomingValueForBlock(DestBBPred);
1021 }))
1022 SameIncomingValueBBs.insert(DestBBPred);
1023 }
1024
1025 // See if all BB's incoming values are same as the value from Pred. In this
1026 // case, no reason to skip merging because COPYs are expected to be place in
1027 // Pred already.
1028 if (SameIncomingValueBBs.count(Pred))
1029 return true;
1030
1031 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1032 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1033
1034 for (auto *SameValueBB : SameIncomingValueBBs)
1035 if (SameValueBB->getUniquePredecessor() == Pred &&
1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1037 BBFreq += BFI->getBlockFreq(SameValueBB);
1038
1039 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1040 return !Limit || PredFreq <= *Limit;
1041}
1042
1043/// Return true if we can merge BB into DestBB if there is a single
1044/// unconditional branch between them, and BB contains no other non-phi
1045/// instructions.
1046bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1047 const BasicBlock *DestBB) const {
1048 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1049 // the successor. If there are more complex condition (e.g. preheaders),
1050 // don't mess around with them.
1051 for (const PHINode &PN : BB->phis()) {
1052 for (const User *U : PN.users()) {
1053 const Instruction *UI = cast<Instruction>(U);
1054 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1055 return false;
1056 // If User is inside DestBB block and it is a PHINode then check
1057 // incoming value. If incoming value is not from BB then this is
1058 // a complex condition (e.g. preheaders) we want to avoid here.
1059 if (UI->getParent() == DestBB) {
1060 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1061 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1062 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1063 if (Insn && Insn->getParent() == BB &&
1064 Insn->getParent() != UPN->getIncomingBlock(I))
1065 return false;
1066 }
1067 }
1068 }
1069 }
1070
1071 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1072 // and DestBB may have conflicting incoming values for the block. If so, we
1073 // can't merge the block.
1074 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1075 if (!DestBBPN)
1076 return true; // no conflict.
1077
1078 // Collect the preds of BB.
1080 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1081 // It is faster to get preds from a PHI than with pred_iterator.
1082 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1083 BBPreds.insert(BBPN->getIncomingBlock(i));
1084 } else {
1085 BBPreds.insert(pred_begin(BB), pred_end(BB));
1086 }
1087
1088 // Walk the preds of DestBB.
1089 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1090 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1091 if (BBPreds.count(Pred)) { // Common predecessor?
1092 for (const PHINode &PN : DestBB->phis()) {
1093 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1094 const Value *V2 = PN.getIncomingValueForBlock(BB);
1095
1096 // If V2 is a phi node in BB, look up what the mapped value will be.
1097 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1098 if (V2PN->getParent() == BB)
1099 V2 = V2PN->getIncomingValueForBlock(Pred);
1100
1101 // If there is a conflict, bail out.
1102 if (V1 != V2)
1103 return false;
1104 }
1105 }
1106 }
1107
1108 return true;
1109}
1110
1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1112static void replaceAllUsesWith(Value *Old, Value *New,
1114 bool IsHuge) {
1115 auto *OldI = dyn_cast<Instruction>(Old);
1116 if (OldI) {
1117 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1118 UI != E; ++UI) {
1119 Instruction *User = cast<Instruction>(*UI);
1120 if (IsHuge)
1121 FreshBBs.insert(User->getParent());
1122 }
1123 }
1124 Old->replaceAllUsesWith(New);
1125}
1126
1127/// Eliminate a basic block that has only phi's and an unconditional branch in
1128/// it.
1129void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1130 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1131 BasicBlock *DestBB = BI->getSuccessor(0);
1132
1133 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1134 << *BB << *DestBB);
1135
1136 // If the destination block has a single pred, then this is a trivial edge,
1137 // just collapse it.
1138 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1139 if (SinglePred != DestBB) {
1140 assert(SinglePred == BB &&
1141 "Single predecessor not the same as predecessor");
1142 // Merge DestBB into SinglePred/BB and delete it.
1144 // Note: BB(=SinglePred) will not be deleted on this path.
1145 // DestBB(=its single successor) is the one that was deleted.
1146 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1147
1148 if (IsHugeFunc) {
1149 // Update FreshBBs to optimize the merged BB.
1150 FreshBBs.insert(SinglePred);
1151 FreshBBs.erase(DestBB);
1152 }
1153 return;
1154 }
1155 }
1156
1157 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1158 // to handle the new incoming edges it is about to have.
1159 for (PHINode &PN : DestBB->phis()) {
1160 // Remove the incoming value for BB, and remember it.
1161 Value *InVal = PN.removeIncomingValue(BB, false);
1162
1163 // Two options: either the InVal is a phi node defined in BB or it is some
1164 // value that dominates BB.
1165 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1166 if (InValPhi && InValPhi->getParent() == BB) {
1167 // Add all of the input values of the input PHI as inputs of this phi.
1168 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1169 PN.addIncoming(InValPhi->getIncomingValue(i),
1170 InValPhi->getIncomingBlock(i));
1171 } else {
1172 // Otherwise, add one instance of the dominating value for each edge that
1173 // we will be adding.
1174 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1175 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1177 } else {
1178 for (BasicBlock *Pred : predecessors(BB))
1179 PN.addIncoming(InVal, Pred);
1180 }
1181 }
1182 }
1183
1184 // Preserve loop Metadata.
1185 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1186 for (auto *Pred : predecessors(BB))
1187 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1188 }
1189
1190 // The PHIs are now updated, change everything that refers to BB to use
1191 // DestBB and remove BB.
1192 BB->replaceAllUsesWith(DestBB);
1193 BB->eraseFromParent();
1194 ++NumBlocksElim;
1195
1196 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1197}
1198
1199// Computes a map of base pointer relocation instructions to corresponding
1200// derived pointer relocation instructions given a vector of all relocate calls
1202 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1204 &RelocateInstMap) {
1205 // Collect information in two maps: one primarily for locating the base object
1206 // while filling the second map; the second map is the final structure holding
1207 // a mapping between Base and corresponding Derived relocate calls
1209 for (auto *ThisRelocate : AllRelocateCalls) {
1210 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1211 ThisRelocate->getDerivedPtrIndex());
1212 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1213 }
1214 for (auto &Item : RelocateIdxMap) {
1215 std::pair<unsigned, unsigned> Key = Item.first;
1216 if (Key.first == Key.second)
1217 // Base relocation: nothing to insert
1218 continue;
1219
1220 GCRelocateInst *I = Item.second;
1221 auto BaseKey = std::make_pair(Key.first, Key.first);
1222
1223 // We're iterating over RelocateIdxMap so we cannot modify it.
1224 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1225 if (MaybeBase == RelocateIdxMap.end())
1226 // TODO: We might want to insert a new base object relocate and gep off
1227 // that, if there are enough derived object relocates.
1228 continue;
1229
1230 RelocateInstMap[MaybeBase->second].push_back(I);
1231 }
1232}
1233
1234// Accepts a GEP and extracts the operands into a vector provided they're all
1235// small integer constants
1237 SmallVectorImpl<Value *> &OffsetV) {
1238 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1239 // Only accept small constant integer operands
1240 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1241 if (!Op || Op->getZExtValue() > 20)
1242 return false;
1243 }
1244
1245 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1246 OffsetV.push_back(GEP->getOperand(i));
1247 return true;
1248}
1249
1250// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1251// replace, computes a replacement, and affects it.
1252static bool
1254 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1255 bool MadeChange = false;
1256 // We must ensure the relocation of derived pointer is defined after
1257 // relocation of base pointer. If we find a relocation corresponding to base
1258 // defined earlier than relocation of base then we move relocation of base
1259 // right before found relocation. We consider only relocation in the same
1260 // basic block as relocation of base. Relocations from other basic block will
1261 // be skipped by optimization and we do not care about them.
1262 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1263 &*R != RelocatedBase; ++R)
1264 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1265 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1266 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1267 RelocatedBase->moveBefore(RI);
1268 MadeChange = true;
1269 break;
1270 }
1271
1272 for (GCRelocateInst *ToReplace : Targets) {
1273 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1274 "Not relocating a derived object of the original base object");
1275 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1276 // A duplicate relocate call. TODO: coalesce duplicates.
1277 continue;
1278 }
1279
1280 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1281 // Base and derived relocates are in different basic blocks.
1282 // In this case transform is only valid when base dominates derived
1283 // relocate. However it would be too expensive to check dominance
1284 // for each such relocate, so we skip the whole transformation.
1285 continue;
1286 }
1287
1288 Value *Base = ToReplace->getBasePtr();
1289 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1290 if (!Derived || Derived->getPointerOperand() != Base)
1291 continue;
1292
1294 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1295 continue;
1296
1297 // Create a Builder and replace the target callsite with a gep
1298 assert(RelocatedBase->getNextNode() &&
1299 "Should always have one since it's not a terminator");
1300
1301 // Insert after RelocatedBase
1302 IRBuilder<> Builder(RelocatedBase->getNextNode());
1303 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1304
1305 // If gc_relocate does not match the actual type, cast it to the right type.
1306 // In theory, there must be a bitcast after gc_relocate if the type does not
1307 // match, and we should reuse it to get the derived pointer. But it could be
1308 // cases like this:
1309 // bb1:
1310 // ...
1311 // %g1 = call coldcc i8 addrspace(1)*
1312 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1313 //
1314 // bb2:
1315 // ...
1316 // %g2 = call coldcc i8 addrspace(1)*
1317 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1318 //
1319 // merge:
1320 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1321 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1322 //
1323 // In this case, we can not find the bitcast any more. So we insert a new
1324 // bitcast no matter there is already one or not. In this way, we can handle
1325 // all cases, and the extra bitcast should be optimized away in later
1326 // passes.
1327 Value *ActualRelocatedBase = RelocatedBase;
1328 if (RelocatedBase->getType() != Base->getType()) {
1329 ActualRelocatedBase =
1330 Builder.CreateBitCast(RelocatedBase, Base->getType());
1331 }
1332 Value *Replacement =
1333 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1334 ArrayRef(OffsetV));
1335 Replacement->takeName(ToReplace);
1336 // If the newly generated derived pointer's type does not match the original
1337 // derived pointer's type, cast the new derived pointer to match it. Same
1338 // reasoning as above.
1339 Value *ActualReplacement = Replacement;
1340 if (Replacement->getType() != ToReplace->getType()) {
1341 ActualReplacement =
1342 Builder.CreateBitCast(Replacement, ToReplace->getType());
1343 }
1344 ToReplace->replaceAllUsesWith(ActualReplacement);
1345 ToReplace->eraseFromParent();
1346
1347 MadeChange = true;
1348 }
1349 return MadeChange;
1350}
1351
1352// Turns this:
1353//
1354// %base = ...
1355// %ptr = gep %base + 15
1356// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1357// %base' = relocate(%tok, i32 4, i32 4)
1358// %ptr' = relocate(%tok, i32 4, i32 5)
1359// %val = load %ptr'
1360//
1361// into this:
1362//
1363// %base = ...
1364// %ptr = gep %base + 15
1365// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1366// %base' = gc.relocate(%tok, i32 4, i32 4)
1367// %ptr' = gep %base' + 15
1368// %val = load %ptr'
1369bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1370 bool MadeChange = false;
1371 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1372 for (auto *U : I.users())
1373 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1374 // Collect all the relocate calls associated with a statepoint
1375 AllRelocateCalls.push_back(Relocate);
1376
1377 // We need at least one base pointer relocation + one derived pointer
1378 // relocation to mangle
1379 if (AllRelocateCalls.size() < 2)
1380 return false;
1381
1382 // RelocateInstMap is a mapping from the base relocate instruction to the
1383 // corresponding derived relocate instructions
1385 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1386 if (RelocateInstMap.empty())
1387 return false;
1388
1389 for (auto &Item : RelocateInstMap)
1390 // Item.first is the RelocatedBase to offset against
1391 // Item.second is the vector of Targets to replace
1392 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1393 return MadeChange;
1394}
1395
1396/// Sink the specified cast instruction into its user blocks.
1397static bool SinkCast(CastInst *CI) {
1398 BasicBlock *DefBB = CI->getParent();
1399
1400 /// InsertedCasts - Only insert a cast in each block once.
1402
1403 bool MadeChange = false;
1404 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1405 UI != E;) {
1406 Use &TheUse = UI.getUse();
1407 Instruction *User = cast<Instruction>(*UI);
1408
1409 // Figure out which BB this cast is used in. For PHI's this is the
1410 // appropriate predecessor block.
1411 BasicBlock *UserBB = User->getParent();
1412 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1413 UserBB = PN->getIncomingBlock(TheUse);
1414 }
1415
1416 // Preincrement use iterator so we don't invalidate it.
1417 ++UI;
1418
1419 // The first insertion point of a block containing an EH pad is after the
1420 // pad. If the pad is the user, we cannot sink the cast past the pad.
1421 if (User->isEHPad())
1422 continue;
1423
1424 // If the block selected to receive the cast is an EH pad that does not
1425 // allow non-PHI instructions before the terminator, we can't sink the
1426 // cast.
1427 if (UserBB->getTerminator()->isEHPad())
1428 continue;
1429
1430 // If this user is in the same block as the cast, don't change the cast.
1431 if (UserBB == DefBB)
1432 continue;
1433
1434 // If we have already inserted a cast into this block, use it.
1435 CastInst *&InsertedCast = InsertedCasts[UserBB];
1436
1437 if (!InsertedCast) {
1438 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1439 assert(InsertPt != UserBB->end());
1440 InsertedCast = cast<CastInst>(CI->clone());
1441 InsertedCast->insertBefore(*UserBB, InsertPt);
1442 }
1443
1444 // Replace a use of the cast with a use of the new cast.
1445 TheUse = InsertedCast;
1446 MadeChange = true;
1447 ++NumCastUses;
1448 }
1449
1450 // If we removed all uses, nuke the cast.
1451 if (CI->use_empty()) {
1452 salvageDebugInfo(*CI);
1453 CI->eraseFromParent();
1454 MadeChange = true;
1455 }
1456
1457 return MadeChange;
1458}
1459
1460/// If the specified cast instruction is a noop copy (e.g. it's casting from
1461/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1462/// reduce the number of virtual registers that must be created and coalesced.
1463///
1464/// Return true if any changes are made.
1466 const DataLayout &DL) {
1467 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1468 // than sinking only nop casts, but is helpful on some platforms.
1469 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1470 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1471 ASC->getDestAddressSpace()))
1472 return false;
1473 }
1474
1475 // If this is a noop copy,
1476 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1477 EVT DstVT = TLI.getValueType(DL, CI->getType());
1478
1479 // This is an fp<->int conversion?
1480 if (SrcVT.isInteger() != DstVT.isInteger())
1481 return false;
1482
1483 // If this is an extension, it will be a zero or sign extension, which
1484 // isn't a noop.
1485 if (SrcVT.bitsLT(DstVT))
1486 return false;
1487
1488 // If these values will be promoted, find out what they will be promoted
1489 // to. This helps us consider truncates on PPC as noop copies when they
1490 // are.
1491 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1493 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1494 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1496 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1497
1498 // If, after promotion, these are the same types, this is a noop copy.
1499 if (SrcVT != DstVT)
1500 return false;
1501
1502 return SinkCast(CI);
1503}
1504
1505// Match a simple increment by constant operation. Note that if a sub is
1506// matched, the step is negated (as if the step had been canonicalized to
1507// an add, even though we leave the instruction alone.)
1508static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1509 Constant *&Step) {
1510 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1511 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1512 m_Instruction(LHS), m_Constant(Step)))))
1513 return true;
1514 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1515 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1516 m_Instruction(LHS), m_Constant(Step))))) {
1517 Step = ConstantExpr::getNeg(Step);
1518 return true;
1519 }
1520 return false;
1521}
1522
1523/// If given \p PN is an inductive variable with value IVInc coming from the
1524/// backedge, and on each iteration it gets increased by Step, return pair
1525/// <IVInc, Step>. Otherwise, return std::nullopt.
1526static std::optional<std::pair<Instruction *, Constant *>>
1527getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1528 const Loop *L = LI->getLoopFor(PN->getParent());
1529 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1530 return std::nullopt;
1531 auto *IVInc =
1532 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1533 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1534 return std::nullopt;
1535 Instruction *LHS = nullptr;
1536 Constant *Step = nullptr;
1537 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1538 return std::make_pair(IVInc, Step);
1539 return std::nullopt;
1540}
1541
1542static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1543 auto *I = dyn_cast<Instruction>(V);
1544 if (!I)
1545 return false;
1546 Instruction *LHS = nullptr;
1547 Constant *Step = nullptr;
1548 if (!matchIncrement(I, LHS, Step))
1549 return false;
1550 if (auto *PN = dyn_cast<PHINode>(LHS))
1551 if (auto IVInc = getIVIncrement(PN, LI))
1552 return IVInc->first == I;
1553 return false;
1554}
1555
1556bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1557 Value *Arg0, Value *Arg1,
1558 CmpInst *Cmp,
1559 Intrinsic::ID IID) {
1560 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1561 if (!isIVIncrement(BO, LI))
1562 return false;
1563 const Loop *L = LI->getLoopFor(BO->getParent());
1564 assert(L && "L should not be null after isIVIncrement()");
1565 // Do not risk on moving increment into a child loop.
1566 if (LI->getLoopFor(Cmp->getParent()) != L)
1567 return false;
1568
1569 // Finally, we need to ensure that the insert point will dominate all
1570 // existing uses of the increment.
1571
1572 auto &DT = getDT(*BO->getParent()->getParent());
1573 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1574 // If we're moving up the dom tree, all uses are trivially dominated.
1575 // (This is the common case for code produced by LSR.)
1576 return true;
1577
1578 // Otherwise, special case the single use in the phi recurrence.
1579 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1580 };
1581 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1582 // We used to use a dominator tree here to allow multi-block optimization.
1583 // But that was problematic because:
1584 // 1. It could cause a perf regression by hoisting the math op into the
1585 // critical path.
1586 // 2. It could cause a perf regression by creating a value that was live
1587 // across multiple blocks and increasing register pressure.
1588 // 3. Use of a dominator tree could cause large compile-time regression.
1589 // This is because we recompute the DT on every change in the main CGP
1590 // run-loop. The recomputing is probably unnecessary in many cases, so if
1591 // that was fixed, using a DT here would be ok.
1592 //
1593 // There is one important particular case we still want to handle: if BO is
1594 // the IV increment. Important properties that make it profitable:
1595 // - We can speculate IV increment anywhere in the loop (as long as the
1596 // indvar Phi is its only user);
1597 // - Upon computing Cmp, we effectively compute something equivalent to the
1598 // IV increment (despite it loops differently in the IR). So moving it up
1599 // to the cmp point does not really increase register pressure.
1600 return false;
1601 }
1602
1603 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1604 if (BO->getOpcode() == Instruction::Add &&
1605 IID == Intrinsic::usub_with_overflow) {
1606 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1607 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1608 }
1609
1610 // Insert at the first instruction of the pair.
1611 Instruction *InsertPt = nullptr;
1612 for (Instruction &Iter : *Cmp->getParent()) {
1613 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1614 // the overflow intrinsic are defined.
1615 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1616 InsertPt = &Iter;
1617 break;
1618 }
1619 }
1620 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1621
1622 IRBuilder<> Builder(InsertPt);
1623 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1624 if (BO->getOpcode() != Instruction::Xor) {
1625 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1626 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1627 } else
1628 assert(BO->hasOneUse() &&
1629 "Patterns with XOr should use the BO only in the compare");
1630 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1631 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1632 Cmp->eraseFromParent();
1633 BO->eraseFromParent();
1634 return true;
1635}
1636
1637/// Match special-case patterns that check for unsigned add overflow.
1639 BinaryOperator *&Add) {
1640 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1641 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1642 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1643
1644 // We are not expecting non-canonical/degenerate code. Just bail out.
1645 if (isa<Constant>(A))
1646 return false;
1647
1648 ICmpInst::Predicate Pred = Cmp->getPredicate();
1649 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1650 B = ConstantInt::get(B->getType(), 1);
1651 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1652 B = Constant::getAllOnesValue(B->getType());
1653 else
1654 return false;
1655
1656 // Check the users of the variable operand of the compare looking for an add
1657 // with the adjusted constant.
1658 for (User *U : A->users()) {
1659 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1660 Add = cast<BinaryOperator>(U);
1661 return true;
1662 }
1663 }
1664 return false;
1665}
1666
1667/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1668/// intrinsic. Return true if any changes were made.
1669bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1670 ModifyDT &ModifiedDT) {
1671 bool EdgeCase = false;
1672 Value *A, *B;
1674 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1676 return false;
1677 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1678 A = Add->getOperand(0);
1679 B = Add->getOperand(1);
1680 EdgeCase = true;
1681 }
1682
1684 TLI->getValueType(*DL, Add->getType()),
1685 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1686 return false;
1687
1688 // We don't want to move around uses of condition values this late, so we
1689 // check if it is legal to create the call to the intrinsic in the basic
1690 // block containing the icmp.
1691 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1692 return false;
1693
1694 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1695 Intrinsic::uadd_with_overflow))
1696 return false;
1697
1698 // Reset callers - do not crash by iterating over a dead instruction.
1699 ModifiedDT = ModifyDT::ModifyInstDT;
1700 return true;
1701}
1702
1703bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1704 ModifyDT &ModifiedDT) {
1705 // We are not expecting non-canonical/degenerate code. Just bail out.
1706 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1707 if (isa<Constant>(A) && isa<Constant>(B))
1708 return false;
1709
1710 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1711 ICmpInst::Predicate Pred = Cmp->getPredicate();
1712 if (Pred == ICmpInst::ICMP_UGT) {
1713 std::swap(A, B);
1714 Pred = ICmpInst::ICMP_ULT;
1715 }
1716 // Convert special-case: (A == 0) is the same as (A u< 1).
1717 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1718 B = ConstantInt::get(B->getType(), 1);
1719 Pred = ICmpInst::ICMP_ULT;
1720 }
1721 // Convert special-case: (A != 0) is the same as (0 u< A).
1722 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1723 std::swap(A, B);
1724 Pred = ICmpInst::ICMP_ULT;
1725 }
1726 if (Pred != ICmpInst::ICMP_ULT)
1727 return false;
1728
1729 // Walk the users of a variable operand of a compare looking for a subtract or
1730 // add with that same operand. Also match the 2nd operand of the compare to
1731 // the add/sub, but that may be a negated constant operand of an add.
1732 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1733 BinaryOperator *Sub = nullptr;
1734 for (User *U : CmpVariableOperand->users()) {
1735 // A - B, A u< B --> usubo(A, B)
1736 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1737 Sub = cast<BinaryOperator>(U);
1738 break;
1739 }
1740
1741 // A + (-C), A u< C (canonicalized form of (sub A, C))
1742 const APInt *CmpC, *AddC;
1743 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1744 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1745 Sub = cast<BinaryOperator>(U);
1746 break;
1747 }
1748 }
1749 if (!Sub)
1750 return false;
1751
1753 TLI->getValueType(*DL, Sub->getType()),
1754 Sub->hasNUsesOrMore(1)))
1755 return false;
1756
1757 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1758 Cmp, Intrinsic::usub_with_overflow))
1759 return false;
1760
1761 // Reset callers - do not crash by iterating over a dead instruction.
1762 ModifiedDT = ModifyDT::ModifyInstDT;
1763 return true;
1764}
1765
1766/// Sink the given CmpInst into user blocks to reduce the number of virtual
1767/// registers that must be created and coalesced. This is a clear win except on
1768/// targets with multiple condition code registers (PowerPC), where it might
1769/// lose; some adjustment may be wanted there.
1770///
1771/// Return true if any changes are made.
1772static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1774 return false;
1775
1776 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1777 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1778 return false;
1779
1780 // Only insert a cmp in each block once.
1782
1783 bool MadeChange = false;
1784 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1785 UI != E;) {
1786 Use &TheUse = UI.getUse();
1787 Instruction *User = cast<Instruction>(*UI);
1788
1789 // Preincrement use iterator so we don't invalidate it.
1790 ++UI;
1791
1792 // Don't bother for PHI nodes.
1793 if (isa<PHINode>(User))
1794 continue;
1795
1796 // Figure out which BB this cmp is used in.
1797 BasicBlock *UserBB = User->getParent();
1798 BasicBlock *DefBB = Cmp->getParent();
1799
1800 // If this user is in the same block as the cmp, don't change the cmp.
1801 if (UserBB == DefBB)
1802 continue;
1803
1804 // If we have already inserted a cmp into this block, use it.
1805 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1806
1807 if (!InsertedCmp) {
1808 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1809 assert(InsertPt != UserBB->end());
1810 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1811 Cmp->getOperand(0), Cmp->getOperand(1), "");
1812 InsertedCmp->insertBefore(*UserBB, InsertPt);
1813 // Propagate the debug info.
1814 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1815 }
1816
1817 // Replace a use of the cmp with a use of the new cmp.
1818 TheUse = InsertedCmp;
1819 MadeChange = true;
1820 ++NumCmpUses;
1821 }
1822
1823 // If we removed all uses, nuke the cmp.
1824 if (Cmp->use_empty()) {
1825 Cmp->eraseFromParent();
1826 MadeChange = true;
1827 }
1828
1829 return MadeChange;
1830}
1831
1832/// For pattern like:
1833///
1834/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1835/// ...
1836/// DomBB:
1837/// ...
1838/// br DomCond, TrueBB, CmpBB
1839/// CmpBB: (with DomBB being the single predecessor)
1840/// ...
1841/// Cmp = icmp eq CmpOp0, CmpOp1
1842/// ...
1843///
1844/// It would use two comparison on targets that lowering of icmp sgt/slt is
1845/// different from lowering of icmp eq (PowerPC). This function try to convert
1846/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1847/// After that, DomCond and Cmp can use the same comparison so reduce one
1848/// comparison.
1849///
1850/// Return true if any changes are made.
1852 const TargetLowering &TLI) {
1854 return false;
1855
1856 ICmpInst::Predicate Pred = Cmp->getPredicate();
1857 if (Pred != ICmpInst::ICMP_EQ)
1858 return false;
1859
1860 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1861 // icmp slt/sgt would introduce more redundant LLVM IR.
1862 for (User *U : Cmp->users()) {
1863 if (isa<BranchInst>(U))
1864 continue;
1865 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1866 continue;
1867 return false;
1868 }
1869
1870 // This is a cheap/incomplete check for dominance - just match a single
1871 // predecessor with a conditional branch.
1872 BasicBlock *CmpBB = Cmp->getParent();
1873 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1874 if (!DomBB)
1875 return false;
1876
1877 // We want to ensure that the only way control gets to the comparison of
1878 // interest is that a less/greater than comparison on the same operands is
1879 // false.
1880 Value *DomCond;
1881 BasicBlock *TrueBB, *FalseBB;
1882 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1883 return false;
1884 if (CmpBB != FalseBB)
1885 return false;
1886
1887 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1888 CmpPredicate DomPred;
1889 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1890 return false;
1891 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1892 return false;
1893
1894 // Convert the equality comparison to the opposite of the dominating
1895 // comparison and swap the direction for all branch/select users.
1896 // We have conceptually converted:
1897 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1898 // to
1899 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1900 // And similarly for branches.
1901 for (User *U : Cmp->users()) {
1902 if (auto *BI = dyn_cast<BranchInst>(U)) {
1903 assert(BI->isConditional() && "Must be conditional");
1904 BI->swapSuccessors();
1905 continue;
1906 }
1907 if (auto *SI = dyn_cast<SelectInst>(U)) {
1908 // Swap operands
1909 SI->swapValues();
1910 SI->swapProfMetadata();
1911 continue;
1912 }
1913 llvm_unreachable("Must be a branch or a select");
1914 }
1915 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1916 return true;
1917}
1918
1919/// Many architectures use the same instruction for both subtract and cmp. Try
1920/// to swap cmp operands to match subtract operations to allow for CSE.
1922 Value *Op0 = Cmp->getOperand(0);
1923 Value *Op1 = Cmp->getOperand(1);
1924 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1925 isa<Constant>(Op1) || Op0 == Op1)
1926 return false;
1927
1928 // If a subtract already has the same operands as a compare, swapping would be
1929 // bad. If a subtract has the same operands as a compare but in reverse order,
1930 // then swapping is good.
1931 int GoodToSwap = 0;
1932 unsigned NumInspected = 0;
1933 for (const User *U : Op0->users()) {
1934 // Avoid walking many users.
1935 if (++NumInspected > 128)
1936 return false;
1937 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
1938 GoodToSwap++;
1939 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
1940 GoodToSwap--;
1941 }
1942
1943 if (GoodToSwap > 0) {
1944 Cmp->swapOperands();
1945 return true;
1946 }
1947 return false;
1948}
1949
1950static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1951 const DataLayout &DL) {
1952 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1953 if (!FCmp)
1954 return false;
1955
1956 // Don't fold if the target offers free fabs and the predicate is legal.
1957 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
1958 if (TLI.isFAbsFree(VT) &&
1960 VT.getSimpleVT()))
1961 return false;
1962
1963 // Reverse the canonicalization if it is a FP class test
1964 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
1965 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
1966 };
1967 auto [ClassVal, ClassTest] =
1968 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
1969 FCmp->getOperand(0), FCmp->getOperand(1));
1970 if (!ClassVal)
1971 return false;
1972
1973 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1974 return false;
1975
1976 IRBuilder<> Builder(Cmp);
1977 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
1978 Cmp->replaceAllUsesWith(IsFPClass);
1980 return true;
1981}
1982
1984 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
1985 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
1986 Value *Incr, *RemAmt;
1987 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1988 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
1989 return false;
1990
1991 Value *AddInst, *AddOffset;
1992 // Find out loop increment PHI.
1993 auto *PN = dyn_cast<PHINode>(Incr);
1994 if (PN != nullptr) {
1995 AddInst = nullptr;
1996 AddOffset = nullptr;
1997 } else {
1998 // Search through a NUW add on top of the loop increment.
1999 Value *V0, *V1;
2000 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2001 return false;
2002
2003 AddInst = Incr;
2004 PN = dyn_cast<PHINode>(V0);
2005 if (PN != nullptr) {
2006 AddOffset = V1;
2007 } else {
2008 PN = dyn_cast<PHINode>(V1);
2009 AddOffset = V0;
2010 }
2011 }
2012
2013 if (!PN)
2014 return false;
2015
2016 // This isn't strictly necessary, what we really need is one increment and any
2017 // amount of initial values all being the same.
2018 if (PN->getNumIncomingValues() != 2)
2019 return false;
2020
2021 // Only trivially analyzable loops.
2022 Loop *L = LI->getLoopFor(PN->getParent());
2023 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2024 return false;
2025
2026 // Req that the remainder is in the loop
2027 if (!L->contains(Rem))
2028 return false;
2029
2030 // Only works if the remainder amount is a loop invaraint
2031 if (!L->isLoopInvariant(RemAmt))
2032 return false;
2033
2034 // Is the PHI a loop increment?
2035 auto LoopIncrInfo = getIVIncrement(PN, LI);
2036 if (!LoopIncrInfo)
2037 return false;
2038
2039 // We need remainder_amount % increment_amount to be zero. Increment of one
2040 // satisfies that without any special logic and is overwhelmingly the common
2041 // case.
2042 if (!match(LoopIncrInfo->second, m_One()))
2043 return false;
2044
2045 // Need the increment to not overflow.
2046 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2047 return false;
2048
2049 // Set output variables.
2050 RemAmtOut = RemAmt;
2051 LoopIncrPNOut = PN;
2052 AddInstOut = AddInst;
2053 AddOffsetOut = AddOffset;
2054
2055 return true;
2056}
2057
2058// Try to transform:
2059//
2060// for(i = Start; i < End; ++i)
2061// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2062//
2063// ->
2064//
2065// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2066// for(i = Start; i < End; ++i, ++rem)
2067// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2069 const LoopInfo *LI,
2071 bool IsHuge) {
2072 Value *AddOffset, *RemAmt, *AddInst;
2073 PHINode *LoopIncrPN;
2074 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2075 AddOffset, LoopIncrPN))
2076 return false;
2077
2078 // Only non-constant remainder as the extra IV is probably not profitable
2079 // in that case.
2080 //
2081 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2082 // we can rule out register pressure and ensure this `urem` is executed each
2083 // iteration, its probably profitable to handle the const case as well.
2084 //
2085 // Potential TODO(2): Should we have a check for how "nested" this remainder
2086 // operation is? The new code runs every iteration so if the remainder is
2087 // guarded behind unlikely conditions this might not be worth it.
2088 if (match(RemAmt, m_ImmConstant()))
2089 return false;
2090
2091 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2092 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2093 // If we have add create initial value for remainder.
2094 // The logic here is:
2095 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2096 //
2097 // Only proceed if the expression simplifies (otherwise we can't fully
2098 // optimize out the urem).
2099 if (AddInst) {
2100 assert(AddOffset && "We found an add but missing values");
2101 // Without dom-condition/assumption cache we aren't likely to get much out
2102 // of a context instruction.
2103 Start = simplifyAddInst(Start, AddOffset,
2104 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2105 /*IsNUW=*/true, *DL);
2106 if (!Start)
2107 return false;
2108 }
2109
2110 // If we can't fully optimize out the `rem`, skip this transform.
2111 Start = simplifyURemInst(Start, RemAmt, *DL);
2112 if (!Start)
2113 return false;
2114
2115 // Create new remainder with induction variable.
2116 Type *Ty = Rem->getType();
2117 IRBuilder<> Builder(Rem->getContext());
2118
2119 Builder.SetInsertPoint(LoopIncrPN);
2120 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2121
2122 Builder.SetInsertPoint(cast<Instruction>(
2123 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2124 // `(add (urem x, y), 1)` is always nuw.
2125 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2126 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2127 Value *RemSel =
2128 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2129
2130 NewRem->addIncoming(Start, L->getLoopPreheader());
2131 NewRem->addIncoming(RemSel, L->getLoopLatch());
2132
2133 // Insert all touched BBs.
2134 FreshBBs.insert(LoopIncrPN->getParent());
2135 FreshBBs.insert(L->getLoopLatch());
2136 FreshBBs.insert(Rem->getParent());
2137 if (AddInst)
2138 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2139 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2140 Rem->eraseFromParent();
2141 if (AddInst && AddInst->use_empty())
2142 cast<Instruction>(AddInst)->eraseFromParent();
2143 return true;
2144}
2145
2146bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2147 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2148 return true;
2149 return false;
2150}
2151
2152/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`.
2153/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u</u> 2/1` if the
2154/// result cannot be zero.
2155static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI,
2156 const TargetTransformInfo &TTI,
2157 const DataLayout &DL) {
2158 CmpPredicate Pred;
2159 if (!match(Cmp, m_ICmp(Pred, m_Intrinsic<Intrinsic::ctpop>(), m_One())))
2160 return false;
2161 if (!ICmpInst::isEquality(Pred))
2162 return false;
2163 auto *II = cast<IntrinsicInst>(Cmp->getOperand(0));
2164
2165 if (isKnownNonZero(II, DL)) {
2166 if (Pred == ICmpInst::ICMP_EQ) {
2167 Cmp->setOperand(1, ConstantInt::get(II->getType(), 2));
2168 Cmp->setPredicate(ICmpInst::ICMP_ULT);
2169 } else {
2170 Cmp->setPredicate(ICmpInst::ICMP_UGT);
2171 }
2172 return true;
2173 }
2174 return false;
2175}
2176
2177bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2178 if (sinkCmpExpression(Cmp, *TLI))
2179 return true;
2180
2181 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2182 return true;
2183
2184 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2185 return true;
2186
2187 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2188 return true;
2189
2191 return true;
2192
2193 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2194 return true;
2195
2196 if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL))
2197 return true;
2198
2199 return false;
2200}
2201
2202/// Duplicate and sink the given 'and' instruction into user blocks where it is
2203/// used in a compare to allow isel to generate better code for targets where
2204/// this operation can be combined.
2205///
2206/// Return true if any changes are made.
2208 SetOfInstrs &InsertedInsts) {
2209 // Double-check that we're not trying to optimize an instruction that was
2210 // already optimized by some other part of this pass.
2211 assert(!InsertedInsts.count(AndI) &&
2212 "Attempting to optimize already optimized and instruction");
2213 (void)InsertedInsts;
2214
2215 // Nothing to do for single use in same basic block.
2216 if (AndI->hasOneUse() &&
2217 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2218 return false;
2219
2220 // Try to avoid cases where sinking/duplicating is likely to increase register
2221 // pressure.
2222 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2223 !isa<ConstantInt>(AndI->getOperand(1)) &&
2224 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2225 return false;
2226
2227 for (auto *U : AndI->users()) {
2228 Instruction *User = cast<Instruction>(U);
2229
2230 // Only sink 'and' feeding icmp with 0.
2231 if (!isa<ICmpInst>(User))
2232 return false;
2233
2234 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2235 if (!CmpC || !CmpC->isZero())
2236 return false;
2237 }
2238
2239 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2240 return false;
2241
2242 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2243 LLVM_DEBUG(AndI->getParent()->dump());
2244
2245 // Push the 'and' into the same block as the icmp 0. There should only be
2246 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2247 // others, so we don't need to keep track of which BBs we insert into.
2248 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2249 UI != E;) {
2250 Use &TheUse = UI.getUse();
2251 Instruction *User = cast<Instruction>(*UI);
2252
2253 // Preincrement use iterator so we don't invalidate it.
2254 ++UI;
2255
2256 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2257
2258 // Keep the 'and' in the same place if the use is already in the same block.
2259 Instruction *InsertPt =
2260 User->getParent() == AndI->getParent() ? AndI : User;
2261 Instruction *InsertedAnd = BinaryOperator::Create(
2262 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2263 InsertPt->getIterator());
2264 // Propagate the debug info.
2265 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2266
2267 // Replace a use of the 'and' with a use of the new 'and'.
2268 TheUse = InsertedAnd;
2269 ++NumAndUses;
2270 LLVM_DEBUG(User->getParent()->dump());
2271 }
2272
2273 // We removed all uses, nuke the and.
2274 AndI->eraseFromParent();
2275 return true;
2276}
2277
2278/// Check if the candidates could be combined with a shift instruction, which
2279/// includes:
2280/// 1. Truncate instruction
2281/// 2. And instruction and the imm is a mask of the low bits:
2282/// imm & (imm+1) == 0
2284 if (!isa<TruncInst>(User)) {
2285 if (User->getOpcode() != Instruction::And ||
2286 !isa<ConstantInt>(User->getOperand(1)))
2287 return false;
2288
2289 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2290
2291 if ((Cimm & (Cimm + 1)).getBoolValue())
2292 return false;
2293 }
2294 return true;
2295}
2296
2297/// Sink both shift and truncate instruction to the use of truncate's BB.
2298static bool
2301 const TargetLowering &TLI, const DataLayout &DL) {
2302 BasicBlock *UserBB = User->getParent();
2304 auto *TruncI = cast<TruncInst>(User);
2305 bool MadeChange = false;
2306
2307 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2308 TruncE = TruncI->user_end();
2309 TruncUI != TruncE;) {
2310
2311 Use &TruncTheUse = TruncUI.getUse();
2312 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2313 // Preincrement use iterator so we don't invalidate it.
2314
2315 ++TruncUI;
2316
2317 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2318 if (!ISDOpcode)
2319 continue;
2320
2321 // If the use is actually a legal node, there will not be an
2322 // implicit truncate.
2323 // FIXME: always querying the result type is just an
2324 // approximation; some nodes' legality is determined by the
2325 // operand or other means. There's no good way to find out though.
2327 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2328 continue;
2329
2330 // Don't bother for PHI nodes.
2331 if (isa<PHINode>(TruncUser))
2332 continue;
2333
2334 BasicBlock *TruncUserBB = TruncUser->getParent();
2335
2336 if (UserBB == TruncUserBB)
2337 continue;
2338
2339 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2340 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2341
2342 if (!InsertedShift && !InsertedTrunc) {
2343 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2344 assert(InsertPt != TruncUserBB->end());
2345 // Sink the shift
2346 if (ShiftI->getOpcode() == Instruction::AShr)
2347 InsertedShift =
2348 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2349 else
2350 InsertedShift =
2351 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2352 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2353 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2354
2355 // Sink the trunc
2356 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2357 TruncInsertPt++;
2358 // It will go ahead of any debug-info.
2359 TruncInsertPt.setHeadBit(true);
2360 assert(TruncInsertPt != TruncUserBB->end());
2361
2362 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2363 TruncI->getType(), "");
2364 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2365 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2366
2367 MadeChange = true;
2368
2369 TruncTheUse = InsertedTrunc;
2370 }
2371 }
2372 return MadeChange;
2373}
2374
2375/// Sink the shift *right* instruction into user blocks if the uses could
2376/// potentially be combined with this shift instruction and generate BitExtract
2377/// instruction. It will only be applied if the architecture supports BitExtract
2378/// instruction. Here is an example:
2379/// BB1:
2380/// %x.extract.shift = lshr i64 %arg1, 32
2381/// BB2:
2382/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2383/// ==>
2384///
2385/// BB2:
2386/// %x.extract.shift.1 = lshr i64 %arg1, 32
2387/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2388///
2389/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2390/// instruction.
2391/// Return true if any changes are made.
2393 const TargetLowering &TLI,
2394 const DataLayout &DL) {
2395 BasicBlock *DefBB = ShiftI->getParent();
2396
2397 /// Only insert instructions in each block once.
2399
2400 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2401
2402 bool MadeChange = false;
2403 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2404 UI != E;) {
2405 Use &TheUse = UI.getUse();
2406 Instruction *User = cast<Instruction>(*UI);
2407 // Preincrement use iterator so we don't invalidate it.
2408 ++UI;
2409
2410 // Don't bother for PHI nodes.
2411 if (isa<PHINode>(User))
2412 continue;
2413
2415 continue;
2416
2417 BasicBlock *UserBB = User->getParent();
2418
2419 if (UserBB == DefBB) {
2420 // If the shift and truncate instruction are in the same BB. The use of
2421 // the truncate(TruncUse) may still introduce another truncate if not
2422 // legal. In this case, we would like to sink both shift and truncate
2423 // instruction to the BB of TruncUse.
2424 // for example:
2425 // BB1:
2426 // i64 shift.result = lshr i64 opnd, imm
2427 // trunc.result = trunc shift.result to i16
2428 //
2429 // BB2:
2430 // ----> We will have an implicit truncate here if the architecture does
2431 // not have i16 compare.
2432 // cmp i16 trunc.result, opnd2
2433 //
2434 if (isa<TruncInst>(User) &&
2435 shiftIsLegal
2436 // If the type of the truncate is legal, no truncate will be
2437 // introduced in other basic blocks.
2438 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2439 MadeChange =
2440 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2441
2442 continue;
2443 }
2444 // If we have already inserted a shift into this block, use it.
2445 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2446
2447 if (!InsertedShift) {
2448 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2449 assert(InsertPt != UserBB->end());
2450
2451 if (ShiftI->getOpcode() == Instruction::AShr)
2452 InsertedShift =
2453 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2454 else
2455 InsertedShift =
2456 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2457 InsertedShift->insertBefore(*UserBB, InsertPt);
2458 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2459
2460 MadeChange = true;
2461 }
2462
2463 // Replace a use of the shift with a use of the new shift.
2464 TheUse = InsertedShift;
2465 }
2466
2467 // If we removed all uses, or there are none, nuke the shift.
2468 if (ShiftI->use_empty()) {
2469 salvageDebugInfo(*ShiftI);
2470 ShiftI->eraseFromParent();
2471 MadeChange = true;
2472 }
2473
2474 return MadeChange;
2475}
2476
2477/// If counting leading or trailing zeros is an expensive operation and a zero
2478/// input is defined, add a check for zero to avoid calling the intrinsic.
2479///
2480/// We want to transform:
2481/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2482///
2483/// into:
2484/// entry:
2485/// %cmpz = icmp eq i64 %A, 0
2486/// br i1 %cmpz, label %cond.end, label %cond.false
2487/// cond.false:
2488/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2489/// br label %cond.end
2490/// cond.end:
2491/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2492///
2493/// If the transform is performed, return true and set ModifiedDT to true.
2494static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2495 LoopInfo &LI,
2496 const TargetLowering *TLI,
2497 const DataLayout *DL, ModifyDT &ModifiedDT,
2499 bool IsHugeFunc) {
2500 // If a zero input is undefined, it doesn't make sense to despeculate that.
2501 if (match(CountZeros->getOperand(1), m_One()))
2502 return false;
2503
2504 // If it's cheap to speculate, there's nothing to do.
2505 Type *Ty = CountZeros->getType();
2506 auto IntrinsicID = CountZeros->getIntrinsicID();
2507 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2508 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2509 return false;
2510
2511 // Only handle legal scalar cases. Anything else requires too much work.
2512 unsigned SizeInBits = Ty->getScalarSizeInBits();
2513 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2514 return false;
2515
2516 // Bail if the value is never zero.
2517 Use &Op = CountZeros->getOperandUse(0);
2518 if (isKnownNonZero(Op, *DL))
2519 return false;
2520
2521 // The intrinsic will be sunk behind a compare against zero and branch.
2522 BasicBlock *StartBlock = CountZeros->getParent();
2523 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2524 if (IsHugeFunc)
2525 FreshBBs.insert(CallBlock);
2526
2527 // Create another block after the count zero intrinsic. A PHI will be added
2528 // in this block to select the result of the intrinsic or the bit-width
2529 // constant if the input to the intrinsic is zero.
2530 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2531 // Any debug-info after CountZeros should not be included.
2532 SplitPt.setHeadBit(true);
2533 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2534 if (IsHugeFunc)
2535 FreshBBs.insert(EndBlock);
2536
2537 // Update the LoopInfo. The new blocks are in the same loop as the start
2538 // block.
2539 if (Loop *L = LI.getLoopFor(StartBlock)) {
2540 L->addBasicBlockToLoop(CallBlock, LI);
2541 L->addBasicBlockToLoop(EndBlock, LI);
2542 }
2543
2544 // Set up a builder to create a compare, conditional branch, and PHI.
2545 IRBuilder<> Builder(CountZeros->getContext());
2546 Builder.SetInsertPoint(StartBlock->getTerminator());
2547 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2548
2549 // Replace the unconditional branch that was created by the first split with
2550 // a compare against zero and a conditional branch.
2551 Value *Zero = Constant::getNullValue(Ty);
2552 // Avoid introducing branch on poison. This also replaces the ctz operand.
2554 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2555 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2556 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2557 StartBlock->getTerminator()->eraseFromParent();
2558
2559 // Create a PHI in the end block to select either the output of the intrinsic
2560 // or the bit width of the operand.
2561 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2562 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2563 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2564 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2565 PN->addIncoming(BitWidth, StartBlock);
2566 PN->addIncoming(CountZeros, CallBlock);
2567
2568 // We are explicitly handling the zero case, so we can set the intrinsic's
2569 // undefined zero argument to 'true'. This will also prevent reprocessing the
2570 // intrinsic; we only despeculate when a zero input is defined.
2571 CountZeros->setArgOperand(1, Builder.getTrue());
2572 ModifiedDT = ModifyDT::ModifyBBDT;
2573 return true;
2574}
2575
2576bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2577 BasicBlock *BB = CI->getParent();
2578
2579 // Lower inline assembly if we can.
2580 // If we found an inline asm expession, and if the target knows how to
2581 // lower it to normal LLVM code, do so now.
2582 if (CI->isInlineAsm()) {
2583 if (TLI->ExpandInlineAsm(CI)) {
2584 // Avoid invalidating the iterator.
2585 CurInstIterator = BB->begin();
2586 // Avoid processing instructions out of order, which could cause
2587 // reuse before a value is defined.
2588 SunkAddrs.clear();
2589 return true;
2590 }
2591 // Sink address computing for memory operands into the block.
2592 if (optimizeInlineAsmInst(CI))
2593 return true;
2594 }
2595
2596 // Align the pointer arguments to this call if the target thinks it's a good
2597 // idea
2598 unsigned MinSize;
2599 Align PrefAlign;
2600 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2601 for (auto &Arg : CI->args()) {
2602 // We want to align both objects whose address is used directly and
2603 // objects whose address is used in casts and GEPs, though it only makes
2604 // sense for GEPs if the offset is a multiple of the desired alignment and
2605 // if size - offset meets the size threshold.
2606 if (!Arg->getType()->isPointerTy())
2607 continue;
2608 APInt Offset(DL->getIndexSizeInBits(
2609 cast<PointerType>(Arg->getType())->getAddressSpace()),
2610 0);
2612 uint64_t Offset2 = Offset.getLimitedValue();
2613 if (!isAligned(PrefAlign, Offset2))
2614 continue;
2615 AllocaInst *AI;
2616 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2617 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2618 AI->setAlignment(PrefAlign);
2619 // Global variables can only be aligned if they are defined in this
2620 // object (i.e. they are uniquely initialized in this object), and
2621 // over-aligning global variables that have an explicit section is
2622 // forbidden.
2623 GlobalVariable *GV;
2624 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2625 GV->getPointerAlignment(*DL) < PrefAlign &&
2626 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2627 GV->setAlignment(PrefAlign);
2628 }
2629 }
2630 // If this is a memcpy (or similar) then we may be able to improve the
2631 // alignment.
2632 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2633 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2634 MaybeAlign MIDestAlign = MI->getDestAlign();
2635 if (!MIDestAlign || DestAlign > *MIDestAlign)
2636 MI->setDestAlignment(DestAlign);
2637 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2638 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2639 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2640 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2641 MTI->setSourceAlignment(SrcAlign);
2642 }
2643 }
2644
2645 // If we have a cold call site, try to sink addressing computation into the
2646 // cold block. This interacts with our handling for loads and stores to
2647 // ensure that we can fold all uses of a potential addressing computation
2648 // into their uses. TODO: generalize this to work over profiling data
2649 if (CI->hasFnAttr(Attribute::Cold) &&
2650 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2651 for (auto &Arg : CI->args()) {
2652 if (!Arg->getType()->isPointerTy())
2653 continue;
2654 unsigned AS = Arg->getType()->getPointerAddressSpace();
2655 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2656 return true;
2657 }
2658
2659 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2660 if (II) {
2661 switch (II->getIntrinsicID()) {
2662 default:
2663 break;
2664 case Intrinsic::assume:
2665 llvm_unreachable("llvm.assume should have been removed already");
2666 case Intrinsic::allow_runtime_check:
2667 case Intrinsic::allow_ubsan_check:
2668 case Intrinsic::experimental_widenable_condition: {
2669 // Give up on future widening opportunities so that we can fold away dead
2670 // paths and merge blocks before going into block-local instruction
2671 // selection.
2672 if (II->use_empty()) {
2673 II->eraseFromParent();
2674 return true;
2675 }
2676 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2677 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2678 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2679 });
2680 return true;
2681 }
2682 case Intrinsic::objectsize:
2683 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2684 case Intrinsic::is_constant:
2685 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2686 case Intrinsic::aarch64_stlxr:
2687 case Intrinsic::aarch64_stxr: {
2688 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2689 if (!ExtVal || !ExtVal->hasOneUse() ||
2690 ExtVal->getParent() == CI->getParent())
2691 return false;
2692 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2693 ExtVal->moveBefore(CI);
2694 // Mark this instruction as "inserted by CGP", so that other
2695 // optimizations don't touch it.
2696 InsertedInsts.insert(ExtVal);
2697 return true;
2698 }
2699
2700 case Intrinsic::launder_invariant_group:
2701 case Intrinsic::strip_invariant_group: {
2702 Value *ArgVal = II->getArgOperand(0);
2703 auto it = LargeOffsetGEPMap.find(II);
2704 if (it != LargeOffsetGEPMap.end()) {
2705 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2706 // Make sure not to have to deal with iterator invalidation
2707 // after possibly adding ArgVal to LargeOffsetGEPMap.
2708 auto GEPs = std::move(it->second);
2709 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2710 LargeOffsetGEPMap.erase(II);
2711 }
2712
2713 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2714 II->eraseFromParent();
2715 return true;
2716 }
2717 case Intrinsic::cttz:
2718 case Intrinsic::ctlz:
2719 // If counting zeros is expensive, try to avoid it.
2720 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2721 IsHugeFunc);
2722 case Intrinsic::fshl:
2723 case Intrinsic::fshr:
2724 return optimizeFunnelShift(II);
2725 case Intrinsic::dbg_assign:
2726 case Intrinsic::dbg_value:
2727 return fixupDbgValue(II);
2728 case Intrinsic::masked_gather:
2729 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2730 case Intrinsic::masked_scatter:
2731 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2732 }
2733
2735 Type *AccessTy;
2736 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2737 while (!PtrOps.empty()) {
2738 Value *PtrVal = PtrOps.pop_back_val();
2739 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2740 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2741 return true;
2742 }
2743 }
2744
2745 // From here on out we're working with named functions.
2746 auto *Callee = CI->getCalledFunction();
2747 if (!Callee)
2748 return false;
2749
2750 // Lower all default uses of _chk calls. This is very similar
2751 // to what InstCombineCalls does, but here we are only lowering calls
2752 // to fortified library functions (e.g. __memcpy_chk) that have the default
2753 // "don't know" as the objectsize. Anything else should be left alone.
2754 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2755 IRBuilder<> Builder(CI);
2756 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2757 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2758 CI->eraseFromParent();
2759 return true;
2760 }
2761
2762 // SCCP may have propagated, among other things, C++ static variables across
2763 // calls. If this happens to be the case, we may want to undo it in order to
2764 // avoid redundant pointer computation of the constant, as the function method
2765 // returning the constant needs to be executed anyways.
2766 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2767 if (!F->getReturnType()->isPointerTy())
2768 return nullptr;
2769
2770 GlobalVariable *UniformValue = nullptr;
2771 for (auto &BB : *F) {
2772 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2773 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2774 if (!UniformValue)
2775 UniformValue = V;
2776 else if (V != UniformValue)
2777 return nullptr;
2778 } else {
2779 return nullptr;
2780 }
2781 }
2782 }
2783
2784 return UniformValue;
2785 };
2786
2787 if (Callee->hasExactDefinition()) {
2788 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2789 bool MadeChange = false;
2790 for (Use &U : make_early_inc_range(RV->uses())) {
2791 auto *I = dyn_cast<Instruction>(U.getUser());
2792 if (!I || I->getParent() != CI->getParent()) {
2793 // Limit to the same basic block to avoid extending the call-site live
2794 // range, which otherwise could increase register pressure.
2795 continue;
2796 }
2797 if (CI->comesBefore(I)) {
2798 U.set(CI);
2799 MadeChange = true;
2800 }
2801 }
2802
2803 return MadeChange;
2804 }
2805 }
2806
2807 return false;
2808}
2809
2811 const CallInst *CI) {
2812 assert(CI && CI->use_empty());
2813
2814 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2815 switch (II->getIntrinsicID()) {
2816 case Intrinsic::memset:
2817 case Intrinsic::memcpy:
2818 case Intrinsic::memmove:
2819 return true;
2820 default:
2821 return false;
2822 }
2823
2824 LibFunc LF;
2825 Function *Callee = CI->getCalledFunction();
2826 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2827 switch (LF) {
2828 case LibFunc_strcpy:
2829 case LibFunc_strncpy:
2830 case LibFunc_strcat:
2831 case LibFunc_strncat:
2832 return true;
2833 default:
2834 return false;
2835 }
2836
2837 return false;
2838}
2839
2840/// Look for opportunities to duplicate return instructions to the predecessor
2841/// to enable tail call optimizations. The case it is currently looking for is
2842/// the following one. Known intrinsics or library function that may be tail
2843/// called are taken into account as well.
2844/// @code
2845/// bb0:
2846/// %tmp0 = tail call i32 @f0()
2847/// br label %return
2848/// bb1:
2849/// %tmp1 = tail call i32 @f1()
2850/// br label %return
2851/// bb2:
2852/// %tmp2 = tail call i32 @f2()
2853/// br label %return
2854/// return:
2855/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2856/// ret i32 %retval
2857/// @endcode
2858///
2859/// =>
2860///
2861/// @code
2862/// bb0:
2863/// %tmp0 = tail call i32 @f0()
2864/// ret i32 %tmp0
2865/// bb1:
2866/// %tmp1 = tail call i32 @f1()
2867/// ret i32 %tmp1
2868/// bb2:
2869/// %tmp2 = tail call i32 @f2()
2870/// ret i32 %tmp2
2871/// @endcode
2872bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2873 ModifyDT &ModifiedDT) {
2874 if (!BB->getTerminator())
2875 return false;
2876
2877 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2878 if (!RetI)
2879 return false;
2880
2881 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2882
2883 PHINode *PN = nullptr;
2884 ExtractValueInst *EVI = nullptr;
2885 BitCastInst *BCI = nullptr;
2886 Value *V = RetI->getReturnValue();
2887 if (V) {
2888 BCI = dyn_cast<BitCastInst>(V);
2889 if (BCI)
2890 V = BCI->getOperand(0);
2891
2892 EVI = dyn_cast<ExtractValueInst>(V);
2893 if (EVI) {
2894 V = EVI->getOperand(0);
2895 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2896 return false;
2897 }
2898
2899 PN = dyn_cast<PHINode>(V);
2900 }
2901
2902 if (PN && PN->getParent() != BB)
2903 return false;
2904
2905 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2906 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2907 if (BC && BC->hasOneUse())
2908 Inst = BC->user_back();
2909
2910 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2911 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2912 return false;
2913 };
2914
2916
2917 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2918 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2919 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2920 // Record the instruction so it can be preserved when the exit block is
2921 // removed. Do not preserve the fake use that uses the result of the
2922 // PHI instruction.
2923 // Do not copy fake uses that use the result of a PHI node.
2924 // FIXME: If we do want to copy the fake use into the return blocks, we
2925 // have to figure out which of the PHI node operands to use for each
2926 // copy.
2927 if (!isa<PHINode>(II->getOperand(0))) {
2928 FakeUses.push_back(II);
2929 }
2930 return true;
2931 }
2932
2933 return false;
2934 };
2935
2936 // Make sure there are no instructions between the first instruction
2937 // and return.
2938 const Instruction *BI = BB->getFirstNonPHI();
2939 // Skip over debug and the bitcast.
2940 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2941 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI) ||
2942 isFakeUse(BI))
2943 BI = BI->getNextNode();
2944 if (BI != RetI)
2945 return false;
2946
2947 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2948 /// call.
2949 const Function *F = BB->getParent();
2950 SmallVector<BasicBlock *, 4> TailCallBBs;
2951 // Record the call instructions so we can insert any fake uses
2952 // that need to be preserved before them.
2954 if (PN) {
2955 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2956 // Look through bitcasts.
2957 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2958 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2959 BasicBlock *PredBB = PN->getIncomingBlock(I);
2960 // Make sure the phi value is indeed produced by the tail call.
2961 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2962 TLI->mayBeEmittedAsTailCall(CI) &&
2963 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2964 TailCallBBs.push_back(PredBB);
2965 CallInsts.push_back(CI);
2966 } else {
2967 // Consider the cases in which the phi value is indirectly produced by
2968 // the tail call, for example when encountering memset(), memmove(),
2969 // strcpy(), whose return value may have been optimized out. In such
2970 // cases, the value needs to be the first function argument.
2971 //
2972 // bb0:
2973 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2974 // br label %return
2975 // return:
2976 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2977 if (PredBB && PredBB->getSingleSuccessor() == BB)
2978 CI = dyn_cast_or_null<CallInst>(
2979 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2980
2981 if (CI && CI->use_empty() &&
2982 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2983 IncomingVal == CI->getArgOperand(0) &&
2984 TLI->mayBeEmittedAsTailCall(CI) &&
2985 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2986 TailCallBBs.push_back(PredBB);
2987 CallInsts.push_back(CI);
2988 }
2989 }
2990 }
2991 } else {
2993 for (BasicBlock *Pred : predecessors(BB)) {
2994 if (!VisitedBBs.insert(Pred).second)
2995 continue;
2996 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2997 CallInst *CI = dyn_cast<CallInst>(I);
2998 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2999 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3000 // Either we return void or the return value must be the first
3001 // argument of a known intrinsic or library function.
3002 if (!V || isa<UndefValue>(V) ||
3003 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3004 V == CI->getArgOperand(0))) {
3005 TailCallBBs.push_back(Pred);
3006 CallInsts.push_back(CI);
3007 }
3008 }
3009 }
3010 }
3011 }
3012
3013 bool Changed = false;
3014 for (auto const &TailCallBB : TailCallBBs) {
3015 // Make sure the call instruction is followed by an unconditional branch to
3016 // the return block.
3017 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3018 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3019 continue;
3020
3021 // Duplicate the return into TailCallBB.
3022 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3024 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3025 BFI->setBlockFreq(BB,
3026 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3027 ModifiedDT = ModifyDT::ModifyBBDT;
3028 Changed = true;
3029 ++NumRetsDup;
3030 }
3031
3032 // If we eliminated all predecessors of the block, delete the block now.
3033 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3034 // Copy the fake uses found in the original return block to all blocks
3035 // that contain tail calls.
3036 for (auto *CI : CallInsts) {
3037 for (auto const *FakeUse : FakeUses) {
3038 auto *ClonedInst = FakeUse->clone();
3039 ClonedInst->insertBefore(CI);
3040 }
3041 }
3042 BB->eraseFromParent();
3043 }
3044
3045 return Changed;
3046}
3047
3048//===----------------------------------------------------------------------===//
3049// Memory Optimization
3050//===----------------------------------------------------------------------===//
3051
3052namespace {
3053
3054/// This is an extended version of TargetLowering::AddrMode
3055/// which holds actual Value*'s for register values.
3056struct ExtAddrMode : public TargetLowering::AddrMode {
3057 Value *BaseReg = nullptr;
3058 Value *ScaledReg = nullptr;
3059 Value *OriginalValue = nullptr;
3060 bool InBounds = true;
3061
3062 enum FieldName {
3063 NoField = 0x00,
3064 BaseRegField = 0x01,
3065 BaseGVField = 0x02,
3066 BaseOffsField = 0x04,
3067 ScaledRegField = 0x08,
3068 ScaleField = 0x10,
3069 MultipleFields = 0xff
3070 };
3071
3072 ExtAddrMode() = default;
3073
3074 void print(raw_ostream &OS) const;
3075 void dump() const;
3076
3077 FieldName compare(const ExtAddrMode &other) {
3078 // First check that the types are the same on each field, as differing types
3079 // is something we can't cope with later on.
3080 if (BaseReg && other.BaseReg &&
3081 BaseReg->getType() != other.BaseReg->getType())
3082 return MultipleFields;
3083 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3084 return MultipleFields;
3085 if (ScaledReg && other.ScaledReg &&
3086 ScaledReg->getType() != other.ScaledReg->getType())
3087 return MultipleFields;
3088
3089 // Conservatively reject 'inbounds' mismatches.
3090 if (InBounds != other.InBounds)
3091 return MultipleFields;
3092
3093 // Check each field to see if it differs.
3094 unsigned Result = NoField;
3095 if (BaseReg != other.BaseReg)
3096 Result |= BaseRegField;
3097 if (BaseGV != other.BaseGV)
3098 Result |= BaseGVField;
3099 if (BaseOffs != other.BaseOffs)
3100 Result |= BaseOffsField;
3101 if (ScaledReg != other.ScaledReg)
3102 Result |= ScaledRegField;
3103 // Don't count 0 as being a different scale, because that actually means
3104 // unscaled (which will already be counted by having no ScaledReg).
3105 if (Scale && other.Scale && Scale != other.Scale)
3106 Result |= ScaleField;
3107
3108 if (llvm::popcount(Result) > 1)
3109 return MultipleFields;
3110 else
3111 return static_cast<FieldName>(Result);
3112 }
3113
3114 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3115 // with no offset.
3116 bool isTrivial() {
3117 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3118 // trivial if at most one of these terms is nonzero, except that BaseGV and
3119 // BaseReg both being zero actually means a null pointer value, which we
3120 // consider to be 'non-zero' here.
3121 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3122 }
3123
3124 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3125 switch (Field) {
3126 default:
3127 return nullptr;
3128 case BaseRegField:
3129 return BaseReg;
3130 case BaseGVField:
3131 return BaseGV;
3132 case ScaledRegField:
3133 return ScaledReg;
3134 case BaseOffsField:
3135 return ConstantInt::get(IntPtrTy, BaseOffs);
3136 }
3137 }
3138
3139 void SetCombinedField(FieldName Field, Value *V,
3140 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3141 switch (Field) {
3142 default:
3143 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3144 break;
3145 case ExtAddrMode::BaseRegField:
3146 BaseReg = V;
3147 break;
3148 case ExtAddrMode::BaseGVField:
3149 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3150 // in the BaseReg field.
3151 assert(BaseReg == nullptr);
3152 BaseReg = V;
3153 BaseGV = nullptr;
3154 break;
3155 case ExtAddrMode::ScaledRegField:
3156 ScaledReg = V;
3157 // If we have a mix of scaled and unscaled addrmodes then we want scale
3158 // to be the scale and not zero.
3159 if (!Scale)
3160 for (const ExtAddrMode &AM : AddrModes)
3161 if (AM.Scale) {
3162 Scale = AM.Scale;
3163 break;
3164 }
3165 break;
3166 case ExtAddrMode::BaseOffsField:
3167 // The offset is no longer a constant, so it goes in ScaledReg with a
3168 // scale of 1.
3169 assert(ScaledReg == nullptr);
3170 ScaledReg = V;
3171 Scale = 1;
3172 BaseOffs = 0;
3173 break;
3174 }
3175 }
3176};
3177
3178#ifndef NDEBUG
3179static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3180 AM.print(OS);
3181 return OS;
3182}
3183#endif
3184
3185#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3186void ExtAddrMode::print(raw_ostream &OS) const {
3187 bool NeedPlus = false;
3188 OS << "[";
3189 if (InBounds)
3190 OS << "inbounds ";
3191 if (BaseGV) {
3192 OS << "GV:";
3193 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3194 NeedPlus = true;
3195 }
3196
3197 if (BaseOffs) {
3198 OS << (NeedPlus ? " + " : "") << BaseOffs;
3199 NeedPlus = true;
3200 }
3201
3202 if (BaseReg) {
3203 OS << (NeedPlus ? " + " : "") << "Base:";
3204 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3205 NeedPlus = true;
3206 }
3207 if (Scale) {
3208 OS << (NeedPlus ? " + " : "") << Scale << "*";
3209 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3210 }
3211
3212 OS << ']';
3213}
3214
3215LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3216 print(dbgs());
3217 dbgs() << '\n';
3218}
3219#endif
3220
3221} // end anonymous namespace
3222
3223namespace {
3224
3225/// This class provides transaction based operation on the IR.
3226/// Every change made through this class is recorded in the internal state and
3227/// can be undone (rollback) until commit is called.
3228/// CGP does not check if instructions could be speculatively executed when
3229/// moved. Preserving the original location would pessimize the debugging
3230/// experience, as well as negatively impact the quality of sample PGO.
3231class TypePromotionTransaction {
3232 /// This represents the common interface of the individual transaction.
3233 /// Each class implements the logic for doing one specific modification on
3234 /// the IR via the TypePromotionTransaction.
3235 class TypePromotionAction {
3236 protected:
3237 /// The Instruction modified.
3238 Instruction *Inst;
3239
3240 public:
3241 /// Constructor of the action.
3242 /// The constructor performs the related action on the IR.
3243 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3244
3245 virtual ~TypePromotionAction() = default;
3246
3247 /// Undo the modification done by this action.
3248 /// When this method is called, the IR must be in the same state as it was
3249 /// before this action was applied.
3250 /// \pre Undoing the action works if and only if the IR is in the exact same
3251 /// state as it was directly after this action was applied.
3252 virtual void undo() = 0;
3253
3254 /// Advocate every change made by this action.
3255 /// When the results on the IR of the action are to be kept, it is important
3256 /// to call this function, otherwise hidden information may be kept forever.
3257 virtual void commit() {
3258 // Nothing to be done, this action is not doing anything.
3259 }
3260 };
3261
3262 /// Utility to remember the position of an instruction.
3263 class InsertionHandler {
3264 /// Position of an instruction.
3265 /// Either an instruction:
3266 /// - Is the first in a basic block: BB is used.
3267 /// - Has a previous instruction: PrevInst is used.
3268 union {
3269 Instruction *PrevInst;
3270 BasicBlock *BB;
3271 } Point;
3272 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3273
3274 /// Remember whether or not the instruction had a previous instruction.
3275 bool HasPrevInstruction;
3276
3277 public:
3278 /// Record the position of \p Inst.
3279 InsertionHandler(Instruction *Inst) {
3280 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3281 BasicBlock *BB = Inst->getParent();
3282
3283 // Record where we would have to re-insert the instruction in the sequence
3284 // of DbgRecords, if we ended up reinserting.
3285 if (BB->IsNewDbgInfoFormat)
3286 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3287
3288 if (HasPrevInstruction) {
3289 Point.PrevInst = &*std::prev(Inst->getIterator());
3290 } else {
3291 Point.BB = BB;
3292 }
3293 }
3294
3295 /// Insert \p Inst at the recorded position.
3296 void insert(Instruction *Inst) {
3297 if (HasPrevInstruction) {
3298 if (Inst->getParent())
3299 Inst->removeFromParent();
3300 Inst->insertAfter(&*Point.PrevInst);
3301 } else {
3302 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3303 if (Inst->getParent())
3304 Inst->moveBefore(*Point.BB, Position);
3305 else
3306 Inst->insertBefore(*Point.BB, Position);
3307 }
3308
3309 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3310 }
3311 };
3312
3313 /// Move an instruction before another.
3314 class InstructionMoveBefore : public TypePromotionAction {
3315 /// Original position of the instruction.
3316 InsertionHandler Position;
3317
3318 public:
3319 /// Move \p Inst before \p Before.
3320 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
3321 : TypePromotionAction(Inst), Position(Inst) {
3322 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3323 << "\n");
3324 Inst->moveBefore(Before);
3325 }
3326
3327 /// Move the instruction back to its original position.
3328 void undo() override {
3329 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3330 Position.insert(Inst);
3331 }
3332 };
3333
3334 /// Set the operand of an instruction with a new value.
3335 class OperandSetter : public TypePromotionAction {
3336 /// Original operand of the instruction.
3337 Value *Origin;
3338
3339 /// Index of the modified instruction.
3340 unsigned Idx;
3341
3342 public:
3343 /// Set \p Idx operand of \p Inst with \p NewVal.
3344 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3345 : TypePromotionAction(Inst), Idx(Idx) {
3346 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3347 << "for:" << *Inst << "\n"
3348 << "with:" << *NewVal << "\n");
3349 Origin = Inst->getOperand(Idx);
3350 Inst->setOperand(Idx, NewVal);
3351 }
3352
3353 /// Restore the original value of the instruction.
3354 void undo() override {
3355 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3356 << "for: " << *Inst << "\n"
3357 << "with: " << *Origin << "\n");
3358 Inst->setOperand(Idx, Origin);
3359 }
3360 };
3361
3362 /// Hide the operands of an instruction.
3363 /// Do as if this instruction was not using any of its operands.
3364 class OperandsHider : public TypePromotionAction {
3365 /// The list of original operands.
3366 SmallVector<Value *, 4> OriginalValues;
3367
3368 public:
3369 /// Remove \p Inst from the uses of the operands of \p Inst.
3370 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3371 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3372 unsigned NumOpnds = Inst->getNumOperands();
3373 OriginalValues.reserve(NumOpnds);
3374 for (unsigned It = 0; It < NumOpnds; ++It) {
3375 // Save the current operand.
3376 Value *Val = Inst->getOperand(It);
3377 OriginalValues.push_back(Val);
3378 // Set a dummy one.
3379 // We could use OperandSetter here, but that would imply an overhead
3380 // that we are not willing to pay.
3381 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3382 }
3383 }
3384
3385 /// Restore the original list of uses.
3386 void undo() override {
3387 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3388 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3389 Inst->setOperand(It, OriginalValues[It]);
3390 }
3391 };
3392
3393 /// Build a truncate instruction.
3394 class TruncBuilder : public TypePromotionAction {
3395 Value *Val;
3396
3397 public:
3398 /// Build a truncate instruction of \p Opnd producing a \p Ty
3399 /// result.
3400 /// trunc Opnd to Ty.
3401 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3402 IRBuilder<> Builder(Opnd);
3403 Builder.SetCurrentDebugLocation(DebugLoc());
3404 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3405 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3406 }
3407
3408 /// Get the built value.
3409 Value *getBuiltValue() { return Val; }
3410
3411 /// Remove the built instruction.
3412 void undo() override {
3413 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3414 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3415 IVal->eraseFromParent();
3416 }
3417 };
3418
3419 /// Build a sign extension instruction.
3420 class SExtBuilder : public TypePromotionAction {
3421 Value *Val;
3422
3423 public:
3424 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3425 /// result.
3426 /// sext Opnd to Ty.
3427 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3428 : TypePromotionAction(InsertPt) {
3429 IRBuilder<> Builder(InsertPt);
3430 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3431 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3432 }
3433
3434 /// Get the built value.
3435 Value *getBuiltValue() { return Val; }
3436
3437 /// Remove the built instruction.
3438 void undo() override {
3439 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3440 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3441 IVal->eraseFromParent();
3442 }
3443 };
3444
3445 /// Build a zero extension instruction.
3446 class ZExtBuilder : public TypePromotionAction {
3447 Value *Val;
3448
3449 public:
3450 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3451 /// result.
3452 /// zext Opnd to Ty.
3453 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3454 : TypePromotionAction(InsertPt) {
3455 IRBuilder<> Builder(InsertPt);
3456 Builder.SetCurrentDebugLocation(DebugLoc());
3457 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3458 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3459 }
3460
3461 /// Get the built value.
3462 Value *getBuiltValue() { return Val; }
3463
3464 /// Remove the built instruction.
3465 void undo() override {
3466 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3467 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3468 IVal->eraseFromParent();
3469 }
3470 };
3471
3472 /// Mutate an instruction to another type.
3473 class TypeMutator : public TypePromotionAction {
3474 /// Record the original type.
3475 Type *OrigTy;
3476
3477 public:
3478 /// Mutate the type of \p Inst into \p NewTy.
3479 TypeMutator(Instruction *Inst, Type *NewTy)
3480 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3481 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3482 << "\n");
3483 Inst->mutateType(NewTy);
3484 }
3485
3486 /// Mutate the instruction back to its original type.
3487 void undo() override {
3488 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3489 << "\n");
3490 Inst->mutateType(OrigTy);
3491 }
3492 };
3493
3494 /// Replace the uses of an instruction by another instruction.
3495 class UsesReplacer : public TypePromotionAction {
3496 /// Helper structure to keep track of the replaced uses.
3497 struct InstructionAndIdx {
3498 /// The instruction using the instruction.
3499 Instruction *Inst;
3500
3501 /// The index where this instruction is used for Inst.
3502 unsigned Idx;
3503
3504 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3505 : Inst(Inst), Idx(Idx) {}
3506 };
3507
3508 /// Keep track of the original uses (pair Instruction, Index).
3510 /// Keep track of the debug users.
3512 /// And non-instruction debug-users too.
3513 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3514
3515 /// Keep track of the new value so that we can undo it by replacing
3516 /// instances of the new value with the original value.
3517 Value *New;
3518
3520
3521 public:
3522 /// Replace all the use of \p Inst by \p New.
3523 UsesReplacer(Instruction *Inst, Value *New)
3524 : TypePromotionAction(Inst), New(New) {
3525 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3526 << "\n");
3527 // Record the original uses.
3528 for (Use &U : Inst->uses()) {
3529 Instruction *UserI = cast<Instruction>(U.getUser());
3530 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3531 }
3532 // Record the debug uses separately. They are not in the instruction's
3533 // use list, but they are replaced by RAUW.
3534 findDbgValues(DbgValues, Inst, &DbgVariableRecords);
3535
3536 // Now, we can replace the uses.
3537 Inst->replaceAllUsesWith(New);
3538 }
3539
3540 /// Reassign the original uses of Inst to Inst.
3541 void undo() override {
3542 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3543 for (InstructionAndIdx &Use : OriginalUses)
3544 Use.Inst->setOperand(Use.Idx, Inst);
3545 // RAUW has replaced all original uses with references to the new value,
3546 // including the debug uses. Since we are undoing the replacements,
3547 // the original debug uses must also be reinstated to maintain the
3548 // correctness and utility of debug value instructions.
3549 for (auto *DVI : DbgValues)
3550 DVI->replaceVariableLocationOp(New, Inst);
3551 // Similar story with DbgVariableRecords, the non-instruction
3552 // representation of dbg.values.
3553 for (DbgVariableRecord *DVR : DbgVariableRecords)
3554 DVR->replaceVariableLocationOp(New, Inst);
3555 }
3556 };
3557
3558 /// Remove an instruction from the IR.
3559 class InstructionRemover : public TypePromotionAction {
3560 /// Original position of the instruction.
3561 InsertionHandler Inserter;
3562
3563 /// Helper structure to hide all the link to the instruction. In other
3564 /// words, this helps to do as if the instruction was removed.
3565 OperandsHider Hider;
3566
3567 /// Keep track of the uses replaced, if any.
3568 UsesReplacer *Replacer = nullptr;
3569
3570 /// Keep track of instructions removed.
3571 SetOfInstrs &RemovedInsts;
3572
3573 public:
3574 /// Remove all reference of \p Inst and optionally replace all its
3575 /// uses with New.
3576 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3577 /// \pre If !Inst->use_empty(), then New != nullptr
3578 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3579 Value *New = nullptr)
3580 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3581 RemovedInsts(RemovedInsts) {
3582 if (New)
3583 Replacer = new UsesReplacer(Inst, New);
3584 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3585 RemovedInsts.insert(Inst);
3586 /// The instructions removed here will be freed after completing
3587 /// optimizeBlock() for all blocks as we need to keep track of the
3588 /// removed instructions during promotion.
3589 Inst->removeFromParent();
3590 }
3591
3592 ~InstructionRemover() override { delete Replacer; }
3593
3594 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3595 InstructionRemover(const InstructionRemover &other) = delete;
3596
3597 /// Resurrect the instruction and reassign it to the proper uses if
3598 /// new value was provided when build this action.
3599 void undo() override {
3600 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3601 Inserter.insert(Inst);
3602 if (Replacer)
3603 Replacer->undo();
3604 Hider.undo();
3605 RemovedInsts.erase(Inst);
3606 }
3607 };
3608
3609public:
3610 /// Restoration point.
3611 /// The restoration point is a pointer to an action instead of an iterator
3612 /// because the iterator may be invalidated but not the pointer.
3613 using ConstRestorationPt = const TypePromotionAction *;
3614
3615 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3616 : RemovedInsts(RemovedInsts) {}
3617
3618 /// Advocate every changes made in that transaction. Return true if any change
3619 /// happen.
3620 bool commit();
3621
3622 /// Undo all the changes made after the given point.
3623 void rollback(ConstRestorationPt Point);
3624
3625 /// Get the current restoration point.
3626 ConstRestorationPt getRestorationPoint() const;
3627
3628 /// \name API for IR modification with state keeping to support rollback.
3629 /// @{
3630 /// Same as Instruction::setOperand.
3631 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3632
3633 /// Same as Instruction::eraseFromParent.
3634 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3635
3636 /// Same as Value::replaceAllUsesWith.
3637 void replaceAllUsesWith(Instruction *Inst, Value *New);
3638
3639 /// Same as Value::mutateType.
3640 void mutateType(Instruction *Inst, Type *NewTy);
3641
3642 /// Same as IRBuilder::createTrunc.
3643 Value *createTrunc(Instruction *Opnd, Type *Ty);
3644
3645 /// Same as IRBuilder::createSExt.
3646 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3647
3648 /// Same as IRBuilder::createZExt.
3649 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3650
3651private:
3652 /// The ordered list of actions made so far.
3654
3655 using CommitPt =
3657
3658 SetOfInstrs &RemovedInsts;
3659};
3660
3661} // end anonymous namespace
3662
3663void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3664 Value *NewVal) {
3665 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3666 Inst, Idx, NewVal));
3667}
3668
3669void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3670 Value *NewVal) {
3671 Actions.push_back(
3672 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3673 Inst, RemovedInsts, NewVal));
3674}
3675
3676void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3677 Value *New) {
3678 Actions.push_back(
3679 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3680}
3681
3682void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3683 Actions.push_back(
3684 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3685}
3686
3687Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3688 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3689 Value *Val = Ptr->getBuiltValue();
3690 Actions.push_back(std::move(Ptr));
3691 return Val;
3692}
3693
3694Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3695 Type *Ty) {
3696 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3697 Value *Val = Ptr->getBuiltValue();
3698 Actions.push_back(std::move(Ptr));
3699 return Val;
3700}
3701
3702Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3703 Type *Ty) {
3704 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3705 Value *Val = Ptr->getBuiltValue();
3706 Actions.push_back(std::move(Ptr));
3707 return Val;
3708}
3709
3710TypePromotionTransaction::ConstRestorationPt
3711TypePromotionTransaction::getRestorationPoint() const {
3712 return !Actions.empty() ? Actions.back().get() : nullptr;
3713}
3714
3715bool TypePromotionTransaction::commit() {
3716 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3717 Action->commit();
3718 bool Modified = !Actions.empty();
3719 Actions.clear();
3720 return Modified;
3721}
3722
3723void TypePromotionTransaction::rollback(
3724 TypePromotionTransaction::ConstRestorationPt Point) {
3725 while (!Actions.empty() && Point != Actions.back().get()) {
3726 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3727 Curr->undo();
3728 }
3729}
3730
3731namespace {
3732
3733/// A helper class for matching addressing modes.
3734///
3735/// This encapsulates the logic for matching the target-legal addressing modes.
3736class AddressingModeMatcher {
3737 SmallVectorImpl<Instruction *> &AddrModeInsts;
3738 const TargetLowering &TLI;
3739 const TargetRegisterInfo &TRI;
3740 const DataLayout &DL;
3741 const LoopInfo &LI;
3742 const std::function<const DominatorTree &()> getDTFn;
3743
3744 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3745 /// the memory instruction that we're computing this address for.
3746 Type *AccessTy;
3747 unsigned AddrSpace;
3748 Instruction *MemoryInst;
3749
3750 /// This is the addressing mode that we're building up. This is
3751 /// part of the return value of this addressing mode matching stuff.
3753
3754 /// The instructions inserted by other CodeGenPrepare optimizations.
3755 const SetOfInstrs &InsertedInsts;
3756
3757 /// A map from the instructions to their type before promotion.
3758 InstrToOrigTy &PromotedInsts;
3759
3760 /// The ongoing transaction where every action should be registered.
3761 TypePromotionTransaction &TPT;
3762
3763 // A GEP which has too large offset to be folded into the addressing mode.
3764 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3765
3766 /// This is set to true when we should not do profitability checks.
3767 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3768 bool IgnoreProfitability;
3769
3770 /// True if we are optimizing for size.
3771 bool OptSize = false;
3772
3773 ProfileSummaryInfo *PSI;
3775
3776 AddressingModeMatcher(
3778 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3779 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3780 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3781 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3782 TypePromotionTransaction &TPT,
3783 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3784 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3785 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3786 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3787 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3788 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3789 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3790 IgnoreProfitability = false;
3791 }
3792
3793public:
3794 /// Find the maximal addressing mode that a load/store of V can fold,
3795 /// give an access type of AccessTy. This returns a list of involved
3796 /// instructions in AddrModeInsts.
3797 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3798 /// optimizations.
3799 /// \p PromotedInsts maps the instructions to their type before promotion.
3800 /// \p The ongoing transaction where every action should be registered.
3801 static ExtAddrMode
3802 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3803 SmallVectorImpl<Instruction *> &AddrModeInsts,
3804 const TargetLowering &TLI, const LoopInfo &LI,
3805 const std::function<const DominatorTree &()> getDTFn,
3806 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3807 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3808 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3809 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3811
3812 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3813 AccessTy, AS, MemoryInst, Result,
3814 InsertedInsts, PromotedInsts, TPT,
3815 LargeOffsetGEP, OptSize, PSI, BFI)
3816 .matchAddr(V, 0);
3817 (void)Success;
3818 assert(Success && "Couldn't select *anything*?");
3819 return Result;
3820 }
3821
3822private:
3823 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3824 bool matchAddr(Value *Addr, unsigned Depth);
3825 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3826 bool *MovedAway = nullptr);
3827 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3828 ExtAddrMode &AMBefore,
3829 ExtAddrMode &AMAfter);
3830 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3831 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3832 Value *PromotedOperand) const;
3833};
3834
3835class PhiNodeSet;
3836
3837/// An iterator for PhiNodeSet.
3838class PhiNodeSetIterator {
3839 PhiNodeSet *const Set;
3840 size_t CurrentIndex = 0;
3841
3842public:
3843 /// The constructor. Start should point to either a valid element, or be equal
3844 /// to the size of the underlying SmallVector of the PhiNodeSet.
3845 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3846 PHINode *operator*() const;
3847 PhiNodeSetIterator &operator++();
3848 bool operator==(const PhiNodeSetIterator &RHS) const;
3849 bool operator!=(const PhiNodeSetIterator &RHS) const;
3850};
3851
3852/// Keeps a set of PHINodes.
3853///
3854/// This is a minimal set implementation for a specific use case:
3855/// It is very fast when there are very few elements, but also provides good
3856/// performance when there are many. It is similar to SmallPtrSet, but also
3857/// provides iteration by insertion order, which is deterministic and stable
3858/// across runs. It is also similar to SmallSetVector, but provides removing
3859/// elements in O(1) time. This is achieved by not actually removing the element
3860/// from the underlying vector, so comes at the cost of using more memory, but
3861/// that is fine, since PhiNodeSets are used as short lived objects.
3862class PhiNodeSet {
3863 friend class PhiNodeSetIterator;
3864
3866 using iterator = PhiNodeSetIterator;
3867
3868 /// Keeps the elements in the order of their insertion in the underlying
3869 /// vector. To achieve constant time removal, it never deletes any element.
3871
3872 /// Keeps the elements in the underlying set implementation. This (and not the
3873 /// NodeList defined above) is the source of truth on whether an element
3874 /// is actually in the collection.
3875 MapType NodeMap;
3876
3877 /// Points to the first valid (not deleted) element when the set is not empty
3878 /// and the value is not zero. Equals to the size of the underlying vector
3879 /// when the set is empty. When the value is 0, as in the beginning, the
3880 /// first element may or may not be valid.
3881 size_t FirstValidElement = 0;
3882
3883public:
3884 /// Inserts a new element to the collection.
3885 /// \returns true if the element is actually added, i.e. was not in the
3886 /// collection before the operation.
3887 bool insert(PHINode *Ptr) {
3888 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3889 NodeList.push_back(Ptr);
3890 return true;
3891 }
3892 return false;
3893 }
3894
3895 /// Removes the element from the collection.
3896 /// \returns whether the element is actually removed, i.e. was in the
3897 /// collection before the operation.
3898 bool erase(PHINode *Ptr) {
3899 if (NodeMap.erase(Ptr)) {
3900 SkipRemovedElements(FirstValidElement);
3901 return true;
3902 }
3903 return false;
3904 }
3905
3906 /// Removes all elements and clears the collection.
3907 void clear() {
3908 NodeMap.clear();
3909 NodeList.clear();
3910 FirstValidElement = 0;
3911 }
3912
3913 /// \returns an iterator that will iterate the elements in the order of
3914 /// insertion.
3915 iterator begin() {
3916 if (FirstValidElement == 0)
3917 SkipRemovedElements(FirstValidElement);
3918 return PhiNodeSetIterator(this, FirstValidElement);
3919 }
3920
3921 /// \returns an iterator that points to the end of the collection.
3922 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3923
3924 /// Returns the number of elements in the collection.
3925 size_t size() const { return NodeMap.size(); }
3926
3927 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3928 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3929
3930private:
3931 /// Updates the CurrentIndex so that it will point to a valid element.
3932 ///
3933 /// If the element of NodeList at CurrentIndex is valid, it does not
3934 /// change it. If there are no more valid elements, it updates CurrentIndex
3935 /// to point to the end of the NodeList.
3936 void SkipRemovedElements(size_t &CurrentIndex) {
3937 while (CurrentIndex < NodeList.size()) {
3938 auto it = NodeMap.find(NodeList[CurrentIndex]);
3939 // If the element has been deleted and added again later, NodeMap will
3940 // point to a different index, so CurrentIndex will still be invalid.
3941 if (it != NodeMap.end() && it->second == CurrentIndex)
3942 break;
3943 ++CurrentIndex;
3944 }
3945 }
3946};
3947
3948PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3949 : Set(Set), CurrentIndex(Start) {}
3950
3951PHINode *PhiNodeSetIterator::operator*() const {
3952 assert(CurrentIndex < Set->NodeList.size() &&
3953 "PhiNodeSet access out of range");
3954 return Set->NodeList[CurrentIndex];
3955}
3956
3957PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3958 assert(CurrentIndex < Set->NodeList.size() &&
3959 "PhiNodeSet access out of range");
3960 ++CurrentIndex;
3961 Set->SkipRemovedElements(CurrentIndex);
3962 return *this;
3963}
3964
3965bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3966 return CurrentIndex == RHS.CurrentIndex;
3967}
3968
3969bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3970 return !((*this) == RHS);
3971}
3972
3973/// Keep track of simplification of Phi nodes.
3974/// Accept the set of all phi nodes and erase phi node from this set
3975/// if it is simplified.
3976class SimplificationTracker {
3978 const SimplifyQuery &SQ;
3979 // Tracks newly created Phi nodes. The elements are iterated by insertion
3980 // order.
3981 PhiNodeSet AllPhiNodes;
3982 // Tracks newly created Select nodes.
3983 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3984
3985public:
3986 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
3987
3988 Value *Get(Value *V) {
3989 do {
3990 auto SV = Storage.find(V);
3991 if (SV == Storage.end())
3992 return V;
3993 V = SV->second;
3994 } while (true);
3995 }
3996
3997 Value *Simplify(Value *Val) {
3998 SmallVector<Value *, 32> WorkList;
4000 WorkList.push_back(Val);
4001 while (!WorkList.empty()) {
4002 auto *P = WorkList.pop_back_val();
4003 if (!Visited.insert(P).second)
4004 continue;
4005 if (auto *PI = dyn_cast<Instruction>(P))
4006 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
4007 for (auto *U : PI->users())
4008 WorkList.push_back(cast<Value>(U));
4009 Put(PI, V);
4010 PI->replaceAllUsesWith(V);
4011 if (auto *PHI = dyn_cast<PHINode>(PI))
4012 AllPhiNodes.erase(PHI);
4013 if (auto *Select = dyn_cast<SelectInst>(PI))
4014 AllSelectNodes.erase(Select);
4015 PI->eraseFromParent();
4016 }
4017 }
4018 return Get(Val);
4019 }
4020
4021 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4022
4023 void ReplacePhi(PHINode *From, PHINode *To) {
4024 Value *OldReplacement = Get(From);
4025 while (OldReplacement != From) {
4026 From = To;
4027 To = dyn_cast<PHINode>(OldReplacement);
4028 OldReplacement = Get(From);
4029 }
4030 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4031 Put(From, To);
4032 From->replaceAllUsesWith(To);
4033 AllPhiNodes.erase(From);
4034 From->eraseFromParent();
4035 }
4036
4037 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4038
4039 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4040
4041 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4042
4043 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4044
4045 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4046
4047 void destroyNewNodes(Type *CommonType) {
4048 // For safe erasing, replace the uses with dummy value first.
4049 auto *Dummy = PoisonValue::get(CommonType);
4050 for (auto *I : AllPhiNodes) {
4051 I->replaceAllUsesWith(Dummy);
4052 I->eraseFromParent();
4053 }
4054 AllPhiNodes.clear();
4055 for (auto *I : AllSelectNodes) {
4056 I->replaceAllUsesWith(Dummy);
4057 I->eraseFromParent();
4058 }
4059 AllSelectNodes.clear();
4060 }
4061};
4062
4063/// A helper class for combining addressing modes.
4064class AddressingModeCombiner {
4065 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4066 typedef std::pair<PHINode *, PHINode *> PHIPair;
4067
4068private:
4069 /// The addressing modes we've collected.
4071
4072 /// The field in which the AddrModes differ, when we have more than one.
4073 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4074
4075 /// Are the AddrModes that we have all just equal to their original values?
4076 bool AllAddrModesTrivial = true;
4077
4078 /// Common Type for all different fields in addressing modes.
4079 Type *CommonType = nullptr;
4080
4081 /// SimplifyQuery for simplifyInstruction utility.
4082 const SimplifyQuery &SQ;
4083
4084 /// Original Address.
4085 Value *Original;
4086
4087 /// Common value among addresses
4088 Value *CommonValue = nullptr;
4089
4090public:
4091 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
4092 : SQ(_SQ), Original(OriginalValue) {}
4093
4094 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4095
4096 /// Get the combined AddrMode
4097 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4098
4099 /// Add a new AddrMode if it's compatible with the AddrModes we already
4100 /// have.
4101 /// \return True iff we succeeded in doing so.
4102 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4103 // Take note of if we have any non-trivial AddrModes, as we need to detect
4104 // when all AddrModes are trivial as then we would introduce a phi or select
4105 // which just duplicates what's already there.
4106 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4107
4108 // If this is the first addrmode then everything is fine.
4109 if (AddrModes.empty()) {
4110 AddrModes.emplace_back(NewAddrMode);
4111 return true;
4112 }
4113
4114 // Figure out how different this is from the other address modes, which we
4115 // can do just by comparing against the first one given that we only care
4116 // about the cumulative difference.
4117 ExtAddrMode::FieldName ThisDifferentField =
4118 AddrModes[0].compare(NewAddrMode);
4119 if (DifferentField == ExtAddrMode::NoField)
4120 DifferentField = ThisDifferentField;
4121 else if (DifferentField != ThisDifferentField)
4122 DifferentField = ExtAddrMode::MultipleFields;
4123
4124 // If NewAddrMode differs in more than one dimension we cannot handle it.
4125 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4126
4127 // If Scale Field is different then we reject.
4128 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4129
4130 // We also must reject the case when base offset is different and
4131 // scale reg is not null, we cannot handle this case due to merge of
4132 // different offsets will be used as ScaleReg.
4133 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4134 !NewAddrMode.ScaledReg);
4135
4136 // We also must reject the case when GV is different and BaseReg installed
4137 // due to we want to use base reg as a merge of GV values.
4138 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4139 !NewAddrMode.HasBaseReg);
4140
4141 // Even if NewAddMode is the same we still need to collect it due to
4142 // original value is different. And later we will need all original values
4143 // as anchors during finding the common Phi node.
4144 if (CanHandle)
4145 AddrModes.emplace_back(NewAddrMode);
4146 else
4147 AddrModes.clear();
4148
4149 return CanHandle;
4150 }
4151
4152 /// Combine the addressing modes we've collected into a single
4153 /// addressing mode.
4154 /// \return True iff we successfully combined them or we only had one so
4155 /// didn't need to combine them anyway.
4156 bool combineAddrModes() {
4157 // If we have no AddrModes then they can't be combined.
4158 if (AddrModes.size() == 0)
4159 return false;
4160
4161 // A single AddrMode can trivially be combined.
4162 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4163 return true;
4164
4165 // If the AddrModes we collected are all just equal to the value they are
4166 // derived from then combining them wouldn't do anything useful.
4167 if (AllAddrModesTrivial)
4168 return false;
4169
4170 if (!addrModeCombiningAllowed())
4171 return false;
4172
4173 // Build a map between <original value, basic block where we saw it> to
4174 // value of base register.
4175 // Bail out if there is no common type.
4176 FoldAddrToValueMapping Map;
4177 if (!initializeMap(Map))
4178 return false;
4179
4180 CommonValue = findCommon(Map);
4181 if (CommonValue)
4182 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4183 return CommonValue != nullptr;
4184 }
4185
4186private:
4187 /// `CommonValue` may be a placeholder inserted by us.
4188 /// If the placeholder is not used, we should remove this dead instruction.
4189 void eraseCommonValueIfDead() {
4190 if (CommonValue && CommonValue->getNumUses() == 0)
4191 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4192 CommonInst->eraseFromParent();
4193 }
4194
4195 /// Initialize Map with anchor values. For address seen
4196 /// we set the value of different field saw in this address.
4197 /// At the same time we find a common type for different field we will
4198 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4199 /// Return false if there is no common type found.
4200 bool initializeMap(FoldAddrToValueMapping &Map) {
4201 // Keep track of keys where the value is null. We will need to replace it
4202 // with constant null when we know the common type.
4203 SmallVector<Value *, 2> NullValue;
4204 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4205 for (auto &AM : AddrModes) {
4206 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4207 if (DV) {
4208 auto *Type = DV->getType();
4209 if (CommonType && CommonType != Type)
4210 return false;
4211 CommonType = Type;
4212 Map[AM.OriginalValue] = DV;
4213 } else {
4214 NullValue.push_back(AM.OriginalValue);
4215 }
4216 }
4217 assert(CommonType && "At least one non-null value must be!");
4218 for (auto *V : NullValue)
4219 Map[V] = Constant::getNullValue(CommonType);
4220 return true;
4221 }
4222
4223 /// We have mapping between value A and other value B where B was a field in
4224 /// addressing mode represented by A. Also we have an original value C
4225 /// representing an address we start with. Traversing from C through phi and
4226 /// selects we ended up with A's in a map. This utility function tries to find
4227 /// a value V which is a field in addressing mode C and traversing through phi
4228 /// nodes and selects we will end up in corresponded values B in a map.
4229 /// The utility will create a new Phi/Selects if needed.
4230 // The simple example looks as follows:
4231 // BB1:
4232 // p1 = b1 + 40
4233 // br cond BB2, BB3
4234 // BB2:
4235 // p2 = b2 + 40
4236 // br BB3
4237 // BB3:
4238 // p = phi [p1, BB1], [p2, BB2]
4239 // v = load p
4240 // Map is
4241 // p1 -> b1
4242 // p2 -> b2
4243 // Request is
4244 // p -> ?
4245 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4246 Value *findCommon(FoldAddrToValueMapping &Map) {
4247 // Tracks the simplification of newly created phi nodes. The reason we use
4248 // this mapping is because we will add new created Phi nodes in AddrToBase.
4249 // Simplification of Phi nodes is recursive, so some Phi node may
4250 // be simplified after we added it to AddrToBase. In reality this
4251 // simplification is possible only if original phi/selects were not
4252 // simplified yet.
4253 // Using this mapping we can find the current value in AddrToBase.
4254 SimplificationTracker ST(SQ);
4255
4256 // First step, DFS to create PHI nodes for all intermediate blocks.
4257 // Also fill traverse order for the second step.
4258 SmallVector<Value *, 32> TraverseOrder;
4259 InsertPlaceholders(Map, TraverseOrder, ST);
4260
4261 // Second Step, fill new nodes by merged values and simplify if possible.
4262 FillPlaceholders(Map, TraverseOrder, ST);
4263
4264 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4265 ST.destroyNewNodes(CommonType);
4266 return nullptr;
4267 }
4268
4269 // Now we'd like to match New Phi nodes to existed ones.
4270 unsigned PhiNotMatchedCount = 0;
4271 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4272 ST.destroyNewNodes(CommonType);
4273 return nullptr;
4274 }
4275
4276 auto *Result = ST.Get(Map.find(Original)->second);
4277 if (Result) {
4278 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4279 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4280 }
4281 return Result;
4282 }
4283
4284 /// Try to match PHI node to Candidate.
4285 /// Matcher tracks the matched Phi nodes.
4286 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4288 PhiNodeSet &PhiNodesToMatch) {
4289 SmallVector<PHIPair, 8> WorkList;
4290 Matcher.insert({PHI, Candidate});
4291 SmallSet<PHINode *, 8> MatchedPHIs;
4292 MatchedPHIs.insert(PHI);
4293 WorkList.push_back({PHI, Candidate});
4294 SmallSet<PHIPair, 8> Visited;
4295 while (!WorkList.empty()) {
4296 auto Item = WorkList.pop_back_val();
4297 if (!Visited.insert(Item).second)
4298 continue;
4299 // We iterate over all incoming values to Phi to compare them.
4300 // If values are different and both of them Phi and the first one is a
4301 // Phi we added (subject to match) and both of them is in the same basic
4302 // block then we can match our pair if values match. So we state that
4303 // these values match and add it to work list to verify that.
4304 for (auto *B : Item.first->blocks()) {
4305 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4306 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4307 if (FirstValue == SecondValue)
4308 continue;
4309
4310 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4311 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4312
4313 // One of them is not Phi or
4314 // The first one is not Phi node from the set we'd like to match or
4315 // Phi nodes from different basic blocks then
4316 // we will not be able to match.
4317 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4318 FirstPhi->getParent() != SecondPhi->getParent())
4319 return false;
4320
4321 // If we already matched them then continue.
4322 if (Matcher.count({FirstPhi, SecondPhi}))
4323 continue;
4324 // So the values are different and does not match. So we need them to
4325 // match. (But we register no more than one match per PHI node, so that
4326 // we won't later try to replace them twice.)
4327 if (MatchedPHIs.insert(FirstPhi).second)
4328 Matcher.insert({FirstPhi, SecondPhi});
4329 // But me must check it.
4330 WorkList.push_back({FirstPhi, SecondPhi});
4331 }
4332 }
4333 return true;
4334 }
4335
4336 /// For the given set of PHI nodes (in the SimplificationTracker) try
4337 /// to find their equivalents.
4338 /// Returns false if this matching fails and creation of new Phi is disabled.
4339 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4340 unsigned &PhiNotMatchedCount) {
4341 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4342 // order, so the replacements (ReplacePhi) are also done in a deterministic
4343 // order.
4345 SmallPtrSet<PHINode *, 8> WillNotMatch;
4346 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4347 while (PhiNodesToMatch.size()) {
4348 PHINode *PHI = *PhiNodesToMatch.begin();
4349
4350 // Add us, if no Phi nodes in the basic block we do not match.
4351 WillNotMatch.clear();
4352 WillNotMatch.insert(PHI);
4353
4354 // Traverse all Phis until we found equivalent or fail to do that.
4355 bool IsMatched = false;
4356 for (auto &P : PHI->getParent()->phis()) {
4357 // Skip new Phi nodes.
4358 if (PhiNodesToMatch.count(&P))
4359 continue;
4360 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4361 break;
4362 // If it does not match, collect all Phi nodes from matcher.
4363 // if we end up with no match, them all these Phi nodes will not match
4364 // later.
4365 for (auto M : Matched)
4366 WillNotMatch.insert(M.first);
4367 Matched.clear();
4368 }
4369 if (IsMatched) {
4370 // Replace all matched values and erase them.
4371 for (auto MV : Matched)
4372 ST.ReplacePhi(MV.first, MV.second);
4373 Matched.clear();
4374 continue;
4375 }
4376 // If we are not allowed to create new nodes then bail out.
4377 if (!AllowNewPhiNodes)
4378 return false;
4379 // Just remove all seen values in matcher. They will not match anything.
4380 PhiNotMatchedCount += WillNotMatch.size();
4381 for (auto *P : WillNotMatch)
4382 PhiNodesToMatch.erase(P);
4383 }
4384 return true;
4385 }
4386 /// Fill the placeholders with values from predecessors and simplify them.
4387 void FillPlaceholders(FoldAddrToValueMapping &Map,
4388 SmallVectorImpl<Value *> &TraverseOrder,
4389 SimplificationTracker &ST) {
4390 while (!TraverseOrder.empty()) {
4391 Value *Current = TraverseOrder.pop_back_val();
4392 assert(Map.contains(Current) && "No node to fill!!!");
4393 Value *V = Map[Current];
4394
4395 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4396 // CurrentValue also must be Select.
4397 auto *CurrentSelect = cast<SelectInst>(Current);
4398 auto *TrueValue = CurrentSelect->getTrueValue();
4399 assert(Map.contains(TrueValue) && "No True Value!");
4400 Select->setTrueValue(ST.Get(Map[TrueValue]));
4401 auto *FalseValue = CurrentSelect->getFalseValue();
4402 assert(Map.contains(FalseValue) && "No False Value!");
4403 Select->setFalseValue(ST.Get(Map[FalseValue]));
4404 } else {
4405 // Must be a Phi node then.
4406 auto *PHI = cast<PHINode>(V);
4407 // Fill the Phi node with values from predecessors.
4408 for (auto *B : predecessors(PHI->getParent())) {
4409 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4410 assert(Map.contains(PV) && "No predecessor Value!");
4411 PHI->addIncoming(ST.Get(Map[PV]), B);
4412 }
4413 }
4414 Map[Current] = ST.Simplify(V);
4415 }
4416 }
4417
4418 /// Starting from original value recursively iterates over def-use chain up to
4419 /// known ending values represented in a map. For each traversed phi/select
4420 /// inserts a placeholder Phi or Select.
4421 /// Reports all new created Phi/Select nodes by adding them to set.
4422 /// Also reports and order in what values have been traversed.
4423 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4424 SmallVectorImpl<Value *> &TraverseOrder,
4425 SimplificationTracker &ST) {
4426 SmallVector<Value *, 32> Worklist;
4427 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4428 "Address must be a Phi or Select node");
4429 auto *Dummy = PoisonValue::get(CommonType);
4430 Worklist.push_back(Original);
4431 while (!Worklist.empty()) {
4432 Value *Current = Worklist.pop_back_val();
4433 // if it is already visited or it is an ending value then skip it.
4434 if (Map.contains(Current))
4435 continue;
4436 TraverseOrder.push_back(Current);
4437
4438 // CurrentValue must be a Phi node or select. All others must be covered
4439 // by anchors.
4440 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4441 // Is it OK to get metadata from OrigSelect?!
4442 // Create a Select placeholder with dummy value.
4444 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4445 CurrentSelect->getName(),
4446 CurrentSelect->getIterator(), CurrentSelect);
4447 Map[Current] = Select;
4448 ST.insertNewSelect(Select);
4449 // We are interested in True and False values.
4450 Worklist.push_back(CurrentSelect->getTrueValue());
4451 Worklist.push_back(CurrentSelect->getFalseValue());
4452 } else {
4453 // It must be a Phi node then.
4454 PHINode *CurrentPhi = cast<PHINode>(Current);
4455 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4456 PHINode *PHI =
4457 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4458 Map[Current] = PHI;
4459 ST.insertNewPhi(PHI);
4460 append_range(Worklist, CurrentPhi->incoming_values());
4461 }
4462 }
4463 }
4464
4465 bool addrModeCombiningAllowed() {
4467 return false;
4468 switch (DifferentField) {
4469 default:
4470 return false;
4471 case ExtAddrMode::BaseRegField:
4473 case ExtAddrMode::BaseGVField:
4474 return AddrSinkCombineBaseGV;
4475 case ExtAddrMode::BaseOffsField:
4477 case ExtAddrMode::ScaledRegField:
4479 }
4480 }
4481};
4482} // end anonymous namespace
4483
4484/// Try adding ScaleReg*Scale to the current addressing mode.
4485/// Return true and update AddrMode if this addr mode is legal for the target,
4486/// false if not.
4487bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4488 unsigned Depth) {
4489 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4490 // mode. Just process that directly.
4491 if (Scale == 1)
4492 return matchAddr(ScaleReg, Depth);
4493
4494 // If the scale is 0, it takes nothing to add this.
4495 if (Scale == 0)
4496 return true;
4497
4498 // If we already have a scale of this value, we can add to it, otherwise, we
4499 // need an available scale field.
4500 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4501 return false;
4502
4503 ExtAddrMode TestAddrMode = AddrMode;
4504
4505 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4506 // [A+B + A*7] -> [B+A*8].
4507 TestAddrMode.Scale += Scale;
4508 TestAddrMode.ScaledReg = ScaleReg;
4509
4510 // If the new address isn't legal, bail out.
4511 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4512 return false;
4513
4514 // It was legal, so commit it.
4515 AddrMode = TestAddrMode;
4516
4517 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4518 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4519 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4520 // go any further: we can reuse it and cannot eliminate it.
4521 ConstantInt *CI = nullptr;
4522 Value *AddLHS = nullptr;
4523 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4524 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4525 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4526 TestAddrMode.InBounds = false;
4527 TestAddrMode.ScaledReg = AddLHS;
4528 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4529
4530 // If this addressing mode is legal, commit it and remember that we folded
4531 // this instruction.
4532 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4533 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4534 AddrMode = TestAddrMode;
4535 return true;
4536 }
4537 // Restore status quo.
4538 TestAddrMode = AddrMode;
4539 }
4540
4541 // If this is an add recurrence with a constant step, return the increment
4542 // instruction and the canonicalized step.
4543 auto GetConstantStep =
4544 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4545 auto *PN = dyn_cast<PHINode>(V);
4546 if (!PN)
4547 return std::nullopt;
4548 auto IVInc = getIVIncrement(PN, &LI);
4549 if (!IVInc)
4550 return std::nullopt;
4551 // TODO: The result of the intrinsics above is two-complement. However when
4552 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4553 // If it has nuw or nsw flags, we need to make sure that these flags are
4554 // inferrable at the point of memory instruction. Otherwise we are replacing
4555 // well-defined two-complement computation with poison. Currently, to avoid
4556 // potentially complex analysis needed to prove this, we reject such cases.
4557 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4558 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4559 return std::nullopt;
4560 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4561 return std::make_pair(IVInc->first, ConstantStep->getValue());
4562 return std::nullopt;
4563 };
4564
4565 // Try to account for the following special case:
4566 // 1. ScaleReg is an inductive variable;
4567 // 2. We use it with non-zero offset;
4568 // 3. IV's increment is available at the point of memory instruction.
4569 //
4570 // In this case, we may reuse the IV increment instead of the IV Phi to
4571 // achieve the following advantages:
4572 // 1. If IV step matches the offset, we will have no need in the offset;
4573 // 2. Even if they don't match, we will reduce the overlap of living IV
4574 // and IV increment, that will potentially lead to better register
4575 // assignment.
4576 if (AddrMode.BaseOffs) {
4577 if (auto IVStep = GetConstantStep(ScaleReg)) {
4578 Instruction *IVInc = IVStep->first;
4579 // The following assert is important to ensure a lack of infinite loops.
4580 // This transforms is (intentionally) the inverse of the one just above.
4581 // If they don't agree on the definition of an increment, we'd alternate
4582 // back and forth indefinitely.
4583 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4584 APInt Step = IVStep->second;
4585 APInt Offset = Step * AddrMode.Scale;
4586 if (Offset.isSignedIntN(64)) {
4587 TestAddrMode.InBounds = false;
4588 TestAddrMode.ScaledReg = IVInc;
4589 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4590 // If this addressing mode is legal, commit it..
4591 // (Note that we defer the (expensive) domtree base legality check
4592 // to the very last possible point.)
4593 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4594 getDTFn().dominates(IVInc, MemoryInst)) {
4595 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4596 AddrMode = TestAddrMode;
4597 return true;
4598 }
4599 // Restore status quo.
4600 TestAddrMode = AddrMode;
4601 }
4602 }
4603 }
4604
4605 // Otherwise, just return what we have.
4606 return true;
4607}
4608
4609/// This is a little filter, which returns true if an addressing computation
4610/// involving I might be folded into a load/store accessing it.
4611/// This doesn't need to be perfect, but needs to accept at least
4612/// the set of instructions that MatchOperationAddr can.
4614 switch (I->getOpcode()) {
4615 case Instruction::BitCast:
4616 case Instruction::AddrSpaceCast:
4617 // Don't touch identity bitcasts.
4618 if (I->getType() == I->getOperand(0)->getType())
4619 return false;
4620 return I->getType()->isIntOrPtrTy();
4621 case Instruction::PtrToInt:
4622 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4623 return true;
4624 case Instruction::IntToPtr:
4625 // We know the input is intptr_t, so this is foldable.
4626 return true;
4627 case Instruction::Add:
4628 return true;
4629 case Instruction::Mul:
4630 case Instruction::Shl:
4631 // Can only handle X*C and X << C.
4632 return isa<ConstantInt>(I->getOperand(1));
4633 case Instruction::GetElementPtr:
4634 return true;
4635 default:
4636 return false;
4637 }
4638}
4639
4640/// Check whether or not \p Val is a legal instruction for \p TLI.
4641/// \note \p Val is assumed to be the product of some type promotion.
4642/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4643/// to be legal, as the non-promoted value would have had the same state.
4645 const DataLayout &DL, Value *Val) {
4646 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4647 if (!PromotedInst)
4648 return false;
4649 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4650 // If the ISDOpcode is undefined, it was undefined before the promotion.
4651 if (!ISDOpcode)
4652 return true;
4653 // Otherwise, check if the promoted instruction is legal or not.
4654 return TLI.isOperationLegalOrCustom(
4655 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4656}
4657
4658namespace {
4659
4660/// Hepler class to perform type promotion.
4661class TypePromotionHelper {
4662 /// Utility function to add a promoted instruction \p ExtOpnd to
4663 /// \p PromotedInsts and record the type of extension we have seen.
4664 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4665 Instruction *ExtOpnd, bool IsSExt) {
4666 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4667 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4668 if (It != PromotedInsts.end()) {
4669 // If the new extension is same as original, the information in
4670 // PromotedInsts[ExtOpnd] is still correct.
4671 if (It->second.getInt() == ExtTy)
4672 return;
4673
4674 // Now the new extension is different from old extension, we make
4675 // the type information invalid by setting extension type to
4676 // BothExtension.
4677 ExtTy = BothExtension;
4678 }
4679 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4680 }
4681
4682 /// Utility function to query the original type of instruction \p Opnd
4683 /// with a matched extension type. If the extension doesn't match, we
4684 /// cannot use the information we had on the original type.
4685 /// BothExtension doesn't match any extension type.
4686 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4687 Instruction *Opnd, bool IsSExt) {
4688 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4689 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4690 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4691 return It->second.getPointer();
4692 return nullptr;
4693 }
4694
4695 /// Utility function to check whether or not a sign or zero extension
4696 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4697 /// either using the operands of \p Inst or promoting \p Inst.
4698 /// The type of the extension is defined by \p IsSExt.
4699 /// In other words, check if:
4700 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4701 /// #1 Promotion applies:
4702 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4703 /// #2 Operand reuses:
4704 /// ext opnd1 to ConsideredExtType.
4705 /// \p PromotedInsts maps the instructions to their type before promotion.
4706 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4707 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4708
4709 /// Utility function to determine if \p OpIdx should be promoted when
4710 /// promoting \p Inst.
4711 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4712 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4713 }
4714
4715 /// Utility function to promote the operand of \p Ext when this
4716 /// operand is a promotable trunc or sext or zext.
4717 /// \p PromotedInsts maps the instructions to their type before promotion.
4718 /// \p CreatedInstsCost[out] contains the cost of all instructions
4719 /// created to promote the operand of Ext.
4720 /// Newly added extensions are inserted in \p Exts.
4721 /// Newly added truncates are inserted in \p Truncs.
4722 /// Should never be called directly.
4723 /// \return The promoted value which is used instead of Ext.
4724 static Value *promoteOperandForTruncAndAnyExt(
4725 Instruction *Ext, TypePromotionTransaction &TPT,
4726 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4729
4730 /// Utility function to promote the operand of \p Ext when this
4731 /// operand is promotable and is not a supported trunc or sext.
4732 /// \p PromotedInsts maps the instructions to their type before promotion.
4733 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4734 /// created to promote the operand of Ext.
4735 /// Newly added extensions are inserted in \p Exts.
4736 /// Newly added truncates are inserted in \p Truncs.
4737 /// Should never be called directly.
4738 /// \return The promoted value which is used instead of Ext.
4739 static Value *promoteOperandForOther(Instruction *Ext,
4740 TypePromotionTransaction &TPT,
4741 InstrToOrigTy &PromotedInsts,
4742 unsigned &CreatedInstsCost,
4745 const TargetLowering &TLI, bool IsSExt);
4746
4747 /// \see promoteOperandForOther.
4748 static Value *signExtendOperandForOther(
4749 Instruction *Ext, TypePromotionTransaction &TPT,
4750 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4752 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4753 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4754 Exts, Truncs, TLI, true);
4755 }
4756
4757 /// \see promoteOperandForOther.
4758 static Value *zeroExtendOperandForOther(
4759 Instruction *Ext, TypePromotionTransaction &TPT,
4760 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4762 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4763 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4764 Exts, Truncs, TLI, false);
4765 }
4766
4767public:
4768 /// Type for the utility function that promotes the operand of Ext.
4769 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4770 InstrToOrigTy &PromotedInsts,
4771 unsigned &CreatedInstsCost,
4774 const TargetLowering &TLI);
4775
4776 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4777 /// action to promote the operand of \p Ext instead of using Ext.
4778 /// \return NULL if no promotable action is possible with the current
4779 /// sign extension.
4780 /// \p InsertedInsts keeps track of all the instructions inserted by the
4781 /// other CodeGenPrepare optimizations. This information is important
4782 /// because we do not want to promote these instructions as CodeGenPrepare
4783 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4784 /// \p PromotedInsts maps the instructions to their type before promotion.
4785 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4786 const TargetLowering &TLI,
4787 const InstrToOrigTy &PromotedInsts);
4788};
4789
4790} // end anonymous namespace
4791
4792bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4793 Type *ConsideredExtType,
4794 const InstrToOrigTy &PromotedInsts,
4795 bool IsSExt) {
4796 // The promotion helper does not know how to deal with vector types yet.
4797 // To be able to fix that, we would need to fix the places where we
4798 // statically extend, e.g., constants and such.
4799 if (Inst->getType()->isVectorTy())
4800 return false;
4801
4802 // We can always get through zext.
4803 if (isa<ZExtInst>(Inst))
4804 return true;
4805
4806 // sext(sext) is ok too.
4807 if (IsSExt && isa<SExtInst>(Inst))
4808 return true;
4809
4810 // We can get through binary operator, if it is legal. In other words, the
4811 // binary operator must have a nuw or nsw flag.
4812 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4813 if (isa<OverflowingBinaryOperator>(BinOp) &&
4814 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4815 (IsSExt && BinOp->hasNoSignedWrap())))
4816 return true;
4817
4818 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4819 if ((Inst->getOpcode() == Instruction::And ||
4820 Inst->getOpcode() == Instruction::Or))
4821 return true;
4822
4823 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4824 if (Inst->getOpcode() == Instruction::Xor) {
4825 // Make sure it is not a NOT.
4826 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4827 if (!Cst->getValue().isAllOnes())
4828 return true;
4829 }
4830
4831 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4832 // It may change a poisoned value into a regular value, like
4833 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4834 // poisoned value regular value
4835 // It should be OK since undef covers valid value.
4836 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4837 return true;
4838
4839 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4840 // It may change a poisoned value into a regular value, like
4841 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4842 // poisoned value regular value
4843 // It should be OK since undef covers valid value.
4844 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4845 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4846 if (ExtInst->hasOneUse()) {
4847 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4848 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4849 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4850 if (Cst &&
4851 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4852 return true;
4853 }
4854 }
4855 }
4856
4857 // Check if we can do the following simplification.
4858 // ext(trunc(opnd)) --> ext(opnd)
4859 if (!isa<TruncInst>(Inst))
4860 return false;
4861
4862 Value *OpndVal = Inst->getOperand(0);
4863 // Check if we can use this operand in the extension.
4864 // If the type is larger than the result type of the extension, we cannot.
4865 if (!OpndVal->getType()->isIntegerTy() ||
4866 OpndVal->getType()->getIntegerBitWidth() >
4867 ConsideredExtType->getIntegerBitWidth())
4868 return false;
4869
4870 // If the operand of the truncate is not an instruction, we will not have
4871 // any information on the dropped bits.
4872 // (Actually we could for constant but it is not worth the extra logic).
4873 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4874 if (!Opnd)
4875 return false;
4876
4877 // Check if the source of the type is narrow enough.
4878 // I.e., check that trunc just drops extended bits of the same kind of
4879 // the extension.
4880 // #1 get the type of the operand and check the kind of the extended bits.
4881 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4882 if (OpndType)
4883 ;
4884 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4885 OpndType = Opnd->getOperand(0)->getType();
4886 else
4887 return false;
4888
4889 // #2 check that the truncate just drops extended bits.
4890 return Inst->getType()->getIntegerBitWidth() >=
4891 OpndType->getIntegerBitWidth();
4892}
4893
4894TypePromotionHelper::Action TypePromotionHelper::getAction(
4895 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4896 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4897 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4898 "Unexpected instruction type");
4899 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4900 Type *ExtTy = Ext->getType();
4901 bool IsSExt = isa<SExtInst>(Ext);
4902 // If the operand of the extension is not an instruction, we cannot
4903 // get through.
4904 // If it, check we can get through.
4905 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4906 return nullptr;
4907
4908 // Do not promote if the operand has been added by codegenprepare.
4909 // Otherwise, it means we are undoing an optimization that is likely to be
4910 // redone, thus causing potential infinite loop.
4911 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4912 return nullptr;
4913
4914 // SExt or Trunc instructions.
4915 // Return the related handler.
4916 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4917 isa<ZExtInst>(ExtOpnd))
4918 return promoteOperandForTruncAndAnyExt;
4919
4920 // Regular instruction.
4921 // Abort early if we will have to insert non-free instructions.
4922 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4923 return nullptr;
4924 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4925}
4926
4927Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4928 Instruction *SExt, TypePromotionTransaction &TPT,
4929 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4931 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4932 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4933 // get through it and this method should not be called.
4934 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4935 Value *ExtVal = SExt;
4936 bool HasMergedNonFreeExt = false;
4937 if (isa<ZExtInst>(SExtOpnd)) {
4938 // Replace s|zext(zext(opnd))
4939 // => zext(opnd).
4940 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4941 Value *ZExt =
4942 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4943 TPT.replaceAllUsesWith(SExt, ZExt);
4944 TPT.eraseInstruction(SExt);
4945 ExtVal = ZExt;
4946 } else {
4947 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4948 // => z|sext(opnd).
4949 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4950 }
4951 CreatedInstsCost = 0;
4952
4953 // Remove dead code.
4954 if (SExtOpnd->use_empty())
4955 TPT.eraseInstruction(SExtOpnd);
4956
4957 // Check if the extension is still needed.
4958 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4959 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4960 if (ExtInst) {
4961 if (Exts)
4962 Exts->push_back(ExtInst);
4963 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4964 }
4965 return ExtVal;
4966 }
4967
4968 // At this point we have: ext ty opnd to ty.
4969 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4970 Value *NextVal = ExtInst->getOperand(0);
4971 TPT.eraseInstruction(ExtInst, NextVal);
4972 return NextVal;
4973}
4974
4975Value *TypePromotionHelper::promoteOperandForOther(
4976 Instruction *Ext, TypePromotionTransaction &TPT,
4977 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4980 bool IsSExt) {
4981 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4982 // get through it and this method should not be called.
4983 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4984 CreatedInstsCost = 0;
4985 if (!ExtOpnd->hasOneUse()) {
4986 // ExtOpnd will be promoted.
4987 // All its uses, but Ext, will need to use a truncated value of the
4988 // promoted version.
4989 // Create the truncate now.
4990 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4991 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4992 // Insert it just after the definition.
4993 ITrunc->moveAfter(ExtOpnd);
4994 if (Truncs)
4995 Truncs->push_back(ITrunc);
4996 }
4997
4998 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4999 // Restore the operand of Ext (which has been replaced by the previous call
5000 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5001 TPT.setOperand(Ext, 0, ExtOpnd);
5002 }
5003
5004 // Get through the Instruction:
5005 // 1. Update its type.
5006 // 2. Replace the uses of Ext by Inst.
5007 // 3. Extend each operand that needs to be extended.
5008
5009 // Remember the original type of the instruction before promotion.
5010 // This is useful to know that the high bits are sign extended bits.
5011 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5012 // Step #1.
5013 TPT.mutateType(ExtOpnd, Ext->getType());
5014 // Step #2.
5015 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5016 // Step #3.
5017 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5018 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5019 ++OpIdx) {
5020 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5021 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5022 !shouldExtOperand(ExtOpnd, OpIdx)) {
5023 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5024 continue;
5025 }
5026 // Check if we can statically extend the operand.
5027 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5028 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5029 LLVM_DEBUG(dbgs() << "Statically extend\n");
5030 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5031 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5032 : Cst->getValue().zext(BitWidth);
5033 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5034 continue;
5035 }
5036 // UndefValue are typed, so we have to statically sign extend them.
5037 if (isa<UndefValue>(Opnd)) {
5038 LLVM_DEBUG(dbgs() << "Statically extend\n");
5039 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5040 continue;
5041 }
5042
5043 // Otherwise we have to explicitly sign extend the operand.
5044 Value *ValForExtOpnd = IsSExt
5045 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5046 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5047 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5048 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5049 if (!InstForExtOpnd)
5050 continue;
5051
5052 if (Exts)
5053 Exts->push_back(InstForExtOpnd);
5054
5055 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5056 }
5057 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5058 TPT.eraseInstruction(Ext);
5059 return ExtOpnd;
5060}
5061
5062/// Check whether or not promoting an instruction to a wider type is profitable.
5063/// \p NewCost gives the cost of extension instructions created by the
5064/// promotion.
5065/// \p OldCost gives the cost of extension instructions before the promotion
5066/// plus the number of instructions that have been
5067/// matched in the addressing mode the promotion.
5068/// \p PromotedOperand is the value that has been promoted.
5069/// \return True if the promotion is profitable, false otherwise.
5070bool AddressingModeMatcher::isPromotionProfitable(
5071 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5072 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5073 << '\n');
5074 // The cost of the new extensions is greater than the cost of the
5075 // old extension plus what we folded.
5076 // This is not profitable.
5077 if (NewCost > OldCost)
5078 return false;
5079 if (NewCost < OldCost)
5080 return true;
5081 // The promotion is neutral but it may help folding the sign extension in
5082 // loads for instance.
5083 // Check that we did not create an illegal instruction.
5084 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5085}
5086
5087/// Given an instruction or constant expr, see if we can fold the operation
5088/// into the addressing mode. If so, update the addressing mode and return
5089/// true, otherwise return false without modifying AddrMode.
5090/// If \p MovedAway is not NULL, it contains the information of whether or
5091/// not AddrInst has to be folded into the addressing mode on success.
5092/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5093/// because it has been moved away.
5094/// Thus AddrInst must not be added in the matched instructions.
5095/// This state can happen when AddrInst is a sext, since it may be moved away.
5096/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5097/// not be referenced anymore.
5098bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5099 unsigned Depth,
5100 bool *MovedAway) {
5101 // Avoid exponential behavior on extremely deep expression trees.
5102 if (Depth >= 5)
5103 return false;
5104
5105 // By default, all matched instructions stay in place.
5106 if (MovedAway)
5107 *MovedAway = false;
5108
5109 switch (Opcode) {
5110 case Instruction::PtrToInt:
5111 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5112 return matchAddr(AddrInst->getOperand(0), Depth);
5113 case Instruction::IntToPtr: {
5114 auto AS = AddrInst->getType()->getPointerAddressSpace();
5115 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5116 // This inttoptr is a no-op if the integer type is pointer sized.
5117 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5118 return matchAddr(AddrInst->getOperand(0), Depth);
5119 return false;
5120 }
5121 case Instruction::BitCast:
5122 // BitCast is always a noop, and we can handle it as long as it is
5123 // int->int or pointer->pointer (we don't want int<->fp or something).
5124 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5125 // Don't touch identity bitcasts. These were probably put here by LSR,
5126 // and we don't want to mess around with them. Assume it knows what it
5127 // is doing.
5128 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5129 return matchAddr(AddrInst->getOperand(0), Depth);
5130 return false;
5131 case Instruction::AddrSpaceCast: {
5132 unsigned SrcAS =
5133 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5134 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5135 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5136 return matchAddr(AddrInst->getOperand(0), Depth);
5137 return false;
5138 }
5139 case Instruction::Add: {
5140 // Check to see if we can merge in one operand, then the other. If so, we
5141 // win.
5142 ExtAddrMode BackupAddrMode = AddrMode;
5143 unsigned OldSize = AddrModeInsts.size();
5144 // Start a transaction at this point.
5145 // The LHS may match but not the RHS.
5146 // Therefore, we need a higher level restoration point to undo partially
5147 // matched operation.
5148 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5149 TPT.getRestorationPoint();
5150
5151 // Try to match an integer constant second to increase its chance of ending
5152 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5153 int First = 0, Second = 1;
5154 if (isa<ConstantInt>(AddrInst->getOperand(First))
5155 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5156 std::swap(First, Second);
5157 AddrMode.InBounds = false;
5158 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5159 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5160 return true;
5161
5162 // Restore the old addr mode info.
5163 AddrMode = BackupAddrMode;
5164 AddrModeInsts.resize(OldSize);
5165 TPT.rollback(LastKnownGood);
5166
5167 // Otherwise this was over-aggressive. Try merging operands in the opposite
5168 // order.
5169 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5170 matchAddr(AddrInst->getOperand(First), Depth + 1))
5171 return true;
5172
5173 // Otherwise we definitely can't merge the ADD in.
5174 AddrMode = BackupAddrMode;
5175 AddrModeInsts.resize(OldSize);
5176 TPT.rollback(LastKnownGood);
5177 break;
5178 }
5179 // case Instruction::Or:
5180 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5181 // break;
5182 case Instruction::Mul:
5183 case Instruction::Shl: {
5184 // Can only handle X*C and X << C.
5185 AddrMode.InBounds = false;
5186 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5187 if (!RHS || RHS->getBitWidth() > 64)
5188 return false;
5189 int64_t Scale = Opcode == Instruction::Shl
5190 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5191 : RHS->getSExtValue();
5192
5193 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5194 }
5195 case Instruction::GetElementPtr: {
5196 // Scan the GEP. We check it if it contains constant offsets and at most
5197 // one variable offset.
5198 int VariableOperand = -1;
5199 unsigned VariableScale = 0;
5200
5201 int64_t ConstantOffset = 0;
5202 gep_type_iterator GTI = gep_type_begin(AddrInst);
5203 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5204 if (StructType *STy = GTI.getStructTypeOrNull()) {
5205 const StructLayout *SL = DL.getStructLayout(STy);
5206 unsigned Idx =
5207 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5208 ConstantOffset += SL->getElementOffset(Idx);
5209 } else {
5211 if (TS.isNonZero()) {
5212 // The optimisations below currently only work for fixed offsets.
5213 if (TS.isScalable())
5214 return false;
5215 int64_t TypeSize = TS.getFixedValue();
5216 if (ConstantInt *CI =
5217 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5218 const APInt &CVal = CI->getValue();
5219 if (CVal.getSignificantBits() <= 64) {
5220 ConstantOffset += CVal.getSExtValue() * TypeSize;
5221 continue;
5222 }
5223 }
5224 // We only allow one variable index at the moment.
5225 if (VariableOperand != -1)
5226 return false;
5227
5228 // Remember the variable index.
5229 VariableOperand = i;
5230 VariableScale = TypeSize;
5231 }
5232 }
5233 }
5234
5235 // A common case is for the GEP to only do a constant offset. In this case,
5236 // just add it to the disp field and check validity.
5237 if (VariableOperand == -1) {
5238 AddrMode.BaseOffs += ConstantOffset;
5239 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5240 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5241 AddrMode.InBounds = false;
5242 return true;
5243 }
5244 AddrMode.BaseOffs -= ConstantOffset;
5245
5246 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
5247 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5248 ConstantOffset > 0) {
5249 // Record GEPs with non-zero offsets as candidates for splitting in
5250 // the event that the offset cannot fit into the r+i addressing mode.
5251 // Simple and common case that only one GEP is used in calculating the
5252 // address for the memory access.
5253 Value *Base = AddrInst->getOperand(0);
5254 auto *BaseI = dyn_cast<Instruction>(Base);
5255 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5256 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
5257 (BaseI && !isa<CastInst>(BaseI) &&
5258 !isa<GetElementPtrInst>(BaseI))) {
5259 // Make sure the parent block allows inserting non-PHI instructions
5260 // before the terminator.
5261 BasicBlock *Parent = BaseI ? BaseI->getParent()
5262 : &GEP->getFunction()->getEntryBlock();
5263 if (!Parent->getTerminator()->isEHPad())
5264 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5265 }
5266 }
5267
5268 return false;
5269 }
5270
5271 // Save the valid addressing mode in case we can't match.
5272 ExtAddrMode BackupAddrMode = AddrMode;
5273 unsigned OldSize = AddrModeInsts.size();
5274
5275 // See if the scale and offset amount is valid for this target.
5276 AddrMode.BaseOffs += ConstantOffset;
5277 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5278 AddrMode.InBounds = false;
5279
5280 // Match the base operand of the GEP.
5281 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5282 // If it couldn't be matched, just stuff the value in a register.
5283 if (AddrMode.HasBaseReg) {
5284 AddrMode = BackupAddrMode;
5285 AddrModeInsts.resize(OldSize);
5286 return false;
5287 }
5288 AddrMode.HasBaseReg = true;
5289 AddrMode.BaseReg = AddrInst->getOperand(0);
5290 }
5291
5292 // Match the remaining variable portion of the GEP.
5293 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5294 Depth)) {
5295 // If it couldn't be matched, try stuffing the base into a register
5296 // instead of matching it, and retrying the match of the scale.
5297 AddrMode = BackupAddrMode;
5298 AddrModeInsts.resize(OldSize);
5299 if (AddrMode.HasBaseReg)
5300 return false;
5301 AddrMode.HasBaseReg = true;
5302 AddrMode.BaseReg = AddrInst->getOperand(0);
5303 AddrMode.BaseOffs += ConstantOffset;
5304 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5305 VariableScale, Depth)) {
5306 // If even that didn't work, bail.
5307 AddrMode = BackupAddrMode;
5308 AddrModeInsts.resize(OldSize);
5309 return false;
5310 }
5311 }
5312
5313 return true;
5314 }
5315 case Instruction::SExt:
5316 case Instruction::ZExt: {
5317 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5318 if (!Ext)
5319 return false;
5320
5321 // Try to move this ext out of the way of the addressing mode.
5322 // Ask for a method for doing so.
5323 TypePromotionHelper::Action TPH =
5324 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5325 if (!TPH)
5326 return false;
5327
5328 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5329 TPT.getRestorationPoint();
5330 unsigned CreatedInstsCost = 0;
5331 unsigned ExtCost = !TLI.isExtFree(Ext);
5332 Value *PromotedOperand =
5333 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5334 // SExt has been moved away.
5335 // Thus either it will be rematched later in the recursive calls or it is
5336 // gone. Anyway, we must not fold it into the addressing mode at this point.
5337 // E.g.,
5338 // op = add opnd, 1
5339 // idx = ext op
5340 // addr = gep base, idx
5341 // is now:
5342 // promotedOpnd = ext opnd <- no match here
5343 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5344 // addr = gep base, op <- match
5345 if (MovedAway)
5346 *MovedAway = true;
5347
5348 assert(PromotedOperand &&
5349 "TypePromotionHelper should have filtered out those cases");
5350
5351 ExtAddrMode BackupAddrMode = AddrMode;
5352 unsigned OldSize = AddrModeInsts.size();
5353
5354 if (!matchAddr(PromotedOperand, Depth) ||
5355 // The total of the new cost is equal to the cost of the created
5356 // instructions.
5357 // The total of the old cost is equal to the cost of the extension plus
5358 // what we have saved in the addressing mode.
5359 !isPromotionProfitable(CreatedInstsCost,
5360 ExtCost + (AddrModeInsts.size() - OldSize),
5361 PromotedOperand)) {
5362 AddrMode = BackupAddrMode;
5363 AddrModeInsts.resize(OldSize);
5364 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5365 TPT.rollback(LastKnownGood);
5366 return false;
5367 }
5368 return true;
5369 }
5370 case Instruction::Call:
5371 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5372 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5373 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5374 if (TLI.addressingModeSupportsTLS(GV))
5375 return matchAddr(AddrInst->getOperand(0), Depth);
5376 }
5377 }
5378 break;
5379 }
5380 return false;
5381}
5382
5383/// If we can, try to add the value of 'Addr' into the current addressing mode.
5384/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5385/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5386/// for the target.
5387///
5388bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5389 // Start a transaction at this point that we will rollback if the matching
5390 // fails.
5391 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5392 TPT.getRestorationPoint();
5393 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5394 if (CI->getValue().isSignedIntN(64)) {
5395 // Fold in immediates if legal for the target.
5396 AddrMode.BaseOffs += CI->getSExtValue();
5397 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5398 return true;
5399 AddrMode.BaseOffs -= CI->getSExtValue();
5400 }
5401 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5402 // If this is a global variable, try to fold it into the addressing mode.
5403 if (!AddrMode.BaseGV) {
5404 AddrMode.BaseGV = GV;
5405 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5406 return true;
5407 AddrMode.BaseGV = nullptr;
5408 }
5409 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5410 ExtAddrMode BackupAddrMode = AddrMode;
5411 unsigned OldSize = AddrModeInsts.size();
5412
5413 // Check to see if it is possible to fold this operation.
5414 bool MovedAway = false;
5415 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5416 // This instruction may have been moved away. If so, there is nothing
5417 // to check here.
5418 if (MovedAway)
5419 return true;
5420 // Okay, it's possible to fold this. Check to see if it is actually
5421 // *profitable* to do so. We use a simple cost model to avoid increasing
5422 // register pressure too much.
5423 if (I->hasOneUse() ||
5424 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5425 AddrModeInsts.push_back(I);
5426 return true;
5427 }
5428
5429 // It isn't profitable to do this, roll back.
5430 AddrMode = BackupAddrMode;
5431 AddrModeInsts.resize(OldSize);
5432 TPT.rollback(LastKnownGood);
5433 }
5434 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5435 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5436 return true;
5437 TPT.rollback(LastKnownGood);
5438 } else if (isa<ConstantPointerNull>(Addr)) {
5439 // Null pointer gets folded without affecting the addressing mode.
5440 return true;
5441 }
5442
5443 // Worse case, the target should support [reg] addressing modes. :)
5444 if (!AddrMode.HasBaseReg) {
5445 AddrMode.HasBaseReg = true;
5446 AddrMode.BaseReg = Addr;
5447 // Still check for legality in case the target supports [imm] but not [i+r].
5448 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5449 return true;
5450 AddrMode.HasBaseReg = false;
5451 AddrMode.BaseReg = nullptr;
5452 }
5453
5454 // If the base register is already taken, see if we can do [r+r].
5455 if (AddrMode.Scale == 0) {
5456 AddrMode.Scale = 1;
5457 AddrMode.ScaledReg = Addr;
5458 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5459 return true;
5460 AddrMode.Scale = 0;
5461 AddrMode.ScaledReg = nullptr;
5462 }
5463 // Couldn't match.
5464 TPT.rollback(LastKnownGood);
5465 return false;
5466}
5467
5468/// Check to see if all uses of OpVal by the specified inline asm call are due
5469/// to memory operands. If so, return true, otherwise return false.
5471 const TargetLowering &TLI,
5472 const TargetRegisterInfo &TRI) {
5473 const Function *F = CI->getFunction();
5474 TargetLowering::AsmOperandInfoVector TargetConstraints =
5475 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5476
5477 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5478 // Compute the constraint code and ConstraintType to use.
5479 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5480
5481 // If this asm operand is our Value*, and if it isn't an indirect memory
5482 // operand, we can't fold it! TODO: Also handle C_Address?
5483 if (OpInfo.CallOperandVal == OpVal &&
5484 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5485 !OpInfo.isIndirect))
5486 return false;
5487 }
5488
5489 return true;
5490}
5491
5492/// Recursively walk all the uses of I until we find a memory use.
5493/// If we find an obviously non-foldable instruction, return true.
5494/// Add accessed addresses and types to MemoryUses.
5496 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5497 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5498 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5499 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5500 // If we already considered this instruction, we're done.
5501 if (!ConsideredInsts.insert(I).second)
5502 return false;
5503
5504 // If this is an obviously unfoldable instruction, bail out.
5505 if (!MightBeFoldableInst(I))
5506 return true;
5507
5508 // Loop over all the uses, recursively processing them.
5509 for (Use &U : I->uses()) {
5510 // Conservatively return true if we're seeing a large number or a deep chain
5511 // of users. This avoids excessive compilation times in pathological cases.
5512 if (SeenInsts++ >= MaxAddressUsersToScan)
5513 return true;
5514
5515 Instruction *UserI = cast<Instruction>(U.getUser());
5516 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5517 MemoryUses.push_back({&U, LI->getType()});
5518 continue;
5519 }
5520
5521 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5522 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5523 return true; // Storing addr, not into addr.
5524 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5525 continue;
5526 }
5527
5528 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5529 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5530 return true; // Storing addr, not into addr.
5531 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5532 continue;
5533 }
5534
5535 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5536 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5537 return true; // Storing addr, not into addr.
5538 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5539 continue;
5540 }
5541
5542 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5543 if (CI->hasFnAttr(Attribute::Cold)) {
5544 // If this is a cold call, we can sink the addressing calculation into
5545 // the cold path. See optimizeCallInst
5546 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5547 continue;
5548 }
5549
5550 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5551 if (!IA)
5552 return true;
5553
5554 // If this is a memory operand, we're cool, otherwise bail out.
5555 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5556 return true;
5557 continue;
5558 }
5559
5560 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5561 PSI, BFI, SeenInsts))
5562 return true;
5563 }
5564
5565 return false;
5566}
5567
5569 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5570 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5572 unsigned SeenInsts = 0;
5573 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5574 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5575 PSI, BFI, SeenInsts);
5576}
5577
5578
5579/// Return true if Val is already known to be live at the use site that we're
5580/// folding it into. If so, there is no cost to include it in the addressing
5581/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5582/// instruction already.
5583bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5584 Value *KnownLive1,
5585 Value *KnownLive2) {
5586 // If Val is either of the known-live values, we know it is live!
5587 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5588 return true;
5589
5590 // All values other than instructions and arguments (e.g. constants) are live.
5591 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5592 return true;
5593
5594 // If Val is a constant sized alloca in the entry block, it is live, this is
5595 // true because it is just a reference to the stack/frame pointer, which is
5596 // live for the whole function.
5597 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5598 if (AI->isStaticAlloca())
5599 return true;
5600
5601 // Check to see if this value is already used in the memory instruction's
5602 // block. If so, it's already live into the block at the very least, so we
5603 // can reasonably fold it.
5604 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5605}
5606
5607/// It is possible for the addressing mode of the machine to fold the specified
5608/// instruction into a load or store that ultimately uses it.
5609/// However, the specified instruction has multiple uses.
5610/// Given this, it may actually increase register pressure to fold it
5611/// into the load. For example, consider this code:
5612///
5613/// X = ...
5614/// Y = X+1
5615/// use(Y) -> nonload/store
5616/// Z = Y+1
5617/// load Z
5618///
5619/// In this case, Y has multiple uses, and can be folded into the load of Z
5620/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5621/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5622/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5623/// number of computations either.
5624///
5625/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5626/// X was live across 'load Z' for other reasons, we actually *would* want to
5627/// fold the addressing mode in the Z case. This would make Y die earlier.
5628bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5629 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5630 if (IgnoreProfitability)
5631 return true;
5632
5633 // AMBefore is the addressing mode before this instruction was folded into it,
5634 // and AMAfter is the addressing mode after the instruction was folded. Get
5635 // the set of registers referenced by AMAfter and subtract out those
5636 // referenced by AMBefore: this is the set of values which folding in this
5637 // address extends the lifetime of.
5638 //
5639 // Note that there are only two potential values being referenced here,
5640 // BaseReg and ScaleReg (global addresses are always available, as are any
5641 // folded immediates).
5642 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5643
5644 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5645 // lifetime wasn't extended by adding this instruction.
5646 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5647 BaseReg = nullptr;
5648 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5649 ScaledReg = nullptr;
5650
5651 // If folding this instruction (and it's subexprs) didn't extend any live
5652 // ranges, we're ok with it.
5653 if (!BaseReg && !ScaledReg)
5654 return true;
5655
5656 // If all uses of this instruction can have the address mode sunk into them,
5657 // we can remove the addressing mode and effectively trade one live register
5658 // for another (at worst.) In this context, folding an addressing mode into
5659 // the use is just a particularly nice way of sinking it.
5661 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5662 return false; // Has a non-memory, non-foldable use!
5663
5664 // Now that we know that all uses of this instruction are part of a chain of
5665 // computation involving only operations that could theoretically be folded
5666 // into a memory use, loop over each of these memory operation uses and see
5667 // if they could *actually* fold the instruction. The assumption is that
5668 // addressing modes are cheap and that duplicating the computation involved
5669 // many times is worthwhile, even on a fastpath. For sinking candidates
5670 // (i.e. cold call sites), this serves as a way to prevent excessive code
5671 // growth since most architectures have some reasonable small and fast way to
5672 // compute an effective address. (i.e LEA on x86)
5673 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5674 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5675 Value *Address = Pair.first->get();
5676 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5677 Type *AddressAccessTy = Pair.second;
5678 unsigned AS = Address->getType()->getPointerAddressSpace();
5679
5680 // Do a match against the root of this address, ignoring profitability. This
5681 // will tell us if the addressing mode for the memory operation will
5682 // *actually* cover the shared instruction.
5684 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5685 0);
5686 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5687 TPT.getRestorationPoint();
5688 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5689 AddressAccessTy, AS, UserI, Result,
5690 InsertedInsts, PromotedInsts, TPT,
5691 LargeOffsetGEP, OptSize, PSI, BFI);
5692 Matcher.IgnoreProfitability = true;
5693 bool Success = Matcher.matchAddr(Address, 0);
5694 (void)Success;
5695 assert(Success && "Couldn't select *anything*?");
5696
5697 // The match was to check the profitability, the changes made are not
5698 // part of the original matcher. Therefore, they should be dropped
5699 // otherwise the original matcher will not present the right state.
5700 TPT.rollback(LastKnownGood);
5701
5702 // If the match didn't cover I, then it won't be shared by it.
5703 if (!is_contained(MatchedAddrModeInsts, I))
5704 return false;
5705
5706 MatchedAddrModeInsts.clear();
5707 }
5708
5709 return true;
5710}
5711
5712/// Return true if the specified values are defined in a
5713/// different basic block than BB.
5714static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5715 if (Instruction *I = dyn_cast<Instruction>(V))
5716 return I->getParent() != BB;
5717 return false;
5718}
5719
5720/// Sink addressing mode computation immediate before MemoryInst if doing so
5721/// can be done without increasing register pressure. The need for the
5722/// register pressure constraint means this can end up being an all or nothing
5723/// decision for all uses of the same addressing computation.
5724///
5725/// Load and Store Instructions often have addressing modes that can do
5726/// significant amounts of computation. As such, instruction selection will try
5727/// to get the load or store to do as much computation as possible for the
5728/// program. The problem is that isel can only see within a single block. As
5729/// such, we sink as much legal addressing mode work into the block as possible.
5730///
5731/// This method is used to optimize both load/store and inline asms with memory
5732/// operands. It's also used to sink addressing computations feeding into cold
5733/// call sites into their (cold) basic block.
5734///
5735/// The motivation for handling sinking into cold blocks is that doing so can
5736/// both enable other address mode sinking (by satisfying the register pressure
5737/// constraint above), and reduce register pressure globally (by removing the
5738/// addressing mode computation from the fast path entirely.).
5739bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5740 Type *AccessTy, unsigned AddrSpace) {
5741 Value *Repl = Addr;
5742
5743 // Try to collapse single-value PHI nodes. This is necessary to undo
5744 // unprofitable PRE transformations.
5745 SmallVector<Value *, 8> worklist;
5747 worklist.push_back(Addr);
5748
5749 // Use a worklist to iteratively look through PHI and select nodes, and
5750 // ensure that the addressing mode obtained from the non-PHI/select roots of
5751 // the graph are compatible.
5752 bool PhiOrSelectSeen = false;
5753 SmallVector<Instruction *, 16> AddrModeInsts;
5754 const SimplifyQuery SQ(*DL, TLInfo);
5755 AddressingModeCombiner AddrModes(SQ, Addr);
5756 TypePromotionTransaction TPT(RemovedInsts);
5757 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5758 TPT.getRestorationPoint();
5759 while (!worklist.empty()) {
5760 Value *V = worklist.pop_back_val();
5761
5762 // We allow traversing cyclic Phi nodes.
5763 // In case of success after this loop we ensure that traversing through
5764 // Phi nodes ends up with all cases to compute address of the form
5765 // BaseGV + Base + Scale * Index + Offset
5766 // where Scale and Offset are constans and BaseGV, Base and Index
5767 // are exactly the same Values in all cases.
5768 // It means that BaseGV, Scale and Offset dominate our memory instruction
5769 // and have the same value as they had in address computation represented
5770 // as Phi. So we can safely sink address computation to memory instruction.
5771 if (!Visited.insert(V).second)
5772 continue;
5773
5774 // For a PHI node, push all of its incoming values.
5775 if (PHINode *P = dyn_cast<PHINode>(V)) {
5776 append_range(worklist, P->incoming_values());
5777 PhiOrSelectSeen = true;
5778 continue;
5779 }
5780 // Similar for select.
5781 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5782 worklist.push_back(SI->getFalseValue());
5783 worklist.push_back(SI->getTrueValue());
5784 PhiOrSelectSeen = true;
5785 continue;
5786 }
5787
5788 // For non-PHIs, determine the addressing mode being computed. Note that
5789 // the result may differ depending on what other uses our candidate
5790 // addressing instructions might have.
5791 AddrModeInsts.clear();
5792 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5793 0);
5794 // Defer the query (and possible computation of) the dom tree to point of
5795 // actual use. It's expected that most address matches don't actually need
5796 // the domtree.
5797 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5798 Function *F = MemoryInst->getParent()->getParent();
5799 return this->getDT(*F);
5800 };
5801 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5802 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5803 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5804 BFI.get());
5805
5806 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5807 if (GEP && !NewGEPBases.count(GEP)) {
5808 // If splitting the underlying data structure can reduce the offset of a
5809 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5810 // previously split data structures.
5811 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5812 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5813 }
5814
5815 NewAddrMode.OriginalValue = V;
5816 if (!AddrModes.addNewAddrMode(NewAddrMode))
5817 break;
5818 }
5819
5820 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5821 // or we have multiple but either couldn't combine them or combining them
5822 // wouldn't do anything useful, bail out now.
5823 if (!AddrModes.combineAddrModes()) {
5824 TPT.rollback(LastKnownGood);
5825 return false;
5826 }
5827 bool Modified = TPT.commit();
5828
5829 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5830 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5831
5832 // If all the instructions matched are already in this BB, don't do anything.
5833 // If we saw a Phi node then it is not local definitely, and if we saw a
5834 // select then we want to push the address calculation past it even if it's
5835 // already in this BB.
5836 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5837 return IsNonLocalValue(V, MemoryInst->getParent());
5838 })) {
5839 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5840 << "\n");
5841 return Modified;
5842 }
5843
5844 // Insert this computation right after this user. Since our caller is
5845 // scanning from the top of the BB to the bottom, reuse of the expr are
5846 // guaranteed to happen later.
5847 IRBuilder<> Builder(MemoryInst);
5848
5849 // Now that we determined the addressing expression we want to use and know
5850 // that we have to sink it into this block. Check to see if we have already
5851 // done this for some other load/store instr in this block. If so, reuse
5852 // the computation. Before attempting reuse, check if the address is valid
5853 // as it may have been erased.
5854
5855 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5856
5857 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5858 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5859 if (SunkAddr) {
5860 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5861 << " for " << *MemoryInst << "\n");
5862 if (SunkAddr->getType() != Addr->getType()) {
5863 if (SunkAddr->getType()->getPointerAddressSpace() !=
5864 Addr->getType()->getPointerAddressSpace() &&
5865 !DL->isNonIntegralPointerType(Addr->getType())) {
5866 // There are two reasons the address spaces might not match: a no-op
5867 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5868 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5869 // TODO: allow bitcast between different address space pointers with the
5870 // same size.
5871 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5872 SunkAddr =
5873 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5874 } else
5875 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5876 }
5877 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
5878 SubtargetInfo->addrSinkUsingGEPs())) {
5879 // By default, we use the GEP-based method when AA is used later. This
5880 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5881 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5882 << " for " << *MemoryInst << "\n");
5883 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5884
5885 // First, find the pointer.
5886 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5887 ResultPtr = AddrMode.BaseReg;
5888 AddrMode.BaseReg = nullptr;
5889 }
5890
5891 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5892 // We can't add more than one pointer together, nor can we scale a
5893 // pointer (both of which seem meaningless).
5894 if (ResultPtr || AddrMode.Scale != 1)
5895 return Modified;
5896
5897 ResultPtr = AddrMode.ScaledReg;
5898 AddrMode.Scale = 0;
5899 }
5900
5901 // It is only safe to sign extend the BaseReg if we know that the math
5902 // required to create it did not overflow before we extend it. Since
5903 // the original IR value was tossed in favor of a constant back when
5904 // the AddrMode was created we need to bail out gracefully if widths
5905 // do not match instead of extending it.
5906 //
5907 // (See below for code to add the scale.)
5908 if (AddrMode.Scale) {
5909 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
5910 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5911 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5912 return Modified;
5913 }
5914
5915 GlobalValue *BaseGV = AddrMode.BaseGV;
5916 if (BaseGV != nullptr) {
5917 if (ResultPtr)
5918 return Modified;
5919
5920 if (BaseGV->isThreadLocal()) {
5921 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
5922 } else {
5923 ResultPtr = BaseGV;
5924 }
5925 }
5926
5927 // If the real base value actually came from an inttoptr, then the matcher
5928 // will look through it and provide only the integer value. In that case,
5929 // use it here.
5930 if (!DL->isNonIntegralPointerType(Addr->getType())) {
5931 if (!ResultPtr && AddrMode.BaseReg) {
5932 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
5933 "sunkaddr");
5934 AddrMode.BaseReg = nullptr;
5935 } else if (!ResultPtr && AddrMode.Scale == 1) {
5936 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
5937 "sunkaddr");
5938 AddrMode.Scale = 0;
5939 }
5940 }
5941
5942 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
5943 !AddrMode.BaseOffs) {
5944 SunkAddr = Constant::getNullValue(Addr->getType());
5945 } else if (!ResultPtr) {
5946 return Modified;
5947 } else {
5948 Type *I8PtrTy =
5949 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
5950
5951 // Start with the base register. Do this first so that subsequent address
5952 // matching finds it last, which will prevent it from trying to match it
5953 // as the scaled value in case it happens to be a mul. That would be
5954 // problematic if we've sunk a different mul for the scale, because then
5955 // we'd end up sinking both muls.
5956 if (AddrMode.BaseReg) {
5957 Value *V = AddrMode.BaseReg;
5958 if (V->getType() != IntPtrTy)
5959 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5960
5961 ResultIndex = V;
5962 }
5963
5964 // Add the scale value.
5965 if (AddrMode.Scale) {
5966 Value *V = AddrMode.ScaledReg;
5967 if (V->getType() == IntPtrTy) {
5968 // done.
5969 } else {
5970 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
5971 cast<IntegerType>(V->getType())->getBitWidth() &&
5972 "We can't transform if ScaledReg is too narrow");
5973 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5974 }
5975
5976 if (AddrMode.Scale != 1)
5977 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5978 "sunkaddr");
5979 if (ResultIndex)
5980 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5981 else
5982 ResultIndex = V;
5983 }
5984
5985 // Add in the Base Offset if present.
5986 if (AddrMode.BaseOffs) {
5987 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5988 if (ResultIndex) {
5989 // We need to add this separately from the scale above to help with
5990 // SDAG consecutive load/store merging.
5991 if (ResultPtr->getType() != I8PtrTy)
5992 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5993 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5994 AddrMode.InBounds);
5995 }
5996
5997 ResultIndex = V;
5998 }
5999
6000 if (!ResultIndex) {
6001 SunkAddr = ResultPtr;
6002 } else {
6003 if (ResultPtr->getType() != I8PtrTy)
6004 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6005 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6006 AddrMode.InBounds);
6007 }
6008
6009 if (SunkAddr->getType() != Addr->getType()) {
6010 if (SunkAddr->getType()->getPointerAddressSpace() !=
6011 Addr->getType()->getPointerAddressSpace() &&
6012 !DL->isNonIntegralPointerType(Addr->getType())) {
6013 // There are two reasons the address spaces might not match: a no-op
6014 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6015 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6016 // TODO: allow bitcast between different address space pointers with
6017 // the same size.
6018 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6019 SunkAddr =
6020 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6021 } else
6022 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6023 }
6024 }
6025 } else {
6026 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6027 // non-integral pointers, so in that case bail out now.
6028 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6029 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6030 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6031 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6032 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6033 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6034 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6035 (AddrMode.BaseGV &&
6036 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6037 return Modified;
6038
6039 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6040 << " for " << *MemoryInst << "\n");
6041 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6042 Value *Result = nullptr;
6043
6044 // Start with the base register. Do this first so that subsequent address
6045 // matching finds it last, which will prevent it from trying to match it
6046 // as the scaled value in case it happens to be a mul. That would be
6047 // problematic if we've sunk a different mul for the scale, because then
6048 // we'd end up sinking both muls.
6049 if (AddrMode.BaseReg) {
6050 Value *V = AddrMode.BaseReg;
6051 if (V->getType()->isPointerTy())
6052 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6053 if (V->getType() != IntPtrTy)
6054 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6055 Result = V;
6056 }
6057
6058 // Add the scale value.
6059 if (AddrMode.Scale) {
6060 Value *V = AddrMode.ScaledReg;
6061 if (V->getType() == IntPtrTy) {
6062 // done.
6063 } else if (V->getType()->isPointerTy()) {
6064 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6065 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6066 cast<IntegerType>(V->getType())->getBitWidth()) {
6067 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6068 } else {
6069 // It is only safe to sign extend the BaseReg if we know that the math
6070 // required to create it did not overflow before we extend it. Since
6071 // the original IR value was tossed in favor of a constant back when
6072 // the AddrMode was created we need to bail out gracefully if widths
6073 // do not match instead of extending it.
6074 Instruction *I = dyn_cast_or_null<Instruction>(Result);
6075 if (I && (Result != AddrMode.BaseReg))
6076 I->eraseFromParent();
6077 return Modified;
6078 }
6079 if (AddrMode.Scale != 1)
6080 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6081 "sunkaddr");
6082 if (Result)
6083 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6084 else
6085 Result = V;
6086 }
6087
6088 // Add in the BaseGV if present.
6089 GlobalValue *BaseGV = AddrMode.BaseGV;
6090 if (BaseGV != nullptr) {
6091 Value *BaseGVPtr;
6092 if (BaseGV->isThreadLocal()) {
6093 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6094 } else {
6095 BaseGVPtr = BaseGV;
6096 }
6097 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6098 if (Result)
6099 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6100 else
6101 Result = V;
6102 }
6103
6104 // Add in the Base Offset if present.
6105 if (AddrMode.BaseOffs) {
6106 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6107 if (Result)
6108 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6109 else
6110 Result = V;
6111 }
6112
6113 if (!Result)
6114 SunkAddr = Constant::getNullValue(Addr->getType());
6115 else
6116 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6117 }
6118
6119 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6120 // Store the newly computed address into the cache. In the case we reused a
6121 // value, this should be idempotent.
6122 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6123
6124 // If we have no uses, recursively delete the value and all dead instructions
6125 // using it.
6126 if (Repl->use_empty()) {
6127 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6128 RecursivelyDeleteTriviallyDeadInstructions(
6129 Repl, TLInfo, nullptr,
6130 [&](Value *V) { removeAllAssertingVHReferences(V); });
6131 });
6132 }
6133 ++NumMemoryInsts;
6134 return true;
6135}
6136
6137/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6138/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6139/// only handle a 2 operand GEP in the same basic block or a splat constant
6140/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6141/// index.
6142///
6143/// If the existing GEP has a vector base pointer that is splat, we can look
6144/// through the splat to find the scalar pointer. If we can't find a scalar
6145/// pointer there's nothing we can do.
6146///
6147/// If we have a GEP with more than 2 indices where the middle indices are all
6148/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6149///
6150/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6151/// followed by a GEP with an all zeroes vector index. This will enable
6152/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6153/// zero index.
6154bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6155 Value *Ptr) {
6156 Value *NewAddr;
6157
6158 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6159 // Don't optimize GEPs that don't have indices.
6160 if (!GEP->hasIndices())
6161 return false;
6162
6163 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6164 // FIXME: We should support this by sinking the GEP.
6165 if (MemoryInst->getParent() != GEP->getParent())
6166 return false;
6167
6168 SmallVector<Value *, 2> Ops(GEP->operands());
6169
6170 bool RewriteGEP = false;
6171
6172 if (Ops[0]->getType()->isVectorTy()) {
6173 Ops[0] = getSplatValue(Ops[0]);
6174 if (!Ops[0])
6175 return false;
6176 RewriteGEP = true;
6177 }
6178
6179 unsigned FinalIndex = Ops.size() - 1;
6180
6181 // Ensure all but the last index is 0.
6182 // FIXME: This isn't strictly required. All that's required is that they are
6183 // all scalars or splats.
6184 for (unsigned i = 1; i < FinalIndex; ++i) {
6185 auto *C = dyn_cast<Constant>(Ops[i]);
6186 if (!C)
6187 return false;
6188 if (isa<VectorType>(C->getType()))
6189 C = C->getSplatValue();
6190 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6191 if (!CI || !CI->isZero())
6192 return false;
6193 // Scalarize the index if needed.
6194 Ops[i] = CI;
6195 }
6196
6197 // Try to scalarize the final index.
6198 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6199 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6200 auto *C = dyn_cast<ConstantInt>(V);
6201 // Don't scalarize all zeros vector.
6202 if (!C || !C->isZero()) {
6203 Ops[FinalIndex] = V;
6204 RewriteGEP = true;
6205 }
6206 }
6207 }
6208
6209 // If we made any changes or the we have extra operands, we need to generate
6210 // new instructions.
6211 if (!RewriteGEP && Ops.size() == 2)
6212 return false;
6213
6214 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6215
6216 IRBuilder<> Builder(MemoryInst);
6217
6218 Type *SourceTy = GEP->getSourceElementType();
6219 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6220
6221 // If the final index isn't a vector, emit a scalar GEP containing all ops
6222 // and a vector GEP with all zeroes final index.
6223 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6224 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6225 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6226 auto *SecondTy = GetElementPtrInst::getIndexedType(
6227 SourceTy, ArrayRef(Ops).drop_front());
6228 NewAddr =
6229 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6230 } else {
6231 Value *Base = Ops[0];
6232 Value *Index = Ops[FinalIndex];
6233
6234 // Create a scalar GEP if there are more than 2 operands.
6235 if (Ops.size() != 2) {
6236 // Replace the last index with 0.
6237 Ops[FinalIndex] =
6238 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6239 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6241 SourceTy, ArrayRef(Ops).drop_front());
6242 }
6243
6244 // Now create the GEP with scalar pointer and vector index.
6245 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6246 }
6247 } else if (!isa<Constant>(Ptr)) {
6248 // Not a GEP, maybe its a splat and we can create a GEP to enable
6249 // SelectionDAGBuilder to use it as a uniform base.
6251 if (!V)
6252 return false;
6253
6254 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6255
6256 IRBuilder<> Builder(MemoryInst);
6257
6258 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6259 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6260 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6261 Type *ScalarTy;
6262 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6263 Intrinsic::masked_gather) {
6264 ScalarTy = MemoryInst->getType()->getScalarType();
6265 } else {
6266 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6267 Intrinsic::masked_scatter);
6268 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6269 }
6270 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6271 } else {
6272 // Constant, SelectionDAGBuilder knows to check if its a splat.
6273 return false;
6274 }
6275
6276 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6277
6278 // If we have no uses, recursively delete the value and all dead instructions
6279 // using it.
6280 if (Ptr->use_empty())
6282 Ptr, TLInfo, nullptr,
6283 [&](Value *V) { removeAllAssertingVHReferences(V); });
6284
6285 return true;
6286}
6287
6288/// If there are any memory operands, use OptimizeMemoryInst to sink their
6289/// address computing into the block when possible / profitable.
6290bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6291 bool MadeChange = false;
6292
6293 const TargetRegisterInfo *TRI =
6294 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6295 TargetLowering::AsmOperandInfoVector TargetConstraints =
6296 TLI->ParseConstraints(*DL, TRI, *CS);
6297 unsigned ArgNo = 0;
6298 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6299 // Compute the constraint code and ConstraintType to use.
6300 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6301
6302 // TODO: Also handle C_Address?
6303 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6304 OpInfo.isIndirect) {
6305 Value *OpVal = CS->getArgOperand(ArgNo++);
6306 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6307 } else if (OpInfo.Type == InlineAsm::isInput)
6308 ArgNo++;
6309 }
6310
6311 return MadeChange;
6312}
6313
6314/// Check if all the uses of \p Val are equivalent (or free) zero or
6315/// sign extensions.
6316static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6317 assert(!Val->use_empty() && "Input must have at least one use");
6318 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6319 bool IsSExt = isa<SExtInst>(FirstUser);
6320 Type *ExtTy = FirstUser->getType();
6321 for (const User *U : Val->users()) {
6322 const Instruction *UI = cast<Instruction>(U);
6323 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6324 return false;
6325 Type *CurTy = UI->getType();
6326 // Same input and output types: Same instruction after CSE.
6327 if (CurTy == ExtTy)
6328 continue;
6329
6330 // If IsSExt is true, we are in this situation:
6331 // a = Val
6332 // b = sext ty1 a to ty2
6333 // c = sext ty1 a to ty3
6334 // Assuming ty2 is shorter than ty3, this could be turned into:
6335 // a = Val
6336 // b = sext ty1 a to ty2
6337 // c = sext ty2 b to ty3
6338 // However, the last sext is not free.
6339 if (IsSExt)
6340 return false;
6341
6342 // This is a ZExt, maybe this is free to extend from one type to another.
6343 // In that case, we would not account for a different use.
6344 Type *NarrowTy;
6345 Type *LargeTy;
6346 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6347 CurTy->getScalarType()->getIntegerBitWidth()) {
6348 NarrowTy = CurTy;
6349 LargeTy = ExtTy;
6350 } else {
6351 NarrowTy = ExtTy;
6352 LargeTy = CurTy;
6353 }
6354
6355 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6356 return false;
6357 }
6358 // All uses are the same or can be derived from one another for free.
6359 return true;
6360}
6361
6362/// Try to speculatively promote extensions in \p Exts and continue
6363/// promoting through newly promoted operands recursively as far as doing so is
6364/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6365/// When some promotion happened, \p TPT contains the proper state to revert
6366/// them.
6367///
6368/// \return true if some promotion happened, false otherwise.
6369bool CodeGenPrepare::tryToPromoteExts(
6370 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6371 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6372 unsigned CreatedInstsCost) {
6373 bool Promoted = false;
6374
6375 // Iterate over all the extensions to try to promote them.
6376 for (auto *I : Exts) {
6377 // Early check if we directly have ext(load).
6378 if (isa<LoadInst>(I->getOperand(0))) {
6379 ProfitablyMovedExts.push_back(I);
6380 continue;
6381 }
6382
6383 // Check whether or not we want to do any promotion. The reason we have
6384 // this check inside the for loop is to catch the case where an extension
6385 // is directly fed by a load because in such case the extension can be moved
6386 // up without any promotion on its operands.
6388 return false;
6389
6390 // Get the action to perform the promotion.
6391 TypePromotionHelper::Action TPH =
6392 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6393 // Check if we can promote.
6394 if (!TPH) {
6395 // Save the current extension as we cannot move up through its operand.
6396 ProfitablyMovedExts.push_back(I);
6397 continue;
6398 }
6399
6400 // Save the current state.
6401 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6402 TPT.getRestorationPoint();
6404 unsigned NewCreatedInstsCost = 0;
6405 unsigned ExtCost = !TLI->isExtFree(I);
6406 // Promote.
6407 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6408 &NewExts, nullptr, *TLI);
6409 assert(PromotedVal &&
6410 "TypePromotionHelper should have filtered out those cases");
6411
6412 // We would be able to merge only one extension in a load.
6413 // Therefore, if we have more than 1 new extension we heuristically
6414 // cut this search path, because it means we degrade the code quality.
6415 // With exactly 2, the transformation is neutral, because we will merge
6416 // one extension but leave one. However, we optimistically keep going,
6417 // because the new extension may be removed too. Also avoid replacing a
6418 // single free extension with multiple extensions, as this increases the
6419 // number of IR instructions while not providing any savings.
6420 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6421 // FIXME: It would be possible to propagate a negative value instead of
6422 // conservatively ceiling it to 0.
6423 TotalCreatedInstsCost =
6424 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6425 if (!StressExtLdPromotion &&
6426 (TotalCreatedInstsCost > 1 ||
6427 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6428 (ExtCost == 0 && NewExts.size() > 1))) {
6429 // This promotion is not profitable, rollback to the previous state, and
6430 // save the current extension in ProfitablyMovedExts as the latest
6431 // speculative promotion turned out to be unprofitable.
6432 TPT.rollback(LastKnownGood);
6433 ProfitablyMovedExts.push_back(I);
6434 continue;
6435 }
6436 // Continue promoting NewExts as far as doing so is profitable.
6437 SmallVector<Instruction *, 2> NewlyMovedExts;
6438 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6439 bool NewPromoted = false;
6440 for (auto *ExtInst : NewlyMovedExts) {
6441 Instruction *MovedExt = cast<Instruction>(ExtInst);
6442 Value *ExtOperand = MovedExt->getOperand(0);
6443 // If we have reached to a load, we need this extra profitability check
6444 // as it could potentially be merged into an ext(load).
6445 if (isa<LoadInst>(ExtOperand) &&
6446 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6447 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6448 continue;
6449
6450 ProfitablyMovedExts.push_back(MovedExt);
6451 NewPromoted = true;
6452 }
6453
6454 // If none of speculative promotions for NewExts is profitable, rollback
6455 // and save the current extension (I) as the last profitable extension.
6456 if (!NewPromoted) {
6457 TPT.rollback(LastKnownGood);
6458 ProfitablyMovedExts.push_back(I);
6459 continue;
6460 }
6461 // The promotion is profitable.
6462 Promoted = true;
6463 }
6464 return Promoted;
6465}
6466
6467/// Merging redundant sexts when one is dominating the other.
6468bool CodeGenPrepare::mergeSExts(Function &F) {
6469 bool Changed = false;
6470 for (auto &Entry : ValToSExtendedUses) {
6471 SExts &Insts = Entry.second;
6472 SExts CurPts;
6473 for (Instruction *Inst : Insts) {
6474 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6475 Inst->getOperand(0) != Entry.first)
6476 continue;
6477 bool inserted = false;
6478 for (auto &Pt : CurPts) {
6479 if (getDT(F).dominates(Inst, Pt)) {
6480 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6481 RemovedInsts.insert(Pt);
6482 Pt->removeFromParent();
6483 Pt = Inst;
6484 inserted = true;
6485 Changed = true;
6486 break;
6487 }
6488 if (!getDT(F).dominates(Pt, Inst))
6489 // Give up if we need to merge in a common dominator as the
6490 // experiments show it is not profitable.
6491 continue;
6492 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6493 RemovedInsts.insert(Inst);
6494 Inst->removeFromParent();
6495 inserted = true;
6496 Changed = true;
6497 break;
6498 }
6499 if (!inserted)
6500 CurPts.push_back(Inst);
6501 }
6502 }
6503 return Changed;
6504}
6505
6506// Splitting large data structures so that the GEPs accessing them can have
6507// smaller offsets so that they can be sunk to the same blocks as their users.
6508// For example, a large struct starting from %base is split into two parts
6509// where the second part starts from %new_base.
6510//
6511// Before:
6512// BB0:
6513// %base =
6514//
6515// BB1:
6516// %gep0 = gep %base, off0
6517// %gep1 = gep %base, off1
6518// %gep2 = gep %base, off2
6519//
6520// BB2:
6521// %load1 = load %gep0
6522// %load2 = load %gep1
6523// %load3 = load %gep2
6524//
6525// After:
6526// BB0:
6527// %base =
6528// %new_base = gep %base, off0
6529//
6530// BB1:
6531// %new_gep0 = %new_base
6532// %new_gep1 = gep %new_base, off1 - off0
6533// %new_gep2 = gep %new_base, off2 - off0
6534//
6535// BB2:
6536// %load1 = load i32, i32* %new_gep0
6537// %load2 = load i32, i32* %new_gep1
6538// %load3 = load i32, i32* %new_gep2
6539//
6540// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6541// their offsets are smaller enough to fit into the addressing mode.
6542bool CodeGenPrepare::splitLargeGEPOffsets() {
6543 bool Changed = false;
6544 for (auto &Entry : LargeOffsetGEPMap) {
6545 Value *OldBase = Entry.first;
6547 &LargeOffsetGEPs = Entry.second;
6548 auto compareGEPOffset =
6549 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6550 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6551 if (LHS.first == RHS.first)
6552 return false;
6553 if (LHS.second != RHS.second)
6554 return LHS.second < RHS.second;
6555 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6556 };
6557 // Sorting all the GEPs of the same data structures based on the offsets.
6558 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6559 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6560 // Skip if all the GEPs have the same offsets.
6561 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6562 continue;
6563 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6564 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6565 Value *NewBaseGEP = nullptr;
6566
6567 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6569 LLVMContext &Ctx = GEP->getContext();
6570 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6571 Type *I8PtrTy =
6572 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6573
6574 BasicBlock::iterator NewBaseInsertPt;
6575 BasicBlock *NewBaseInsertBB;
6576 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6577 // If the base of the struct is an instruction, the new base will be
6578 // inserted close to it.
6579 NewBaseInsertBB = BaseI->getParent();
6580 if (isa<PHINode>(BaseI))
6581 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6582 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6583 NewBaseInsertBB =
6584 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6585 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6586 } else
6587 NewBaseInsertPt = std::next(BaseI->getIterator());
6588 } else {
6589 // If the current base is an argument or global value, the new base
6590 // will be inserted to the entry block.
6591 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6592 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6593 }
6594 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6595 // Create a new base.
6596 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6597 NewBaseGEP = OldBase;
6598 if (NewBaseGEP->getType() != I8PtrTy)
6599 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6600 NewBaseGEP =
6601 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6602 NewGEPBases.insert(NewBaseGEP);
6603 return;
6604 };
6605
6606 // Check whether all the offsets can be encoded with prefered common base.
6607 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6608 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6609 BaseOffset = PreferBase;
6610 // Create a new base if the offset of the BaseGEP can be decoded with one
6611 // instruction.
6612 createNewBase(BaseOffset, OldBase, BaseGEP);
6613 }
6614
6615 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6616 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6617 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6618 int64_t Offset = LargeOffsetGEP->second;
6619 if (Offset != BaseOffset) {
6621 AddrMode.HasBaseReg = true;
6622 AddrMode.BaseOffs = Offset - BaseOffset;
6623 // The result type of the GEP might not be the type of the memory
6624 // access.
6625 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6626 GEP->getResultElementType(),
6627 GEP->getAddressSpace())) {
6628 // We need to create a new base if the offset to the current base is
6629 // too large to fit into the addressing mode. So, a very large struct
6630 // may be split into several parts.
6631 BaseGEP = GEP;
6632 BaseOffset = Offset;
6633 NewBaseGEP = nullptr;
6634 }
6635 }
6636
6637 // Generate a new GEP to replace the current one.
6638 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6639
6640 if (!NewBaseGEP) {
6641 // Create a new base if we don't have one yet. Find the insertion
6642 // pointer for the new base first.
6643 createNewBase(BaseOffset, OldBase, GEP);
6644 }
6645
6646 IRBuilder<> Builder(GEP);
6647 Value *NewGEP = NewBaseGEP;
6648 if (Offset != BaseOffset) {
6649 // Calculate the new offset for the new GEP.
6650 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6651 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6652 }
6653 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6654 LargeOffsetGEPID.erase(GEP);
6655 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6656 GEP->eraseFromParent();
6657 Changed = true;
6658 }
6659 }
6660 return Changed;
6661}
6662
6663bool CodeGenPrepare::optimizePhiType(
6665 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6666 // We are looking for a collection on interconnected phi nodes that together
6667 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6668 // are of the same type. Convert the whole set of nodes to the type of the
6669 // bitcast.
6670 Type *PhiTy = I->getType();
6671 Type *ConvertTy = nullptr;
6672 if (Visited.count(I) ||
6673 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6674 return false;
6675
6677 Worklist.push_back(cast<Instruction>(I));
6680 PhiNodes.insert(I);
6681 Visited.insert(I);
6684 // This works by adding extra bitcasts between load/stores and removing
6685 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6686 // we can get in the situation where we remove a bitcast in one iteration
6687 // just to add it again in the next. We need to ensure that at least one
6688 // bitcast we remove are anchored to something that will not change back.
6689 bool AnyAnchored = false;
6690
6691 while (!Worklist.empty()) {
6692 Instruction *II = Worklist.pop_back_val();
6693
6694 if (auto *Phi = dyn_cast<PHINode>(II)) {
6695 // Handle Defs, which might also be PHI's
6696 for (Value *V : Phi->incoming_values()) {
6697 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6698 if (!PhiNodes.count(OpPhi)) {
6699 if (!Visited.insert(OpPhi).second)
6700 return false;
6701 PhiNodes.insert(OpPhi);
6702 Worklist.push_back(OpPhi);
6703 }
6704 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6705 if (!OpLoad->isSimple())
6706 return false;
6707 if (Defs.insert(OpLoad).second)
6708 Worklist.push_back(OpLoad);
6709 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6710 if (Defs.insert(OpEx).second)
6711 Worklist.push_back(OpEx);
6712 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6713 if (!ConvertTy)
6714 ConvertTy = OpBC->getOperand(0)->getType();
6715 if (OpBC->getOperand(0)->getType() != ConvertTy)
6716 return false;
6717 if (Defs.insert(OpBC).second) {
6718 Worklist.push_back(OpBC);
6719 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6720 !isa<ExtractElementInst>(OpBC->getOperand(0));
6721 }
6722 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6723 Constants.insert(OpC);
6724 else
6725 return false;
6726 }
6727 }
6728
6729 // Handle uses which might also be phi's
6730 for (User *V : II->users()) {
6731 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6732 if (!PhiNodes.count(OpPhi)) {
6733 if (Visited.count(OpPhi))
6734 return false;
6735 PhiNodes.insert(OpPhi);
6736 Visited.insert(OpPhi);
6737 Worklist.push_back(OpPhi);
6738 }
6739 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6740 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6741 return false;
6742 Uses.insert(OpStore);
6743 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6744 if (!ConvertTy)
6745 ConvertTy = OpBC->getType();
6746 if (OpBC->getType() != ConvertTy)
6747 return false;
6748 Uses.insert(OpBC);
6749 AnyAnchored |=
6750 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6751 } else {
6752 return false;
6753 }
6754 }
6755 }
6756
6757 if (!ConvertTy || !AnyAnchored ||
6758 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6759 return false;
6760
6761 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6762 << *ConvertTy << "\n");
6763
6764 // Create all the new phi nodes of the new type, and bitcast any loads to the
6765 // correct type.
6766 ValueToValueMap ValMap;
6767 for (ConstantData *C : Constants)
6768 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6769 for (Instruction *D : Defs) {
6770 if (isa<BitCastInst>(D)) {
6771 ValMap[D] = D->getOperand(0);
6772 DeletedInstrs.insert(D);
6773 } else {
6774 BasicBlock::iterator insertPt = std::next(D->getIterator());
6775 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6776 }
6777 }
6778 for (PHINode *Phi : PhiNodes)
6779 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6780 Phi->getName() + ".tc", Phi->getIterator());
6781 // Pipe together all the PhiNodes.
6782 for (PHINode *Phi : PhiNodes) {
6783 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6784 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6785 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6786 Phi->getIncomingBlock(i));
6787 Visited.insert(NewPhi);
6788 }
6789 // And finally pipe up the stores and bitcasts
6790 for (Instruction *U : Uses) {
6791 if (isa<BitCastInst>(U)) {
6792 DeletedInstrs.insert(U);
6793 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6794 } else {
6795 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6796 U->getIterator()));
6797 }
6798 }
6799
6800 // Save the removed phis to be deleted later.
6801 for (PHINode *Phi : PhiNodes)
6802 DeletedInstrs.insert(Phi);
6803 return true;
6804}
6805
6806bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6807 if (!OptimizePhiTypes)
6808 return false;
6809
6810 bool Changed = false;
6812 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6813
6814 // Attempt to optimize all the phis in the functions to the correct type.
6815 for (auto &BB : F)
6816 for (auto &Phi : BB.phis())
6817 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6818
6819 // Remove any old phi's that have been converted.
6820 for (auto *I : DeletedInstrs) {
6821 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6822 I->eraseFromParent();
6823 }
6824
6825 return Changed;
6826}
6827
6828/// Return true, if an ext(load) can be formed from an extension in
6829/// \p MovedExts.
6830bool CodeGenPrepare::canFormExtLd(
6831 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6832 Instruction *&Inst, bool HasPromoted) {
6833 for (auto *MovedExtInst : MovedExts) {
6834 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6835 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6836 Inst = MovedExtInst;
6837 break;
6838 }
6839 }
6840 if (!LI)
6841 return false;
6842
6843 // If they're already in the same block, there's nothing to do.
6844 // Make the cheap checks first if we did not promote.
6845 // If we promoted, we need to check if it is indeed profitable.
6846 if (!HasPromoted && LI->getParent() == Inst->getParent())
6847 return false;
6848
6849 return TLI->isExtLoad(LI, Inst, *DL);
6850}
6851
6852/// Move a zext or sext fed by a load into the same basic block as the load,
6853/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6854/// extend into the load.
6855///
6856/// E.g.,
6857/// \code
6858/// %ld = load i32* %addr
6859/// %add = add nuw i32 %ld, 4
6860/// %zext = zext i32 %add to i64
6861// \endcode
6862/// =>
6863/// \code
6864/// %ld = load i32* %addr
6865/// %zext = zext i32 %ld to i64
6866/// %add = add nuw i64 %zext, 4
6867/// \encode
6868/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6869/// allow us to match zext(load i32*) to i64.
6870///
6871/// Also, try to promote the computations used to obtain a sign extended
6872/// value used into memory accesses.
6873/// E.g.,
6874/// \code
6875/// a = add nsw i32 b, 3
6876/// d = sext i32 a to i64
6877/// e = getelementptr ..., i64 d
6878/// \endcode
6879/// =>
6880/// \code
6881/// f = sext i32 b to i64
6882/// a = add nsw i64 f, 3
6883/// e = getelementptr ..., i64 a
6884/// \endcode
6885///
6886/// \p Inst[in/out] the extension may be modified during the process if some
6887/// promotions apply.
6888bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6889 bool AllowPromotionWithoutCommonHeader = false;
6890 /// See if it is an interesting sext operations for the address type
6891 /// promotion before trying to promote it, e.g., the ones with the right
6892 /// type and used in memory accesses.
6893 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6894 *Inst, AllowPromotionWithoutCommonHeader);
6895 TypePromotionTransaction TPT(RemovedInsts);
6896 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6897 TPT.getRestorationPoint();
6899 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6900 Exts.push_back(Inst);
6901
6902 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6903
6904 // Look for a load being extended.
6905 LoadInst *LI = nullptr;
6906 Instruction *ExtFedByLoad;
6907
6908 // Try to promote a chain of computation if it allows to form an extended
6909 // load.
6910 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6911 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
6912 TPT.commit();
6913 // Move the extend into the same block as the load.
6914 ExtFedByLoad->moveAfter(LI);
6915 ++NumExtsMoved;
6916 Inst = ExtFedByLoad;
6917 return true;
6918 }
6919
6920 // Continue promoting SExts if known as considerable depending on targets.
6921 if (ATPConsiderable &&
6922 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6923 HasPromoted, TPT, SpeculativelyMovedExts))
6924 return true;
6925
6926 TPT.rollback(LastKnownGood);
6927 return false;
6928}
6929
6930// Perform address type promotion if doing so is profitable.
6931// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6932// instructions that sign extended the same initial value. However, if
6933// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6934// extension is just profitable.
6935bool CodeGenPrepare::performAddressTypePromotion(
6936 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
6937 bool HasPromoted, TypePromotionTransaction &TPT,
6938 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6939 bool Promoted = false;
6940 SmallPtrSet<Instruction *, 1> UnhandledExts;
6941 bool AllSeenFirst = true;
6942 for (auto *I : SpeculativelyMovedExts) {
6943 Value *HeadOfChain = I->getOperand(0);
6945 SeenChainsForSExt.find(HeadOfChain);
6946 // If there is an unhandled SExt which has the same header, try to promote
6947 // it as well.
6948 if (AlreadySeen != SeenChainsForSExt.end()) {
6949 if (AlreadySeen->second != nullptr)
6950 UnhandledExts.insert(AlreadySeen->second);
6951 AllSeenFirst = false;
6952 }
6953 }
6954
6955 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6956 SpeculativelyMovedExts.size() == 1)) {
6957 TPT.commit();
6958 if (HasPromoted)
6959 Promoted = true;
6960 for (auto *I : SpeculativelyMovedExts) {
6961 Value *HeadOfChain = I->getOperand(0);
6962 SeenChainsForSExt[HeadOfChain] = nullptr;
6963 ValToSExtendedUses[HeadOfChain].push_back(I);
6964 }
6965 // Update Inst as promotion happen.
6966 Inst = SpeculativelyMovedExts.pop_back_val();
6967 } else {
6968 // This is the first chain visited from the header, keep the current chain
6969 // as unhandled. Defer to promote this until we encounter another SExt
6970 // chain derived from the same header.
6971 for (auto *I : SpeculativelyMovedExts) {
6972 Value *HeadOfChain = I->getOperand(0);
6973 SeenChainsForSExt[HeadOfChain] = Inst;
6974 }
6975 return false;
6976 }
6977
6978 if (!AllSeenFirst && !UnhandledExts.empty())
6979 for (auto *VisitedSExt : UnhandledExts) {
6980 if (RemovedInsts.count(VisitedSExt))
6981 continue;
6982 TypePromotionTransaction TPT(RemovedInsts);
6985 Exts.push_back(VisitedSExt);
6986 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6987 TPT.commit();
6988 if (HasPromoted)
6989 Promoted = true;
6990 for (auto *I : Chains) {
6991 Value *HeadOfChain = I->getOperand(0);
6992 // Mark this as handled.
6993 SeenChainsForSExt[HeadOfChain] = nullptr;
6994 ValToSExtendedUses[HeadOfChain].push_back(I);
6995 }
6996 }
6997 return Promoted;
6998}
6999
7000bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7001 BasicBlock *DefBB = I->getParent();
7002
7003 // If the result of a {s|z}ext and its source are both live out, rewrite all
7004 // other uses of the source with result of extension.
7005 Value *Src = I->getOperand(0);
7006 if (Src->hasOneUse())
7007 return false;
7008
7009 // Only do this xform if truncating is free.
7010 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7011 return false;
7012
7013 // Only safe to perform the optimization if the source is also defined in
7014 // this block.
7015 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7016 return false;
7017
7018 bool DefIsLiveOut = false;
7019 for (User *U : I->users()) {
7020 Instruction *UI = cast<Instruction>(U);
7021
7022 // Figure out which BB this ext is used in.
7023 BasicBlock *UserBB = UI->getParent();
7024 if (UserBB == DefBB)
7025 continue;
7026 DefIsLiveOut = true;
7027 break;
7028 }
7029 if (!DefIsLiveOut)
7030 return false;
7031
7032 // Make sure none of the uses are PHI nodes.
7033 for (User *U : Src->users()) {
7034 Instruction *UI = cast<Instruction>(U);
7035 BasicBlock *UserBB = UI->getParent();
7036 if (UserBB == DefBB)
7037 continue;
7038 // Be conservative. We don't want this xform to end up introducing
7039 // reloads just before load / store instructions.
7040 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7041 return false;
7042 }
7043
7044 // InsertedTruncs - Only insert one trunc in each block once.
7046
7047 bool MadeChange = false;
7048 for (Use &U : Src->uses()) {
7049 Instruction *User = cast<Instruction>(U.getUser());
7050
7051 // Figure out which BB this ext is used in.
7052 BasicBlock *UserBB = User->getParent();
7053 if (UserBB == DefBB)
7054 continue;
7055
7056 // Both src and def are live in this block. Rewrite the use.
7057 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7058
7059 if (!InsertedTrunc) {
7060 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7061 assert(InsertPt != UserBB->end());
7062 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7063 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7064 InsertedInsts.insert(InsertedTrunc);
7065 }
7066
7067 // Replace a use of the {s|z}ext source with a use of the result.
7068 U = InsertedTrunc;
7069 ++NumExtUses;
7070 MadeChange = true;
7071 }
7072
7073 return MadeChange;
7074}
7075
7076// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7077// just after the load if the target can fold this into one extload instruction,
7078// with the hope of eliminating some of the other later "and" instructions using
7079// the loaded value. "and"s that are made trivially redundant by the insertion
7080// of the new "and" are removed by this function, while others (e.g. those whose
7081// path from the load goes through a phi) are left for isel to potentially
7082// remove.
7083//
7084// For example:
7085//
7086// b0:
7087// x = load i32
7088// ...
7089// b1:
7090// y = and x, 0xff
7091// z = use y
7092//
7093// becomes:
7094//
7095// b0:
7096// x = load i32
7097// x' = and x, 0xff
7098// ...
7099// b1:
7100// z = use x'
7101//
7102// whereas:
7103//
7104// b0:
7105// x1 = load i32
7106// ...
7107// b1:
7108// x2 = load i32
7109// ...
7110// b2:
7111// x = phi x1, x2
7112// y = and x, 0xff
7113//
7114// becomes (after a call to optimizeLoadExt for each load):
7115//
7116// b0:
7117// x1 = load i32
7118// x1' = and x1, 0xff
7119// ...
7120// b1:
7121// x2 = load i32
7122// x2' = and x2, 0xff
7123// ...
7124// b2:
7125// x = phi x1', x2'
7126// y = and x, 0xff
7127bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7128 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7129 return false;
7130
7131 // Skip loads we've already transformed.
7132 if (Load->hasOneUse() &&
7133 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7134 return false;
7135
7136 // Look at all uses of Load, looking through phis, to determine how many bits
7137 // of the loaded value are needed.
7140 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7142 for (auto *U : Load->users())
7143 WorkList.push_back(cast<Instruction>(U));
7144
7145 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7146 unsigned BitWidth = LoadResultVT.getSizeInBits();
7147 // If the BitWidth is 0, do not try to optimize the type
7148 if (BitWidth == 0)
7149 return false;
7150
7151 APInt DemandBits(BitWidth, 0);
7152 APInt WidestAndBits(BitWidth, 0);
7153
7154 while (!WorkList.empty()) {
7155 Instruction *I = WorkList.pop_back_val();
7156
7157 // Break use-def graph loops.
7158 if (!Visited.insert(I).second)
7159 continue;
7160
7161 // For a PHI node, push all of its users.
7162 if (auto *Phi = dyn_cast<PHINode>(I)) {
7163 for (auto *U : Phi->users())
7164 WorkList.push_back(cast<Instruction>(U));
7165 continue;
7166 }
7167
7168 switch (I->getOpcode()) {
7169 case Instruction::And: {
7170 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7171 if (!AndC)
7172 return false;
7173 APInt AndBits = AndC->getValue();
7174 DemandBits |= AndBits;
7175 // Keep track of the widest and mask we see.
7176 if (AndBits.ugt(WidestAndBits))
7177 WidestAndBits = AndBits;
7178 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7179 AndsToMaybeRemove.push_back(I);
7180 break;
7181 }
7182
7183 case Instruction::Shl: {
7184 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7185 if (!ShlC)
7186 return false;
7187 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7188 DemandBits.setLowBits(BitWidth - ShiftAmt);
7189 DropFlags.push_back(I);
7190 break;
7191 }
7192
7193 case Instruction::Trunc: {
7194 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7195 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7196 DemandBits.setLowBits(TruncBitWidth);
7197 DropFlags.push_back(I);
7198 break;
7199 }
7200
7201 default:
7202 return false;
7203 }
7204 }
7205
7206 uint32_t ActiveBits = DemandBits.getActiveBits();
7207 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7208 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7209 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7210 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7211 // followed by an AND.
7212 // TODO: Look into removing this restriction by fixing backends to either
7213 // return false for isLoadExtLegal for i1 or have them select this pattern to
7214 // a single instruction.
7215 //
7216 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7217 // mask, since these are the only ands that will be removed by isel.
7218 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7219 WidestAndBits != DemandBits)
7220 return false;
7221
7222 LLVMContext &Ctx = Load->getType()->getContext();
7223 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7224 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7225
7226 // Reject cases that won't be matched as extloads.
7227 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7228 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7229 return false;
7230
7231 IRBuilder<> Builder(Load->getNextNonDebugInstruction());
7232 auto *NewAnd = cast<Instruction>(
7233 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7234 // Mark this instruction as "inserted by CGP", so that other
7235 // optimizations don't touch it.
7236 InsertedInsts.insert(NewAnd);
7237
7238 // Replace all uses of load with new and (except for the use of load in the
7239 // new and itself).
7240 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7241 NewAnd->setOperand(0, Load);
7242
7243 // Remove any and instructions that are now redundant.
7244 for (auto *And : AndsToMaybeRemove)
7245 // Check that the and mask is the same as the one we decided to put on the
7246 // new and.
7247 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7248 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7249 if (&*CurInstIterator == And)
7250 CurInstIterator = std::next(And->getIterator());
7251 And->eraseFromParent();
7252 ++NumAndUses;
7253 }
7254
7255 // NSW flags may not longer hold.
7256 for (auto *Inst : DropFlags)
7257 Inst->setHasNoSignedWrap(false);
7258
7259 ++NumAndsAdded;
7260 return true;
7261}
7262
7263/// Check if V (an operand of a select instruction) is an expensive instruction
7264/// that is only used once.
7266 auto *I = dyn_cast<Instruction>(V);
7267 // If it's safe to speculatively execute, then it should not have side
7268 // effects; therefore, it's safe to sink and possibly *not* execute.
7269 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7271}
7272
7273/// Returns true if a SelectInst should be turned into an explicit branch.
7275 const TargetLowering *TLI,
7276 SelectInst *SI) {
7277 // If even a predictable select is cheap, then a branch can't be cheaper.
7278 if (!TLI->isPredictableSelectExpensive())
7279 return false;
7280
7281 // FIXME: This should use the same heuristics as IfConversion to determine
7282 // whether a select is better represented as a branch.
7283
7284 // If metadata tells us that the select condition is obviously predictable,
7285 // then we want to replace the select with a branch.
7286 uint64_t TrueWeight, FalseWeight;
7287 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7288 uint64_t Max = std::max(TrueWeight, FalseWeight);
7289 uint64_t Sum = TrueWeight + FalseWeight;
7290 if (Sum != 0) {
7291 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7292 if (Probability > TTI->getPredictableBranchThreshold())
7293 return true;
7294 }
7295 }
7296
7297 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7298
7299 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7300 // comparison condition. If the compare has more than one use, there's
7301 // probably another cmov or setcc around, so it's not worth emitting a branch.
7302 if (!Cmp || !Cmp->hasOneUse())
7303 return false;
7304
7305 // If either operand of the select is expensive and only needed on one side
7306 // of the select, we should form a branch.
7307 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7308 sinkSelectOperand(TTI, SI->getFalseValue()))
7309 return true;
7310
7311 return false;
7312}
7313
7314/// If \p isTrue is true, return the true value of \p SI, otherwise return
7315/// false value of \p SI. If the true/false value of \p SI is defined by any
7316/// select instructions in \p Selects, look through the defining select
7317/// instruction until the true/false value is not defined in \p Selects.
7318static Value *
7320 const SmallPtrSet<const Instruction *, 2> &Selects) {
7321 Value *V = nullptr;
7322
7323 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7324 DefSI = dyn_cast<SelectInst>(V)) {
7325 assert(DefSI->getCondition() == SI->getCondition() &&
7326 "The condition of DefSI does not match with SI");
7327 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7328 }
7329
7330 assert(V && "Failed to get select true/false value");
7331 return V;
7332}
7333
7334bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7335 assert(Shift->isShift() && "Expected a shift");
7336
7337 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7338 // general vector shifts, and (3) the shift amount is a select-of-splatted
7339 // values, hoist the shifts before the select:
7340 // shift Op0, (select Cond, TVal, FVal) -->
7341 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7342 //
7343 // This is inverting a generic IR transform when we know that the cost of a
7344 // general vector shift is more than the cost of 2 shift-by-scalars.
7345 // We can't do this effectively in SDAG because we may not be able to
7346 // determine if the select operands are splats from within a basic block.
7347 Type *Ty = Shift->getType();
7348 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7349 return false;
7350 Value *Cond, *TVal, *FVal;
7351 if (!match(Shift->getOperand(1),
7352 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7353 return false;
7354 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7355 return false;
7356
7357 IRBuilder<> Builder(Shift);
7358 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7359 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7360 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7361 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7362 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7363 Shift->eraseFromParent();
7364 return true;
7365}
7366
7367bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7368 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7369 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7370 "Expected a funnel shift");
7371
7372 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7373 // than general vector shifts, and (3) the shift amount is select-of-splatted
7374 // values, hoist the funnel shifts before the select:
7375 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7376 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7377 //
7378 // This is inverting a generic IR transform when we know that the cost of a
7379 // general vector shift is more than the cost of 2 shift-by-scalars.
7380 // We can't do this effectively in SDAG because we may not be able to
7381 // determine if the select operands are splats from within a basic block.
7382 Type *Ty = Fsh->getType();
7383 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7384 return false;
7385 Value *Cond, *TVal, *FVal;
7386 if (!match(Fsh->getOperand(2),
7387 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7388 return false;
7389 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7390 return false;
7391
7392 IRBuilder<> Builder(Fsh);
7393 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7394 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7395 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7396 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7397 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7398 Fsh->eraseFromParent();
7399 return true;
7400}
7401
7402/// If we have a SelectInst that will likely profit from branch prediction,
7403/// turn it into a branch.
7404bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7406 return false;
7407
7408 // If the SelectOptimize pass is enabled, selects have already been optimized.
7410 return false;
7411
7412 // Find all consecutive select instructions that share the same condition.
7414 ASI.push_back(SI);
7416 It != SI->getParent()->end(); ++It) {
7417 SelectInst *I = dyn_cast<SelectInst>(&*It);
7418 if (I && SI->getCondition() == I->getCondition()) {
7419 ASI.push_back(I);
7420 } else {
7421 break;
7422 }
7423 }
7424
7425 SelectInst *LastSI = ASI.back();
7426 // Increment the current iterator to skip all the rest of select instructions
7427 // because they will be either "not lowered" or "all lowered" to branch.
7428 CurInstIterator = std::next(LastSI->getIterator());
7429 // Examine debug-info attached to the consecutive select instructions. They
7430 // won't be individually optimised by optimizeInst, so we need to perform
7431 // DbgVariableRecord maintenence here instead.
7432 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7433 fixupDbgVariableRecordsOnInst(*SI);
7434
7435 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7436
7437 // Can we convert the 'select' to CF ?
7438 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7439 return false;
7440
7442 if (SI->getType()->isVectorTy())
7443 SelectKind = TargetLowering::ScalarCondVectorVal;
7444 else
7445 SelectKind = TargetLowering::ScalarValSelect;
7446
7447 if (TLI->isSelectSupported(SelectKind) &&
7449 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7450 return false;
7451
7452 // The DominatorTree needs to be rebuilt by any consumers after this
7453 // transformation. We simply reset here rather than setting the ModifiedDT
7454 // flag to avoid restarting the function walk in runOnFunction for each
7455 // select optimized.
7456 DT.reset();
7457
7458 // Transform a sequence like this:
7459 // start:
7460 // %cmp = cmp uge i32 %a, %b
7461 // %sel = select i1 %cmp, i32 %c, i32 %d
7462 //
7463 // Into:
7464 // start:
7465 // %cmp = cmp uge i32 %a, %b
7466 // %cmp.frozen = freeze %cmp
7467 // br i1 %cmp.frozen, label %select.true, label %select.false
7468 // select.true:
7469 // br label %select.end
7470 // select.false:
7471 // br label %select.end
7472 // select.end:
7473 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7474 //
7475 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7476 // In addition, we may sink instructions that produce %c or %d from
7477 // the entry block into the destination(s) of the new branch.
7478 // If the true or false blocks do not contain a sunken instruction, that
7479 // block and its branch may be optimized away. In that case, one side of the
7480 // first branch will point directly to select.end, and the corresponding PHI
7481 // predecessor block will be the start block.
7482
7483 // Collect values that go on the true side and the values that go on the false
7484 // side.
7485 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7486 for (SelectInst *SI : ASI) {
7487 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7488 TrueInstrs.push_back(cast<Instruction>(V));
7489 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7490 FalseInstrs.push_back(cast<Instruction>(V));
7491 }
7492
7493 // Split the select block, according to how many (if any) values go on each
7494 // side.
7495 BasicBlock *StartBlock = SI->getParent();
7496 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7497 // We should split before any debug-info.
7498 SplitPt.setHeadBit(true);
7499
7500 IRBuilder<> IB(SI);
7501 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7502
7503 BasicBlock *TrueBlock = nullptr;
7504 BasicBlock *FalseBlock = nullptr;
7505 BasicBlock *EndBlock = nullptr;
7506 BranchInst *TrueBranch = nullptr;
7507 BranchInst *FalseBranch = nullptr;
7508 if (TrueInstrs.size() == 0) {
7509 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7510 CondFr, SplitPt, false, nullptr, nullptr, LI));
7511 FalseBlock = FalseBranch->getParent();
7512 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7513 } else if (FalseInstrs.size() == 0) {
7514 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7515 CondFr, SplitPt, false, nullptr, nullptr, LI));
7516 TrueBlock = TrueBranch->getParent();
7517 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7518 } else {
7519 Instruction *ThenTerm = nullptr;
7520 Instruction *ElseTerm = nullptr;
7521 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7522 nullptr, nullptr, LI);
7523 TrueBranch = cast<BranchInst>(ThenTerm);
7524 FalseBranch = cast<BranchInst>(ElseTerm);
7525 TrueBlock = TrueBranch->getParent();
7526 FalseBlock = FalseBranch->getParent();
7527 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7528 }
7529
7530 EndBlock->setName("select.end");
7531 if (TrueBlock)
7532 TrueBlock->setName("select.true.sink");
7533 if (FalseBlock)
7534 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7535 : "select.false.sink");
7536
7537 if (IsHugeFunc) {
7538 if (TrueBlock)
7539 FreshBBs.insert(TrueBlock);
7540 if (FalseBlock)
7541 FreshBBs.insert(FalseBlock);
7542 FreshBBs.insert(EndBlock);
7543 }
7544
7545 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7546
7547 static const unsigned MD[] = {
7548 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7549 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7550 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7551
7552 // Sink expensive instructions into the conditional blocks to avoid executing
7553 // them speculatively.
7554 for (Instruction *I : TrueInstrs)
7555 I->moveBefore(TrueBranch);
7556 for (Instruction *I : FalseInstrs)
7557 I->moveBefore(FalseBranch);
7558
7559 // If we did not create a new block for one of the 'true' or 'false' paths
7560 // of the condition, it means that side of the branch goes to the end block
7561 // directly and the path originates from the start block from the point of
7562 // view of the new PHI.
7563 if (TrueBlock == nullptr)
7564 TrueBlock = StartBlock;
7565 else if (FalseBlock == nullptr)
7566 FalseBlock = StartBlock;
7567
7569 INS.insert(ASI.begin(), ASI.end());
7570 // Use reverse iterator because later select may use the value of the
7571 // earlier select, and we need to propagate value through earlier select
7572 // to get the PHI operand.
7573 for (SelectInst *SI : llvm::reverse(ASI)) {
7574 // The select itself is replaced with a PHI Node.
7575 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7576 PN->insertBefore(EndBlock->begin());
7577 PN->takeName(SI);
7578 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7579 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7580 PN->setDebugLoc(SI->getDebugLoc());
7581
7582 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7583 SI->eraseFromParent();
7584 INS.erase(SI);
7585 ++NumSelectsExpanded;
7586 }
7587
7588 // Instruct OptimizeBlock to skip to the next block.
7589 CurInstIterator = StartBlock->end();
7590 return true;
7591}
7592
7593/// Some targets only accept certain types for splat inputs. For example a VDUP
7594/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7595/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7596bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7597 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7599 m_Undef(), m_ZeroMask())))
7600 return false;
7601 Type *NewType = TLI->shouldConvertSplatType(SVI);
7602 if (!NewType)
7603 return false;
7604
7605 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7606 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7607 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7608 "Expected a type of the same size!");
7609 auto *NewVecType =
7610 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7611
7612 // Create a bitcast (shuffle (insert (bitcast(..))))
7613 IRBuilder<> Builder(SVI->getContext());
7614 Builder.SetInsertPoint(SVI);
7615 Value *BC1 = Builder.CreateBitCast(
7616 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7617 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7618 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7619
7620 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7622 SVI, TLInfo, nullptr,
7623 [&](Value *V) { removeAllAssertingVHReferences(V); });
7624
7625 // Also hoist the bitcast up to its operand if it they are not in the same
7626 // block.
7627 if (auto *BCI = dyn_cast<Instruction>(BC1))
7628 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7629 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7630 !Op->isTerminator() && !Op->isEHPad())
7631 BCI->moveAfter(Op);
7632
7633 return true;
7634}
7635
7636bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7637 // If the operands of I can be folded into a target instruction together with
7638 // I, duplicate and sink them.
7639 SmallVector<Use *, 4> OpsToSink;
7640 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7641 return false;
7642
7643 // OpsToSink can contain multiple uses in a use chain (e.g.
7644 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7645 // uses must come first, so we process the ops in reverse order so as to not
7646 // create invalid IR.
7647 BasicBlock *TargetBB = I->getParent();
7648 bool Changed = false;
7649 SmallVector<Use *, 4> ToReplace;
7650 Instruction *InsertPoint = I;
7652 unsigned long InstNumber = 0;
7653 for (const auto &I : *TargetBB)
7654 InstOrdering[&I] = InstNumber++;
7655
7656 for (Use *U : reverse(OpsToSink)) {
7657 auto *UI = cast<Instruction>(U->get());
7658 if (isa<PHINode>(UI))
7659 continue;
7660 if (UI->getParent() == TargetBB) {
7661 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7662 InsertPoint = UI;
7663 continue;
7664 }
7665 ToReplace.push_back(U);
7666 }
7667
7668 SetVector<Instruction *> MaybeDead;
7670 for (Use *U : ToReplace) {
7671 auto *UI = cast<Instruction>(U->get());
7672 Instruction *NI = UI->clone();
7673
7674 if (IsHugeFunc) {
7675 // Now we clone an instruction, its operands' defs may sink to this BB
7676 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7677 for (Value *Op : NI->operands())
7678 if (auto *OpDef = dyn_cast<Instruction>(Op))
7679 FreshBBs.insert(OpDef->getParent());
7680 }
7681
7682 NewInstructions[UI] = NI;
7683 MaybeDead.insert(UI);
7684 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7685 NI->insertBefore(InsertPoint);
7686 InsertPoint = NI;
7687 InsertedInsts.insert(NI);
7688
7689 // Update the use for the new instruction, making sure that we update the
7690 // sunk instruction uses, if it is part of a chain that has already been
7691 // sunk.
7692 Instruction *OldI = cast<Instruction>(U->getUser());
7693 if (NewInstructions.count(OldI))
7694 NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
7695 else
7696 U->set(NI);
7697 Changed = true;
7698 }
7699
7700 // Remove instructions that are dead after sinking.
7701 for (auto *I : MaybeDead) {
7702 if (!I->hasNUsesOrMore(1)) {
7703 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7704 I->eraseFromParent();
7705 }
7706 }
7707
7708 return Changed;
7709}
7710
7711bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7712 Value *Cond = SI->getCondition();
7713 Type *OldType = Cond->getType();
7714 LLVMContext &Context = Cond->getContext();
7715 EVT OldVT = TLI->getValueType(*DL, OldType);
7716 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7717 unsigned RegWidth = RegType.getSizeInBits();
7718
7719 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7720 return false;
7721
7722 // If the register width is greater than the type width, expand the condition
7723 // of the switch instruction and each case constant to the width of the
7724 // register. By widening the type of the switch condition, subsequent
7725 // comparisons (for case comparisons) will not need to be extended to the
7726 // preferred register width, so we will potentially eliminate N-1 extends,
7727 // where N is the number of cases in the switch.
7728 auto *NewType = Type::getIntNTy(Context, RegWidth);
7729
7730 // Extend the switch condition and case constants using the target preferred
7731 // extend unless the switch condition is a function argument with an extend
7732 // attribute. In that case, we can avoid an unnecessary mask/extension by
7733 // matching the argument extension instead.
7734 Instruction::CastOps ExtType = Instruction::ZExt;
7735 // Some targets prefer SExt over ZExt.
7736 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7737 ExtType = Instruction::SExt;
7738
7739 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7740 if (Arg->hasSExtAttr())
7741 ExtType = Instruction::SExt;
7742 if (Arg->hasZExtAttr())
7743 ExtType = Instruction::ZExt;
7744 }
7745
7746 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7747 ExtInst->insertBefore(SI);
7748 ExtInst->setDebugLoc(SI->getDebugLoc());
7749 SI->setCondition(ExtInst);
7750 for (auto Case : SI->cases()) {
7751 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7752 APInt WideConst = (ExtType == Instruction::ZExt)
7753 ? NarrowConst.zext(RegWidth)
7754 : NarrowConst.sext(RegWidth);
7755 Case.setValue(ConstantInt::get(Context, WideConst));
7756 }
7757
7758 return true;
7759}
7760
7761bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7762 // The SCCP optimization tends to produce code like this:
7763 // switch(x) { case 42: phi(42, ...) }
7764 // Materializing the constant for the phi-argument needs instructions; So we
7765 // change the code to:
7766 // switch(x) { case 42: phi(x, ...) }
7767
7768 Value *Condition = SI->getCondition();
7769 // Avoid endless loop in degenerate case.
7770 if (isa<ConstantInt>(*Condition))
7771 return false;
7772
7773 bool Changed = false;
7774 BasicBlock *SwitchBB = SI->getParent();
7775 Type *ConditionType = Condition->getType();
7776
7777 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7778 ConstantInt *CaseValue = Case.getCaseValue();
7779 BasicBlock *CaseBB = Case.getCaseSuccessor();
7780 // Set to true if we previously checked that `CaseBB` is only reached by
7781 // a single case from this switch.
7782 bool CheckedForSinglePred = false;
7783 for (PHINode &PHI : CaseBB->phis()) {
7784 Type *PHIType = PHI.getType();
7785 // If ZExt is free then we can also catch patterns like this:
7786 // switch((i32)x) { case 42: phi((i64)42, ...); }
7787 // and replace `(i64)42` with `zext i32 %x to i64`.
7788 bool TryZExt =
7789 PHIType->isIntegerTy() &&
7790 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7791 TLI->isZExtFree(ConditionType, PHIType);
7792 if (PHIType == ConditionType || TryZExt) {
7793 // Set to true to skip this case because of multiple preds.
7794 bool SkipCase = false;
7795 Value *Replacement = nullptr;
7796 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7797 Value *PHIValue = PHI.getIncomingValue(I);
7798 if (PHIValue != CaseValue) {
7799 if (!TryZExt)
7800 continue;
7801 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7802 if (!PHIValueInt ||
7803 PHIValueInt->getValue() !=
7804 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7805 continue;
7806 }
7807 if (PHI.getIncomingBlock(I) != SwitchBB)
7808 continue;
7809 // We cannot optimize if there are multiple case labels jumping to
7810 // this block. This check may get expensive when there are many
7811 // case labels so we test for it last.
7812 if (!CheckedForSinglePred) {
7813 CheckedForSinglePred = true;
7814 if (SI->findCaseDest(CaseBB) == nullptr) {
7815 SkipCase = true;
7816 break;
7817 }
7818 }
7819
7820 if (Replacement == nullptr) {
7821 if (PHIValue == CaseValue) {
7822 Replacement = Condition;
7823 } else {
7824 IRBuilder<> Builder(SI);
7825 Replacement = Builder.CreateZExt(Condition, PHIType);
7826 }
7827 }
7828 PHI.setIncomingValue(I, Replacement);
7829 Changed = true;
7830 }
7831 if (SkipCase)
7832 break;
7833 }
7834 }
7835 }
7836 return Changed;
7837}
7838
7839bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7840 bool Changed = optimizeSwitchType(SI);
7841 Changed |= optimizeSwitchPhiConstants(SI);
7842 return Changed;
7843}
7844
7845namespace {
7846
7847/// Helper class to promote a scalar operation to a vector one.
7848/// This class is used to move downward extractelement transition.
7849/// E.g.,
7850/// a = vector_op <2 x i32>
7851/// b = extractelement <2 x i32> a, i32 0
7852/// c = scalar_op b
7853/// store c
7854///
7855/// =>
7856/// a = vector_op <2 x i32>
7857/// c = vector_op a (equivalent to scalar_op on the related lane)
7858/// * d = extractelement <2 x i32> c, i32 0
7859/// * store d
7860/// Assuming both extractelement and store can be combine, we get rid of the
7861/// transition.
7862class VectorPromoteHelper {
7863 /// DataLayout associated with the current module.
7864 const DataLayout &DL;
7865
7866 /// Used to perform some checks on the legality of vector operations.
7867 const TargetLowering &TLI;
7868
7869 /// Used to estimated the cost of the promoted chain.
7870 const TargetTransformInfo &TTI;
7871
7872 /// The transition being moved downwards.
7873 Instruction *Transition;
7874
7875 /// The sequence of instructions to be promoted.
7876 SmallVector<Instruction *, 4> InstsToBePromoted;
7877
7878 /// Cost of combining a store and an extract.
7879 unsigned StoreExtractCombineCost;
7880
7881 /// Instruction that will be combined with the transition.
7882 Instruction *CombineInst = nullptr;
7883
7884 /// The instruction that represents the current end of the transition.
7885 /// Since we are faking the promotion until we reach the end of the chain
7886 /// of computation, we need a way to get the current end of the transition.
7887 Instruction *getEndOfTransition() const {
7888 if (InstsToBePromoted.empty())
7889 return Transition;
7890 return InstsToBePromoted.back();
7891 }
7892
7893 /// Return the index of the original value in the transition.
7894 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7895 /// c, is at index 0.
7896 unsigned getTransitionOriginalValueIdx() const {
7897 assert(isa<ExtractElementInst>(Transition) &&
7898 "Other kind of transitions are not supported yet");
7899 return 0;
7900 }
7901
7902 /// Return the index of the index in the transition.
7903 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7904 /// is at index 1.
7905 unsigned getTransitionIdx() const {
7906 assert(isa<ExtractElementInst>(Transition) &&
7907 "Other kind of transitions are not supported yet");
7908 return 1;
7909 }
7910
7911 /// Get the type of the transition.
7912 /// This is the type of the original value.
7913 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7914 /// transition is <2 x i32>.
7915 Type *getTransitionType() const {
7916 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7917 }
7918
7919 /// Promote \p ToBePromoted by moving \p Def downward through.
7920 /// I.e., we have the following sequence:
7921 /// Def = Transition <ty1> a to <ty2>
7922 /// b = ToBePromoted <ty2> Def, ...
7923 /// =>
7924 /// b = ToBePromoted <ty1> a, ...
7925 /// Def = Transition <ty1> ToBePromoted to <ty2>
7926 void promoteImpl(Instruction *ToBePromoted);
7927
7928 /// Check whether or not it is profitable to promote all the
7929 /// instructions enqueued to be promoted.
7930 bool isProfitableToPromote() {
7931 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7932 unsigned Index = isa<ConstantInt>(ValIdx)
7933 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7934 : -1;
7935 Type *PromotedType = getTransitionType();
7936
7937 StoreInst *ST = cast<StoreInst>(CombineInst);
7938 unsigned AS = ST->getPointerAddressSpace();
7939 // Check if this store is supported.
7941 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
7942 ST->getAlign())) {
7943 // If this is not supported, there is no way we can combine
7944 // the extract with the store.
7945 return false;
7946 }
7947
7948 // The scalar chain of computation has to pay for the transition
7949 // scalar to vector.
7950 // The vector chain has to account for the combining cost.
7953 InstructionCost ScalarCost =
7954 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
7955 InstructionCost VectorCost = StoreExtractCombineCost;
7956 for (const auto &Inst : InstsToBePromoted) {
7957 // Compute the cost.
7958 // By construction, all instructions being promoted are arithmetic ones.
7959 // Moreover, one argument is a constant that can be viewed as a splat
7960 // constant.
7961 Value *Arg0 = Inst->getOperand(0);
7962 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7963 isa<ConstantFP>(Arg0);
7964 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
7965 if (IsArg0Constant)
7967 else
7969
7970 ScalarCost += TTI.getArithmeticInstrCost(
7971 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
7972 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7973 CostKind, Arg0Info, Arg1Info);
7974 }
7975 LLVM_DEBUG(
7976 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
7977 << ScalarCost << "\nVector: " << VectorCost << '\n');
7978 return ScalarCost > VectorCost;
7979 }
7980
7981 /// Generate a constant vector with \p Val with the same
7982 /// number of elements as the transition.
7983 /// \p UseSplat defines whether or not \p Val should be replicated
7984 /// across the whole vector.
7985 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7986 /// otherwise we generate a vector with as many undef as possible:
7987 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
7988 /// used at the index of the extract.
7989 Value *getConstantVector(Constant *Val, bool UseSplat) const {
7990 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7991 if (!UseSplat) {
7992 // If we cannot determine where the constant must be, we have to
7993 // use a splat constant.
7994 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
7995 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
7996 ExtractIdx = CstVal->getSExtValue();
7997 else
7998 UseSplat = true;
7999 }
8000
8001 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8002 if (UseSplat)
8003 return ConstantVector::getSplat(EC, Val);
8004
8005 if (!EC.isScalable()) {
8007 UndefValue *UndefVal = UndefValue::get(Val->getType());
8008 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8009 if (Idx == ExtractIdx)
8010 ConstVec.push_back(Val);
8011 else
8012 ConstVec.push_back(UndefVal);
8013 }
8014 return ConstantVector::get(ConstVec);
8015 } else
8017 "Generate scalable vector for non-splat is unimplemented");
8018 }
8019
8020 /// Check if promoting to a vector type an operand at \p OperandIdx
8021 /// in \p Use can trigger undefined behavior.
8022 static bool canCauseUndefinedBehavior(const Instruction *Use,
8023 unsigned OperandIdx) {
8024 // This is not safe to introduce undef when the operand is on
8025 // the right hand side of a division-like instruction.
8026 if (OperandIdx != 1)
8027 return false;
8028 switch (Use->getOpcode()) {
8029 default:
8030 return false;
8031 case Instruction::SDiv:
8032 case Instruction::UDiv:
8033 case Instruction::SRem:
8034 case Instruction::URem:
8035 return true;
8036 case Instruction::FDiv:
8037 case Instruction::FRem:
8038 return !Use->hasNoNaNs();
8039 }
8040 llvm_unreachable(nullptr);
8041 }
8042
8043public:
8044 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8045 const TargetTransformInfo &TTI, Instruction *Transition,
8046 unsigned CombineCost)
8047 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8048 StoreExtractCombineCost(CombineCost) {
8049 assert(Transition && "Do not know how to promote null");
8050 }
8051
8052 /// Check if we can promote \p ToBePromoted to \p Type.
8053 bool canPromote(const Instruction *ToBePromoted) const {
8054 // We could support CastInst too.
8055 return isa<BinaryOperator>(ToBePromoted);
8056 }
8057
8058 /// Check if it is profitable to promote \p ToBePromoted
8059 /// by moving downward the transition through.
8060 bool shouldPromote(const Instruction *ToBePromoted) const {
8061 // Promote only if all the operands can be statically expanded.
8062 // Indeed, we do not want to introduce any new kind of transitions.
8063 for (const Use &U : ToBePromoted->operands()) {
8064 const Value *Val = U.get();
8065 if (Val == getEndOfTransition()) {
8066 // If the use is a division and the transition is on the rhs,
8067 // we cannot promote the operation, otherwise we may create a
8068 // division by zero.
8069 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8070 return false;
8071 continue;
8072 }
8073 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8074 !isa<ConstantFP>(Val))
8075 return false;
8076 }
8077 // Check that the resulting operation is legal.
8078 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8079 if (!ISDOpcode)
8080 return false;
8081 return StressStoreExtract ||
8083 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8084 }
8085
8086 /// Check whether or not \p Use can be combined
8087 /// with the transition.
8088 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8089 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8090
8091 /// Record \p ToBePromoted as part of the chain to be promoted.
8092 void enqueueForPromotion(Instruction *ToBePromoted) {
8093 InstsToBePromoted.push_back(ToBePromoted);
8094 }
8095
8096 /// Set the instruction that will be combined with the transition.
8097 void recordCombineInstruction(Instruction *ToBeCombined) {
8098 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8099 CombineInst = ToBeCombined;
8100 }
8101
8102 /// Promote all the instructions enqueued for promotion if it is
8103 /// is profitable.
8104 /// \return True if the promotion happened, false otherwise.
8105 bool promote() {
8106 // Check if there is something to promote.
8107 // Right now, if we do not have anything to combine with,
8108 // we assume the promotion is not profitable.
8109 if (InstsToBePromoted.empty() || !CombineInst)
8110 return false;
8111
8112 // Check cost.
8113 if (!StressStoreExtract && !isProfitableToPromote())
8114 return false;
8115
8116 // Promote.
8117 for (auto &ToBePromoted : InstsToBePromoted)
8118 promoteImpl(ToBePromoted);
8119 InstsToBePromoted.clear();
8120 return true;
8121 }
8122};
8123
8124} // end anonymous namespace
8125
8126void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8127 // At this point, we know that all the operands of ToBePromoted but Def
8128 // can be statically promoted.
8129 // For Def, we need to use its parameter in ToBePromoted:
8130 // b = ToBePromoted ty1 a
8131 // Def = Transition ty1 b to ty2
8132 // Move the transition down.
8133 // 1. Replace all uses of the promoted operation by the transition.
8134 // = ... b => = ... Def.
8135 assert(ToBePromoted->getType() == Transition->getType() &&
8136 "The type of the result of the transition does not match "
8137 "the final type");
8138 ToBePromoted->replaceAllUsesWith(Transition);
8139 // 2. Update the type of the uses.
8140 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8141 Type *TransitionTy = getTransitionType();
8142 ToBePromoted->mutateType(TransitionTy);
8143 // 3. Update all the operands of the promoted operation with promoted
8144 // operands.
8145 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8146 for (Use &U : ToBePromoted->operands()) {
8147 Value *Val = U.get();
8148 Value *NewVal = nullptr;
8149 if (Val == Transition)
8150 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8151 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8152 isa<ConstantFP>(Val)) {
8153 // Use a splat constant if it is not safe to use undef.
8154 NewVal = getConstantVector(
8155 cast<Constant>(Val),
8156 isa<UndefValue>(Val) ||
8157 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8158 } else
8159 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8160 "this?");
8161 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8162 }
8163 Transition->moveAfter(ToBePromoted);
8164 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8165}
8166
8167/// Some targets can do store(extractelement) with one instruction.
8168/// Try to push the extractelement towards the stores when the target
8169/// has this feature and this is profitable.
8170bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8171 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8172 if (DisableStoreExtract ||
8175 Inst->getOperand(1), CombineCost)))
8176 return false;
8177
8178 // At this point we know that Inst is a vector to scalar transition.
8179 // Try to move it down the def-use chain, until:
8180 // - We can combine the transition with its single use
8181 // => we got rid of the transition.
8182 // - We escape the current basic block
8183 // => we would need to check that we are moving it at a cheaper place and
8184 // we do not do that for now.
8185 BasicBlock *Parent = Inst->getParent();
8186 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8187 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8188 // If the transition has more than one use, assume this is not going to be
8189 // beneficial.
8190 while (Inst->hasOneUse()) {
8191 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8192 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8193
8194 if (ToBePromoted->getParent() != Parent) {
8195 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8196 << ToBePromoted->getParent()->getName()
8197 << ") than the transition (" << Parent->getName()
8198 << ").\n");
8199 return false;
8200 }
8201
8202 if (VPH.canCombine(ToBePromoted)) {
8203 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8204 << "will be combined with: " << *ToBePromoted << '\n');
8205 VPH.recordCombineInstruction(ToBePromoted);
8206 bool Changed = VPH.promote();
8207 NumStoreExtractExposed += Changed;
8208 return Changed;
8209 }
8210
8211 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8212 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8213 return false;
8214
8215 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8216
8217 VPH.enqueueForPromotion(ToBePromoted);
8218 Inst = ToBePromoted;
8219 }
8220 return false;
8221}
8222
8223/// For the instruction sequence of store below, F and I values
8224/// are bundled together as an i64 value before being stored into memory.
8225/// Sometimes it is more efficient to generate separate stores for F and I,
8226/// which can remove the bitwise instructions or sink them to colder places.
8227///
8228/// (store (or (zext (bitcast F to i32) to i64),
8229/// (shl (zext I to i64), 32)), addr) -->
8230/// (store F, addr) and (store I, addr+4)
8231///
8232/// Similarly, splitting for other merged store can also be beneficial, like:
8233/// For pair of {i32, i32}, i64 store --> two i32 stores.
8234/// For pair of {i32, i16}, i64 store --> two i32 stores.
8235/// For pair of {i16, i16}, i32 store --> two i16 stores.
8236/// For pair of {i16, i8}, i32 store --> two i16 stores.
8237/// For pair of {i8, i8}, i16 store --> two i8 stores.
8238///
8239/// We allow each target to determine specifically which kind of splitting is
8240/// supported.
8241///
8242/// The store patterns are commonly seen from the simple code snippet below
8243/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8244/// void goo(const std::pair<int, float> &);
8245/// hoo() {
8246/// ...
8247/// goo(std::make_pair(tmp, ftmp));
8248/// ...
8249/// }
8250///
8251/// Although we already have similar splitting in DAG Combine, we duplicate
8252/// it in CodeGenPrepare to catch the case in which pattern is across
8253/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8254/// during code expansion.
8256 const TargetLowering &TLI) {
8257 // Handle simple but common cases only.
8258 Type *StoreType = SI.getValueOperand()->getType();
8259
8260 // The code below assumes shifting a value by <number of bits>,
8261 // whereas scalable vectors would have to be shifted by
8262 // <2log(vscale) + number of bits> in order to store the
8263 // low/high parts. Bailing out for now.
8264 if (StoreType->isScalableTy())
8265 return false;
8266
8267 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8268 DL.getTypeSizeInBits(StoreType) == 0)
8269 return false;
8270
8271 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8272 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8273 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8274 return false;
8275
8276 // Don't split the store if it is volatile.
8277 if (SI.isVolatile())
8278 return false;
8279
8280 // Match the following patterns:
8281 // (store (or (zext LValue to i64),
8282 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8283 // or
8284 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8285 // (zext LValue to i64),
8286 // Expect both operands of OR and the first operand of SHL have only
8287 // one use.
8288 Value *LValue, *HValue;
8289 if (!match(SI.getValueOperand(),
8292 m_SpecificInt(HalfValBitSize))))))
8293 return false;
8294
8295 // Check LValue and HValue are int with size less or equal than 32.
8296 if (!LValue->getType()->isIntegerTy() ||
8297 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8298 !HValue->getType()->isIntegerTy() ||
8299 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8300 return false;
8301
8302 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8303 // as the input of target query.
8304 auto *LBC = dyn_cast<BitCastInst>(LValue);
8305 auto *HBC = dyn_cast<BitCastInst>(HValue);
8306 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8307 : EVT::getEVT(LValue->getType());
8308 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8309 : EVT::getEVT(HValue->getType());
8310 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8311 return false;
8312
8313 // Start to split store.
8314 IRBuilder<> Builder(SI.getContext());
8315 Builder.SetInsertPoint(&SI);
8316
8317 // If LValue/HValue is a bitcast in another BB, create a new one in current
8318 // BB so it may be merged with the splitted stores by dag combiner.
8319 if (LBC && LBC->getParent() != SI.getParent())
8320 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8321 if (HBC && HBC->getParent() != SI.getParent())
8322 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8323
8324 bool IsLE = SI.getDataLayout().isLittleEndian();
8325 auto CreateSplitStore = [&](Value *V, bool Upper) {
8326 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8327 Value *Addr = SI.getPointerOperand();
8328 Align Alignment = SI.getAlign();
8329 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8330 if (IsOffsetStore) {
8331 Addr = Builder.CreateGEP(
8332 SplitStoreType, Addr,
8333 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8334
8335 // When splitting the store in half, naturally one half will retain the
8336 // alignment of the original wider store, regardless of whether it was
8337 // over-aligned or not, while the other will require adjustment.
8338 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8339 }
8340 Builder.CreateAlignedStore(V, Addr, Alignment);
8341 };
8342
8343 CreateSplitStore(LValue, false);
8344 CreateSplitStore(HValue, true);
8345
8346 // Delete the old store.
8347 SI.eraseFromParent();
8348 return true;
8349}
8350
8351// Return true if the GEP has two operands, the first operand is of a sequential
8352// type, and the second operand is a constant.
8355 return GEP->getNumOperands() == 2 && I.isSequential() &&
8356 isa<ConstantInt>(GEP->getOperand(1));
8357}
8358
8359// Try unmerging GEPs to reduce liveness interference (register pressure) across
8360// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8361// reducing liveness interference across those edges benefits global register
8362// allocation. Currently handles only certain cases.
8363//
8364// For example, unmerge %GEPI and %UGEPI as below.
8365//
8366// ---------- BEFORE ----------
8367// SrcBlock:
8368// ...
8369// %GEPIOp = ...
8370// ...
8371// %GEPI = gep %GEPIOp, Idx
8372// ...
8373// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8374// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8375// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8376// %UGEPI)
8377//
8378// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8379// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8380// ...
8381//
8382// DstBi:
8383// ...
8384// %UGEPI = gep %GEPIOp, UIdx
8385// ...
8386// ---------------------------
8387//
8388// ---------- AFTER ----------
8389// SrcBlock:
8390// ... (same as above)
8391// (* %GEPI is still alive on the indirectbr edges)
8392// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8393// unmerging)
8394// ...
8395//
8396// DstBi:
8397// ...
8398// %UGEPI = gep %GEPI, (UIdx-Idx)
8399// ...
8400// ---------------------------
8401//
8402// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8403// no longer alive on them.
8404//
8405// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8406// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8407// not to disable further simplications and optimizations as a result of GEP
8408// merging.
8409//
8410// Note this unmerging may increase the length of the data flow critical path
8411// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8412// between the register pressure and the length of data-flow critical
8413// path. Restricting this to the uncommon IndirectBr case would minimize the
8414// impact of potentially longer critical path, if any, and the impact on compile
8415// time.
8417 const TargetTransformInfo *TTI) {
8418 BasicBlock *SrcBlock = GEPI->getParent();
8419 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8420 // (non-IndirectBr) cases exit early here.
8421 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8422 return false;
8423 // Check that GEPI is a simple gep with a single constant index.
8424 if (!GEPSequentialConstIndexed(GEPI))
8425 return false;
8426 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8427 // Check that GEPI is a cheap one.
8428 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8431 return false;
8432 Value *GEPIOp = GEPI->getOperand(0);
8433 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8434 if (!isa<Instruction>(GEPIOp))
8435 return false;
8436 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8437 if (GEPIOpI->getParent() != SrcBlock)
8438 return false;
8439 // Check that GEP is used outside the block, meaning it's alive on the
8440 // IndirectBr edge(s).
8441 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8442 if (auto *I = dyn_cast<Instruction>(Usr)) {
8443 if (I->getParent() != SrcBlock) {
8444 return true;
8445 }
8446 }
8447 return false;
8448 }))
8449 return false;
8450 // The second elements of the GEP chains to be unmerged.
8451 std::vector<GetElementPtrInst *> UGEPIs;
8452 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8453 // on IndirectBr edges.
8454 for (User *Usr : GEPIOp->users()) {
8455 if (Usr == GEPI)
8456 continue;
8457 // Check if Usr is an Instruction. If not, give up.
8458 if (!isa<Instruction>(Usr))
8459 return false;
8460 auto *UI = cast<Instruction>(Usr);
8461 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8462 if (UI->getParent() == SrcBlock)
8463 continue;
8464 // Check if Usr is a GEP. If not, give up.
8465 if (!isa<GetElementPtrInst>(Usr))
8466 return false;
8467 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8468 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8469 // the pointer operand to it. If so, record it in the vector. If not, give
8470 // up.
8471 if (!GEPSequentialConstIndexed(UGEPI))
8472 return false;
8473 if (UGEPI->getOperand(0) != GEPIOp)
8474 return false;
8475 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8476 return false;
8477 if (GEPIIdx->getType() !=
8478 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8479 return false;
8480 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8481 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8484 return false;
8485 UGEPIs.push_back(UGEPI);
8486 }
8487 if (UGEPIs.size() == 0)
8488 return false;
8489 // Check the materializing cost of (Uidx-Idx).
8490 for (GetElementPtrInst *UGEPI : UGEPIs) {
8491 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8492 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8494 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8495 if (ImmCost > TargetTransformInfo::TCC_Basic)
8496 return false;
8497 }
8498 // Now unmerge between GEPI and UGEPIs.
8499 for (GetElementPtrInst *UGEPI : UGEPIs) {
8500 UGEPI->setOperand(0, GEPI);
8501 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8502 Constant *NewUGEPIIdx = ConstantInt::get(
8503 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8504 UGEPI->setOperand(1, NewUGEPIIdx);
8505 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8506 // inbounds to avoid UB.
8507 if (!GEPI->isInBounds()) {
8508 UGEPI->setIsInBounds(false);
8509 }
8510 }
8511 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8512 // alive on IndirectBr edges).
8513 assert(llvm::none_of(GEPIOp->users(),
8514 [&](User *Usr) {
8515 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8516 }) &&
8517 "GEPIOp is used outside SrcBlock");
8518 return true;
8519}
8520
8521static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8523 bool IsHugeFunc) {
8524 // Try and convert
8525 // %c = icmp ult %x, 8
8526 // br %c, bla, blb
8527 // %tc = lshr %x, 3
8528 // to
8529 // %tc = lshr %x, 3
8530 // %c = icmp eq %tc, 0
8531 // br %c, bla, blb
8532 // Creating the cmp to zero can be better for the backend, especially if the
8533 // lshr produces flags that can be used automatically.
8534 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8535 return false;
8536
8537 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8538 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8539 return false;
8540
8541 Value *X = Cmp->getOperand(0);
8542 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8543
8544 for (auto *U : X->users()) {
8545 Instruction *UI = dyn_cast<Instruction>(U);
8546 // A quick dominance check
8547 if (!UI ||
8548 (UI->getParent() != Branch->getParent() &&
8549 UI->getParent() != Branch->getSuccessor(0) &&
8550 UI->getParent() != Branch->getSuccessor(1)) ||
8551 (UI->getParent() != Branch->getParent() &&
8552 !UI->getParent()->getSinglePredecessor()))
8553 continue;
8554
8555 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8556 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8557 IRBuilder<> Builder(Branch);
8558 if (UI->getParent() != Branch->getParent())
8559 UI->moveBefore(Branch);
8561 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8562 ConstantInt::get(UI->getType(), 0));
8563 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8564 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8565 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8566 return true;
8567 }
8568 if (Cmp->isEquality() &&
8569 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8570 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
8571 IRBuilder<> Builder(Branch);
8572 if (UI->getParent() != Branch->getParent())
8573 UI->moveBefore(Branch);
8575 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8576 ConstantInt::get(UI->getType(), 0));
8577 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8578 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8579 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8580 return true;
8581 }
8582 }
8583 return false;
8584}
8585
8586bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8587 bool AnyChange = false;
8588 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8589
8590 // Bail out if we inserted the instruction to prevent optimizations from
8591 // stepping on each other's toes.
8592 if (InsertedInsts.count(I))
8593 return AnyChange;
8594
8595 // TODO: Move into the switch on opcode below here.
8596 if (PHINode *P = dyn_cast<PHINode>(I)) {
8597 // It is possible for very late stage optimizations (such as SimplifyCFG)
8598 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8599 // trivial PHI, go ahead and zap it here.
8600 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8601 LargeOffsetGEPMap.erase(P);
8602 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8603 P->eraseFromParent();
8604 ++NumPHIsElim;
8605 return true;
8606 }
8607 return AnyChange;
8608 }
8609
8610 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8611 // If the source of the cast is a constant, then this should have
8612 // already been constant folded. The only reason NOT to constant fold
8613 // it is if something (e.g. LSR) was careful to place the constant
8614 // evaluation in a block other than then one that uses it (e.g. to hoist
8615 // the address of globals out of a loop). If this is the case, we don't
8616 // want to forward-subst the cast.
8617 if (isa<Constant>(CI->getOperand(0)))
8618 return AnyChange;
8619
8620 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8621 return true;
8622
8623 if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) ||
8624 isa<TruncInst>(I)) &&
8626 I, LI->getLoopFor(I->getParent()), *TTI))
8627 return true;
8628
8629 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8630 /// Sink a zext or sext into its user blocks if the target type doesn't
8631 /// fit in one register
8632 if (TLI->getTypeAction(CI->getContext(),
8633 TLI->getValueType(*DL, CI->getType())) ==
8634 TargetLowering::TypeExpandInteger) {
8635 return SinkCast(CI);
8636 } else {
8638 I, LI->getLoopFor(I->getParent()), *TTI))
8639 return true;
8640
8641 bool MadeChange = optimizeExt(I);
8642 return MadeChange | optimizeExtUses(I);
8643 }
8644 }
8645 return AnyChange;
8646 }
8647
8648 if (auto *Cmp = dyn_cast<CmpInst>(I))
8649 if (optimizeCmp(Cmp, ModifiedDT))
8650 return true;
8651
8652 if (match(I, m_URem(m_Value(), m_Value())))
8653 if (optimizeURem(I))
8654 return true;
8655
8656 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8657 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8658 bool Modified = optimizeLoadExt(LI);
8659 unsigned AS = LI->getPointerAddressSpace();
8660 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8661 return Modified;
8662 }
8663
8664 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8665 if (splitMergedValStore(*SI, *DL, *TLI))
8666 return true;
8667 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8668 unsigned AS = SI->getPointerAddressSpace();
8669 return optimizeMemoryInst(I, SI->getOperand(1),
8670 SI->getOperand(0)->getType(), AS);
8671 }
8672
8673 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8674 unsigned AS = RMW->getPointerAddressSpace();
8675 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8676 }
8677
8678 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8679 unsigned AS = CmpX->getPointerAddressSpace();
8680 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8681 CmpX->getCompareOperand()->getType(), AS);
8682 }
8683
8684 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8685
8686 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8687 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8688 return true;
8689
8690 // TODO: Move this into the switch on opcode - it handles shifts already.
8691 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8692 BinOp->getOpcode() == Instruction::LShr)) {
8693 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8694 if (CI && TLI->hasExtractBitsInsn())
8695 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8696 return true;
8697 }
8698
8699 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8700 if (GEPI->hasAllZeroIndices()) {
8701 /// The GEP operand must be a pointer, so must its result -> BitCast
8702 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8703 GEPI->getName(), GEPI->getIterator());
8704 NC->setDebugLoc(GEPI->getDebugLoc());
8705 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8707 GEPI, TLInfo, nullptr,
8708 [&](Value *V) { removeAllAssertingVHReferences(V); });
8709 ++NumGEPsElim;
8710 optimizeInst(NC, ModifiedDT);
8711 return true;
8712 }
8714 return true;
8715 }
8716 }
8717
8718 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8719 // freeze(icmp a, const)) -> icmp (freeze a), const
8720 // This helps generate efficient conditional jumps.
8721 Instruction *CmpI = nullptr;
8722 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8723 CmpI = II;
8724 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8725 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8726
8727 if (CmpI && CmpI->hasOneUse()) {
8728 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8729 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8730 isa<ConstantPointerNull>(Op0);
8731 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8732 isa<ConstantPointerNull>(Op1);
8733 if (Const0 || Const1) {
8734 if (!Const0 || !Const1) {
8735 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8736 F->takeName(FI);
8737 CmpI->setOperand(Const0 ? 1 : 0, F);
8738 }
8739 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8740 FI->eraseFromParent();
8741 return true;
8742 }
8743 }
8744 return AnyChange;
8745 }
8746
8747 if (tryToSinkFreeOperands(I))
8748 return true;
8749
8750 switch (I->getOpcode()) {
8751 case Instruction::Shl:
8752 case Instruction::LShr:
8753 case Instruction::AShr:
8754 return optimizeShiftInst(cast<BinaryOperator>(I));
8755 case Instruction::Call:
8756 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8757 case Instruction::Select:
8758 return optimizeSelectInst(cast<SelectInst>(I));
8759 case Instruction::ShuffleVector:
8760 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8761 case Instruction::Switch:
8762 return optimizeSwitchInst(cast<SwitchInst>(I));
8763 case Instruction::ExtractElement:
8764 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8765 case Instruction::Br:
8766 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8767 }
8768
8769 return AnyChange;
8770}
8771
8772/// Given an OR instruction, check to see if this is a bitreverse
8773/// idiom. If so, insert the new intrinsic and return true.
8774bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8775 if (!I.getType()->isIntegerTy() ||
8777 TLI->getValueType(*DL, I.getType(), true)))
8778 return false;
8779
8781 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8782 return false;
8783 Instruction *LastInst = Insts.back();
8784 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8786 &I, TLInfo, nullptr,
8787 [&](Value *V) { removeAllAssertingVHReferences(V); });
8788 return true;
8789}
8790
8791// In this pass we look for GEP and cast instructions that are used
8792// across basic blocks and rewrite them to improve basic-block-at-a-time
8793// selection.
8794bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8795 SunkAddrs.clear();
8796 bool MadeChange = false;
8797
8798 do {
8799 CurInstIterator = BB.begin();
8800 ModifiedDT = ModifyDT::NotModifyDT;
8801 while (CurInstIterator != BB.end()) {
8802 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8803 if (ModifiedDT != ModifyDT::NotModifyDT) {
8804 // For huge function we tend to quickly go though the inner optmization
8805 // opportunities in the BB. So we go back to the BB head to re-optimize
8806 // each instruction instead of go back to the function head.
8807 if (IsHugeFunc) {
8808 DT.reset();
8809 getDT(*BB.getParent());
8810 break;
8811 } else {
8812 return true;
8813 }
8814 }
8815 }
8816 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8817
8818 bool MadeBitReverse = true;
8819 while (MadeBitReverse) {
8820 MadeBitReverse = false;
8821 for (auto &I : reverse(BB)) {
8822 if (makeBitReverse(I)) {
8823 MadeBitReverse = MadeChange = true;
8824 break;
8825 }
8826 }
8827 }
8828 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8829
8830 return MadeChange;
8831}
8832
8833// Some CGP optimizations may move or alter what's computed in a block. Check
8834// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8835bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8836 assert(isa<DbgValueInst>(I));
8837 DbgValueInst &DVI = *cast<DbgValueInst>(I);
8838
8839 // Does this dbg.value refer to a sunk address calculation?
8840 bool AnyChange = false;
8841 SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8842 DVI.location_ops().end());
8843 for (Value *Location : LocationOps) {
8844 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8845 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8846 if (SunkAddr) {
8847 // Point dbg.value at locally computed address, which should give the best
8848 // opportunity to be accurately lowered. This update may change the type
8849 // of pointer being referred to; however this makes no difference to
8850 // debugging information, and we can't generate bitcasts that may affect
8851 // codegen.
8852 DVI.replaceVariableLocationOp(Location, SunkAddr);
8853 AnyChange = true;
8854 }
8855 }
8856 return AnyChange;
8857}
8858
8859bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8860 bool AnyChange = false;
8861 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8862 AnyChange |= fixupDbgVariableRecord(DVR);
8863 return AnyChange;
8864}
8865
8866// FIXME: should updating debug-info really cause the "changed" flag to fire,
8867// which can cause a function to be reprocessed?
8868bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8869 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8870 DVR.Type != DbgVariableRecord::LocationType::Assign)
8871 return false;
8872
8873 // Does this DbgVariableRecord refer to a sunk address calculation?
8874 bool AnyChange = false;
8875 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8876 DVR.location_ops().end());
8877 for (Value *Location : LocationOps) {
8878 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8879 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8880 if (SunkAddr) {
8881 // Point dbg.value at locally computed address, which should give the best
8882 // opportunity to be accurately lowered. This update may change the type
8883 // of pointer being referred to; however this makes no difference to
8884 // debugging information, and we can't generate bitcasts that may affect
8885 // codegen.
8886 DVR.replaceVariableLocationOp(Location, SunkAddr);
8887 AnyChange = true;
8888 }
8889 }
8890 return AnyChange;
8891}
8892
8894 DVI->removeFromParent();
8895 if (isa<PHINode>(VI))
8896 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
8897 else
8898 DVI->insertAfter(VI);
8899}
8900
8902 DVR->removeFromParent();
8903 BasicBlock *VIBB = VI->getParent();
8904 if (isa<PHINode>(VI))
8905 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8906 else
8907 VIBB->insertDbgRecordAfter(DVR, VI);
8908}
8909
8910// A llvm.dbg.value may be using a value before its definition, due to
8911// optimizations in this pass and others. Scan for such dbg.values, and rescue
8912// them by moving the dbg.value to immediately after the value definition.
8913// FIXME: Ideally this should never be necessary, and this has the potential
8914// to re-order dbg.value intrinsics.
8915bool CodeGenPrepare::placeDbgValues(Function &F) {
8916 bool MadeChange = false;
8917 DominatorTree DT(F);
8918
8919 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8921 for (Value *V : DbgItem->location_ops())
8922 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8923 VIs.push_back(VI);
8924
8925 // This item may depend on multiple instructions, complicating any
8926 // potential sink. This block takes the defensive approach, opting to
8927 // "undef" the item if it has more than one instruction and any of them do
8928 // not dominate iem.
8929 for (Instruction *VI : VIs) {
8930 if (VI->isTerminator())
8931 continue;
8932
8933 // If VI is a phi in a block with an EHPad terminator, we can't insert
8934 // after it.
8935 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
8936 continue;
8937
8938 // If the defining instruction dominates the dbg.value, we do not need
8939 // to move the dbg.value.
8940 if (DT.dominates(VI, Position))
8941 continue;
8942
8943 // If we depend on multiple instructions and any of them doesn't
8944 // dominate this DVI, we probably can't salvage it: moving it to
8945 // after any of the instructions could cause us to lose the others.
8946 if (VIs.size() > 1) {
8947 LLVM_DEBUG(
8948 dbgs()
8949 << "Unable to find valid location for Debug Value, undefing:\n"
8950 << *DbgItem);
8951 DbgItem->setKillLocation();
8952 break;
8953 }
8954
8955 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
8956 << *DbgItem << ' ' << *VI);
8957 DbgInserterHelper(DbgItem, VI);
8958 MadeChange = true;
8959 ++NumDbgValueMoved;
8960 }
8961 };
8962
8963 for (BasicBlock &BB : F) {
8965 // Process dbg.value intrinsics.
8966 DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
8967 if (DVI) {
8968 DbgProcessor(DVI, DVI);
8969 continue;
8970 }
8971
8972 // If this isn't a dbg.value, process any attached DbgVariableRecord
8973 // records attached to this instruction.
8975 filterDbgVars(Insn.getDbgRecordRange()))) {
8976 if (DVR.Type != DbgVariableRecord::LocationType::Value)
8977 continue;
8978 DbgProcessor(&DVR, &Insn);
8979 }
8980 }
8981 }
8982
8983 return MadeChange;
8984}
8985
8986// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
8987// probes can be chained dependencies of other regular DAG nodes and block DAG
8988// combine optimizations.
8989bool CodeGenPrepare::placePseudoProbes(Function &F) {
8990 bool MadeChange = false;
8991 for (auto &Block : F) {
8992 // Move the rest probes to the beginning of the block.
8993 auto FirstInst = Block.getFirstInsertionPt();
8994 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
8995 ++FirstInst;
8996 BasicBlock::iterator I(FirstInst);
8997 I++;
8998 while (I != Block.end()) {
8999 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9000 II->moveBefore(&*FirstInst);
9001 MadeChange = true;
9002 }
9003 }
9004 }
9005 return MadeChange;
9006}
9007
9008/// Scale down both weights to fit into uint32_t.
9009static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9010 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9011 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9012 NewTrue = NewTrue / Scale;
9013 NewFalse = NewFalse / Scale;
9014}
9015
9016/// Some targets prefer to split a conditional branch like:
9017/// \code
9018/// %0 = icmp ne i32 %a, 0
9019/// %1 = icmp ne i32 %b, 0
9020/// %or.cond = or i1 %0, %1
9021/// br i1 %or.cond, label %TrueBB, label %FalseBB
9022/// \endcode
9023/// into multiple branch instructions like:
9024/// \code
9025/// bb1:
9026/// %0 = icmp ne i32 %a, 0
9027/// br i1 %0, label %TrueBB, label %bb2
9028/// bb2:
9029/// %1 = icmp ne i32 %b, 0
9030/// br i1 %1, label %TrueBB, label %FalseBB
9031/// \endcode
9032/// This usually allows instruction selection to do even further optimizations
9033/// and combine the compare with the branch instruction. Currently this is
9034/// applied for targets which have "cheap" jump instructions.
9035///
9036/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9037///
9038bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9039 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9040 return false;
9041
9042 bool MadeChange = false;
9043 for (auto &BB : F) {
9044 // Does this BB end with the following?
9045 // %cond1 = icmp|fcmp|binary instruction ...
9046 // %cond2 = icmp|fcmp|binary instruction ...
9047 // %cond.or = or|and i1 %cond1, cond2
9048 // br i1 %cond.or label %dest1, label %dest2"
9049 Instruction *LogicOp;
9050 BasicBlock *TBB, *FBB;
9051 if (!match(BB.getTerminator(),
9052 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9053 continue;
9054
9055 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9056 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9057 continue;
9058
9059 // The merging of mostly empty BB can cause a degenerate branch.
9060 if (TBB == FBB)
9061 continue;
9062
9063 unsigned Opc;
9064 Value *Cond1, *Cond2;
9065 if (match(LogicOp,
9066 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9067 Opc = Instruction::And;
9068 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9069 m_OneUse(m_Value(Cond2)))))
9070 Opc = Instruction::Or;
9071 else
9072 continue;
9073
9074 auto IsGoodCond = [](Value *Cond) {
9075 return match(
9076 Cond,
9078 m_LogicalOr(m_Value(), m_Value()))));
9079 };
9080 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9081 continue;
9082
9083 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9084
9085 // Create a new BB.
9086 auto *TmpBB =
9087 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9088 BB.getParent(), BB.getNextNode());
9089 if (IsHugeFunc)
9090 FreshBBs.insert(TmpBB);
9091
9092 // Update original basic block by using the first condition directly by the
9093 // branch instruction and removing the no longer needed and/or instruction.
9094 Br1->setCondition(Cond1);
9095 LogicOp->eraseFromParent();
9096
9097 // Depending on the condition we have to either replace the true or the
9098 // false successor of the original branch instruction.
9099 if (Opc == Instruction::And)
9100 Br1->setSuccessor(0, TmpBB);
9101 else
9102 Br1->setSuccessor(1, TmpBB);
9103
9104 // Fill in the new basic block.
9105 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9106 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9107 I->removeFromParent();
9108 I->insertBefore(Br2);
9109 }
9110
9111 // Update PHI nodes in both successors. The original BB needs to be
9112 // replaced in one successor's PHI nodes, because the branch comes now from
9113 // the newly generated BB (NewBB). In the other successor we need to add one
9114 // incoming edge to the PHI nodes, because both branch instructions target
9115 // now the same successor. Depending on the original branch condition
9116 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9117 // we perform the correct update for the PHI nodes.
9118 // This doesn't change the successor order of the just created branch
9119 // instruction (or any other instruction).
9120 if (Opc == Instruction::Or)
9121 std::swap(TBB, FBB);
9122
9123 // Replace the old BB with the new BB.
9124 TBB->replacePhiUsesWith(&BB, TmpBB);
9125
9126 // Add another incoming edge from the new BB.
9127 for (PHINode &PN : FBB->phis()) {
9128 auto *Val = PN.getIncomingValueForBlock(&BB);
9129 PN.addIncoming(Val, TmpBB);
9130 }
9131
9132 // Update the branch weights (from SelectionDAGBuilder::
9133 // FindMergedConditions).
9134 if (Opc == Instruction::Or) {
9135 // Codegen X | Y as:
9136 // BB1:
9137 // jmp_if_X TBB
9138 // jmp TmpBB
9139 // TmpBB:
9140 // jmp_if_Y TBB
9141 // jmp FBB
9142 //
9143
9144 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9145 // The requirement is that
9146 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9147 // = TrueProb for original BB.
9148 // Assuming the original weights are A and B, one choice is to set BB1's
9149 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9150 // assumes that
9151 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9152 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9153 // TmpBB, but the math is more complicated.
9154 uint64_t TrueWeight, FalseWeight;
9155 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9156 uint64_t NewTrueWeight = TrueWeight;
9157 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9158 scaleWeights(NewTrueWeight, NewFalseWeight);
9159 Br1->setMetadata(LLVMContext::MD_prof,
9160 MDBuilder(Br1->getContext())
9161 .createBranchWeights(TrueWeight, FalseWeight,
9162 hasBranchWeightOrigin(*Br1)));
9163
9164 NewTrueWeight = TrueWeight;
9165 NewFalseWeight = 2 * FalseWeight;
9166 scaleWeights(NewTrueWeight, NewFalseWeight);
9167 Br2->setMetadata(LLVMContext::MD_prof,
9168 MDBuilder(Br2->getContext())
9169 .createBranchWeights(TrueWeight, FalseWeight));
9170 }
9171 } else {
9172 // Codegen X & Y as:
9173 // BB1:
9174 // jmp_if_X TmpBB
9175 // jmp FBB
9176 // TmpBB:
9177 // jmp_if_Y TBB
9178 // jmp FBB
9179 //
9180 // This requires creation of TmpBB after CurBB.
9181
9182 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9183 // The requirement is that
9184 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9185 // = FalseProb for original BB.
9186 // Assuming the original weights are A and B, one choice is to set BB1's
9187 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9188 // assumes that
9189 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9190 uint64_t TrueWeight, FalseWeight;
9191 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9192 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9193 uint64_t NewFalseWeight = FalseWeight;
9194 scaleWeights(NewTrueWeight, NewFalseWeight);
9195 Br1->setMetadata(LLVMContext::MD_prof,
9196 MDBuilder(Br1->getContext())
9197 .createBranchWeights(TrueWeight, FalseWeight));
9198
9199 NewTrueWeight = 2 * TrueWeight;
9200 NewFalseWeight = FalseWeight;
9201 scaleWeights(NewTrueWeight, NewFalseWeight);
9202 Br2->setMetadata(LLVMContext::MD_prof,
9203 MDBuilder(Br2->getContext())
9204 .createBranchWeights(TrueWeight, FalseWeight));
9205 }
9206 }
9207
9208 ModifiedDT = ModifyDT::ModifyBBDT;
9209 MadeChange = true;
9210
9211 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9212 TmpBB->dump());
9213 }
9214 return MadeChange;
9215}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, const TargetTransformInfo &TTI, const DataLayout &DL)
Some targets have better codegen for ctpop(X) u< 2 than ctpop(X) == 1.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
#define DEBUG_TYPE
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:622
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(...)
Definition: Debug.h:106
This file defines the DenseMap class.
uint64_t Addr
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1504
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:39
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:986
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:380
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:435
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1511
unsigned logBase2() const
Definition: APInt.h:1739
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:959
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:440
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1542
an instruction to allocate memory on the stack
Definition: Instructions.h:63
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:124
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:117
void setAlignment(Align Align)
Definition: Instructions.h:128
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:429
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
static unsigned getPointerOperandIndex()
Definition: Instructions.h:631
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
static unsigned getPointerOperandIndex()
Definition: Instructions.h:872
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:489
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:279
void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
BinaryOps getOpcode() const
Definition: InstrTypes.h:370
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1416
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1349
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1459
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1294
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1299
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1285
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:444
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:661
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:673
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:702
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:696
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:700
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:698
@ ICMP_EQ
equal
Definition: InstrTypes.h:694
@ ICMP_NE
not equal
Definition: InstrTypes.h:695
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:825
static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:763
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
Definition: CmpPredicate.h:22
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
Definition: Constants.h:53
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1108
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2321
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2625
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:208
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:163
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:148
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1472
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1421
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:420
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:851
This represents the llvm.dbg.value instruction.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool erase(const KeyT &Val)
Definition: DenseMap.h:321
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:152
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:791
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
Definition: Function.h:809
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:933
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:143
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition: Globals.cpp:310
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
Type * getValueType() const
Definition: GlobalValue.h:296
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2165
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1048
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2566
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1363
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1889
Value * createIsFPClass(Value *FPNum, unsigned Test)
Definition: IRBuilder.cpp:1237
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2398
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1144
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1849
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2383
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:499
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:78
void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:471
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
Definition: Instruction.h:368
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:829
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isShift() const
Definition: Instruction.h:282
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:468
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:176
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
void clear()
Definition: MapVector.h:88
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:692
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:298
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:363
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:401
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:132
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:175
bool erase(const T &V)
Definition: SmallSet.h:193
void clear()
Definition: SmallSet.h:204
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:222
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:181
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937
void reserve(size_type N)
Definition: SmallVector.h:663
iterator erase(const_iterator CI)
Definition: SmallVector.h:737
typename SuperClass::iterator iterator
Definition: SmallVector.h:577
void resize(size_type N)
Definition: SmallVector.h:638
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
static unsigned getPointerOperandIndex()
Definition: Instructions.h:383
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:567
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:596
Class to represent struct types.
Definition: DerivedTypes.h:218
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
virtual bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Basic
The cost of a typical 'add' instruction.
bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:252
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:355
'undef' values are things that do not have specified contents.
Definition: Constants.h:1412
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1859
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:288
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:241
void setOperand(unsigned i, Value *Val)
Definition: User.h:233
Value * getOperand(unsigned i) const
Definition: User.h:228
unsigned getNumOperands() const
Definition: User.h:250
See the file comment.
Definition: ValueMap.h:84
void clear()
Definition: ValueMap.h:145
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:740
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:234
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
iterator_range< use_iterator > uses()
Definition: Value.h:376
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:819
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5304
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
This class represents zero extension of integer types.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isNonZero() const
Definition: TypeSize.h:158
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:844
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:524
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:982
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:826
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:885
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:864
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1697
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:546
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:136
auto pred_end(const MachineBasicBlock *BB)
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2204
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1656
auto successors(const MachineBasicBlock *BB)
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2082
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2055
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:155
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:242
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2107
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:4094
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1664
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1753
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:199
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:51
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:584
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1938
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
auto pred_begin(const MachineBasicBlock *BB)
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:243
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
ExtAddrMode()=default
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
const DataLayout & DL
Definition: SimplifyQuery.h:71
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.