LLVM 22.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/Argument.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InlineAsm.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/MDBuilder.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Operator.h"
73#include "llvm/IR/Statepoint.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/Use.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
79#include "llvm/IR/ValueMap.h"
81#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinking and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
329 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
347 MapVector<AssertingVH<Value>,
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare(){};
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
380 SmallPtrSet<BasicBlock *, 32> FreshBBs;
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
391 bool run(Function &F, FunctionAnalysisManager &AM);
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeInlineAsmInst(CallInst *CS);
435 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436 bool optimizeExt(Instruction *&I);
437 bool optimizeExtUses(Instruction *I);
438 bool optimizeLoadExt(LoadInst *Load);
439 bool optimizeShiftInst(BinaryOperator *BO);
440 bool optimizeFunnelShift(IntrinsicInst *Fsh);
441 bool optimizeSelectInst(SelectInst *SI);
442 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443 bool optimizeSwitchType(SwitchInst *SI);
444 bool optimizeSwitchPhiConstants(SwitchInst *SI);
445 bool optimizeSwitchInst(SwitchInst *SI);
446 bool optimizeExtractElementInst(Instruction *Inst);
447 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
455 const SmallVectorImpl<Instruction *> &Exts,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool optimizeURem(Instruction *Rem);
475 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 bool unfoldPowerOf2Test(CmpInst *Cmp);
478 void verifyBFIUpdates(Function &F);
479 bool _run(Function &F);
480};
481
482class CodeGenPrepareLegacyPass : public FunctionPass {
483public:
484 static char ID; // Pass identification, replacement for typeid
485
486 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
488 }
489
490 bool runOnFunction(Function &F) override;
491
492 StringRef getPassName() const override { return "CodeGen Prepare"; }
493
494 void getAnalysisUsage(AnalysisUsage &AU) const override {
495 // FIXME: When we can selectively preserve passes, preserve the domtree.
496 AU.addRequired<ProfileSummaryInfoWrapperPass>();
497 AU.addRequired<TargetLibraryInfoWrapperPass>();
498 AU.addRequired<TargetPassConfig>();
499 AU.addRequired<TargetTransformInfoWrapperPass>();
500 AU.addRequired<LoopInfoWrapperPass>();
501 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
522 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
540 "Optimize for code generation", false, false)
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
569 BPI.reset(new BranchProbabilityInfo(F, *LI));
570 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
575 return _run(F);
576}
577
578bool CodeGenPrepare::_run(Function &F) {
579 bool EverMadeChange = false;
580
581 OptSize = F.hasOptSize();
582 // Use the basic-block-sections profile to promote hot functions to .text.hot
583 // if requested.
584 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
585 BBSectionsProfileReader->isFunctionHot(F.getName())) {
586 F.setSectionPrefix("hot");
587 } else if (ProfileGuidedSectionPrefix) {
588 // The hot attribute overwrites profile count based hotness while profile
589 // counts based hotness overwrite the cold attribute.
590 // This is a conservative behabvior.
591 if (F.hasFnAttribute(Attribute::Hot) ||
592 PSI->isFunctionHotInCallGraph(&F, *BFI))
593 F.setSectionPrefix("hot");
594 // If PSI shows this function is not hot, we will placed the function
595 // into unlikely section if (1) PSI shows this is a cold function, or
596 // (2) the function has a attribute of cold.
597 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
598 F.hasFnAttribute(Attribute::Cold))
599 F.setSectionPrefix("unlikely");
602 F.setSectionPrefix("unknown");
603 }
604
605 /// This optimization identifies DIV instructions that can be
606 /// profitably bypassed and carried out with a shorter, faster divide.
607 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
608 const DenseMap<unsigned int, unsigned int> &BypassWidths =
610 BasicBlock *BB = &*F.begin();
611 while (BB != nullptr) {
612 // bypassSlowDivision may create new BBs, but we don't want to reapply the
613 // optimization to those blocks.
614 BasicBlock *Next = BB->getNextNode();
615 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
616 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
617 BB = Next;
618 }
619 }
620
621 // Get rid of @llvm.assume builtins before attempting to eliminate empty
622 // blocks, since there might be blocks that only contain @llvm.assume calls
623 // (plus arguments that we can get rid of).
624 EverMadeChange |= eliminateAssumptions(F);
625
626 // Eliminate blocks that contain only PHI nodes and an
627 // unconditional branch.
628 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
629
630 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
632 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
633
634 // Split some critical edges where one of the sources is an indirect branch,
635 // to help generate sane code for PHIs involving such edges.
636 EverMadeChange |=
637 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
638
639 // If we are optimzing huge function, we need to consider the build time.
640 // Because the basic algorithm's complex is near O(N!).
641 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
642
643 // Transformations above may invalidate dominator tree and/or loop info.
644 DT.reset();
645 LI->releaseMemory();
646 LI->analyze(getDT(F));
647
648 bool MadeChange = true;
649 bool FuncIterated = false;
650 while (MadeChange) {
651 MadeChange = false;
652
653 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
654 if (FuncIterated && !FreshBBs.contains(&BB))
655 continue;
656
657 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
658 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
659
660 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
661 DT.reset();
662
663 MadeChange |= Changed;
664 if (IsHugeFunc) {
665 // If the BB is updated, it may still has chance to be optimized.
666 // This usually happen at sink optimization.
667 // For example:
668 //
669 // bb0:
670 // %and = and i32 %a, 4
671 // %cmp = icmp eq i32 %and, 0
672 //
673 // If the %cmp sink to other BB, the %and will has chance to sink.
674 if (Changed)
675 FreshBBs.insert(&BB);
676 else if (FuncIterated)
677 FreshBBs.erase(&BB);
678 } else {
679 // For small/normal functions, we restart BB iteration if the dominator
680 // tree of the Function was changed.
681 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
682 break;
683 }
684 }
685 // We have iterated all the BB in the (only work for huge) function.
686 FuncIterated = IsHugeFunc;
687
688 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
689 MadeChange |= mergeSExts(F);
690 if (!LargeOffsetGEPMap.empty())
691 MadeChange |= splitLargeGEPOffsets();
692 MadeChange |= optimizePhiTypes(F);
693
694 if (MadeChange)
695 eliminateFallThrough(F, DT.get());
696
697#ifndef NDEBUG
698 if (MadeChange && VerifyLoopInfo)
699 LI->verify(getDT(F));
700#endif
701
702 // Really free removed instructions during promotion.
703 for (Instruction *I : RemovedInsts)
704 I->deleteValue();
705
706 EverMadeChange |= MadeChange;
707 SeenChainsForSExt.clear();
708 ValToSExtendedUses.clear();
709 RemovedInsts.clear();
710 LargeOffsetGEPMap.clear();
711 LargeOffsetGEPID.clear();
712 }
713
714 NewGEPBases.clear();
715 SunkAddrs.clear();
716
717 if (!DisableBranchOpts) {
718 MadeChange = false;
719 // Use a set vector to get deterministic iteration order. The order the
720 // blocks are removed may affect whether or not PHI nodes in successors
721 // are removed.
722 SmallSetVector<BasicBlock *, 8> WorkList;
723 for (BasicBlock &BB : F) {
725 MadeChange |= ConstantFoldTerminator(&BB, true);
726 if (!MadeChange)
727 continue;
728
729 for (BasicBlock *Succ : Successors)
730 if (pred_empty(Succ))
731 WorkList.insert(Succ);
732 }
733
734 // Delete the dead blocks and any of their dead successors.
735 MadeChange |= !WorkList.empty();
736 while (!WorkList.empty()) {
737 BasicBlock *BB = WorkList.pop_back_val();
739
740 DeleteDeadBlock(BB);
741
742 for (BasicBlock *Succ : Successors)
743 if (pred_empty(Succ))
744 WorkList.insert(Succ);
745 }
746
747 // Merge pairs of basic blocks with unconditional branches, connected by
748 // a single edge.
749 if (EverMadeChange || MadeChange)
750 MadeChange |= eliminateFallThrough(F);
751
752 EverMadeChange |= MadeChange;
753 }
754
755 if (!DisableGCOpts) {
757 for (BasicBlock &BB : F)
758 for (Instruction &I : BB)
759 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
760 Statepoints.push_back(SP);
761 for (auto &I : Statepoints)
762 EverMadeChange |= simplifyOffsetableRelocate(*I);
763 }
764
765 // Do this last to clean up use-before-def scenarios introduced by other
766 // preparatory transforms.
767 EverMadeChange |= placeDbgValues(F);
768 EverMadeChange |= placePseudoProbes(F);
769
770#ifndef NDEBUG
772 verifyBFIUpdates(F);
773#endif
774
775 return EverMadeChange;
776}
777
778bool CodeGenPrepare::eliminateAssumptions(Function &F) {
779 bool MadeChange = false;
780 for (BasicBlock &BB : F) {
781 CurInstIterator = BB.begin();
782 while (CurInstIterator != BB.end()) {
783 Instruction *I = &*(CurInstIterator++);
784 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
785 MadeChange = true;
786 Value *Operand = Assume->getOperand(0);
787 Assume->eraseFromParent();
788
789 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
790 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
791 });
792 }
793 }
794 }
795 return MadeChange;
796}
797
798/// An instruction is about to be deleted, so remove all references to it in our
799/// GEP-tracking data strcutures.
800void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
801 LargeOffsetGEPMap.erase(V);
802 NewGEPBases.erase(V);
803
805 if (!GEP)
806 return;
807
808 LargeOffsetGEPID.erase(GEP);
809
810 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
811 if (VecI == LargeOffsetGEPMap.end())
812 return;
813
814 auto &GEPVector = VecI->second;
815 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
816
817 if (GEPVector.empty())
818 LargeOffsetGEPMap.erase(VecI);
819}
820
821// Verify BFI has been updated correctly by recomputing BFI and comparing them.
822void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
823 DominatorTree NewDT(F);
824 LoopInfo NewLI(NewDT);
825 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
826 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
827 NewBFI.verifyMatch(*BFI);
828}
829
830/// Merge basic blocks which are connected by a single edge, where one of the
831/// basic blocks has a single successor pointing to the other basic block,
832/// which has a single predecessor.
833bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
834 bool Changed = false;
835 // Scan all of the blocks in the function, except for the entry block.
836 // Use a temporary array to avoid iterator being invalidated when
837 // deleting blocks.
840
841 SmallSet<WeakTrackingVH, 16> Preds;
842 for (auto &Block : Blocks) {
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 if (!isa<PHINode>(BBI))
899 return nullptr;
900 }
901
902 // Do not break infinite loops.
903 BasicBlock *DestBB = BI->getSuccessor(0);
904 if (DestBB == BB)
905 return nullptr;
906
907 if (!canMergeBlocks(BB, DestBB))
908 DestBB = nullptr;
909
910 return DestBB;
911}
912
913/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
914/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
915/// edges in ways that are non-optimal for isel. Start by eliminating these
916/// blocks so we can split them the way we want them.
917bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
918 SmallPtrSet<BasicBlock *, 16> Preheaders;
919 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
920 while (!LoopList.empty()) {
921 Loop *L = LoopList.pop_back_val();
922 llvm::append_range(LoopList, *L);
923 if (BasicBlock *Preheader = L->getLoopPreheader())
924 Preheaders.insert(Preheader);
925 }
926
927 bool MadeChange = false;
928 // Copy blocks into a temporary array to avoid iterator invalidation issues
929 // as we remove them.
930 // Note that this intentionally skips the entry block.
932 for (auto &Block : llvm::drop_begin(F)) {
933 // Delete phi nodes that could block deleting other empty blocks.
935 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
936 Blocks.push_back(&Block);
937 }
938
939 for (auto &Block : Blocks) {
941 if (!BB)
942 continue;
943 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
944 if (!DestBB ||
945 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
946 continue;
947
948 eliminateMostlyEmptyBlock(BB);
949 MadeChange = true;
950 }
951 return MadeChange;
952}
953
954bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
955 BasicBlock *DestBB,
956 bool isPreheader) {
957 // Do not delete loop preheaders if doing so would create a critical edge.
958 // Loop preheaders can be good locations to spill registers. If the
959 // preheader is deleted and we create a critical edge, registers may be
960 // spilled in the loop body instead.
961 if (!DisablePreheaderProtect && isPreheader &&
962 !(BB->getSinglePredecessor() &&
964 return false;
965
966 // Skip merging if the block's successor is also a successor to any callbr
967 // that leads to this block.
968 // FIXME: Is this really needed? Is this a correctness issue?
969 for (BasicBlock *Pred : predecessors(BB)) {
970 if (isa<CallBrInst>(Pred->getTerminator()) &&
971 llvm::is_contained(successors(Pred), DestBB))
972 return false;
973 }
974
975 // Try to skip merging if the unique predecessor of BB is terminated by a
976 // switch or indirect branch instruction, and BB is used as an incoming block
977 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
978 // add COPY instructions in the predecessor of BB instead of BB (if it is not
979 // merged). Note that the critical edge created by merging such blocks wont be
980 // split in MachineSink because the jump table is not analyzable. By keeping
981 // such empty block (BB), ISel will place COPY instructions in BB, not in the
982 // predecessor of BB.
983 BasicBlock *Pred = BB->getUniquePredecessor();
984 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
986 return true;
987
988 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
989 return true;
990
991 // We use a simple cost heuristic which determine skipping merging is
992 // profitable if the cost of skipping merging is less than the cost of
993 // merging : Cost(skipping merging) < Cost(merging BB), where the
994 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
995 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
996 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
997 // Freq(Pred) / Freq(BB) > 2.
998 // Note that if there are multiple empty blocks sharing the same incoming
999 // value for the PHIs in the DestBB, we consider them together. In such
1000 // case, Cost(merging BB) will be the sum of their frequencies.
1001
1002 if (!isa<PHINode>(DestBB->begin()))
1003 return true;
1004
1005 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1006
1007 // Find all other incoming blocks from which incoming values of all PHIs in
1008 // DestBB are the same as the ones from BB.
1009 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1010 if (DestBBPred == BB)
1011 continue;
1012
1013 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1014 return DestPN.getIncomingValueForBlock(BB) ==
1015 DestPN.getIncomingValueForBlock(DestBBPred);
1016 }))
1017 SameIncomingValueBBs.insert(DestBBPred);
1018 }
1019
1020 // See if all BB's incoming values are same as the value from Pred. In this
1021 // case, no reason to skip merging because COPYs are expected to be place in
1022 // Pred already.
1023 if (SameIncomingValueBBs.count(Pred))
1024 return true;
1025
1026 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1027 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1028
1029 for (auto *SameValueBB : SameIncomingValueBBs)
1030 if (SameValueBB->getUniquePredecessor() == Pred &&
1031 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1032 BBFreq += BFI->getBlockFreq(SameValueBB);
1033
1034 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1035 return !Limit || PredFreq <= *Limit;
1036}
1037
1038/// Return true if we can merge BB into DestBB if there is a single
1039/// unconditional branch between them, and BB contains no other non-phi
1040/// instructions.
1041bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1042 const BasicBlock *DestBB) const {
1043 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1044 // the successor. If there are more complex condition (e.g. preheaders),
1045 // don't mess around with them.
1046 for (const PHINode &PN : BB->phis()) {
1047 for (const User *U : PN.users()) {
1048 const Instruction *UI = cast<Instruction>(U);
1049 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1050 return false;
1051 // If User is inside DestBB block and it is a PHINode then check
1052 // incoming value. If incoming value is not from BB then this is
1053 // a complex condition (e.g. preheaders) we want to avoid here.
1054 if (UI->getParent() == DestBB) {
1055 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1056 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1057 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1058 if (Insn && Insn->getParent() == BB &&
1059 Insn->getParent() != UPN->getIncomingBlock(I))
1060 return false;
1061 }
1062 }
1063 }
1064 }
1065
1066 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1067 // and DestBB may have conflicting incoming values for the block. If so, we
1068 // can't merge the block.
1069 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1070 if (!DestBBPN)
1071 return true; // no conflict.
1072
1073 // Collect the preds of BB.
1074 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1075 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1076 // It is faster to get preds from a PHI than with pred_iterator.
1077 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1078 BBPreds.insert(BBPN->getIncomingBlock(i));
1079 } else {
1080 BBPreds.insert_range(predecessors(BB));
1081 }
1082
1083 // Walk the preds of DestBB.
1084 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1085 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1086 if (BBPreds.count(Pred)) { // Common predecessor?
1087 for (const PHINode &PN : DestBB->phis()) {
1088 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1089 const Value *V2 = PN.getIncomingValueForBlock(BB);
1090
1091 // If V2 is a phi node in BB, look up what the mapped value will be.
1092 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1093 if (V2PN->getParent() == BB)
1094 V2 = V2PN->getIncomingValueForBlock(Pred);
1095
1096 // If there is a conflict, bail out.
1097 if (V1 != V2)
1098 return false;
1099 }
1100 }
1101 }
1102
1103 return true;
1104}
1105
1106/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1107static void replaceAllUsesWith(Value *Old, Value *New,
1109 bool IsHuge) {
1110 auto *OldI = dyn_cast<Instruction>(Old);
1111 if (OldI) {
1112 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1113 UI != E; ++UI) {
1115 if (IsHuge)
1116 FreshBBs.insert(User->getParent());
1117 }
1118 }
1119 Old->replaceAllUsesWith(New);
1120}
1121
1122/// Eliminate a basic block that has only phi's and an unconditional branch in
1123/// it.
1124void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1125 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1126 BasicBlock *DestBB = BI->getSuccessor(0);
1127
1128 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1129 << *BB << *DestBB);
1130
1131 // If the destination block has a single pred, then this is a trivial edge,
1132 // just collapse it.
1133 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1134 if (SinglePred != DestBB) {
1135 assert(SinglePred == BB &&
1136 "Single predecessor not the same as predecessor");
1137 // Merge DestBB into SinglePred/BB and delete it.
1139 // Note: BB(=SinglePred) will not be deleted on this path.
1140 // DestBB(=its single successor) is the one that was deleted.
1141 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1142
1143 if (IsHugeFunc) {
1144 // Update FreshBBs to optimize the merged BB.
1145 FreshBBs.insert(SinglePred);
1146 FreshBBs.erase(DestBB);
1147 }
1148 return;
1149 }
1150 }
1151
1152 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1153 // to handle the new incoming edges it is about to have.
1154 for (PHINode &PN : DestBB->phis()) {
1155 // Remove the incoming value for BB, and remember it.
1156 Value *InVal = PN.removeIncomingValue(BB, false);
1157
1158 // Two options: either the InVal is a phi node defined in BB or it is some
1159 // value that dominates BB.
1160 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1161 if (InValPhi && InValPhi->getParent() == BB) {
1162 // Add all of the input values of the input PHI as inputs of this phi.
1163 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1164 PN.addIncoming(InValPhi->getIncomingValue(i),
1165 InValPhi->getIncomingBlock(i));
1166 } else {
1167 // Otherwise, add one instance of the dominating value for each edge that
1168 // we will be adding.
1169 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1170 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1171 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1172 } else {
1173 for (BasicBlock *Pred : predecessors(BB))
1174 PN.addIncoming(InVal, Pred);
1175 }
1176 }
1177 }
1178
1179 // Preserve loop Metadata.
1180 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1181 for (auto *Pred : predecessors(BB))
1182 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1183 }
1184
1185 // The PHIs are now updated, change everything that refers to BB to use
1186 // DestBB and remove BB.
1187 BB->replaceAllUsesWith(DestBB);
1188 BB->eraseFromParent();
1189 ++NumBlocksElim;
1190
1191 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1192}
1193
1194// Computes a map of base pointer relocation instructions to corresponding
1195// derived pointer relocation instructions given a vector of all relocate calls
1197 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1199 &RelocateInstMap) {
1200 // Collect information in two maps: one primarily for locating the base object
1201 // while filling the second map; the second map is the final structure holding
1202 // a mapping between Base and corresponding Derived relocate calls
1204 for (auto *ThisRelocate : AllRelocateCalls) {
1205 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1206 ThisRelocate->getDerivedPtrIndex());
1207 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1208 }
1209 for (auto &Item : RelocateIdxMap) {
1210 std::pair<unsigned, unsigned> Key = Item.first;
1211 if (Key.first == Key.second)
1212 // Base relocation: nothing to insert
1213 continue;
1214
1215 GCRelocateInst *I = Item.second;
1216 auto BaseKey = std::make_pair(Key.first, Key.first);
1217
1218 // We're iterating over RelocateIdxMap so we cannot modify it.
1219 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1220 if (MaybeBase == RelocateIdxMap.end())
1221 // TODO: We might want to insert a new base object relocate and gep off
1222 // that, if there are enough derived object relocates.
1223 continue;
1224
1225 RelocateInstMap[MaybeBase->second].push_back(I);
1226 }
1227}
1228
1229// Accepts a GEP and extracts the operands into a vector provided they're all
1230// small integer constants
1232 SmallVectorImpl<Value *> &OffsetV) {
1233 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1234 // Only accept small constant integer operands
1235 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1236 if (!Op || Op->getZExtValue() > 20)
1237 return false;
1238 }
1239
1240 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1241 OffsetV.push_back(GEP->getOperand(i));
1242 return true;
1243}
1244
1245// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1246// replace, computes a replacement, and affects it.
1247static bool
1249 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1250 bool MadeChange = false;
1251 // We must ensure the relocation of derived pointer is defined after
1252 // relocation of base pointer. If we find a relocation corresponding to base
1253 // defined earlier than relocation of base then we move relocation of base
1254 // right before found relocation. We consider only relocation in the same
1255 // basic block as relocation of base. Relocations from other basic block will
1256 // be skipped by optimization and we do not care about them.
1257 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1258 &*R != RelocatedBase; ++R)
1259 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1260 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1261 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1262 RelocatedBase->moveBefore(RI->getIterator());
1263 MadeChange = true;
1264 break;
1265 }
1266
1267 for (GCRelocateInst *ToReplace : Targets) {
1268 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1269 "Not relocating a derived object of the original base object");
1270 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1271 // A duplicate relocate call. TODO: coalesce duplicates.
1272 continue;
1273 }
1274
1275 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1276 // Base and derived relocates are in different basic blocks.
1277 // In this case transform is only valid when base dominates derived
1278 // relocate. However it would be too expensive to check dominance
1279 // for each such relocate, so we skip the whole transformation.
1280 continue;
1281 }
1282
1283 Value *Base = ToReplace->getBasePtr();
1284 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1285 if (!Derived || Derived->getPointerOperand() != Base)
1286 continue;
1287
1289 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1290 continue;
1291
1292 // Create a Builder and replace the target callsite with a gep
1293 assert(RelocatedBase->getNextNode() &&
1294 "Should always have one since it's not a terminator");
1295
1296 // Insert after RelocatedBase
1297 IRBuilder<> Builder(RelocatedBase->getNextNode());
1298 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1299
1300 // If gc_relocate does not match the actual type, cast it to the right type.
1301 // In theory, there must be a bitcast after gc_relocate if the type does not
1302 // match, and we should reuse it to get the derived pointer. But it could be
1303 // cases like this:
1304 // bb1:
1305 // ...
1306 // %g1 = call coldcc i8 addrspace(1)*
1307 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1308 //
1309 // bb2:
1310 // ...
1311 // %g2 = call coldcc i8 addrspace(1)*
1312 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1313 //
1314 // merge:
1315 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1316 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1317 //
1318 // In this case, we can not find the bitcast any more. So we insert a new
1319 // bitcast no matter there is already one or not. In this way, we can handle
1320 // all cases, and the extra bitcast should be optimized away in later
1321 // passes.
1322 Value *ActualRelocatedBase = RelocatedBase;
1323 if (RelocatedBase->getType() != Base->getType()) {
1324 ActualRelocatedBase =
1325 Builder.CreateBitCast(RelocatedBase, Base->getType());
1326 }
1327 Value *Replacement =
1328 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1329 ArrayRef(OffsetV));
1330 Replacement->takeName(ToReplace);
1331 // If the newly generated derived pointer's type does not match the original
1332 // derived pointer's type, cast the new derived pointer to match it. Same
1333 // reasoning as above.
1334 Value *ActualReplacement = Replacement;
1335 if (Replacement->getType() != ToReplace->getType()) {
1336 ActualReplacement =
1337 Builder.CreateBitCast(Replacement, ToReplace->getType());
1338 }
1339 ToReplace->replaceAllUsesWith(ActualReplacement);
1340 ToReplace->eraseFromParent();
1341
1342 MadeChange = true;
1343 }
1344 return MadeChange;
1345}
1346
1347// Turns this:
1348//
1349// %base = ...
1350// %ptr = gep %base + 15
1351// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1352// %base' = relocate(%tok, i32 4, i32 4)
1353// %ptr' = relocate(%tok, i32 4, i32 5)
1354// %val = load %ptr'
1355//
1356// into this:
1357//
1358// %base = ...
1359// %ptr = gep %base + 15
1360// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1361// %base' = gc.relocate(%tok, i32 4, i32 4)
1362// %ptr' = gep %base' + 15
1363// %val = load %ptr'
1364bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1365 bool MadeChange = false;
1366 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1367 for (auto *U : I.users())
1368 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1369 // Collect all the relocate calls associated with a statepoint
1370 AllRelocateCalls.push_back(Relocate);
1371
1372 // We need at least one base pointer relocation + one derived pointer
1373 // relocation to mangle
1374 if (AllRelocateCalls.size() < 2)
1375 return false;
1376
1377 // RelocateInstMap is a mapping from the base relocate instruction to the
1378 // corresponding derived relocate instructions
1379 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1380 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1381 if (RelocateInstMap.empty())
1382 return false;
1383
1384 for (auto &Item : RelocateInstMap)
1385 // Item.first is the RelocatedBase to offset against
1386 // Item.second is the vector of Targets to replace
1387 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1388 return MadeChange;
1389}
1390
1391/// Sink the specified cast instruction into its user blocks.
1392static bool SinkCast(CastInst *CI) {
1393 BasicBlock *DefBB = CI->getParent();
1394
1395 /// InsertedCasts - Only insert a cast in each block once.
1397
1398 bool MadeChange = false;
1399 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1400 UI != E;) {
1401 Use &TheUse = UI.getUse();
1403
1404 // Figure out which BB this cast is used in. For PHI's this is the
1405 // appropriate predecessor block.
1406 BasicBlock *UserBB = User->getParent();
1407 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1408 UserBB = PN->getIncomingBlock(TheUse);
1409 }
1410
1411 // Preincrement use iterator so we don't invalidate it.
1412 ++UI;
1413
1414 // The first insertion point of a block containing an EH pad is after the
1415 // pad. If the pad is the user, we cannot sink the cast past the pad.
1416 if (User->isEHPad())
1417 continue;
1418
1419 // If the block selected to receive the cast is an EH pad that does not
1420 // allow non-PHI instructions before the terminator, we can't sink the
1421 // cast.
1422 if (UserBB->getTerminator()->isEHPad())
1423 continue;
1424
1425 // If this user is in the same block as the cast, don't change the cast.
1426 if (UserBB == DefBB)
1427 continue;
1428
1429 // If we have already inserted a cast into this block, use it.
1430 CastInst *&InsertedCast = InsertedCasts[UserBB];
1431
1432 if (!InsertedCast) {
1433 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1434 assert(InsertPt != UserBB->end());
1435 InsertedCast = cast<CastInst>(CI->clone());
1436 InsertedCast->insertBefore(*UserBB, InsertPt);
1437 }
1438
1439 // Replace a use of the cast with a use of the new cast.
1440 TheUse = InsertedCast;
1441 MadeChange = true;
1442 ++NumCastUses;
1443 }
1444
1445 // If we removed all uses, nuke the cast.
1446 if (CI->use_empty()) {
1447 salvageDebugInfo(*CI);
1448 CI->eraseFromParent();
1449 MadeChange = true;
1450 }
1451
1452 return MadeChange;
1453}
1454
1455/// If the specified cast instruction is a noop copy (e.g. it's casting from
1456/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1457/// reduce the number of virtual registers that must be created and coalesced.
1458///
1459/// Return true if any changes are made.
1461 const DataLayout &DL) {
1462 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1463 // than sinking only nop casts, but is helpful on some platforms.
1464 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1465 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1466 ASC->getDestAddressSpace()))
1467 return false;
1468 }
1469
1470 // If this is a noop copy,
1471 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1472 EVT DstVT = TLI.getValueType(DL, CI->getType());
1473
1474 // This is an fp<->int conversion?
1475 if (SrcVT.isInteger() != DstVT.isInteger())
1476 return false;
1477
1478 // If this is an extension, it will be a zero or sign extension, which
1479 // isn't a noop.
1480 if (SrcVT.bitsLT(DstVT))
1481 return false;
1482
1483 // If these values will be promoted, find out what they will be promoted
1484 // to. This helps us consider truncates on PPC as noop copies when they
1485 // are.
1486 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1488 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1489 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1491 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1492
1493 // If, after promotion, these are the same types, this is a noop copy.
1494 if (SrcVT != DstVT)
1495 return false;
1496
1497 return SinkCast(CI);
1498}
1499
1500// Match a simple increment by constant operation. Note that if a sub is
1501// matched, the step is negated (as if the step had been canonicalized to
1502// an add, even though we leave the instruction alone.)
1503static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1504 Constant *&Step) {
1505 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1507 m_Instruction(LHS), m_Constant(Step)))))
1508 return true;
1509 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1511 m_Instruction(LHS), m_Constant(Step))))) {
1512 Step = ConstantExpr::getNeg(Step);
1513 return true;
1514 }
1515 return false;
1516}
1517
1518/// If given \p PN is an inductive variable with value IVInc coming from the
1519/// backedge, and on each iteration it gets increased by Step, return pair
1520/// <IVInc, Step>. Otherwise, return std::nullopt.
1521static std::optional<std::pair<Instruction *, Constant *>>
1522getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1523 const Loop *L = LI->getLoopFor(PN->getParent());
1524 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1525 return std::nullopt;
1526 auto *IVInc =
1527 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1528 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1529 return std::nullopt;
1530 Instruction *LHS = nullptr;
1531 Constant *Step = nullptr;
1532 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1533 return std::make_pair(IVInc, Step);
1534 return std::nullopt;
1535}
1536
1537static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1538 auto *I = dyn_cast<Instruction>(V);
1539 if (!I)
1540 return false;
1541 Instruction *LHS = nullptr;
1542 Constant *Step = nullptr;
1543 if (!matchIncrement(I, LHS, Step))
1544 return false;
1545 if (auto *PN = dyn_cast<PHINode>(LHS))
1546 if (auto IVInc = getIVIncrement(PN, LI))
1547 return IVInc->first == I;
1548 return false;
1549}
1550
1551bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1552 Value *Arg0, Value *Arg1,
1553 CmpInst *Cmp,
1554 Intrinsic::ID IID) {
1555 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1556 if (!isIVIncrement(BO, LI))
1557 return false;
1558 const Loop *L = LI->getLoopFor(BO->getParent());
1559 assert(L && "L should not be null after isIVIncrement()");
1560 // Do not risk on moving increment into a child loop.
1561 if (LI->getLoopFor(Cmp->getParent()) != L)
1562 return false;
1563
1564 // Finally, we need to ensure that the insert point will dominate all
1565 // existing uses of the increment.
1566
1567 auto &DT = getDT(*BO->getParent()->getParent());
1568 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1569 // If we're moving up the dom tree, all uses are trivially dominated.
1570 // (This is the common case for code produced by LSR.)
1571 return true;
1572
1573 // Otherwise, special case the single use in the phi recurrence.
1574 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1575 };
1576 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1577 // We used to use a dominator tree here to allow multi-block optimization.
1578 // But that was problematic because:
1579 // 1. It could cause a perf regression by hoisting the math op into the
1580 // critical path.
1581 // 2. It could cause a perf regression by creating a value that was live
1582 // across multiple blocks and increasing register pressure.
1583 // 3. Use of a dominator tree could cause large compile-time regression.
1584 // This is because we recompute the DT on every change in the main CGP
1585 // run-loop. The recomputing is probably unnecessary in many cases, so if
1586 // that was fixed, using a DT here would be ok.
1587 //
1588 // There is one important particular case we still want to handle: if BO is
1589 // the IV increment. Important properties that make it profitable:
1590 // - We can speculate IV increment anywhere in the loop (as long as the
1591 // indvar Phi is its only user);
1592 // - Upon computing Cmp, we effectively compute something equivalent to the
1593 // IV increment (despite it loops differently in the IR). So moving it up
1594 // to the cmp point does not really increase register pressure.
1595 return false;
1596 }
1597
1598 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1599 if (BO->getOpcode() == Instruction::Add &&
1600 IID == Intrinsic::usub_with_overflow) {
1601 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1603 }
1604
1605 // Insert at the first instruction of the pair.
1606 Instruction *InsertPt = nullptr;
1607 for (Instruction &Iter : *Cmp->getParent()) {
1608 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1609 // the overflow intrinsic are defined.
1610 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1611 InsertPt = &Iter;
1612 break;
1613 }
1614 }
1615 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1616
1617 IRBuilder<> Builder(InsertPt);
1618 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1619 if (BO->getOpcode() != Instruction::Xor) {
1620 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1621 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1622 } else
1623 assert(BO->hasOneUse() &&
1624 "Patterns with XOr should use the BO only in the compare");
1625 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1626 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1627 Cmp->eraseFromParent();
1628 BO->eraseFromParent();
1629 return true;
1630}
1631
1632/// Match special-case patterns that check for unsigned add overflow.
1634 BinaryOperator *&Add) {
1635 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1636 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1637 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1638
1639 // We are not expecting non-canonical/degenerate code. Just bail out.
1640 if (isa<Constant>(A))
1641 return false;
1642
1643 ICmpInst::Predicate Pred = Cmp->getPredicate();
1644 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1645 B = ConstantInt::get(B->getType(), 1);
1646 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1647 B = Constant::getAllOnesValue(B->getType());
1648 else
1649 return false;
1650
1651 // Check the users of the variable operand of the compare looking for an add
1652 // with the adjusted constant.
1653 for (User *U : A->users()) {
1654 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1656 return true;
1657 }
1658 }
1659 return false;
1660}
1661
1662/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1663/// intrinsic. Return true if any changes were made.
1664bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1665 ModifyDT &ModifiedDT) {
1666 bool EdgeCase = false;
1667 Value *A, *B;
1668 BinaryOperator *Add;
1669 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1671 return false;
1672 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1673 A = Add->getOperand(0);
1674 B = Add->getOperand(1);
1675 EdgeCase = true;
1676 }
1677
1679 TLI->getValueType(*DL, Add->getType()),
1680 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1681 return false;
1682
1683 // We don't want to move around uses of condition values this late, so we
1684 // check if it is legal to create the call to the intrinsic in the basic
1685 // block containing the icmp.
1686 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1687 return false;
1688
1689 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1690 Intrinsic::uadd_with_overflow))
1691 return false;
1692
1693 // Reset callers - do not crash by iterating over a dead instruction.
1694 ModifiedDT = ModifyDT::ModifyInstDT;
1695 return true;
1696}
1697
1698bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1699 ModifyDT &ModifiedDT) {
1700 // We are not expecting non-canonical/degenerate code. Just bail out.
1701 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1702 if (isa<Constant>(A) && isa<Constant>(B))
1703 return false;
1704
1705 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1706 ICmpInst::Predicate Pred = Cmp->getPredicate();
1707 if (Pred == ICmpInst::ICMP_UGT) {
1708 std::swap(A, B);
1709 Pred = ICmpInst::ICMP_ULT;
1710 }
1711 // Convert special-case: (A == 0) is the same as (A u< 1).
1712 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1713 B = ConstantInt::get(B->getType(), 1);
1714 Pred = ICmpInst::ICMP_ULT;
1715 }
1716 // Convert special-case: (A != 0) is the same as (0 u< A).
1717 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1718 std::swap(A, B);
1719 Pred = ICmpInst::ICMP_ULT;
1720 }
1721 if (Pred != ICmpInst::ICMP_ULT)
1722 return false;
1723
1724 // Walk the users of a variable operand of a compare looking for a subtract or
1725 // add with that same operand. Also match the 2nd operand of the compare to
1726 // the add/sub, but that may be a negated constant operand of an add.
1727 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1728 BinaryOperator *Sub = nullptr;
1729 for (User *U : CmpVariableOperand->users()) {
1730 // A - B, A u< B --> usubo(A, B)
1731 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1733 break;
1734 }
1735
1736 // A + (-C), A u< C (canonicalized form of (sub A, C))
1737 const APInt *CmpC, *AddC;
1738 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1739 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1741 break;
1742 }
1743 }
1744 if (!Sub)
1745 return false;
1746
1748 TLI->getValueType(*DL, Sub->getType()),
1749 Sub->hasNUsesOrMore(1)))
1750 return false;
1751
1752 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1753 Cmp, Intrinsic::usub_with_overflow))
1754 return false;
1755
1756 // Reset callers - do not crash by iterating over a dead instruction.
1757 ModifiedDT = ModifyDT::ModifyInstDT;
1758 return true;
1759}
1760
1761// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1762// The same transformation exists in DAG combiner, but we repeat it here because
1763// DAG builder can break the pattern by moving icmp into a successor block.
1764bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1765 CmpPredicate Pred;
1766 Value *X;
1767 const APInt *C;
1768
1769 // (icmp (ctpop x), c)
1772 return false;
1773
1774 // We're only interested in "is power of 2 [or zero]" patterns.
1775 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1776 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1777 (Pred == CmpInst::ICMP_UGT && *C == 1);
1778 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1779 return false;
1780
1781 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1782 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1783 // and otherwise expand ctpop into a few simple instructions.
1784 Type *OpTy = X->getType();
1785 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1786 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1787 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1788 return false;
1789
1790 // ctpop(x) == 1 -> ctpop(x) u< 2
1791 // ctpop(x) != 1 -> ctpop(x) u> 1
1792 if (Pred == ICmpInst::ICMP_EQ) {
1793 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1794 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1795 } else {
1796 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1797 }
1798 return true;
1799 }
1800
1801 Value *NewCmp;
1802 if (IsPowerOf2OrZeroTest ||
1803 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1804 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1805 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1806 IRBuilder<> Builder(Cmp);
1807 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1808 Value *And = Builder.CreateAnd(X, Sub);
1809 CmpInst::Predicate NewPred =
1810 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1812 : CmpInst::ICMP_NE;
1813 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1814 } else {
1815 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1816 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1817 IRBuilder<> Builder(Cmp);
1818 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1819 Value *Xor = Builder.CreateXor(X, Sub);
1820 CmpInst::Predicate NewPred =
1822 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1823 }
1824
1825 Cmp->replaceAllUsesWith(NewCmp);
1827 return true;
1828}
1829
1830/// Sink the given CmpInst into user blocks to reduce the number of virtual
1831/// registers that must be created and coalesced. This is a clear win except on
1832/// targets with multiple condition code registers (PowerPC), where it might
1833/// lose; some adjustment may be wanted there.
1834///
1835/// Return true if any changes are made.
1836static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1837 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1838 return false;
1839
1840 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1841 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1842 return false;
1843
1844 // Only insert a cmp in each block once.
1846
1847 bool MadeChange = false;
1848 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1849 UI != E;) {
1850 Use &TheUse = UI.getUse();
1852
1853 // Preincrement use iterator so we don't invalidate it.
1854 ++UI;
1855
1856 // Don't bother for PHI nodes.
1857 if (isa<PHINode>(User))
1858 continue;
1859
1860 // Figure out which BB this cmp is used in.
1861 BasicBlock *UserBB = User->getParent();
1862 BasicBlock *DefBB = Cmp->getParent();
1863
1864 // If this user is in the same block as the cmp, don't change the cmp.
1865 if (UserBB == DefBB)
1866 continue;
1867
1868 // If we have already inserted a cmp into this block, use it.
1869 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1870
1871 if (!InsertedCmp) {
1872 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1873 assert(InsertPt != UserBB->end());
1874 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1875 Cmp->getOperand(0), Cmp->getOperand(1), "");
1876 InsertedCmp->insertBefore(*UserBB, InsertPt);
1877 // Propagate the debug info.
1878 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1879 }
1880
1881 // Replace a use of the cmp with a use of the new cmp.
1882 TheUse = InsertedCmp;
1883 MadeChange = true;
1884 ++NumCmpUses;
1885 }
1886
1887 // If we removed all uses, nuke the cmp.
1888 if (Cmp->use_empty()) {
1889 Cmp->eraseFromParent();
1890 MadeChange = true;
1891 }
1892
1893 return MadeChange;
1894}
1895
1896/// For pattern like:
1897///
1898/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1899/// ...
1900/// DomBB:
1901/// ...
1902/// br DomCond, TrueBB, CmpBB
1903/// CmpBB: (with DomBB being the single predecessor)
1904/// ...
1905/// Cmp = icmp eq CmpOp0, CmpOp1
1906/// ...
1907///
1908/// It would use two comparison on targets that lowering of icmp sgt/slt is
1909/// different from lowering of icmp eq (PowerPC). This function try to convert
1910/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1911/// After that, DomCond and Cmp can use the same comparison so reduce one
1912/// comparison.
1913///
1914/// Return true if any changes are made.
1916 const TargetLowering &TLI) {
1918 return false;
1919
1920 ICmpInst::Predicate Pred = Cmp->getPredicate();
1921 if (Pred != ICmpInst::ICMP_EQ)
1922 return false;
1923
1924 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1925 // icmp slt/sgt would introduce more redundant LLVM IR.
1926 for (User *U : Cmp->users()) {
1927 if (isa<BranchInst>(U))
1928 continue;
1929 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1930 continue;
1931 return false;
1932 }
1933
1934 // This is a cheap/incomplete check for dominance - just match a single
1935 // predecessor with a conditional branch.
1936 BasicBlock *CmpBB = Cmp->getParent();
1937 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1938 if (!DomBB)
1939 return false;
1940
1941 // We want to ensure that the only way control gets to the comparison of
1942 // interest is that a less/greater than comparison on the same operands is
1943 // false.
1944 Value *DomCond;
1945 BasicBlock *TrueBB, *FalseBB;
1946 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1947 return false;
1948 if (CmpBB != FalseBB)
1949 return false;
1950
1951 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1952 CmpPredicate DomPred;
1953 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1954 return false;
1955 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1956 return false;
1957
1958 // Convert the equality comparison to the opposite of the dominating
1959 // comparison and swap the direction for all branch/select users.
1960 // We have conceptually converted:
1961 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1962 // to
1963 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1964 // And similarly for branches.
1965 for (User *U : Cmp->users()) {
1966 if (auto *BI = dyn_cast<BranchInst>(U)) {
1967 assert(BI->isConditional() && "Must be conditional");
1968 BI->swapSuccessors();
1969 continue;
1970 }
1971 if (auto *SI = dyn_cast<SelectInst>(U)) {
1972 // Swap operands
1973 SI->swapValues();
1974 SI->swapProfMetadata();
1975 continue;
1976 }
1977 llvm_unreachable("Must be a branch or a select");
1978 }
1979 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1980 return true;
1981}
1982
1983/// Many architectures use the same instruction for both subtract and cmp. Try
1984/// to swap cmp operands to match subtract operations to allow for CSE.
1986 Value *Op0 = Cmp->getOperand(0);
1987 Value *Op1 = Cmp->getOperand(1);
1988 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1989 isa<Constant>(Op1) || Op0 == Op1)
1990 return false;
1991
1992 // If a subtract already has the same operands as a compare, swapping would be
1993 // bad. If a subtract has the same operands as a compare but in reverse order,
1994 // then swapping is good.
1995 int GoodToSwap = 0;
1996 unsigned NumInspected = 0;
1997 for (const User *U : Op0->users()) {
1998 // Avoid walking many users.
1999 if (++NumInspected > 128)
2000 return false;
2001 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2002 GoodToSwap++;
2003 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2004 GoodToSwap--;
2005 }
2006
2007 if (GoodToSwap > 0) {
2008 Cmp->swapOperands();
2009 return true;
2010 }
2011 return false;
2012}
2013
2014static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2015 const DataLayout &DL) {
2016 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2017 if (!FCmp)
2018 return false;
2019
2020 // Don't fold if the target offers free fabs and the predicate is legal.
2021 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2022 if (TLI.isFAbsFree(VT) &&
2024 VT.getSimpleVT()))
2025 return false;
2026
2027 // Reverse the canonicalization if it is a FP class test
2028 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2029 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2030 };
2031 auto [ClassVal, ClassTest] =
2032 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2033 FCmp->getOperand(0), FCmp->getOperand(1));
2034 if (!ClassVal)
2035 return false;
2036
2037 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2038 return false;
2039
2040 IRBuilder<> Builder(Cmp);
2041 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2042 Cmp->replaceAllUsesWith(IsFPClass);
2044 return true;
2045}
2046
2048 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2049 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2050 Value *Incr, *RemAmt;
2051 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2052 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2053 return false;
2054
2055 Value *AddInst, *AddOffset;
2056 // Find out loop increment PHI.
2057 auto *PN = dyn_cast<PHINode>(Incr);
2058 if (PN != nullptr) {
2059 AddInst = nullptr;
2060 AddOffset = nullptr;
2061 } else {
2062 // Search through a NUW add on top of the loop increment.
2063 Value *V0, *V1;
2064 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2065 return false;
2066
2067 AddInst = Incr;
2068 PN = dyn_cast<PHINode>(V0);
2069 if (PN != nullptr) {
2070 AddOffset = V1;
2071 } else {
2072 PN = dyn_cast<PHINode>(V1);
2073 AddOffset = V0;
2074 }
2075 }
2076
2077 if (!PN)
2078 return false;
2079
2080 // This isn't strictly necessary, what we really need is one increment and any
2081 // amount of initial values all being the same.
2082 if (PN->getNumIncomingValues() != 2)
2083 return false;
2084
2085 // Only trivially analyzable loops.
2086 Loop *L = LI->getLoopFor(PN->getParent());
2087 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2088 return false;
2089
2090 // Req that the remainder is in the loop
2091 if (!L->contains(Rem))
2092 return false;
2093
2094 // Only works if the remainder amount is a loop invaraint
2095 if (!L->isLoopInvariant(RemAmt))
2096 return false;
2097
2098 // Only works if the AddOffset is a loop invaraint
2099 if (AddOffset && !L->isLoopInvariant(AddOffset))
2100 return false;
2101
2102 // Is the PHI a loop increment?
2103 auto LoopIncrInfo = getIVIncrement(PN, LI);
2104 if (!LoopIncrInfo)
2105 return false;
2106
2107 // We need remainder_amount % increment_amount to be zero. Increment of one
2108 // satisfies that without any special logic and is overwhelmingly the common
2109 // case.
2110 if (!match(LoopIncrInfo->second, m_One()))
2111 return false;
2112
2113 // Need the increment to not overflow.
2114 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2115 return false;
2116
2117 // Set output variables.
2118 RemAmtOut = RemAmt;
2119 LoopIncrPNOut = PN;
2120 AddInstOut = AddInst;
2121 AddOffsetOut = AddOffset;
2122
2123 return true;
2124}
2125
2126// Try to transform:
2127//
2128// for(i = Start; i < End; ++i)
2129// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2130//
2131// ->
2132//
2133// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2134// for(i = Start; i < End; ++i, ++rem)
2135// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2137 const LoopInfo *LI,
2139 bool IsHuge) {
2140 Value *AddOffset, *RemAmt, *AddInst;
2141 PHINode *LoopIncrPN;
2142 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2143 AddOffset, LoopIncrPN))
2144 return false;
2145
2146 // Only non-constant remainder as the extra IV is probably not profitable
2147 // in that case.
2148 //
2149 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2150 // we can rule out register pressure and ensure this `urem` is executed each
2151 // iteration, its probably profitable to handle the const case as well.
2152 //
2153 // Potential TODO(2): Should we have a check for how "nested" this remainder
2154 // operation is? The new code runs every iteration so if the remainder is
2155 // guarded behind unlikely conditions this might not be worth it.
2156 if (match(RemAmt, m_ImmConstant()))
2157 return false;
2158
2159 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2160 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2161 // If we have add create initial value for remainder.
2162 // The logic here is:
2163 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2164 //
2165 // Only proceed if the expression simplifies (otherwise we can't fully
2166 // optimize out the urem).
2167 if (AddInst) {
2168 assert(AddOffset && "We found an add but missing values");
2169 // Without dom-condition/assumption cache we aren't likely to get much out
2170 // of a context instruction.
2171 Start = simplifyAddInst(Start, AddOffset,
2172 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2173 /*IsNUW=*/true, *DL);
2174 if (!Start)
2175 return false;
2176 }
2177
2178 // If we can't fully optimize out the `rem`, skip this transform.
2179 Start = simplifyURemInst(Start, RemAmt, *DL);
2180 if (!Start)
2181 return false;
2182
2183 // Create new remainder with induction variable.
2184 Type *Ty = Rem->getType();
2185 IRBuilder<> Builder(Rem->getContext());
2186
2187 Builder.SetInsertPoint(LoopIncrPN);
2188 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2189
2190 Builder.SetInsertPoint(cast<Instruction>(
2191 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2192 // `(add (urem x, y), 1)` is always nuw.
2193 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2194 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2195 Value *RemSel =
2196 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2197
2198 NewRem->addIncoming(Start, L->getLoopPreheader());
2199 NewRem->addIncoming(RemSel, L->getLoopLatch());
2200
2201 // Insert all touched BBs.
2202 FreshBBs.insert(LoopIncrPN->getParent());
2203 FreshBBs.insert(L->getLoopLatch());
2204 FreshBBs.insert(Rem->getParent());
2205 if (AddInst)
2206 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2207 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2208 Rem->eraseFromParent();
2209 if (AddInst && AddInst->use_empty())
2210 cast<Instruction>(AddInst)->eraseFromParent();
2211 return true;
2212}
2213
2214bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2215 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2216 return true;
2217 return false;
2218}
2219
2220bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2221 if (sinkCmpExpression(Cmp, *TLI))
2222 return true;
2223
2224 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2225 return true;
2226
2227 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2228 return true;
2229
2230 if (unfoldPowerOf2Test(Cmp))
2231 return true;
2232
2233 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2234 return true;
2235
2237 return true;
2238
2239 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2240 return true;
2241
2242 return false;
2243}
2244
2245/// Duplicate and sink the given 'and' instruction into user blocks where it is
2246/// used in a compare to allow isel to generate better code for targets where
2247/// this operation can be combined.
2248///
2249/// Return true if any changes are made.
2251 SetOfInstrs &InsertedInsts) {
2252 // Double-check that we're not trying to optimize an instruction that was
2253 // already optimized by some other part of this pass.
2254 assert(!InsertedInsts.count(AndI) &&
2255 "Attempting to optimize already optimized and instruction");
2256 (void)InsertedInsts;
2257
2258 // Nothing to do for single use in same basic block.
2259 if (AndI->hasOneUse() &&
2260 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2261 return false;
2262
2263 // Try to avoid cases where sinking/duplicating is likely to increase register
2264 // pressure.
2265 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2266 !isa<ConstantInt>(AndI->getOperand(1)) &&
2267 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2268 return false;
2269
2270 for (auto *U : AndI->users()) {
2272
2273 // Only sink 'and' feeding icmp with 0.
2274 if (!isa<ICmpInst>(User))
2275 return false;
2276
2277 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2278 if (!CmpC || !CmpC->isZero())
2279 return false;
2280 }
2281
2282 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2283 return false;
2284
2285 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2286 LLVM_DEBUG(AndI->getParent()->dump());
2287
2288 // Push the 'and' into the same block as the icmp 0. There should only be
2289 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2290 // others, so we don't need to keep track of which BBs we insert into.
2291 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2292 UI != E;) {
2293 Use &TheUse = UI.getUse();
2295
2296 // Preincrement use iterator so we don't invalidate it.
2297 ++UI;
2298
2299 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2300
2301 // Keep the 'and' in the same place if the use is already in the same block.
2302 Instruction *InsertPt =
2303 User->getParent() == AndI->getParent() ? AndI : User;
2304 Instruction *InsertedAnd = BinaryOperator::Create(
2305 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2306 InsertPt->getIterator());
2307 // Propagate the debug info.
2308 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2309
2310 // Replace a use of the 'and' with a use of the new 'and'.
2311 TheUse = InsertedAnd;
2312 ++NumAndUses;
2313 LLVM_DEBUG(User->getParent()->dump());
2314 }
2315
2316 // We removed all uses, nuke the and.
2317 AndI->eraseFromParent();
2318 return true;
2319}
2320
2321/// Check if the candidates could be combined with a shift instruction, which
2322/// includes:
2323/// 1. Truncate instruction
2324/// 2. And instruction and the imm is a mask of the low bits:
2325/// imm & (imm+1) == 0
2327 if (!isa<TruncInst>(User)) {
2328 if (User->getOpcode() != Instruction::And ||
2330 return false;
2331
2332 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2333
2334 if ((Cimm & (Cimm + 1)).getBoolValue())
2335 return false;
2336 }
2337 return true;
2338}
2339
2340/// Sink both shift and truncate instruction to the use of truncate's BB.
2341static bool
2344 const TargetLowering &TLI, const DataLayout &DL) {
2345 BasicBlock *UserBB = User->getParent();
2347 auto *TruncI = cast<TruncInst>(User);
2348 bool MadeChange = false;
2349
2350 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2351 TruncE = TruncI->user_end();
2352 TruncUI != TruncE;) {
2353
2354 Use &TruncTheUse = TruncUI.getUse();
2355 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2356 // Preincrement use iterator so we don't invalidate it.
2357
2358 ++TruncUI;
2359
2360 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2361 if (!ISDOpcode)
2362 continue;
2363
2364 // If the use is actually a legal node, there will not be an
2365 // implicit truncate.
2366 // FIXME: always querying the result type is just an
2367 // approximation; some nodes' legality is determined by the
2368 // operand or other means. There's no good way to find out though.
2370 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2371 continue;
2372
2373 // Don't bother for PHI nodes.
2374 if (isa<PHINode>(TruncUser))
2375 continue;
2376
2377 BasicBlock *TruncUserBB = TruncUser->getParent();
2378
2379 if (UserBB == TruncUserBB)
2380 continue;
2381
2382 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2383 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2384
2385 if (!InsertedShift && !InsertedTrunc) {
2386 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2387 assert(InsertPt != TruncUserBB->end());
2388 // Sink the shift
2389 if (ShiftI->getOpcode() == Instruction::AShr)
2390 InsertedShift =
2391 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2392 else
2393 InsertedShift =
2394 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2395 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2396 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2397
2398 // Sink the trunc
2399 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2400 TruncInsertPt++;
2401 // It will go ahead of any debug-info.
2402 TruncInsertPt.setHeadBit(true);
2403 assert(TruncInsertPt != TruncUserBB->end());
2404
2405 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2406 TruncI->getType(), "");
2407 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2408 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2409
2410 MadeChange = true;
2411
2412 TruncTheUse = InsertedTrunc;
2413 }
2414 }
2415 return MadeChange;
2416}
2417
2418/// Sink the shift *right* instruction into user blocks if the uses could
2419/// potentially be combined with this shift instruction and generate BitExtract
2420/// instruction. It will only be applied if the architecture supports BitExtract
2421/// instruction. Here is an example:
2422/// BB1:
2423/// %x.extract.shift = lshr i64 %arg1, 32
2424/// BB2:
2425/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2426/// ==>
2427///
2428/// BB2:
2429/// %x.extract.shift.1 = lshr i64 %arg1, 32
2430/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2431///
2432/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2433/// instruction.
2434/// Return true if any changes are made.
2436 const TargetLowering &TLI,
2437 const DataLayout &DL) {
2438 BasicBlock *DefBB = ShiftI->getParent();
2439
2440 /// Only insert instructions in each block once.
2442
2443 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2444
2445 bool MadeChange = false;
2446 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2447 UI != E;) {
2448 Use &TheUse = UI.getUse();
2450 // Preincrement use iterator so we don't invalidate it.
2451 ++UI;
2452
2453 // Don't bother for PHI nodes.
2454 if (isa<PHINode>(User))
2455 continue;
2456
2458 continue;
2459
2460 BasicBlock *UserBB = User->getParent();
2461
2462 if (UserBB == DefBB) {
2463 // If the shift and truncate instruction are in the same BB. The use of
2464 // the truncate(TruncUse) may still introduce another truncate if not
2465 // legal. In this case, we would like to sink both shift and truncate
2466 // instruction to the BB of TruncUse.
2467 // for example:
2468 // BB1:
2469 // i64 shift.result = lshr i64 opnd, imm
2470 // trunc.result = trunc shift.result to i16
2471 //
2472 // BB2:
2473 // ----> We will have an implicit truncate here if the architecture does
2474 // not have i16 compare.
2475 // cmp i16 trunc.result, opnd2
2476 //
2477 if (isa<TruncInst>(User) &&
2478 shiftIsLegal
2479 // If the type of the truncate is legal, no truncate will be
2480 // introduced in other basic blocks.
2481 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2482 MadeChange =
2483 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2484
2485 continue;
2486 }
2487 // If we have already inserted a shift into this block, use it.
2488 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2489
2490 if (!InsertedShift) {
2491 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2492 assert(InsertPt != UserBB->end());
2493
2494 if (ShiftI->getOpcode() == Instruction::AShr)
2495 InsertedShift =
2496 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2497 else
2498 InsertedShift =
2499 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2500 InsertedShift->insertBefore(*UserBB, InsertPt);
2501 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2502
2503 MadeChange = true;
2504 }
2505
2506 // Replace a use of the shift with a use of the new shift.
2507 TheUse = InsertedShift;
2508 }
2509
2510 // If we removed all uses, or there are none, nuke the shift.
2511 if (ShiftI->use_empty()) {
2512 salvageDebugInfo(*ShiftI);
2513 ShiftI->eraseFromParent();
2514 MadeChange = true;
2515 }
2516
2517 return MadeChange;
2518}
2519
2520/// If counting leading or trailing zeros is an expensive operation and a zero
2521/// input is defined, add a check for zero to avoid calling the intrinsic.
2522///
2523/// We want to transform:
2524/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2525///
2526/// into:
2527/// entry:
2528/// %cmpz = icmp eq i64 %A, 0
2529/// br i1 %cmpz, label %cond.end, label %cond.false
2530/// cond.false:
2531/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2532/// br label %cond.end
2533/// cond.end:
2534/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2535///
2536/// If the transform is performed, return true and set ModifiedDT to true.
2537static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
2538 const TargetLowering *TLI,
2539 const DataLayout *DL, ModifyDT &ModifiedDT,
2541 bool IsHugeFunc) {
2542 // If a zero input is undefined, it doesn't make sense to despeculate that.
2543 if (match(CountZeros->getOperand(1), m_One()))
2544 return false;
2545
2546 // If it's cheap to speculate, there's nothing to do.
2547 Type *Ty = CountZeros->getType();
2548 auto IntrinsicID = CountZeros->getIntrinsicID();
2549 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2550 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2551 return false;
2552
2553 // Only handle scalar cases. Anything else requires too much work.
2554 unsigned SizeInBits = Ty->getScalarSizeInBits();
2555 if (Ty->isVectorTy())
2556 return false;
2557
2558 // Bail if the value is never zero.
2559 Use &Op = CountZeros->getOperandUse(0);
2560 if (isKnownNonZero(Op, *DL))
2561 return false;
2562
2563 // The intrinsic will be sunk behind a compare against zero and branch.
2564 BasicBlock *StartBlock = CountZeros->getParent();
2565 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2566 if (IsHugeFunc)
2567 FreshBBs.insert(CallBlock);
2568
2569 // Create another block after the count zero intrinsic. A PHI will be added
2570 // in this block to select the result of the intrinsic or the bit-width
2571 // constant if the input to the intrinsic is zero.
2572 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2573 // Any debug-info after CountZeros should not be included.
2574 SplitPt.setHeadBit(true);
2575 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2576 if (IsHugeFunc)
2577 FreshBBs.insert(EndBlock);
2578
2579 // Update the LoopInfo. The new blocks are in the same loop as the start
2580 // block.
2581 if (Loop *L = LI.getLoopFor(StartBlock)) {
2582 L->addBasicBlockToLoop(CallBlock, LI);
2583 L->addBasicBlockToLoop(EndBlock, LI);
2584 }
2585
2586 // Set up a builder to create a compare, conditional branch, and PHI.
2587 IRBuilder<> Builder(CountZeros->getContext());
2588 Builder.SetInsertPoint(StartBlock->getTerminator());
2589 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2590
2591 // Replace the unconditional branch that was created by the first split with
2592 // a compare against zero and a conditional branch.
2593 Value *Zero = Constant::getNullValue(Ty);
2594 // Avoid introducing branch on poison. This also replaces the ctz operand.
2596 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2597 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2598 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2599 StartBlock->getTerminator()->eraseFromParent();
2600
2601 // Create a PHI in the end block to select either the output of the intrinsic
2602 // or the bit width of the operand.
2603 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2604 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2605 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2606 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2607 PN->addIncoming(BitWidth, StartBlock);
2608 PN->addIncoming(CountZeros, CallBlock);
2609
2610 // We are explicitly handling the zero case, so we can set the intrinsic's
2611 // undefined zero argument to 'true'. This will also prevent reprocessing the
2612 // intrinsic; we only despeculate when a zero input is defined.
2613 CountZeros->setArgOperand(1, Builder.getTrue());
2614 ModifiedDT = ModifyDT::ModifyBBDT;
2615 return true;
2616}
2617
2618bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2619 BasicBlock *BB = CI->getParent();
2620
2621 // Sink address computing for memory operands into the block.
2622 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2623 return true;
2624
2625 // Align the pointer arguments to this call if the target thinks it's a good
2626 // idea
2627 unsigned MinSize;
2628 Align PrefAlign;
2629 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2630 for (auto &Arg : CI->args()) {
2631 // We want to align both objects whose address is used directly and
2632 // objects whose address is used in casts and GEPs, though it only makes
2633 // sense for GEPs if the offset is a multiple of the desired alignment and
2634 // if size - offset meets the size threshold.
2635 if (!Arg->getType()->isPointerTy())
2636 continue;
2637 APInt Offset(DL->getIndexSizeInBits(
2638 cast<PointerType>(Arg->getType())->getAddressSpace()),
2639 0);
2640 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2641 uint64_t Offset2 = Offset.getLimitedValue();
2642 if (!isAligned(PrefAlign, Offset2))
2643 continue;
2644 AllocaInst *AI;
2645 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2646 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2647 AI->setAlignment(PrefAlign);
2648 // Global variables can only be aligned if they are defined in this
2649 // object (i.e. they are uniquely initialized in this object), and
2650 // over-aligning global variables that have an explicit section is
2651 // forbidden.
2652 GlobalVariable *GV;
2653 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2654 GV->getPointerAlignment(*DL) < PrefAlign &&
2655 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2656 GV->setAlignment(PrefAlign);
2657 }
2658 }
2659 // If this is a memcpy (or similar) then we may be able to improve the
2660 // alignment.
2661 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2662 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2663 MaybeAlign MIDestAlign = MI->getDestAlign();
2664 if (!MIDestAlign || DestAlign > *MIDestAlign)
2665 MI->setDestAlignment(DestAlign);
2666 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2667 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2668 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2669 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2670 MTI->setSourceAlignment(SrcAlign);
2671 }
2672 }
2673
2674 // If we have a cold call site, try to sink addressing computation into the
2675 // cold block. This interacts with our handling for loads and stores to
2676 // ensure that we can fold all uses of a potential addressing computation
2677 // into their uses. TODO: generalize this to work over profiling data
2678 if (CI->hasFnAttr(Attribute::Cold) &&
2679 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2680 for (auto &Arg : CI->args()) {
2681 if (!Arg->getType()->isPointerTy())
2682 continue;
2683 unsigned AS = Arg->getType()->getPointerAddressSpace();
2684 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2685 return true;
2686 }
2687
2688 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2689 if (II) {
2690 switch (II->getIntrinsicID()) {
2691 default:
2692 break;
2693 case Intrinsic::assume:
2694 llvm_unreachable("llvm.assume should have been removed already");
2695 case Intrinsic::allow_runtime_check:
2696 case Intrinsic::allow_ubsan_check:
2697 case Intrinsic::experimental_widenable_condition: {
2698 // Give up on future widening opportunities so that we can fold away dead
2699 // paths and merge blocks before going into block-local instruction
2700 // selection.
2701 if (II->use_empty()) {
2702 II->eraseFromParent();
2703 return true;
2704 }
2705 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2706 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2707 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2708 });
2709 return true;
2710 }
2711 case Intrinsic::objectsize:
2712 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2713 case Intrinsic::is_constant:
2714 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2715 case Intrinsic::aarch64_stlxr:
2716 case Intrinsic::aarch64_stxr: {
2717 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2718 if (!ExtVal || !ExtVal->hasOneUse() ||
2719 ExtVal->getParent() == CI->getParent())
2720 return false;
2721 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2722 ExtVal->moveBefore(CI->getIterator());
2723 // Mark this instruction as "inserted by CGP", so that other
2724 // optimizations don't touch it.
2725 InsertedInsts.insert(ExtVal);
2726 return true;
2727 }
2728
2729 case Intrinsic::launder_invariant_group:
2730 case Intrinsic::strip_invariant_group: {
2731 Value *ArgVal = II->getArgOperand(0);
2732 auto it = LargeOffsetGEPMap.find(II);
2733 if (it != LargeOffsetGEPMap.end()) {
2734 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2735 // Make sure not to have to deal with iterator invalidation
2736 // after possibly adding ArgVal to LargeOffsetGEPMap.
2737 auto GEPs = std::move(it->second);
2738 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2739 LargeOffsetGEPMap.erase(II);
2740 }
2741
2742 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2743 II->eraseFromParent();
2744 return true;
2745 }
2746 case Intrinsic::cttz:
2747 case Intrinsic::ctlz:
2748 // If counting zeros is expensive, try to avoid it.
2749 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2750 IsHugeFunc);
2751 case Intrinsic::fshl:
2752 case Intrinsic::fshr:
2753 return optimizeFunnelShift(II);
2754 case Intrinsic::masked_gather:
2755 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2756 case Intrinsic::masked_scatter:
2757 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2758 case Intrinsic::masked_load:
2759 // Treat v1X masked load as load X type.
2760 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2761 if (VT->getNumElements() == 1) {
2762 Value *PtrVal = II->getArgOperand(0);
2763 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2764 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2765 return true;
2766 }
2767 }
2768 return false;
2769 case Intrinsic::masked_store:
2770 // Treat v1X masked store as store X type.
2771 if (auto *VT =
2772 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2773 if (VT->getNumElements() == 1) {
2774 Value *PtrVal = II->getArgOperand(1);
2775 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2776 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2777 return true;
2778 }
2779 }
2780 return false;
2781 }
2782
2783 SmallVector<Value *, 2> PtrOps;
2784 Type *AccessTy;
2785 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2786 while (!PtrOps.empty()) {
2787 Value *PtrVal = PtrOps.pop_back_val();
2788 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2789 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2790 return true;
2791 }
2792 }
2793
2794 // From here on out we're working with named functions.
2795 auto *Callee = CI->getCalledFunction();
2796 if (!Callee)
2797 return false;
2798
2799 // Lower all default uses of _chk calls. This is very similar
2800 // to what InstCombineCalls does, but here we are only lowering calls
2801 // to fortified library functions (e.g. __memcpy_chk) that have the default
2802 // "don't know" as the objectsize. Anything else should be left alone.
2803 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2804 IRBuilder<> Builder(CI);
2805 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2806 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2807 CI->eraseFromParent();
2808 return true;
2809 }
2810
2811 // SCCP may have propagated, among other things, C++ static variables across
2812 // calls. If this happens to be the case, we may want to undo it in order to
2813 // avoid redundant pointer computation of the constant, as the function method
2814 // returning the constant needs to be executed anyways.
2815 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2816 if (!F->getReturnType()->isPointerTy())
2817 return nullptr;
2818
2819 GlobalVariable *UniformValue = nullptr;
2820 for (auto &BB : *F) {
2821 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2822 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2823 if (!UniformValue)
2824 UniformValue = V;
2825 else if (V != UniformValue)
2826 return nullptr;
2827 } else {
2828 return nullptr;
2829 }
2830 }
2831 }
2832
2833 return UniformValue;
2834 };
2835
2836 if (Callee->hasExactDefinition()) {
2837 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2838 bool MadeChange = false;
2839 for (Use &U : make_early_inc_range(RV->uses())) {
2840 auto *I = dyn_cast<Instruction>(U.getUser());
2841 if (!I || I->getParent() != CI->getParent()) {
2842 // Limit to the same basic block to avoid extending the call-site live
2843 // range, which otherwise could increase register pressure.
2844 continue;
2845 }
2846 if (CI->comesBefore(I)) {
2847 U.set(CI);
2848 MadeChange = true;
2849 }
2850 }
2851
2852 return MadeChange;
2853 }
2854 }
2855
2856 return false;
2857}
2858
2860 const CallInst *CI) {
2861 assert(CI && CI->use_empty());
2862
2863 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2864 switch (II->getIntrinsicID()) {
2865 case Intrinsic::memset:
2866 case Intrinsic::memcpy:
2867 case Intrinsic::memmove:
2868 return true;
2869 default:
2870 return false;
2871 }
2872
2873 LibFunc LF;
2874 Function *Callee = CI->getCalledFunction();
2875 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2876 switch (LF) {
2877 case LibFunc_strcpy:
2878 case LibFunc_strncpy:
2879 case LibFunc_strcat:
2880 case LibFunc_strncat:
2881 return true;
2882 default:
2883 return false;
2884 }
2885
2886 return false;
2887}
2888
2889/// Look for opportunities to duplicate return instructions to the predecessor
2890/// to enable tail call optimizations. The case it is currently looking for is
2891/// the following one. Known intrinsics or library function that may be tail
2892/// called are taken into account as well.
2893/// @code
2894/// bb0:
2895/// %tmp0 = tail call i32 @f0()
2896/// br label %return
2897/// bb1:
2898/// %tmp1 = tail call i32 @f1()
2899/// br label %return
2900/// bb2:
2901/// %tmp2 = tail call i32 @f2()
2902/// br label %return
2903/// return:
2904/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2905/// ret i32 %retval
2906/// @endcode
2907///
2908/// =>
2909///
2910/// @code
2911/// bb0:
2912/// %tmp0 = tail call i32 @f0()
2913/// ret i32 %tmp0
2914/// bb1:
2915/// %tmp1 = tail call i32 @f1()
2916/// ret i32 %tmp1
2917/// bb2:
2918/// %tmp2 = tail call i32 @f2()
2919/// ret i32 %tmp2
2920/// @endcode
2921bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2922 ModifyDT &ModifiedDT) {
2923 if (!BB->getTerminator())
2924 return false;
2925
2926 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2927 if (!RetI)
2928 return false;
2929
2930 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2931
2932 PHINode *PN = nullptr;
2933 ExtractValueInst *EVI = nullptr;
2934 BitCastInst *BCI = nullptr;
2935 Value *V = RetI->getReturnValue();
2936 if (V) {
2937 BCI = dyn_cast<BitCastInst>(V);
2938 if (BCI)
2939 V = BCI->getOperand(0);
2940
2942 if (EVI) {
2943 V = EVI->getOperand(0);
2944 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2945 return false;
2946 }
2947
2948 PN = dyn_cast<PHINode>(V);
2949 }
2950
2951 if (PN && PN->getParent() != BB)
2952 return false;
2953
2954 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2955 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2956 if (BC && BC->hasOneUse())
2957 Inst = BC->user_back();
2958
2959 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2960 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2961 return false;
2962 };
2963
2965
2966 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2967 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2968 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2969 // Record the instruction so it can be preserved when the exit block is
2970 // removed. Do not preserve the fake use that uses the result of the
2971 // PHI instruction.
2972 // Do not copy fake uses that use the result of a PHI node.
2973 // FIXME: If we do want to copy the fake use into the return blocks, we
2974 // have to figure out which of the PHI node operands to use for each
2975 // copy.
2976 if (!isa<PHINode>(II->getOperand(0))) {
2977 FakeUses.push_back(II);
2978 }
2979 return true;
2980 }
2981
2982 return false;
2983 };
2984
2985 // Make sure there are no instructions between the first instruction
2986 // and return.
2988 // Skip over pseudo-probes and the bitcast.
2989 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
2990 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
2991 BI = std::next(BI);
2992 if (&*BI != RetI)
2993 return false;
2994
2995 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2996 /// call.
2997 const Function *F = BB->getParent();
2998 SmallVector<BasicBlock *, 4> TailCallBBs;
2999 // Record the call instructions so we can insert any fake uses
3000 // that need to be preserved before them.
3002 if (PN) {
3003 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3004 // Look through bitcasts.
3005 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3006 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3007 BasicBlock *PredBB = PN->getIncomingBlock(I);
3008 // Make sure the phi value is indeed produced by the tail call.
3009 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3010 TLI->mayBeEmittedAsTailCall(CI) &&
3011 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3012 TailCallBBs.push_back(PredBB);
3013 CallInsts.push_back(CI);
3014 } else {
3015 // Consider the cases in which the phi value is indirectly produced by
3016 // the tail call, for example when encountering memset(), memmove(),
3017 // strcpy(), whose return value may have been optimized out. In such
3018 // cases, the value needs to be the first function argument.
3019 //
3020 // bb0:
3021 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3022 // br label %return
3023 // return:
3024 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3025 if (PredBB && PredBB->getSingleSuccessor() == BB)
3027 PredBB->getTerminator()->getPrevNode());
3028
3029 if (CI && CI->use_empty() &&
3030 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3031 IncomingVal == CI->getArgOperand(0) &&
3032 TLI->mayBeEmittedAsTailCall(CI) &&
3033 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3034 TailCallBBs.push_back(PredBB);
3035 CallInsts.push_back(CI);
3036 }
3037 }
3038 }
3039 } else {
3040 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3041 for (BasicBlock *Pred : predecessors(BB)) {
3042 if (!VisitedBBs.insert(Pred).second)
3043 continue;
3044 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3045 CallInst *CI = dyn_cast<CallInst>(I);
3046 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
3047 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3048 // Either we return void or the return value must be the first
3049 // argument of a known intrinsic or library function.
3050 if (!V || isa<UndefValue>(V) ||
3051 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3052 V == CI->getArgOperand(0))) {
3053 TailCallBBs.push_back(Pred);
3054 CallInsts.push_back(CI);
3055 }
3056 }
3057 }
3058 }
3059 }
3060
3061 bool Changed = false;
3062 for (auto const &TailCallBB : TailCallBBs) {
3063 // Make sure the call instruction is followed by an unconditional branch to
3064 // the return block.
3065 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3066 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3067 continue;
3068
3069 // Duplicate the return into TailCallBB.
3070 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3072 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3073 BFI->setBlockFreq(BB,
3074 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3075 ModifiedDT = ModifyDT::ModifyBBDT;
3076 Changed = true;
3077 ++NumRetsDup;
3078 }
3079
3080 // If we eliminated all predecessors of the block, delete the block now.
3081 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3082 // Copy the fake uses found in the original return block to all blocks
3083 // that contain tail calls.
3084 for (auto *CI : CallInsts) {
3085 for (auto const *FakeUse : FakeUses) {
3086 auto *ClonedInst = FakeUse->clone();
3087 ClonedInst->insertBefore(CI->getIterator());
3088 }
3089 }
3090 BB->eraseFromParent();
3091 }
3092
3093 return Changed;
3094}
3095
3096//===----------------------------------------------------------------------===//
3097// Memory Optimization
3098//===----------------------------------------------------------------------===//
3099
3100namespace {
3101
3102/// This is an extended version of TargetLowering::AddrMode
3103/// which holds actual Value*'s for register values.
3104struct ExtAddrMode : public TargetLowering::AddrMode {
3105 Value *BaseReg = nullptr;
3106 Value *ScaledReg = nullptr;
3107 Value *OriginalValue = nullptr;
3108 bool InBounds = true;
3109
3110 enum FieldName {
3111 NoField = 0x00,
3112 BaseRegField = 0x01,
3113 BaseGVField = 0x02,
3114 BaseOffsField = 0x04,
3115 ScaledRegField = 0x08,
3116 ScaleField = 0x10,
3117 MultipleFields = 0xff
3118 };
3119
3120 ExtAddrMode() = default;
3121
3122 void print(raw_ostream &OS) const;
3123 void dump() const;
3124
3125 // Replace From in ExtAddrMode with To.
3126 // E.g., SExt insts may be promoted and deleted. We should replace them with
3127 // the promoted values.
3128 void replaceWith(Value *From, Value *To) {
3129 if (ScaledReg == From)
3130 ScaledReg = To;
3131 }
3132
3133 FieldName compare(const ExtAddrMode &other) {
3134 // First check that the types are the same on each field, as differing types
3135 // is something we can't cope with later on.
3136 if (BaseReg && other.BaseReg &&
3137 BaseReg->getType() != other.BaseReg->getType())
3138 return MultipleFields;
3139 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3140 return MultipleFields;
3141 if (ScaledReg && other.ScaledReg &&
3142 ScaledReg->getType() != other.ScaledReg->getType())
3143 return MultipleFields;
3144
3145 // Conservatively reject 'inbounds' mismatches.
3146 if (InBounds != other.InBounds)
3147 return MultipleFields;
3148
3149 // Check each field to see if it differs.
3150 unsigned Result = NoField;
3151 if (BaseReg != other.BaseReg)
3152 Result |= BaseRegField;
3153 if (BaseGV != other.BaseGV)
3154 Result |= BaseGVField;
3155 if (BaseOffs != other.BaseOffs)
3156 Result |= BaseOffsField;
3157 if (ScaledReg != other.ScaledReg)
3158 Result |= ScaledRegField;
3159 // Don't count 0 as being a different scale, because that actually means
3160 // unscaled (which will already be counted by having no ScaledReg).
3161 if (Scale && other.Scale && Scale != other.Scale)
3162 Result |= ScaleField;
3163
3164 if (llvm::popcount(Result) > 1)
3165 return MultipleFields;
3166 else
3167 return static_cast<FieldName>(Result);
3168 }
3169
3170 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3171 // with no offset.
3172 bool isTrivial() {
3173 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3174 // trivial if at most one of these terms is nonzero, except that BaseGV and
3175 // BaseReg both being zero actually means a null pointer value, which we
3176 // consider to be 'non-zero' here.
3177 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3178 }
3179
3180 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3181 switch (Field) {
3182 default:
3183 return nullptr;
3184 case BaseRegField:
3185 return BaseReg;
3186 case BaseGVField:
3187 return BaseGV;
3188 case ScaledRegField:
3189 return ScaledReg;
3190 case BaseOffsField:
3191 return ConstantInt::get(IntPtrTy, BaseOffs);
3192 }
3193 }
3194
3195 void SetCombinedField(FieldName Field, Value *V,
3196 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3197 switch (Field) {
3198 default:
3199 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3200 break;
3201 case ExtAddrMode::BaseRegField:
3202 BaseReg = V;
3203 break;
3204 case ExtAddrMode::BaseGVField:
3205 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3206 // in the BaseReg field.
3207 assert(BaseReg == nullptr);
3208 BaseReg = V;
3209 BaseGV = nullptr;
3210 break;
3211 case ExtAddrMode::ScaledRegField:
3212 ScaledReg = V;
3213 // If we have a mix of scaled and unscaled addrmodes then we want scale
3214 // to be the scale and not zero.
3215 if (!Scale)
3216 for (const ExtAddrMode &AM : AddrModes)
3217 if (AM.Scale) {
3218 Scale = AM.Scale;
3219 break;
3220 }
3221 break;
3222 case ExtAddrMode::BaseOffsField:
3223 // The offset is no longer a constant, so it goes in ScaledReg with a
3224 // scale of 1.
3225 assert(ScaledReg == nullptr);
3226 ScaledReg = V;
3227 Scale = 1;
3228 BaseOffs = 0;
3229 break;
3230 }
3231 }
3232};
3233
3234#ifndef NDEBUG
3235static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3236 AM.print(OS);
3237 return OS;
3238}
3239#endif
3240
3241#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3242void ExtAddrMode::print(raw_ostream &OS) const {
3243 bool NeedPlus = false;
3244 OS << "[";
3245 if (InBounds)
3246 OS << "inbounds ";
3247 if (BaseGV) {
3248 OS << "GV:";
3249 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3250 NeedPlus = true;
3251 }
3252
3253 if (BaseOffs) {
3254 OS << (NeedPlus ? " + " : "") << BaseOffs;
3255 NeedPlus = true;
3256 }
3257
3258 if (BaseReg) {
3259 OS << (NeedPlus ? " + " : "") << "Base:";
3260 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3261 NeedPlus = true;
3262 }
3263 if (Scale) {
3264 OS << (NeedPlus ? " + " : "") << Scale << "*";
3265 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3266 }
3267
3268 OS << ']';
3269}
3270
3271LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3272 print(dbgs());
3273 dbgs() << '\n';
3274}
3275#endif
3276
3277} // end anonymous namespace
3278
3279namespace {
3280
3281/// This class provides transaction based operation on the IR.
3282/// Every change made through this class is recorded in the internal state and
3283/// can be undone (rollback) until commit is called.
3284/// CGP does not check if instructions could be speculatively executed when
3285/// moved. Preserving the original location would pessimize the debugging
3286/// experience, as well as negatively impact the quality of sample PGO.
3287class TypePromotionTransaction {
3288 /// This represents the common interface of the individual transaction.
3289 /// Each class implements the logic for doing one specific modification on
3290 /// the IR via the TypePromotionTransaction.
3291 class TypePromotionAction {
3292 protected:
3293 /// The Instruction modified.
3294 Instruction *Inst;
3295
3296 public:
3297 /// Constructor of the action.
3298 /// The constructor performs the related action on the IR.
3299 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3300
3301 virtual ~TypePromotionAction() = default;
3302
3303 /// Undo the modification done by this action.
3304 /// When this method is called, the IR must be in the same state as it was
3305 /// before this action was applied.
3306 /// \pre Undoing the action works if and only if the IR is in the exact same
3307 /// state as it was directly after this action was applied.
3308 virtual void undo() = 0;
3309
3310 /// Advocate every change made by this action.
3311 /// When the results on the IR of the action are to be kept, it is important
3312 /// to call this function, otherwise hidden information may be kept forever.
3313 virtual void commit() {
3314 // Nothing to be done, this action is not doing anything.
3315 }
3316 };
3317
3318 /// Utility to remember the position of an instruction.
3319 class InsertionHandler {
3320 /// Position of an instruction.
3321 /// Either an instruction:
3322 /// - Is the first in a basic block: BB is used.
3323 /// - Has a previous instruction: PrevInst is used.
3324 struct {
3325 BasicBlock::iterator PrevInst;
3326 BasicBlock *BB;
3327 } Point;
3328 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3329
3330 /// Remember whether or not the instruction had a previous instruction.
3331 bool HasPrevInstruction;
3332
3333 public:
3334 /// Record the position of \p Inst.
3335 InsertionHandler(Instruction *Inst) {
3336 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3337 BasicBlock *BB = Inst->getParent();
3338
3339 // Record where we would have to re-insert the instruction in the sequence
3340 // of DbgRecords, if we ended up reinserting.
3341 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3342
3343 if (HasPrevInstruction) {
3344 Point.PrevInst = std::prev(Inst->getIterator());
3345 } else {
3346 Point.BB = BB;
3347 }
3348 }
3349
3350 /// Insert \p Inst at the recorded position.
3351 void insert(Instruction *Inst) {
3352 if (HasPrevInstruction) {
3353 if (Inst->getParent())
3354 Inst->removeFromParent();
3355 Inst->insertAfter(Point.PrevInst);
3356 } else {
3357 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3358 if (Inst->getParent())
3359 Inst->moveBefore(*Point.BB, Position);
3360 else
3361 Inst->insertBefore(*Point.BB, Position);
3362 }
3363
3364 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3365 }
3366 };
3367
3368 /// Move an instruction before another.
3369 class InstructionMoveBefore : public TypePromotionAction {
3370 /// Original position of the instruction.
3371 InsertionHandler Position;
3372
3373 public:
3374 /// Move \p Inst before \p Before.
3375 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3376 : TypePromotionAction(Inst), Position(Inst) {
3377 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3378 << "\n");
3379 Inst->moveBefore(Before);
3380 }
3381
3382 /// Move the instruction back to its original position.
3383 void undo() override {
3384 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3385 Position.insert(Inst);
3386 }
3387 };
3388
3389 /// Set the operand of an instruction with a new value.
3390 class OperandSetter : public TypePromotionAction {
3391 /// Original operand of the instruction.
3392 Value *Origin;
3393
3394 /// Index of the modified instruction.
3395 unsigned Idx;
3396
3397 public:
3398 /// Set \p Idx operand of \p Inst with \p NewVal.
3399 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3400 : TypePromotionAction(Inst), Idx(Idx) {
3401 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3402 << "for:" << *Inst << "\n"
3403 << "with:" << *NewVal << "\n");
3404 Origin = Inst->getOperand(Idx);
3405 Inst->setOperand(Idx, NewVal);
3406 }
3407
3408 /// Restore the original value of the instruction.
3409 void undo() override {
3410 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3411 << "for: " << *Inst << "\n"
3412 << "with: " << *Origin << "\n");
3413 Inst->setOperand(Idx, Origin);
3414 }
3415 };
3416
3417 /// Hide the operands of an instruction.
3418 /// Do as if this instruction was not using any of its operands.
3419 class OperandsHider : public TypePromotionAction {
3420 /// The list of original operands.
3421 SmallVector<Value *, 4> OriginalValues;
3422
3423 public:
3424 /// Remove \p Inst from the uses of the operands of \p Inst.
3425 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3426 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3427 unsigned NumOpnds = Inst->getNumOperands();
3428 OriginalValues.reserve(NumOpnds);
3429 for (unsigned It = 0; It < NumOpnds; ++It) {
3430 // Save the current operand.
3431 Value *Val = Inst->getOperand(It);
3432 OriginalValues.push_back(Val);
3433 // Set a dummy one.
3434 // We could use OperandSetter here, but that would imply an overhead
3435 // that we are not willing to pay.
3436 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3437 }
3438 }
3439
3440 /// Restore the original list of uses.
3441 void undo() override {
3442 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3443 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3444 Inst->setOperand(It, OriginalValues[It]);
3445 }
3446 };
3447
3448 /// Build a truncate instruction.
3449 class TruncBuilder : public TypePromotionAction {
3450 Value *Val;
3451
3452 public:
3453 /// Build a truncate instruction of \p Opnd producing a \p Ty
3454 /// result.
3455 /// trunc Opnd to Ty.
3456 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3457 IRBuilder<> Builder(Opnd);
3458 Builder.SetCurrentDebugLocation(DebugLoc());
3459 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3460 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3461 }
3462
3463 /// Get the built value.
3464 Value *getBuiltValue() { return Val; }
3465
3466 /// Remove the built instruction.
3467 void undo() override {
3468 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3469 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3470 IVal->eraseFromParent();
3471 }
3472 };
3473
3474 /// Build a sign extension instruction.
3475 class SExtBuilder : public TypePromotionAction {
3476 Value *Val;
3477
3478 public:
3479 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3480 /// result.
3481 /// sext Opnd to Ty.
3482 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3483 : TypePromotionAction(InsertPt) {
3484 IRBuilder<> Builder(InsertPt);
3485 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3486 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3487 }
3488
3489 /// Get the built value.
3490 Value *getBuiltValue() { return Val; }
3491
3492 /// Remove the built instruction.
3493 void undo() override {
3494 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3495 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3496 IVal->eraseFromParent();
3497 }
3498 };
3499
3500 /// Build a zero extension instruction.
3501 class ZExtBuilder : public TypePromotionAction {
3502 Value *Val;
3503
3504 public:
3505 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3506 /// result.
3507 /// zext Opnd to Ty.
3508 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3509 : TypePromotionAction(InsertPt) {
3510 IRBuilder<> Builder(InsertPt);
3511 Builder.SetCurrentDebugLocation(DebugLoc());
3512 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3513 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3514 }
3515
3516 /// Get the built value.
3517 Value *getBuiltValue() { return Val; }
3518
3519 /// Remove the built instruction.
3520 void undo() override {
3521 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3522 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3523 IVal->eraseFromParent();
3524 }
3525 };
3526
3527 /// Mutate an instruction to another type.
3528 class TypeMutator : public TypePromotionAction {
3529 /// Record the original type.
3530 Type *OrigTy;
3531
3532 public:
3533 /// Mutate the type of \p Inst into \p NewTy.
3534 TypeMutator(Instruction *Inst, Type *NewTy)
3535 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3536 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3537 << "\n");
3538 Inst->mutateType(NewTy);
3539 }
3540
3541 /// Mutate the instruction back to its original type.
3542 void undo() override {
3543 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3544 << "\n");
3545 Inst->mutateType(OrigTy);
3546 }
3547 };
3548
3549 /// Replace the uses of an instruction by another instruction.
3550 class UsesReplacer : public TypePromotionAction {
3551 /// Helper structure to keep track of the replaced uses.
3552 struct InstructionAndIdx {
3553 /// The instruction using the instruction.
3554 Instruction *Inst;
3555
3556 /// The index where this instruction is used for Inst.
3557 unsigned Idx;
3558
3559 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3560 : Inst(Inst), Idx(Idx) {}
3561 };
3562
3563 /// Keep track of the original uses (pair Instruction, Index).
3565 /// Keep track of the debug users.
3566 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3567
3568 /// Keep track of the new value so that we can undo it by replacing
3569 /// instances of the new value with the original value.
3570 Value *New;
3571
3573
3574 public:
3575 /// Replace all the use of \p Inst by \p New.
3576 UsesReplacer(Instruction *Inst, Value *New)
3577 : TypePromotionAction(Inst), New(New) {
3578 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3579 << "\n");
3580 // Record the original uses.
3581 for (Use &U : Inst->uses()) {
3582 Instruction *UserI = cast<Instruction>(U.getUser());
3583 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3584 }
3585 // Record the debug uses separately. They are not in the instruction's
3586 // use list, but they are replaced by RAUW.
3587 findDbgValues(Inst, DbgVariableRecords);
3588
3589 // Now, we can replace the uses.
3590 Inst->replaceAllUsesWith(New);
3591 }
3592
3593 /// Reassign the original uses of Inst to Inst.
3594 void undo() override {
3595 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3596 for (InstructionAndIdx &Use : OriginalUses)
3597 Use.Inst->setOperand(Use.Idx, Inst);
3598 // RAUW has replaced all original uses with references to the new value,
3599 // including the debug uses. Since we are undoing the replacements,
3600 // the original debug uses must also be reinstated to maintain the
3601 // correctness and utility of debug value records.
3602 for (DbgVariableRecord *DVR : DbgVariableRecords)
3603 DVR->replaceVariableLocationOp(New, Inst);
3604 }
3605 };
3606
3607 /// Remove an instruction from the IR.
3608 class InstructionRemover : public TypePromotionAction {
3609 /// Original position of the instruction.
3610 InsertionHandler Inserter;
3611
3612 /// Helper structure to hide all the link to the instruction. In other
3613 /// words, this helps to do as if the instruction was removed.
3614 OperandsHider Hider;
3615
3616 /// Keep track of the uses replaced, if any.
3617 UsesReplacer *Replacer = nullptr;
3618
3619 /// Keep track of instructions removed.
3620 SetOfInstrs &RemovedInsts;
3621
3622 public:
3623 /// Remove all reference of \p Inst and optionally replace all its
3624 /// uses with New.
3625 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3626 /// \pre If !Inst->use_empty(), then New != nullptr
3627 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3628 Value *New = nullptr)
3629 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3630 RemovedInsts(RemovedInsts) {
3631 if (New)
3632 Replacer = new UsesReplacer(Inst, New);
3633 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3634 RemovedInsts.insert(Inst);
3635 /// The instructions removed here will be freed after completing
3636 /// optimizeBlock() for all blocks as we need to keep track of the
3637 /// removed instructions during promotion.
3638 Inst->removeFromParent();
3639 }
3640
3641 ~InstructionRemover() override { delete Replacer; }
3642
3643 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3644 InstructionRemover(const InstructionRemover &other) = delete;
3645
3646 /// Resurrect the instruction and reassign it to the proper uses if
3647 /// new value was provided when build this action.
3648 void undo() override {
3649 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3650 Inserter.insert(Inst);
3651 if (Replacer)
3652 Replacer->undo();
3653 Hider.undo();
3654 RemovedInsts.erase(Inst);
3655 }
3656 };
3657
3658public:
3659 /// Restoration point.
3660 /// The restoration point is a pointer to an action instead of an iterator
3661 /// because the iterator may be invalidated but not the pointer.
3662 using ConstRestorationPt = const TypePromotionAction *;
3663
3664 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3665 : RemovedInsts(RemovedInsts) {}
3666
3667 /// Advocate every changes made in that transaction. Return true if any change
3668 /// happen.
3669 bool commit();
3670
3671 /// Undo all the changes made after the given point.
3672 void rollback(ConstRestorationPt Point);
3673
3674 /// Get the current restoration point.
3675 ConstRestorationPt getRestorationPoint() const;
3676
3677 /// \name API for IR modification with state keeping to support rollback.
3678 /// @{
3679 /// Same as Instruction::setOperand.
3680 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3681
3682 /// Same as Instruction::eraseFromParent.
3683 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3684
3685 /// Same as Value::replaceAllUsesWith.
3686 void replaceAllUsesWith(Instruction *Inst, Value *New);
3687
3688 /// Same as Value::mutateType.
3689 void mutateType(Instruction *Inst, Type *NewTy);
3690
3691 /// Same as IRBuilder::createTrunc.
3692 Value *createTrunc(Instruction *Opnd, Type *Ty);
3693
3694 /// Same as IRBuilder::createSExt.
3695 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3696
3697 /// Same as IRBuilder::createZExt.
3698 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3699
3700private:
3701 /// The ordered list of actions made so far.
3703
3704 using CommitPt =
3705 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3706
3707 SetOfInstrs &RemovedInsts;
3708};
3709
3710} // end anonymous namespace
3711
3712void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3713 Value *NewVal) {
3714 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3715 Inst, Idx, NewVal));
3716}
3717
3718void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3719 Value *NewVal) {
3720 Actions.push_back(
3721 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3722 Inst, RemovedInsts, NewVal));
3723}
3724
3725void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3726 Value *New) {
3727 Actions.push_back(
3728 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3729}
3730
3731void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3732 Actions.push_back(
3733 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3734}
3735
3736Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3737 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3738 Value *Val = Ptr->getBuiltValue();
3739 Actions.push_back(std::move(Ptr));
3740 return Val;
3741}
3742
3743Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3744 Type *Ty) {
3745 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3746 Value *Val = Ptr->getBuiltValue();
3747 Actions.push_back(std::move(Ptr));
3748 return Val;
3749}
3750
3751Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3752 Type *Ty) {
3753 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3754 Value *Val = Ptr->getBuiltValue();
3755 Actions.push_back(std::move(Ptr));
3756 return Val;
3757}
3758
3759TypePromotionTransaction::ConstRestorationPt
3760TypePromotionTransaction::getRestorationPoint() const {
3761 return !Actions.empty() ? Actions.back().get() : nullptr;
3762}
3763
3764bool TypePromotionTransaction::commit() {
3765 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3766 Action->commit();
3767 bool Modified = !Actions.empty();
3768 Actions.clear();
3769 return Modified;
3770}
3771
3772void TypePromotionTransaction::rollback(
3773 TypePromotionTransaction::ConstRestorationPt Point) {
3774 while (!Actions.empty() && Point != Actions.back().get()) {
3775 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3776 Curr->undo();
3777 }
3778}
3779
3780namespace {
3781
3782/// A helper class for matching addressing modes.
3783///
3784/// This encapsulates the logic for matching the target-legal addressing modes.
3785class AddressingModeMatcher {
3786 SmallVectorImpl<Instruction *> &AddrModeInsts;
3787 const TargetLowering &TLI;
3788 const TargetRegisterInfo &TRI;
3789 const DataLayout &DL;
3790 const LoopInfo &LI;
3791 const std::function<const DominatorTree &()> getDTFn;
3792
3793 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3794 /// the memory instruction that we're computing this address for.
3795 Type *AccessTy;
3796 unsigned AddrSpace;
3797 Instruction *MemoryInst;
3798
3799 /// This is the addressing mode that we're building up. This is
3800 /// part of the return value of this addressing mode matching stuff.
3801 ExtAddrMode &AddrMode;
3802
3803 /// The instructions inserted by other CodeGenPrepare optimizations.
3804 const SetOfInstrs &InsertedInsts;
3805
3806 /// A map from the instructions to their type before promotion.
3807 InstrToOrigTy &PromotedInsts;
3808
3809 /// The ongoing transaction where every action should be registered.
3810 TypePromotionTransaction &TPT;
3811
3812 // A GEP which has too large offset to be folded into the addressing mode.
3813 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3814
3815 /// This is set to true when we should not do profitability checks.
3816 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3817 bool IgnoreProfitability;
3818
3819 /// True if we are optimizing for size.
3820 bool OptSize = false;
3821
3822 ProfileSummaryInfo *PSI;
3823 BlockFrequencyInfo *BFI;
3824
3825 AddressingModeMatcher(
3826 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3827 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3828 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3829 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3830 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3831 TypePromotionTransaction &TPT,
3832 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3833 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3834 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3835 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3836 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3837 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3838 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3839 IgnoreProfitability = false;
3840 }
3841
3842public:
3843 /// Find the maximal addressing mode that a load/store of V can fold,
3844 /// give an access type of AccessTy. This returns a list of involved
3845 /// instructions in AddrModeInsts.
3846 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3847 /// optimizations.
3848 /// \p PromotedInsts maps the instructions to their type before promotion.
3849 /// \p The ongoing transaction where every action should be registered.
3850 static ExtAddrMode
3851 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3852 SmallVectorImpl<Instruction *> &AddrModeInsts,
3853 const TargetLowering &TLI, const LoopInfo &LI,
3854 const std::function<const DominatorTree &()> getDTFn,
3855 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3856 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3857 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3858 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3859 ExtAddrMode Result;
3860
3861 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3862 AccessTy, AS, MemoryInst, Result,
3863 InsertedInsts, PromotedInsts, TPT,
3864 LargeOffsetGEP, OptSize, PSI, BFI)
3865 .matchAddr(V, 0);
3866 (void)Success;
3867 assert(Success && "Couldn't select *anything*?");
3868 return Result;
3869 }
3870
3871private:
3872 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3873 bool matchAddr(Value *Addr, unsigned Depth);
3874 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3875 bool *MovedAway = nullptr);
3876 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3877 ExtAddrMode &AMBefore,
3878 ExtAddrMode &AMAfter);
3879 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3880 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3881 Value *PromotedOperand) const;
3882};
3883
3884class PhiNodeSet;
3885
3886/// An iterator for PhiNodeSet.
3887class PhiNodeSetIterator {
3888 PhiNodeSet *const Set;
3889 size_t CurrentIndex = 0;
3890
3891public:
3892 /// The constructor. Start should point to either a valid element, or be equal
3893 /// to the size of the underlying SmallVector of the PhiNodeSet.
3894 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3895 PHINode *operator*() const;
3896 PhiNodeSetIterator &operator++();
3897 bool operator==(const PhiNodeSetIterator &RHS) const;
3898 bool operator!=(const PhiNodeSetIterator &RHS) const;
3899};
3900
3901/// Keeps a set of PHINodes.
3902///
3903/// This is a minimal set implementation for a specific use case:
3904/// It is very fast when there are very few elements, but also provides good
3905/// performance when there are many. It is similar to SmallPtrSet, but also
3906/// provides iteration by insertion order, which is deterministic and stable
3907/// across runs. It is also similar to SmallSetVector, but provides removing
3908/// elements in O(1) time. This is achieved by not actually removing the element
3909/// from the underlying vector, so comes at the cost of using more memory, but
3910/// that is fine, since PhiNodeSets are used as short lived objects.
3911class PhiNodeSet {
3912 friend class PhiNodeSetIterator;
3913
3914 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3915 using iterator = PhiNodeSetIterator;
3916
3917 /// Keeps the elements in the order of their insertion in the underlying
3918 /// vector. To achieve constant time removal, it never deletes any element.
3920
3921 /// Keeps the elements in the underlying set implementation. This (and not the
3922 /// NodeList defined above) is the source of truth on whether an element
3923 /// is actually in the collection.
3924 MapType NodeMap;
3925
3926 /// Points to the first valid (not deleted) element when the set is not empty
3927 /// and the value is not zero. Equals to the size of the underlying vector
3928 /// when the set is empty. When the value is 0, as in the beginning, the
3929 /// first element may or may not be valid.
3930 size_t FirstValidElement = 0;
3931
3932public:
3933 /// Inserts a new element to the collection.
3934 /// \returns true if the element is actually added, i.e. was not in the
3935 /// collection before the operation.
3936 bool insert(PHINode *Ptr) {
3937 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3939 return true;
3940 }
3941 return false;
3942 }
3943
3944 /// Removes the element from the collection.
3945 /// \returns whether the element is actually removed, i.e. was in the
3946 /// collection before the operation.
3947 bool erase(PHINode *Ptr) {
3948 if (NodeMap.erase(Ptr)) {
3949 SkipRemovedElements(FirstValidElement);
3950 return true;
3951 }
3952 return false;
3953 }
3954
3955 /// Removes all elements and clears the collection.
3956 void clear() {
3957 NodeMap.clear();
3958 NodeList.clear();
3959 FirstValidElement = 0;
3960 }
3961
3962 /// \returns an iterator that will iterate the elements in the order of
3963 /// insertion.
3964 iterator begin() {
3965 if (FirstValidElement == 0)
3966 SkipRemovedElements(FirstValidElement);
3967 return PhiNodeSetIterator(this, FirstValidElement);
3968 }
3969
3970 /// \returns an iterator that points to the end of the collection.
3971 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3972
3973 /// Returns the number of elements in the collection.
3974 size_t size() const { return NodeMap.size(); }
3975
3976 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3977 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3978
3979private:
3980 /// Updates the CurrentIndex so that it will point to a valid element.
3981 ///
3982 /// If the element of NodeList at CurrentIndex is valid, it does not
3983 /// change it. If there are no more valid elements, it updates CurrentIndex
3984 /// to point to the end of the NodeList.
3985 void SkipRemovedElements(size_t &CurrentIndex) {
3986 while (CurrentIndex < NodeList.size()) {
3987 auto it = NodeMap.find(NodeList[CurrentIndex]);
3988 // If the element has been deleted and added again later, NodeMap will
3989 // point to a different index, so CurrentIndex will still be invalid.
3990 if (it != NodeMap.end() && it->second == CurrentIndex)
3991 break;
3992 ++CurrentIndex;
3993 }
3994 }
3995};
3996
3997PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3998 : Set(Set), CurrentIndex(Start) {}
3999
4000PHINode *PhiNodeSetIterator::operator*() const {
4001 assert(CurrentIndex < Set->NodeList.size() &&
4002 "PhiNodeSet access out of range");
4003 return Set->NodeList[CurrentIndex];
4004}
4005
4006PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4007 assert(CurrentIndex < Set->NodeList.size() &&
4008 "PhiNodeSet access out of range");
4009 ++CurrentIndex;
4010 Set->SkipRemovedElements(CurrentIndex);
4011 return *this;
4012}
4013
4014bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4015 return CurrentIndex == RHS.CurrentIndex;
4016}
4017
4018bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4019 return !((*this) == RHS);
4020}
4021
4022/// Keep track of simplification of Phi nodes.
4023/// Accept the set of all phi nodes and erase phi node from this set
4024/// if it is simplified.
4025class SimplificationTracker {
4026 DenseMap<Value *, Value *> Storage;
4027 const SimplifyQuery &SQ;
4028 // Tracks newly created Phi nodes. The elements are iterated by insertion
4029 // order.
4030 PhiNodeSet AllPhiNodes;
4031 // Tracks newly created Select nodes.
4032 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4033
4034public:
4035 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
4036
4037 Value *Get(Value *V) {
4038 do {
4039 auto SV = Storage.find(V);
4040 if (SV == Storage.end())
4041 return V;
4042 V = SV->second;
4043 } while (true);
4044 }
4045
4046 Value *Simplify(Value *Val) {
4047 SmallVector<Value *, 32> WorkList;
4048 SmallPtrSet<Value *, 32> Visited;
4049 WorkList.push_back(Val);
4050 while (!WorkList.empty()) {
4051 auto *P = WorkList.pop_back_val();
4052 if (!Visited.insert(P).second)
4053 continue;
4054 if (auto *PI = dyn_cast<Instruction>(P))
4055 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
4056 for (auto *U : PI->users())
4057 WorkList.push_back(cast<Value>(U));
4058 Put(PI, V);
4059 PI->replaceAllUsesWith(V);
4060 if (auto *PHI = dyn_cast<PHINode>(PI))
4061 AllPhiNodes.erase(PHI);
4062 if (auto *Select = dyn_cast<SelectInst>(PI))
4063 AllSelectNodes.erase(Select);
4064 PI->eraseFromParent();
4065 }
4066 }
4067 return Get(Val);
4068 }
4069
4070 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4071
4072 void ReplacePhi(PHINode *From, PHINode *To) {
4073 Value *OldReplacement = Get(From);
4074 while (OldReplacement != From) {
4075 From = To;
4076 To = dyn_cast<PHINode>(OldReplacement);
4077 OldReplacement = Get(From);
4078 }
4079 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4080 Put(From, To);
4081 From->replaceAllUsesWith(To);
4082 AllPhiNodes.erase(From);
4083 From->eraseFromParent();
4084 }
4085
4086 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4087
4088 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4089
4090 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4091
4092 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4093
4094 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4095
4096 void destroyNewNodes(Type *CommonType) {
4097 // For safe erasing, replace the uses with dummy value first.
4098 auto *Dummy = PoisonValue::get(CommonType);
4099 for (auto *I : AllPhiNodes) {
4100 I->replaceAllUsesWith(Dummy);
4101 I->eraseFromParent();
4102 }
4103 AllPhiNodes.clear();
4104 for (auto *I : AllSelectNodes) {
4105 I->replaceAllUsesWith(Dummy);
4106 I->eraseFromParent();
4107 }
4108 AllSelectNodes.clear();
4109 }
4110};
4111
4112/// A helper class for combining addressing modes.
4113class AddressingModeCombiner {
4114 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4115 typedef std::pair<PHINode *, PHINode *> PHIPair;
4116
4117private:
4118 /// The addressing modes we've collected.
4120
4121 /// The field in which the AddrModes differ, when we have more than one.
4122 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4123
4124 /// Are the AddrModes that we have all just equal to their original values?
4125 bool AllAddrModesTrivial = true;
4126
4127 /// Common Type for all different fields in addressing modes.
4128 Type *CommonType = nullptr;
4129
4130 /// SimplifyQuery for simplifyInstruction utility.
4131 const SimplifyQuery &SQ;
4132
4133 /// Original Address.
4134 Value *Original;
4135
4136 /// Common value among addresses
4137 Value *CommonValue = nullptr;
4138
4139public:
4140 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
4141 : SQ(_SQ), Original(OriginalValue) {}
4142
4143 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4144
4145 /// Get the combined AddrMode
4146 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4147
4148 /// Add a new AddrMode if it's compatible with the AddrModes we already
4149 /// have.
4150 /// \return True iff we succeeded in doing so.
4151 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4152 // Take note of if we have any non-trivial AddrModes, as we need to detect
4153 // when all AddrModes are trivial as then we would introduce a phi or select
4154 // which just duplicates what's already there.
4155 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4156
4157 // If this is the first addrmode then everything is fine.
4158 if (AddrModes.empty()) {
4159 AddrModes.emplace_back(NewAddrMode);
4160 return true;
4161 }
4162
4163 // Figure out how different this is from the other address modes, which we
4164 // can do just by comparing against the first one given that we only care
4165 // about the cumulative difference.
4166 ExtAddrMode::FieldName ThisDifferentField =
4167 AddrModes[0].compare(NewAddrMode);
4168 if (DifferentField == ExtAddrMode::NoField)
4169 DifferentField = ThisDifferentField;
4170 else if (DifferentField != ThisDifferentField)
4171 DifferentField = ExtAddrMode::MultipleFields;
4172
4173 // If NewAddrMode differs in more than one dimension we cannot handle it.
4174 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4175
4176 // If Scale Field is different then we reject.
4177 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4178
4179 // We also must reject the case when base offset is different and
4180 // scale reg is not null, we cannot handle this case due to merge of
4181 // different offsets will be used as ScaleReg.
4182 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4183 !NewAddrMode.ScaledReg);
4184
4185 // We also must reject the case when GV is different and BaseReg installed
4186 // due to we want to use base reg as a merge of GV values.
4187 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4188 !NewAddrMode.HasBaseReg);
4189
4190 // Even if NewAddMode is the same we still need to collect it due to
4191 // original value is different. And later we will need all original values
4192 // as anchors during finding the common Phi node.
4193 if (CanHandle)
4194 AddrModes.emplace_back(NewAddrMode);
4195 else
4196 AddrModes.clear();
4197
4198 return CanHandle;
4199 }
4200
4201 /// Combine the addressing modes we've collected into a single
4202 /// addressing mode.
4203 /// \return True iff we successfully combined them or we only had one so
4204 /// didn't need to combine them anyway.
4205 bool combineAddrModes() {
4206 // If we have no AddrModes then they can't be combined.
4207 if (AddrModes.size() == 0)
4208 return false;
4209
4210 // A single AddrMode can trivially be combined.
4211 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4212 return true;
4213
4214 // If the AddrModes we collected are all just equal to the value they are
4215 // derived from then combining them wouldn't do anything useful.
4216 if (AllAddrModesTrivial)
4217 return false;
4218
4219 if (!addrModeCombiningAllowed())
4220 return false;
4221
4222 // Build a map between <original value, basic block where we saw it> to
4223 // value of base register.
4224 // Bail out if there is no common type.
4225 FoldAddrToValueMapping Map;
4226 if (!initializeMap(Map))
4227 return false;
4228
4229 CommonValue = findCommon(Map);
4230 if (CommonValue)
4231 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4232 return CommonValue != nullptr;
4233 }
4234
4235private:
4236 /// `CommonValue` may be a placeholder inserted by us.
4237 /// If the placeholder is not used, we should remove this dead instruction.
4238 void eraseCommonValueIfDead() {
4239 if (CommonValue && CommonValue->use_empty())
4240 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4241 CommonInst->eraseFromParent();
4242 }
4243
4244 /// Initialize Map with anchor values. For address seen
4245 /// we set the value of different field saw in this address.
4246 /// At the same time we find a common type for different field we will
4247 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4248 /// Return false if there is no common type found.
4249 bool initializeMap(FoldAddrToValueMapping &Map) {
4250 // Keep track of keys where the value is null. We will need to replace it
4251 // with constant null when we know the common type.
4252 SmallVector<Value *, 2> NullValue;
4253 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4254 for (auto &AM : AddrModes) {
4255 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4256 if (DV) {
4257 auto *Type = DV->getType();
4258 if (CommonType && CommonType != Type)
4259 return false;
4260 CommonType = Type;
4261 Map[AM.OriginalValue] = DV;
4262 } else {
4263 NullValue.push_back(AM.OriginalValue);
4264 }
4265 }
4266 assert(CommonType && "At least one non-null value must be!");
4267 for (auto *V : NullValue)
4268 Map[V] = Constant::getNullValue(CommonType);
4269 return true;
4270 }
4271
4272 /// We have mapping between value A and other value B where B was a field in
4273 /// addressing mode represented by A. Also we have an original value C
4274 /// representing an address we start with. Traversing from C through phi and
4275 /// selects we ended up with A's in a map. This utility function tries to find
4276 /// a value V which is a field in addressing mode C and traversing through phi
4277 /// nodes and selects we will end up in corresponded values B in a map.
4278 /// The utility will create a new Phi/Selects if needed.
4279 // The simple example looks as follows:
4280 // BB1:
4281 // p1 = b1 + 40
4282 // br cond BB2, BB3
4283 // BB2:
4284 // p2 = b2 + 40
4285 // br BB3
4286 // BB3:
4287 // p = phi [p1, BB1], [p2, BB2]
4288 // v = load p
4289 // Map is
4290 // p1 -> b1
4291 // p2 -> b2
4292 // Request is
4293 // p -> ?
4294 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4295 Value *findCommon(FoldAddrToValueMapping &Map) {
4296 // Tracks the simplification of newly created phi nodes. The reason we use
4297 // this mapping is because we will add new created Phi nodes in AddrToBase.
4298 // Simplification of Phi nodes is recursive, so some Phi node may
4299 // be simplified after we added it to AddrToBase. In reality this
4300 // simplification is possible only if original phi/selects were not
4301 // simplified yet.
4302 // Using this mapping we can find the current value in AddrToBase.
4303 SimplificationTracker ST(SQ);
4304
4305 // First step, DFS to create PHI nodes for all intermediate blocks.
4306 // Also fill traverse order for the second step.
4307 SmallVector<Value *, 32> TraverseOrder;
4308 InsertPlaceholders(Map, TraverseOrder, ST);
4309
4310 // Second Step, fill new nodes by merged values and simplify if possible.
4311 FillPlaceholders(Map, TraverseOrder, ST);
4312
4313 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4314 ST.destroyNewNodes(CommonType);
4315 return nullptr;
4316 }
4317
4318 // Now we'd like to match New Phi nodes to existed ones.
4319 unsigned PhiNotMatchedCount = 0;
4320 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4321 ST.destroyNewNodes(CommonType);
4322 return nullptr;
4323 }
4324
4325 auto *Result = ST.Get(Map.find(Original)->second);
4326 if (Result) {
4327 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4328 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4329 }
4330 return Result;
4331 }
4332
4333 /// Try to match PHI node to Candidate.
4334 /// Matcher tracks the matched Phi nodes.
4335 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4336 SmallSetVector<PHIPair, 8> &Matcher,
4337 PhiNodeSet &PhiNodesToMatch) {
4338 SmallVector<PHIPair, 8> WorkList;
4339 Matcher.insert({PHI, Candidate});
4340 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4341 MatchedPHIs.insert(PHI);
4342 WorkList.push_back({PHI, Candidate});
4343 SmallSet<PHIPair, 8> Visited;
4344 while (!WorkList.empty()) {
4345 auto Item = WorkList.pop_back_val();
4346 if (!Visited.insert(Item).second)
4347 continue;
4348 // We iterate over all incoming values to Phi to compare them.
4349 // If values are different and both of them Phi and the first one is a
4350 // Phi we added (subject to match) and both of them is in the same basic
4351 // block then we can match our pair if values match. So we state that
4352 // these values match and add it to work list to verify that.
4353 for (auto *B : Item.first->blocks()) {
4354 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4355 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4356 if (FirstValue == SecondValue)
4357 continue;
4358
4359 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4360 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4361
4362 // One of them is not Phi or
4363 // The first one is not Phi node from the set we'd like to match or
4364 // Phi nodes from different basic blocks then
4365 // we will not be able to match.
4366 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4367 FirstPhi->getParent() != SecondPhi->getParent())
4368 return false;
4369
4370 // If we already matched them then continue.
4371 if (Matcher.count({FirstPhi, SecondPhi}))
4372 continue;
4373 // So the values are different and does not match. So we need them to
4374 // match. (But we register no more than one match per PHI node, so that
4375 // we won't later try to replace them twice.)
4376 if (MatchedPHIs.insert(FirstPhi).second)
4377 Matcher.insert({FirstPhi, SecondPhi});
4378 // But me must check it.
4379 WorkList.push_back({FirstPhi, SecondPhi});
4380 }
4381 }
4382 return true;
4383 }
4384
4385 /// For the given set of PHI nodes (in the SimplificationTracker) try
4386 /// to find their equivalents.
4387 /// Returns false if this matching fails and creation of new Phi is disabled.
4388 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4389 unsigned &PhiNotMatchedCount) {
4390 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4391 // order, so the replacements (ReplacePhi) are also done in a deterministic
4392 // order.
4393 SmallSetVector<PHIPair, 8> Matched;
4394 SmallPtrSet<PHINode *, 8> WillNotMatch;
4395 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4396 while (PhiNodesToMatch.size()) {
4397 PHINode *PHI = *PhiNodesToMatch.begin();
4398
4399 // Add us, if no Phi nodes in the basic block we do not match.
4400 WillNotMatch.clear();
4401 WillNotMatch.insert(PHI);
4402
4403 // Traverse all Phis until we found equivalent or fail to do that.
4404 bool IsMatched = false;
4405 for (auto &P : PHI->getParent()->phis()) {
4406 // Skip new Phi nodes.
4407 if (PhiNodesToMatch.count(&P))
4408 continue;
4409 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4410 break;
4411 // If it does not match, collect all Phi nodes from matcher.
4412 // if we end up with no match, them all these Phi nodes will not match
4413 // later.
4414 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4415 Matched.clear();
4416 }
4417 if (IsMatched) {
4418 // Replace all matched values and erase them.
4419 for (auto MV : Matched)
4420 ST.ReplacePhi(MV.first, MV.second);
4421 Matched.clear();
4422 continue;
4423 }
4424 // If we are not allowed to create new nodes then bail out.
4425 if (!AllowNewPhiNodes)
4426 return false;
4427 // Just remove all seen values in matcher. They will not match anything.
4428 PhiNotMatchedCount += WillNotMatch.size();
4429 for (auto *P : WillNotMatch)
4430 PhiNodesToMatch.erase(P);
4431 }
4432 return true;
4433 }
4434 /// Fill the placeholders with values from predecessors and simplify them.
4435 void FillPlaceholders(FoldAddrToValueMapping &Map,
4436 SmallVectorImpl<Value *> &TraverseOrder,
4437 SimplificationTracker &ST) {
4438 while (!TraverseOrder.empty()) {
4439 Value *Current = TraverseOrder.pop_back_val();
4440 assert(Map.contains(Current) && "No node to fill!!!");
4441 Value *V = Map[Current];
4442
4443 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4444 // CurrentValue also must be Select.
4445 auto *CurrentSelect = cast<SelectInst>(Current);
4446 auto *TrueValue = CurrentSelect->getTrueValue();
4447 assert(Map.contains(TrueValue) && "No True Value!");
4448 Select->setTrueValue(ST.Get(Map[TrueValue]));
4449 auto *FalseValue = CurrentSelect->getFalseValue();
4450 assert(Map.contains(FalseValue) && "No False Value!");
4451 Select->setFalseValue(ST.Get(Map[FalseValue]));
4452 } else {
4453 // Must be a Phi node then.
4454 auto *PHI = cast<PHINode>(V);
4455 // Fill the Phi node with values from predecessors.
4456 for (auto *B : predecessors(PHI->getParent())) {
4457 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4458 assert(Map.contains(PV) && "No predecessor Value!");
4459 PHI->addIncoming(ST.Get(Map[PV]), B);
4460 }
4461 }
4462 Map[Current] = ST.Simplify(V);
4463 }
4464 }
4465
4466 /// Starting from original value recursively iterates over def-use chain up to
4467 /// known ending values represented in a map. For each traversed phi/select
4468 /// inserts a placeholder Phi or Select.
4469 /// Reports all new created Phi/Select nodes by adding them to set.
4470 /// Also reports and order in what values have been traversed.
4471 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4472 SmallVectorImpl<Value *> &TraverseOrder,
4473 SimplificationTracker &ST) {
4474 SmallVector<Value *, 32> Worklist;
4475 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4476 "Address must be a Phi or Select node");
4477 auto *Dummy = PoisonValue::get(CommonType);
4478 Worklist.push_back(Original);
4479 while (!Worklist.empty()) {
4480 Value *Current = Worklist.pop_back_val();
4481 // if it is already visited or it is an ending value then skip it.
4482 if (Map.contains(Current))
4483 continue;
4484 TraverseOrder.push_back(Current);
4485
4486 // CurrentValue must be a Phi node or select. All others must be covered
4487 // by anchors.
4488 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4489 // Is it OK to get metadata from OrigSelect?!
4490 // Create a Select placeholder with dummy value.
4491 SelectInst *Select =
4492 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4493 CurrentSelect->getName(),
4494 CurrentSelect->getIterator(), CurrentSelect);
4495 Map[Current] = Select;
4496 ST.insertNewSelect(Select);
4497 // We are interested in True and False values.
4498 Worklist.push_back(CurrentSelect->getTrueValue());
4499 Worklist.push_back(CurrentSelect->getFalseValue());
4500 } else {
4501 // It must be a Phi node then.
4502 PHINode *CurrentPhi = cast<PHINode>(Current);
4503 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4504 PHINode *PHI =
4505 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4506 Map[Current] = PHI;
4507 ST.insertNewPhi(PHI);
4508 append_range(Worklist, CurrentPhi->incoming_values());
4509 }
4510 }
4511 }
4512
4513 bool addrModeCombiningAllowed() {
4515 return false;
4516 switch (DifferentField) {
4517 default:
4518 return false;
4519 case ExtAddrMode::BaseRegField:
4521 case ExtAddrMode::BaseGVField:
4522 return AddrSinkCombineBaseGV;
4523 case ExtAddrMode::BaseOffsField:
4525 case ExtAddrMode::ScaledRegField:
4527 }
4528 }
4529};
4530} // end anonymous namespace
4531
4532/// Try adding ScaleReg*Scale to the current addressing mode.
4533/// Return true and update AddrMode if this addr mode is legal for the target,
4534/// false if not.
4535bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4536 unsigned Depth) {
4537 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4538 // mode. Just process that directly.
4539 if (Scale == 1)
4540 return matchAddr(ScaleReg, Depth);
4541
4542 // If the scale is 0, it takes nothing to add this.
4543 if (Scale == 0)
4544 return true;
4545
4546 // If we already have a scale of this value, we can add to it, otherwise, we
4547 // need an available scale field.
4548 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4549 return false;
4550
4551 ExtAddrMode TestAddrMode = AddrMode;
4552
4553 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4554 // [A+B + A*7] -> [B+A*8].
4555 TestAddrMode.Scale += Scale;
4556 TestAddrMode.ScaledReg = ScaleReg;
4557
4558 // If the new address isn't legal, bail out.
4559 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4560 return false;
4561
4562 // It was legal, so commit it.
4563 AddrMode = TestAddrMode;
4564
4565 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4566 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4567 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4568 // go any further: we can reuse it and cannot eliminate it.
4569 ConstantInt *CI = nullptr;
4570 Value *AddLHS = nullptr;
4571 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4572 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4573 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4574 TestAddrMode.InBounds = false;
4575 TestAddrMode.ScaledReg = AddLHS;
4576 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4577
4578 // If this addressing mode is legal, commit it and remember that we folded
4579 // this instruction.
4580 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4581 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4582 AddrMode = TestAddrMode;
4583 return true;
4584 }
4585 // Restore status quo.
4586 TestAddrMode = AddrMode;
4587 }
4588
4589 // If this is an add recurrence with a constant step, return the increment
4590 // instruction and the canonicalized step.
4591 auto GetConstantStep =
4592 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4593 auto *PN = dyn_cast<PHINode>(V);
4594 if (!PN)
4595 return std::nullopt;
4596 auto IVInc = getIVIncrement(PN, &LI);
4597 if (!IVInc)
4598 return std::nullopt;
4599 // TODO: The result of the intrinsics above is two-complement. However when
4600 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4601 // If it has nuw or nsw flags, we need to make sure that these flags are
4602 // inferrable at the point of memory instruction. Otherwise we are replacing
4603 // well-defined two-complement computation with poison. Currently, to avoid
4604 // potentially complex analysis needed to prove this, we reject such cases.
4605 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4606 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4607 return std::nullopt;
4608 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4609 return std::make_pair(IVInc->first, ConstantStep->getValue());
4610 return std::nullopt;
4611 };
4612
4613 // Try to account for the following special case:
4614 // 1. ScaleReg is an inductive variable;
4615 // 2. We use it with non-zero offset;
4616 // 3. IV's increment is available at the point of memory instruction.
4617 //
4618 // In this case, we may reuse the IV increment instead of the IV Phi to
4619 // achieve the following advantages:
4620 // 1. If IV step matches the offset, we will have no need in the offset;
4621 // 2. Even if they don't match, we will reduce the overlap of living IV
4622 // and IV increment, that will potentially lead to better register
4623 // assignment.
4624 if (AddrMode.BaseOffs) {
4625 if (auto IVStep = GetConstantStep(ScaleReg)) {
4626 Instruction *IVInc = IVStep->first;
4627 // The following assert is important to ensure a lack of infinite loops.
4628 // This transforms is (intentionally) the inverse of the one just above.
4629 // If they don't agree on the definition of an increment, we'd alternate
4630 // back and forth indefinitely.
4631 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4632 APInt Step = IVStep->second;
4633 APInt Offset = Step * AddrMode.Scale;
4634 if (Offset.isSignedIntN(64)) {
4635 TestAddrMode.InBounds = false;
4636 TestAddrMode.ScaledReg = IVInc;
4637 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4638 // If this addressing mode is legal, commit it..
4639 // (Note that we defer the (expensive) domtree base legality check
4640 // to the very last possible point.)
4641 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4642 getDTFn().dominates(IVInc, MemoryInst)) {
4643 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4644 AddrMode = TestAddrMode;
4645 return true;
4646 }
4647 // Restore status quo.
4648 TestAddrMode = AddrMode;
4649 }
4650 }
4651 }
4652
4653 // Otherwise, just return what we have.
4654 return true;
4655}
4656
4657/// This is a little filter, which returns true if an addressing computation
4658/// involving I might be folded into a load/store accessing it.
4659/// This doesn't need to be perfect, but needs to accept at least
4660/// the set of instructions that MatchOperationAddr can.
4662 switch (I->getOpcode()) {
4663 case Instruction::BitCast:
4664 case Instruction::AddrSpaceCast:
4665 // Don't touch identity bitcasts.
4666 if (I->getType() == I->getOperand(0)->getType())
4667 return false;
4668 return I->getType()->isIntOrPtrTy();
4669 case Instruction::PtrToInt:
4670 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4671 return true;
4672 case Instruction::IntToPtr:
4673 // We know the input is intptr_t, so this is foldable.
4674 return true;
4675 case Instruction::Add:
4676 return true;
4677 case Instruction::Mul:
4678 case Instruction::Shl:
4679 // Can only handle X*C and X << C.
4680 return isa<ConstantInt>(I->getOperand(1));
4681 case Instruction::GetElementPtr:
4682 return true;
4683 default:
4684 return false;
4685 }
4686}
4687
4688/// Check whether or not \p Val is a legal instruction for \p TLI.
4689/// \note \p Val is assumed to be the product of some type promotion.
4690/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4691/// to be legal, as the non-promoted value would have had the same state.
4693 const DataLayout &DL, Value *Val) {
4694 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4695 if (!PromotedInst)
4696 return false;
4697 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4698 // If the ISDOpcode is undefined, it was undefined before the promotion.
4699 if (!ISDOpcode)
4700 return true;
4701 // Otherwise, check if the promoted instruction is legal or not.
4702 return TLI.isOperationLegalOrCustom(
4703 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4704}
4705
4706namespace {
4707
4708/// Hepler class to perform type promotion.
4709class TypePromotionHelper {
4710 /// Utility function to add a promoted instruction \p ExtOpnd to
4711 /// \p PromotedInsts and record the type of extension we have seen.
4712 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4713 Instruction *ExtOpnd, bool IsSExt) {
4714 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4715 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4716 if (!Inserted) {
4717 // If the new extension is same as original, the information in
4718 // PromotedInsts[ExtOpnd] is still correct.
4719 if (It->second.getInt() == ExtTy)
4720 return;
4721
4722 // Now the new extension is different from old extension, we make
4723 // the type information invalid by setting extension type to
4724 // BothExtension.
4725 ExtTy = BothExtension;
4726 }
4727 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4728 }
4729
4730 /// Utility function to query the original type of instruction \p Opnd
4731 /// with a matched extension type. If the extension doesn't match, we
4732 /// cannot use the information we had on the original type.
4733 /// BothExtension doesn't match any extension type.
4734 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4735 Instruction *Opnd, bool IsSExt) {
4736 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4737 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4738 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4739 return It->second.getPointer();
4740 return nullptr;
4741 }
4742
4743 /// Utility function to check whether or not a sign or zero extension
4744 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4745 /// either using the operands of \p Inst or promoting \p Inst.
4746 /// The type of the extension is defined by \p IsSExt.
4747 /// In other words, check if:
4748 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4749 /// #1 Promotion applies:
4750 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4751 /// #2 Operand reuses:
4752 /// ext opnd1 to ConsideredExtType.
4753 /// \p PromotedInsts maps the instructions to their type before promotion.
4754 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4755 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4756
4757 /// Utility function to determine if \p OpIdx should be promoted when
4758 /// promoting \p Inst.
4759 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4760 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4761 }
4762
4763 /// Utility function to promote the operand of \p Ext when this
4764 /// operand is a promotable trunc or sext or zext.
4765 /// \p PromotedInsts maps the instructions to their type before promotion.
4766 /// \p CreatedInstsCost[out] contains the cost of all instructions
4767 /// created to promote the operand of Ext.
4768 /// Newly added extensions are inserted in \p Exts.
4769 /// Newly added truncates are inserted in \p Truncs.
4770 /// Should never be called directly.
4771 /// \return The promoted value which is used instead of Ext.
4772 static Value *promoteOperandForTruncAndAnyExt(
4773 Instruction *Ext, TypePromotionTransaction &TPT,
4774 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4775 SmallVectorImpl<Instruction *> *Exts,
4776 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4777
4778 /// Utility function to promote the operand of \p Ext when this
4779 /// operand is promotable and is not a supported trunc or sext.
4780 /// \p PromotedInsts maps the instructions to their type before promotion.
4781 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4782 /// created to promote the operand of Ext.
4783 /// Newly added extensions are inserted in \p Exts.
4784 /// Newly added truncates are inserted in \p Truncs.
4785 /// Should never be called directly.
4786 /// \return The promoted value which is used instead of Ext.
4787 static Value *promoteOperandForOther(Instruction *Ext,
4788 TypePromotionTransaction &TPT,
4789 InstrToOrigTy &PromotedInsts,
4790 unsigned &CreatedInstsCost,
4791 SmallVectorImpl<Instruction *> *Exts,
4792 SmallVectorImpl<Instruction *> *Truncs,
4793 const TargetLowering &TLI, bool IsSExt);
4794
4795 /// \see promoteOperandForOther.
4796 static Value *signExtendOperandForOther(
4797 Instruction *Ext, TypePromotionTransaction &TPT,
4798 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4799 SmallVectorImpl<Instruction *> *Exts,
4800 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4801 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4802 Exts, Truncs, TLI, true);
4803 }
4804
4805 /// \see promoteOperandForOther.
4806 static Value *zeroExtendOperandForOther(
4807 Instruction *Ext, TypePromotionTransaction &TPT,
4808 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4809 SmallVectorImpl<Instruction *> *Exts,
4810 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4811 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4812 Exts, Truncs, TLI, false);
4813 }
4814
4815public:
4816 /// Type for the utility function that promotes the operand of Ext.
4817 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4818 InstrToOrigTy &PromotedInsts,
4819 unsigned &CreatedInstsCost,
4820 SmallVectorImpl<Instruction *> *Exts,
4821 SmallVectorImpl<Instruction *> *Truncs,
4822 const TargetLowering &TLI);
4823
4824 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4825 /// action to promote the operand of \p Ext instead of using Ext.
4826 /// \return NULL if no promotable action is possible with the current
4827 /// sign extension.
4828 /// \p InsertedInsts keeps track of all the instructions inserted by the
4829 /// other CodeGenPrepare optimizations. This information is important
4830 /// because we do not want to promote these instructions as CodeGenPrepare
4831 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4832 /// \p PromotedInsts maps the instructions to their type before promotion.
4833 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4834 const TargetLowering &TLI,
4835 const InstrToOrigTy &PromotedInsts);
4836};
4837
4838} // end anonymous namespace
4839
4840bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4841 Type *ConsideredExtType,
4842 const InstrToOrigTy &PromotedInsts,
4843 bool IsSExt) {
4844 // The promotion helper does not know how to deal with vector types yet.
4845 // To be able to fix that, we would need to fix the places where we
4846 // statically extend, e.g., constants and such.
4847 if (Inst->getType()->isVectorTy())
4848 return false;
4849
4850 // We can always get through zext.
4851 if (isa<ZExtInst>(Inst))
4852 return true;
4853
4854 // sext(sext) is ok too.
4855 if (IsSExt && isa<SExtInst>(Inst))
4856 return true;
4857
4858 // We can get through binary operator, if it is legal. In other words, the
4859 // binary operator must have a nuw or nsw flag.
4860 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4861 if (isa<OverflowingBinaryOperator>(BinOp) &&
4862 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4863 (IsSExt && BinOp->hasNoSignedWrap())))
4864 return true;
4865
4866 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4867 if ((Inst->getOpcode() == Instruction::And ||
4868 Inst->getOpcode() == Instruction::Or))
4869 return true;
4870
4871 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4872 if (Inst->getOpcode() == Instruction::Xor) {
4873 // Make sure it is not a NOT.
4874 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4875 if (!Cst->getValue().isAllOnes())
4876 return true;
4877 }
4878
4879 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4880 // It may change a poisoned value into a regular value, like
4881 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4882 // poisoned value regular value
4883 // It should be OK since undef covers valid value.
4884 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4885 return true;
4886
4887 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4888 // It may change a poisoned value into a regular value, like
4889 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4890 // poisoned value regular value
4891 // It should be OK since undef covers valid value.
4892 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4893 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4894 if (ExtInst->hasOneUse()) {
4895 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4896 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4897 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4898 if (Cst &&
4899 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4900 return true;
4901 }
4902 }
4903 }
4904
4905 // Check if we can do the following simplification.
4906 // ext(trunc(opnd)) --> ext(opnd)
4907 if (!isa<TruncInst>(Inst))
4908 return false;
4909
4910 Value *OpndVal = Inst->getOperand(0);
4911 // Check if we can use this operand in the extension.
4912 // If the type is larger than the result type of the extension, we cannot.
4913 if (!OpndVal->getType()->isIntegerTy() ||
4914 OpndVal->getType()->getIntegerBitWidth() >
4915 ConsideredExtType->getIntegerBitWidth())
4916 return false;
4917
4918 // If the operand of the truncate is not an instruction, we will not have
4919 // any information on the dropped bits.
4920 // (Actually we could for constant but it is not worth the extra logic).
4921 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4922 if (!Opnd)
4923 return false;
4924
4925 // Check if the source of the type is narrow enough.
4926 // I.e., check that trunc just drops extended bits of the same kind of
4927 // the extension.
4928 // #1 get the type of the operand and check the kind of the extended bits.
4929 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4930 if (OpndType)
4931 ;
4932 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4933 OpndType = Opnd->getOperand(0)->getType();
4934 else
4935 return false;
4936
4937 // #2 check that the truncate just drops extended bits.
4938 return Inst->getType()->getIntegerBitWidth() >=
4939 OpndType->getIntegerBitWidth();
4940}
4941
4942TypePromotionHelper::Action TypePromotionHelper::getAction(
4943 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4944 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4945 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4946 "Unexpected instruction type");
4947 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4948 Type *ExtTy = Ext->getType();
4949 bool IsSExt = isa<SExtInst>(Ext);
4950 // If the operand of the extension is not an instruction, we cannot
4951 // get through.
4952 // If it, check we can get through.
4953 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4954 return nullptr;
4955
4956 // Do not promote if the operand has been added by codegenprepare.
4957 // Otherwise, it means we are undoing an optimization that is likely to be
4958 // redone, thus causing potential infinite loop.
4959 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4960 return nullptr;
4961
4962 // SExt or Trunc instructions.
4963 // Return the related handler.
4964 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4965 isa<ZExtInst>(ExtOpnd))
4966 return promoteOperandForTruncAndAnyExt;
4967
4968 // Regular instruction.
4969 // Abort early if we will have to insert non-free instructions.
4970 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4971 return nullptr;
4972 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4973}
4974
4975Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4976 Instruction *SExt, TypePromotionTransaction &TPT,
4977 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4978 SmallVectorImpl<Instruction *> *Exts,
4979 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4980 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4981 // get through it and this method should not be called.
4982 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4983 Value *ExtVal = SExt;
4984 bool HasMergedNonFreeExt = false;
4985 if (isa<ZExtInst>(SExtOpnd)) {
4986 // Replace s|zext(zext(opnd))
4987 // => zext(opnd).
4988 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4989 Value *ZExt =
4990 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4991 TPT.replaceAllUsesWith(SExt, ZExt);
4992 TPT.eraseInstruction(SExt);
4993 ExtVal = ZExt;
4994 } else {
4995 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4996 // => z|sext(opnd).
4997 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4998 }
4999 CreatedInstsCost = 0;
5000
5001 // Remove dead code.
5002 if (SExtOpnd->use_empty())
5003 TPT.eraseInstruction(SExtOpnd);
5004
5005 // Check if the extension is still needed.
5006 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5007 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5008 if (ExtInst) {
5009 if (Exts)
5010 Exts->push_back(ExtInst);
5011 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5012 }
5013 return ExtVal;
5014 }
5015
5016 // At this point we have: ext ty opnd to ty.
5017 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5018 Value *NextVal = ExtInst->getOperand(0);
5019 TPT.eraseInstruction(ExtInst, NextVal);
5020 return NextVal;
5021}
5022
5023Value *TypePromotionHelper::promoteOperandForOther(
5024 Instruction *Ext, TypePromotionTransaction &TPT,
5025 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5026 SmallVectorImpl<Instruction *> *Exts,
5027 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5028 bool IsSExt) {
5029 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5030 // get through it and this method should not be called.
5031 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5032 CreatedInstsCost = 0;
5033 if (!ExtOpnd->hasOneUse()) {
5034 // ExtOpnd will be promoted.
5035 // All its uses, but Ext, will need to use a truncated value of the
5036 // promoted version.
5037 // Create the truncate now.
5038 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5039 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5040 // Insert it just after the definition.
5041 ITrunc->moveAfter(ExtOpnd);
5042 if (Truncs)
5043 Truncs->push_back(ITrunc);
5044 }
5045
5046 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5047 // Restore the operand of Ext (which has been replaced by the previous call
5048 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5049 TPT.setOperand(Ext, 0, ExtOpnd);
5050 }
5051
5052 // Get through the Instruction:
5053 // 1. Update its type.
5054 // 2. Replace the uses of Ext by Inst.
5055 // 3. Extend each operand that needs to be extended.
5056
5057 // Remember the original type of the instruction before promotion.
5058 // This is useful to know that the high bits are sign extended bits.
5059 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5060 // Step #1.
5061 TPT.mutateType(ExtOpnd, Ext->getType());
5062 // Step #2.
5063 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5064 // Step #3.
5065 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5066 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5067 ++OpIdx) {
5068 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5069 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5070 !shouldExtOperand(ExtOpnd, OpIdx)) {
5071 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5072 continue;
5073 }
5074 // Check if we can statically extend the operand.
5075 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5076 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5077 LLVM_DEBUG(dbgs() << "Statically extend\n");
5078 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5079 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5080 : Cst->getValue().zext(BitWidth);
5081 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5082 continue;
5083 }
5084 // UndefValue are typed, so we have to statically sign extend them.
5085 if (isa<UndefValue>(Opnd)) {
5086 LLVM_DEBUG(dbgs() << "Statically extend\n");
5087 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5088 continue;
5089 }
5090
5091 // Otherwise we have to explicitly sign extend the operand.
5092 Value *ValForExtOpnd = IsSExt
5093 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5094 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5095 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5096 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5097 if (!InstForExtOpnd)
5098 continue;
5099
5100 if (Exts)
5101 Exts->push_back(InstForExtOpnd);
5102
5103 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5104 }
5105 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5106 TPT.eraseInstruction(Ext);
5107 return ExtOpnd;
5108}
5109
5110/// Check whether or not promoting an instruction to a wider type is profitable.
5111/// \p NewCost gives the cost of extension instructions created by the
5112/// promotion.
5113/// \p OldCost gives the cost of extension instructions before the promotion
5114/// plus the number of instructions that have been
5115/// matched in the addressing mode the promotion.
5116/// \p PromotedOperand is the value that has been promoted.
5117/// \return True if the promotion is profitable, false otherwise.
5118bool AddressingModeMatcher::isPromotionProfitable(
5119 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5120 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5121 << '\n');
5122 // The cost of the new extensions is greater than the cost of the
5123 // old extension plus what we folded.
5124 // This is not profitable.
5125 if (NewCost > OldCost)
5126 return false;
5127 if (NewCost < OldCost)
5128 return true;
5129 // The promotion is neutral but it may help folding the sign extension in
5130 // loads for instance.
5131 // Check that we did not create an illegal instruction.
5132 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5133}
5134
5135/// Given an instruction or constant expr, see if we can fold the operation
5136/// into the addressing mode. If so, update the addressing mode and return
5137/// true, otherwise return false without modifying AddrMode.
5138/// If \p MovedAway is not NULL, it contains the information of whether or
5139/// not AddrInst has to be folded into the addressing mode on success.
5140/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5141/// because it has been moved away.
5142/// Thus AddrInst must not be added in the matched instructions.
5143/// This state can happen when AddrInst is a sext, since it may be moved away.
5144/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5145/// not be referenced anymore.
5146bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5147 unsigned Depth,
5148 bool *MovedAway) {
5149 // Avoid exponential behavior on extremely deep expression trees.
5150 if (Depth >= 5)
5151 return false;
5152
5153 // By default, all matched instructions stay in place.
5154 if (MovedAway)
5155 *MovedAway = false;
5156
5157 switch (Opcode) {
5158 case Instruction::PtrToInt:
5159 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5160 return matchAddr(AddrInst->getOperand(0), Depth);
5161 case Instruction::IntToPtr: {
5162 auto AS = AddrInst->getType()->getPointerAddressSpace();
5163 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5164 // This inttoptr is a no-op if the integer type is pointer sized.
5165 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5166 return matchAddr(AddrInst->getOperand(0), Depth);
5167 return false;
5168 }
5169 case Instruction::BitCast:
5170 // BitCast is always a noop, and we can handle it as long as it is
5171 // int->int or pointer->pointer (we don't want int<->fp or something).
5172 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5173 // Don't touch identity bitcasts. These were probably put here by LSR,
5174 // and we don't want to mess around with them. Assume it knows what it
5175 // is doing.
5176 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5177 return matchAddr(AddrInst->getOperand(0), Depth);
5178 return false;
5179 case Instruction::AddrSpaceCast: {
5180 unsigned SrcAS =
5181 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5182 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5183 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5184 return matchAddr(AddrInst->getOperand(0), Depth);
5185 return false;
5186 }
5187 case Instruction::Add: {
5188 // Check to see if we can merge in one operand, then the other. If so, we
5189 // win.
5190 ExtAddrMode BackupAddrMode = AddrMode;
5191 unsigned OldSize = AddrModeInsts.size();
5192 // Start a transaction at this point.
5193 // The LHS may match but not the RHS.
5194 // Therefore, we need a higher level restoration point to undo partially
5195 // matched operation.
5196 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5197 TPT.getRestorationPoint();
5198
5199 // Try to match an integer constant second to increase its chance of ending
5200 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5201 int First = 0, Second = 1;
5202 if (isa<ConstantInt>(AddrInst->getOperand(First))
5203 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5204 std::swap(First, Second);
5205 AddrMode.InBounds = false;
5206 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5207 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5208 return true;
5209
5210 // Restore the old addr mode info.
5211 AddrMode = BackupAddrMode;
5212 AddrModeInsts.resize(OldSize);
5213 TPT.rollback(LastKnownGood);
5214
5215 // Otherwise this was over-aggressive. Try merging operands in the opposite
5216 // order.
5217 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5218 matchAddr(AddrInst->getOperand(First), Depth + 1))
5219 return true;
5220
5221 // Otherwise we definitely can't merge the ADD in.
5222 AddrMode = BackupAddrMode;
5223 AddrModeInsts.resize(OldSize);
5224 TPT.rollback(LastKnownGood);
5225 break;
5226 }
5227 // case Instruction::Or:
5228 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5229 // break;
5230 case Instruction::Mul:
5231 case Instruction::Shl: {
5232 // Can only handle X*C and X << C.
5233 AddrMode.InBounds = false;
5234 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5235 if (!RHS || RHS->getBitWidth() > 64)
5236 return false;
5237 int64_t Scale = Opcode == Instruction::Shl
5238 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5239 : RHS->getSExtValue();
5240
5241 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5242 }
5243 case Instruction::GetElementPtr: {
5244 // Scan the GEP. We check it if it contains constant offsets and at most
5245 // one variable offset.
5246 int VariableOperand = -1;
5247 unsigned VariableScale = 0;
5248
5249 int64_t ConstantOffset = 0;
5250 gep_type_iterator GTI = gep_type_begin(AddrInst);
5251 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5252 if (StructType *STy = GTI.getStructTypeOrNull()) {
5253 const StructLayout *SL = DL.getStructLayout(STy);
5254 unsigned Idx =
5255 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5256 ConstantOffset += SL->getElementOffset(Idx);
5257 } else {
5258 TypeSize TS = GTI.getSequentialElementStride(DL);
5259 if (TS.isNonZero()) {
5260 // The optimisations below currently only work for fixed offsets.
5261 if (TS.isScalable())
5262 return false;
5263 int64_t TypeSize = TS.getFixedValue();
5264 if (ConstantInt *CI =
5265 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5266 const APInt &CVal = CI->getValue();
5267 if (CVal.getSignificantBits() <= 64) {
5268 ConstantOffset += CVal.getSExtValue() * TypeSize;
5269 continue;
5270 }
5271 }
5272 // We only allow one variable index at the moment.
5273 if (VariableOperand != -1)
5274 return false;
5275
5276 // Remember the variable index.
5277 VariableOperand = i;
5278 VariableScale = TypeSize;
5279 }
5280 }
5281 }
5282
5283 // A common case is for the GEP to only do a constant offset. In this case,
5284 // just add it to the disp field and check validity.
5285 if (VariableOperand == -1) {
5286 AddrMode.BaseOffs += ConstantOffset;
5287 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5288 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5289 AddrMode.InBounds = false;
5290 return true;
5291 }
5292 AddrMode.BaseOffs -= ConstantOffset;
5293
5295 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5296 ConstantOffset > 0) {
5297 // Record GEPs with non-zero offsets as candidates for splitting in
5298 // the event that the offset cannot fit into the r+i addressing mode.
5299 // Simple and common case that only one GEP is used in calculating the
5300 // address for the memory access.
5301 Value *Base = AddrInst->getOperand(0);
5302 auto *BaseI = dyn_cast<Instruction>(Base);
5303 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5305 (BaseI && !isa<CastInst>(BaseI) &&
5306 !isa<GetElementPtrInst>(BaseI))) {
5307 // Make sure the parent block allows inserting non-PHI instructions
5308 // before the terminator.
5309 BasicBlock *Parent = BaseI ? BaseI->getParent()
5310 : &GEP->getFunction()->getEntryBlock();
5311 if (!Parent->getTerminator()->isEHPad())
5312 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5313 }
5314 }
5315
5316 return false;
5317 }
5318
5319 // Save the valid addressing mode in case we can't match.
5320 ExtAddrMode BackupAddrMode = AddrMode;
5321 unsigned OldSize = AddrModeInsts.size();
5322
5323 // See if the scale and offset amount is valid for this target.
5324 AddrMode.BaseOffs += ConstantOffset;
5325 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5326 AddrMode.InBounds = false;
5327
5328 // Match the base operand of the GEP.
5329 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5330 // If it couldn't be matched, just stuff the value in a register.
5331 if (AddrMode.HasBaseReg) {
5332 AddrMode = BackupAddrMode;
5333 AddrModeInsts.resize(OldSize);
5334 return false;
5335 }
5336 AddrMode.HasBaseReg = true;
5337 AddrMode.BaseReg = AddrInst->getOperand(0);
5338 }
5339
5340 // Match the remaining variable portion of the GEP.
5341 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5342 Depth)) {
5343 // If it couldn't be matched, try stuffing the base into a register
5344 // instead of matching it, and retrying the match of the scale.
5345 AddrMode = BackupAddrMode;
5346 AddrModeInsts.resize(OldSize);
5347 if (AddrMode.HasBaseReg)
5348 return false;
5349 AddrMode.HasBaseReg = true;
5350 AddrMode.BaseReg = AddrInst->getOperand(0);
5351 AddrMode.BaseOffs += ConstantOffset;
5352 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5353 VariableScale, Depth)) {
5354 // If even that didn't work, bail.
5355 AddrMode = BackupAddrMode;
5356 AddrModeInsts.resize(OldSize);
5357 return false;
5358 }
5359 }
5360
5361 return true;
5362 }
5363 case Instruction::SExt:
5364 case Instruction::ZExt: {
5366 if (!Ext)
5367 return false;
5368
5369 // Try to move this ext out of the way of the addressing mode.
5370 // Ask for a method for doing so.
5371 TypePromotionHelper::Action TPH =
5372 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5373 if (!TPH)
5374 return false;
5375
5376 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5377 TPT.getRestorationPoint();
5378 unsigned CreatedInstsCost = 0;
5379 unsigned ExtCost = !TLI.isExtFree(Ext);
5380 Value *PromotedOperand =
5381 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5382 // SExt has been moved away.
5383 // Thus either it will be rematched later in the recursive calls or it is
5384 // gone. Anyway, we must not fold it into the addressing mode at this point.
5385 // E.g.,
5386 // op = add opnd, 1
5387 // idx = ext op
5388 // addr = gep base, idx
5389 // is now:
5390 // promotedOpnd = ext opnd <- no match here
5391 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5392 // addr = gep base, op <- match
5393 if (MovedAway)
5394 *MovedAway = true;
5395
5396 assert(PromotedOperand &&
5397 "TypePromotionHelper should have filtered out those cases");
5398
5399 ExtAddrMode BackupAddrMode = AddrMode;
5400 unsigned OldSize = AddrModeInsts.size();
5401
5402 if (!matchAddr(PromotedOperand, Depth) ||
5403 // The total of the new cost is equal to the cost of the created
5404 // instructions.
5405 // The total of the old cost is equal to the cost of the extension plus
5406 // what we have saved in the addressing mode.
5407 !isPromotionProfitable(CreatedInstsCost,
5408 ExtCost + (AddrModeInsts.size() - OldSize),
5409 PromotedOperand)) {
5410 AddrMode = BackupAddrMode;
5411 AddrModeInsts.resize(OldSize);
5412 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5413 TPT.rollback(LastKnownGood);
5414 return false;
5415 }
5416
5417 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5418 AddrMode.replaceWith(Ext, PromotedOperand);
5419 return true;
5420 }
5421 case Instruction::Call:
5422 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5423 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5424 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5425 if (TLI.addressingModeSupportsTLS(GV))
5426 return matchAddr(AddrInst->getOperand(0), Depth);
5427 }
5428 }
5429 break;
5430 }
5431 return false;
5432}
5433
5434/// If we can, try to add the value of 'Addr' into the current addressing mode.
5435/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5436/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5437/// for the target.
5438///
5439bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5440 // Start a transaction at this point that we will rollback if the matching
5441 // fails.
5442 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5443 TPT.getRestorationPoint();
5444 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5445 if (CI->getValue().isSignedIntN(64)) {
5446 // Check if the addition would result in a signed overflow.
5447 int64_t Result;
5448 bool Overflow =
5449 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5450 if (!Overflow) {
5451 // Fold in immediates if legal for the target.
5452 AddrMode.BaseOffs = Result;
5453 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5454 return true;
5455 AddrMode.BaseOffs -= CI->getSExtValue();
5456 }
5457 }
5458 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5459 // If this is a global variable, try to fold it into the addressing mode.
5460 if (!AddrMode.BaseGV) {
5461 AddrMode.BaseGV = GV;
5462 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5463 return true;
5464 AddrMode.BaseGV = nullptr;
5465 }
5466 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5467 ExtAddrMode BackupAddrMode = AddrMode;
5468 unsigned OldSize = AddrModeInsts.size();
5469
5470 // Check to see if it is possible to fold this operation.
5471 bool MovedAway = false;
5472 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5473 // This instruction may have been moved away. If so, there is nothing
5474 // to check here.
5475 if (MovedAway)
5476 return true;
5477 // Okay, it's possible to fold this. Check to see if it is actually
5478 // *profitable* to do so. We use a simple cost model to avoid increasing
5479 // register pressure too much.
5480 if (I->hasOneUse() ||
5481 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5482 AddrModeInsts.push_back(I);
5483 return true;
5484 }
5485
5486 // It isn't profitable to do this, roll back.
5487 AddrMode = BackupAddrMode;
5488 AddrModeInsts.resize(OldSize);
5489 TPT.rollback(LastKnownGood);
5490 }
5491 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5492 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5493 return true;
5494 TPT.rollback(LastKnownGood);
5495 } else if (isa<ConstantPointerNull>(Addr)) {
5496 // Null pointer gets folded without affecting the addressing mode.
5497 return true;
5498 }
5499
5500 // Worse case, the target should support [reg] addressing modes. :)
5501 if (!AddrMode.HasBaseReg) {
5502 AddrMode.HasBaseReg = true;
5503 AddrMode.BaseReg = Addr;
5504 // Still check for legality in case the target supports [imm] but not [i+r].
5505 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5506 return true;
5507 AddrMode.HasBaseReg = false;
5508 AddrMode.BaseReg = nullptr;
5509 }
5510
5511 // If the base register is already taken, see if we can do [r+r].
5512 if (AddrMode.Scale == 0) {
5513 AddrMode.Scale = 1;
5514 AddrMode.ScaledReg = Addr;
5515 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5516 return true;
5517 AddrMode.Scale = 0;
5518 AddrMode.ScaledReg = nullptr;
5519 }
5520 // Couldn't match.
5521 TPT.rollback(LastKnownGood);
5522 return false;
5523}
5524
5525/// Check to see if all uses of OpVal by the specified inline asm call are due
5526/// to memory operands. If so, return true, otherwise return false.
5528 const TargetLowering &TLI,
5529 const TargetRegisterInfo &TRI) {
5530 const Function *F = CI->getFunction();
5531 TargetLowering::AsmOperandInfoVector TargetConstraints =
5532 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5533
5534 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5535 // Compute the constraint code and ConstraintType to use.
5536 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5537
5538 // If this asm operand is our Value*, and if it isn't an indirect memory
5539 // operand, we can't fold it! TODO: Also handle C_Address?
5540 if (OpInfo.CallOperandVal == OpVal &&
5541 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5542 !OpInfo.isIndirect))
5543 return false;
5544 }
5545
5546 return true;
5547}
5548
5549/// Recursively walk all the uses of I until we find a memory use.
5550/// If we find an obviously non-foldable instruction, return true.
5551/// Add accessed addresses and types to MemoryUses.
5553 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5554 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5555 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5556 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5557 // If we already considered this instruction, we're done.
5558 if (!ConsideredInsts.insert(I).second)
5559 return false;
5560
5561 // If this is an obviously unfoldable instruction, bail out.
5562 if (!MightBeFoldableInst(I))
5563 return true;
5564
5565 // Loop over all the uses, recursively processing them.
5566 for (Use &U : I->uses()) {
5567 // Conservatively return true if we're seeing a large number or a deep chain
5568 // of users. This avoids excessive compilation times in pathological cases.
5569 if (SeenInsts++ >= MaxAddressUsersToScan)
5570 return true;
5571
5572 Instruction *UserI = cast<Instruction>(U.getUser());
5573 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5574 MemoryUses.push_back({&U, LI->getType()});
5575 continue;
5576 }
5577
5578 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5579 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5580 return true; // Storing addr, not into addr.
5581 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5582 continue;
5583 }
5584
5585 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5586 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5587 return true; // Storing addr, not into addr.
5588 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5589 continue;
5590 }
5591
5593 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5594 return true; // Storing addr, not into addr.
5595 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5596 continue;
5597 }
5598
5599 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5600 if (CI->hasFnAttr(Attribute::Cold)) {
5601 // If this is a cold call, we can sink the addressing calculation into
5602 // the cold path. See optimizeCallInst
5603 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5604 continue;
5605 }
5606
5607 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5608 if (!IA)
5609 return true;
5610
5611 // If this is a memory operand, we're cool, otherwise bail out.
5612 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5613 return true;
5614 continue;
5615 }
5616
5617 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5618 PSI, BFI, SeenInsts))
5619 return true;
5620 }
5621
5622 return false;
5623}
5624
5626 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5627 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5629 unsigned SeenInsts = 0;
5630 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5631 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5632 PSI, BFI, SeenInsts);
5633}
5634
5635
5636/// Return true if Val is already known to be live at the use site that we're
5637/// folding it into. If so, there is no cost to include it in the addressing
5638/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5639/// instruction already.
5640bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5641 Value *KnownLive1,
5642 Value *KnownLive2) {
5643 // If Val is either of the known-live values, we know it is live!
5644 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5645 return true;
5646
5647 // All values other than instructions and arguments (e.g. constants) are live.
5648 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5649 return true;
5650
5651 // If Val is a constant sized alloca in the entry block, it is live, this is
5652 // true because it is just a reference to the stack/frame pointer, which is
5653 // live for the whole function.
5654 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5655 if (AI->isStaticAlloca())
5656 return true;
5657
5658 // Check to see if this value is already used in the memory instruction's
5659 // block. If so, it's already live into the block at the very least, so we
5660 // can reasonably fold it.
5661 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5662}
5663
5664/// It is possible for the addressing mode of the machine to fold the specified
5665/// instruction into a load or store that ultimately uses it.
5666/// However, the specified instruction has multiple uses.
5667/// Given this, it may actually increase register pressure to fold it
5668/// into the load. For example, consider this code:
5669///
5670/// X = ...
5671/// Y = X+1
5672/// use(Y) -> nonload/store
5673/// Z = Y+1
5674/// load Z
5675///
5676/// In this case, Y has multiple uses, and can be folded into the load of Z
5677/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5678/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5679/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5680/// number of computations either.
5681///
5682/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5683/// X was live across 'load Z' for other reasons, we actually *would* want to
5684/// fold the addressing mode in the Z case. This would make Y die earlier.
5685bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5686 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5687 if (IgnoreProfitability)
5688 return true;
5689
5690 // AMBefore is the addressing mode before this instruction was folded into it,
5691 // and AMAfter is the addressing mode after the instruction was folded. Get
5692 // the set of registers referenced by AMAfter and subtract out those
5693 // referenced by AMBefore: this is the set of values which folding in this
5694 // address extends the lifetime of.
5695 //
5696 // Note that there are only two potential values being referenced here,
5697 // BaseReg and ScaleReg (global addresses are always available, as are any
5698 // folded immediates).
5699 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5700
5701 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5702 // lifetime wasn't extended by adding this instruction.
5703 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5704 BaseReg = nullptr;
5705 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5706 ScaledReg = nullptr;
5707
5708 // If folding this instruction (and it's subexprs) didn't extend any live
5709 // ranges, we're ok with it.
5710 if (!BaseReg && !ScaledReg)
5711 return true;
5712
5713 // If all uses of this instruction can have the address mode sunk into them,
5714 // we can remove the addressing mode and effectively trade one live register
5715 // for another (at worst.) In this context, folding an addressing mode into
5716 // the use is just a particularly nice way of sinking it.
5718 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5719 return false; // Has a non-memory, non-foldable use!
5720
5721 // Now that we know that all uses of this instruction are part of a chain of
5722 // computation involving only operations that could theoretically be folded
5723 // into a memory use, loop over each of these memory operation uses and see
5724 // if they could *actually* fold the instruction. The assumption is that
5725 // addressing modes are cheap and that duplicating the computation involved
5726 // many times is worthwhile, even on a fastpath. For sinking candidates
5727 // (i.e. cold call sites), this serves as a way to prevent excessive code
5728 // growth since most architectures have some reasonable small and fast way to
5729 // compute an effective address. (i.e LEA on x86)
5730 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5731 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5732 Value *Address = Pair.first->get();
5733 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5734 Type *AddressAccessTy = Pair.second;
5735 unsigned AS = Address->getType()->getPointerAddressSpace();
5736
5737 // Do a match against the root of this address, ignoring profitability. This
5738 // will tell us if the addressing mode for the memory operation will
5739 // *actually* cover the shared instruction.
5740 ExtAddrMode Result;
5741 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5742 0);
5743 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5744 TPT.getRestorationPoint();
5745 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5746 AddressAccessTy, AS, UserI, Result,
5747 InsertedInsts, PromotedInsts, TPT,
5748 LargeOffsetGEP, OptSize, PSI, BFI);
5749 Matcher.IgnoreProfitability = true;
5750 bool Success = Matcher.matchAddr(Address, 0);
5751 (void)Success;
5752 assert(Success && "Couldn't select *anything*?");
5753
5754 // The match was to check the profitability, the changes made are not
5755 // part of the original matcher. Therefore, they should be dropped
5756 // otherwise the original matcher will not present the right state.
5757 TPT.rollback(LastKnownGood);
5758
5759 // If the match didn't cover I, then it won't be shared by it.
5760 if (!is_contained(MatchedAddrModeInsts, I))
5761 return false;
5762
5763 MatchedAddrModeInsts.clear();
5764 }
5765
5766 return true;
5767}
5768
5769/// Return true if the specified values are defined in a
5770/// different basic block than BB.
5771static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5773 return I->getParent() != BB;
5774 return false;
5775}
5776
5777// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5778// is the first instruction that will use Addr. So we need to find the first
5779// user of Addr in current BB.
5781 Value *SunkAddr) {
5782 if (Addr->hasOneUse())
5783 return MemoryInst->getIterator();
5784
5785 // We already have a SunkAddr in current BB, but we may need to insert cast
5786 // instruction after it.
5787 if (SunkAddr) {
5788 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5789 return std::next(AddrInst->getIterator());
5790 }
5791
5792 // Find the first user of Addr in current BB.
5793 Instruction *Earliest = MemoryInst;
5794 for (User *U : Addr->users()) {
5795 Instruction *UserInst = dyn_cast<Instruction>(U);
5796 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5797 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5798 continue;
5799 if (UserInst->comesBefore(Earliest))
5800 Earliest = UserInst;
5801 }
5802 }
5803 return Earliest->getIterator();
5804}
5805
5806/// Sink addressing mode computation immediate before MemoryInst if doing so
5807/// can be done without increasing register pressure. The need for the
5808/// register pressure constraint means this can end up being an all or nothing
5809/// decision for all uses of the same addressing computation.
5810///
5811/// Load and Store Instructions often have addressing modes that can do
5812/// significant amounts of computation. As such, instruction selection will try
5813/// to get the load or store to do as much computation as possible for the
5814/// program. The problem is that isel can only see within a single block. As
5815/// such, we sink as much legal addressing mode work into the block as possible.
5816///
5817/// This method is used to optimize both load/store and inline asms with memory
5818/// operands. It's also used to sink addressing computations feeding into cold
5819/// call sites into their (cold) basic block.
5820///
5821/// The motivation for handling sinking into cold blocks is that doing so can
5822/// both enable other address mode sinking (by satisfying the register pressure
5823/// constraint above), and reduce register pressure globally (by removing the
5824/// addressing mode computation from the fast path entirely.).
5825bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5826 Type *AccessTy, unsigned AddrSpace) {
5827 Value *Repl = Addr;
5828
5829 // Try to collapse single-value PHI nodes. This is necessary to undo
5830 // unprofitable PRE transformations.
5831 SmallVector<Value *, 8> worklist;
5832 SmallPtrSet<Value *, 16> Visited;
5833 worklist.push_back(Addr);
5834
5835 // Use a worklist to iteratively look through PHI and select nodes, and
5836 // ensure that the addressing mode obtained from the non-PHI/select roots of
5837 // the graph are compatible.
5838 bool PhiOrSelectSeen = false;
5839 SmallVector<Instruction *, 16> AddrModeInsts;
5840 const SimplifyQuery SQ(*DL, TLInfo);
5841 AddressingModeCombiner AddrModes(SQ, Addr);
5842 TypePromotionTransaction TPT(RemovedInsts);
5843 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5844 TPT.getRestorationPoint();
5845 while (!worklist.empty()) {
5846 Value *V = worklist.pop_back_val();
5847
5848 // We allow traversing cyclic Phi nodes.
5849 // In case of success after this loop we ensure that traversing through
5850 // Phi nodes ends up with all cases to compute address of the form
5851 // BaseGV + Base + Scale * Index + Offset
5852 // where Scale and Offset are constans and BaseGV, Base and Index
5853 // are exactly the same Values in all cases.
5854 // It means that BaseGV, Scale and Offset dominate our memory instruction
5855 // and have the same value as they had in address computation represented
5856 // as Phi. So we can safely sink address computation to memory instruction.
5857 if (!Visited.insert(V).second)
5858 continue;
5859
5860 // For a PHI node, push all of its incoming values.
5861 if (PHINode *P = dyn_cast<PHINode>(V)) {
5862 append_range(worklist, P->incoming_values());
5863 PhiOrSelectSeen = true;
5864 continue;
5865 }
5866 // Similar for select.
5867 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5868 worklist.push_back(SI->getFalseValue());
5869 worklist.push_back(SI->getTrueValue());
5870 PhiOrSelectSeen = true;
5871 continue;
5872 }
5873
5874 // For non-PHIs, determine the addressing mode being computed. Note that
5875 // the result may differ depending on what other uses our candidate
5876 // addressing instructions might have.
5877 AddrModeInsts.clear();
5878 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5879 0);
5880 // Defer the query (and possible computation of) the dom tree to point of
5881 // actual use. It's expected that most address matches don't actually need
5882 // the domtree.
5883 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5884 Function *F = MemoryInst->getParent()->getParent();
5885 return this->getDT(*F);
5886 };
5887 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5888 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5889 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5890 BFI.get());
5891
5892 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5893 if (GEP && !NewGEPBases.count(GEP)) {
5894 // If splitting the underlying data structure can reduce the offset of a
5895 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5896 // previously split data structures.
5897 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5898 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5899 }
5900
5901 NewAddrMode.OriginalValue = V;
5902 if (!AddrModes.addNewAddrMode(NewAddrMode))
5903 break;
5904 }
5905
5906 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5907 // or we have multiple but either couldn't combine them or combining them
5908 // wouldn't do anything useful, bail out now.
5909 if (!AddrModes.combineAddrModes()) {
5910 TPT.rollback(LastKnownGood);
5911 return false;
5912 }
5913 bool Modified = TPT.commit();
5914
5915 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5916 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5917
5918 // If all the instructions matched are already in this BB, don't do anything.
5919 // If we saw a Phi node then it is not local definitely, and if we saw a
5920 // select then we want to push the address calculation past it even if it's
5921 // already in this BB.
5922 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5923 return IsNonLocalValue(V, MemoryInst->getParent());
5924 })) {
5925 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5926 << "\n");
5927 return Modified;
5928 }
5929
5930 // Now that we determined the addressing expression we want to use and know
5931 // that we have to sink it into this block. Check to see if we have already
5932 // done this for some other load/store instr in this block. If so, reuse
5933 // the computation. Before attempting reuse, check if the address is valid
5934 // as it may have been erased.
5935
5936 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5937
5938 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5939 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5940
5941 // The current BB may be optimized multiple times, we can't guarantee the
5942 // reuse of Addr happens later, call findInsertPos to find an appropriate
5943 // insert position.
5944 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5945
5946 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5947 if (!SunkAddr) {
5948 auto &DT = getDT(*MemoryInst->getFunction());
5949 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5950 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5951 return Modified;
5952 }
5953
5954 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5955
5956 if (SunkAddr) {
5957 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5958 << " for " << *MemoryInst << "\n");
5959 if (SunkAddr->getType() != Addr->getType()) {
5960 if (SunkAddr->getType()->getPointerAddressSpace() !=
5961 Addr->getType()->getPointerAddressSpace() &&
5962 !DL->isNonIntegralPointerType(Addr->getType())) {
5963 // There are two reasons the address spaces might not match: a no-op
5964 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5965 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5966 // TODO: allow bitcast between different address space pointers with the
5967 // same size.
5968 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5969 SunkAddr =
5970 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5971 } else
5972 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5973 }
5975 SubtargetInfo->addrSinkUsingGEPs())) {
5976 // By default, we use the GEP-based method when AA is used later. This
5977 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5978 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5979 << " for " << *MemoryInst << "\n");
5980 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5981
5982 // First, find the pointer.
5983 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5984 ResultPtr = AddrMode.BaseReg;
5985 AddrMode.BaseReg = nullptr;
5986 }
5987
5988 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5989 // We can't add more than one pointer together, nor can we scale a
5990 // pointer (both of which seem meaningless).
5991 if (ResultPtr || AddrMode.Scale != 1)
5992 return Modified;
5993
5994 ResultPtr = AddrMode.ScaledReg;
5995 AddrMode.Scale = 0;
5996 }
5997
5998 // It is only safe to sign extend the BaseReg if we know that the math
5999 // required to create it did not overflow before we extend it. Since
6000 // the original IR value was tossed in favor of a constant back when
6001 // the AddrMode was created we need to bail out gracefully if widths
6002 // do not match instead of extending it.
6003 //
6004 // (See below for code to add the scale.)
6005 if (AddrMode.Scale) {
6006 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6007 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6008 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6009 return Modified;
6010 }
6011
6012 GlobalValue *BaseGV = AddrMode.BaseGV;
6013 if (BaseGV != nullptr) {
6014 if (ResultPtr)
6015 return Modified;
6016
6017 if (BaseGV->isThreadLocal()) {
6018 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6019 } else {
6020 ResultPtr = BaseGV;
6021 }
6022 }
6023
6024 // If the real base value actually came from an inttoptr, then the matcher
6025 // will look through it and provide only the integer value. In that case,
6026 // use it here.
6027 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6028 if (!ResultPtr && AddrMode.BaseReg) {
6029 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6030 "sunkaddr");
6031 AddrMode.BaseReg = nullptr;
6032 } else if (!ResultPtr && AddrMode.Scale == 1) {
6033 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6034 "sunkaddr");
6035 AddrMode.Scale = 0;
6036 }
6037 }
6038
6039 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6040 !AddrMode.BaseOffs) {
6041 SunkAddr = Constant::getNullValue(Addr->getType());
6042 } else if (!ResultPtr) {
6043 return Modified;
6044 } else {
6045 Type *I8PtrTy =
6046 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6047
6048 // Start with the base register. Do this first so that subsequent address
6049 // matching finds it last, which will prevent it from trying to match it
6050 // as the scaled value in case it happens to be a mul. That would be
6051 // problematic if we've sunk a different mul for the scale, because then
6052 // we'd end up sinking both muls.
6053 if (AddrMode.BaseReg) {
6054 Value *V = AddrMode.BaseReg;
6055 if (V->getType() != IntPtrTy)
6056 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6057
6058 ResultIndex = V;
6059 }
6060
6061 // Add the scale value.
6062 if (AddrMode.Scale) {
6063 Value *V = AddrMode.ScaledReg;
6064 if (V->getType() == IntPtrTy) {
6065 // done.
6066 } else {
6067 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6068 cast<IntegerType>(V->getType())->getBitWidth() &&
6069 "We can't transform if ScaledReg is too narrow");
6070 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6071 }
6072
6073 if (AddrMode.Scale != 1)
6074 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6075 "sunkaddr");
6076 if (ResultIndex)
6077 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6078 else
6079 ResultIndex = V;
6080 }
6081
6082 // Add in the Base Offset if present.
6083 if (AddrMode.BaseOffs) {
6084 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6085 if (ResultIndex) {
6086 // We need to add this separately from the scale above to help with
6087 // SDAG consecutive load/store merging.
6088 if (ResultPtr->getType() != I8PtrTy)
6089 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6090 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6091 AddrMode.InBounds);
6092 }
6093
6094 ResultIndex = V;
6095 }
6096
6097 if (!ResultIndex) {
6098 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6099 // We know that we have a pointer without any offsets. If this pointer
6100 // originates from a different basic block than the current one, we
6101 // must be able to recreate it in the current basic block.
6102 // We do not support the recreation of any instructions yet.
6103 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6104 return Modified;
6105 SunkAddr = ResultPtr;
6106 } else {
6107 if (ResultPtr->getType() != I8PtrTy)
6108 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6109 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6110 AddrMode.InBounds);
6111 }
6112
6113 if (SunkAddr->getType() != Addr->getType()) {
6114 if (SunkAddr->getType()->getPointerAddressSpace() !=
6115 Addr->getType()->getPointerAddressSpace() &&
6116 !DL->isNonIntegralPointerType(Addr->getType())) {
6117 // There are two reasons the address spaces might not match: a no-op
6118 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6119 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6120 // TODO: allow bitcast between different address space pointers with
6121 // the same size.
6122 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6123 SunkAddr =
6124 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6125 } else
6126 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6127 }
6128 }
6129 } else {
6130 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6131 // non-integral pointers, so in that case bail out now.
6132 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6133 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6134 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6135 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6136 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6137 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6138 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6139 (AddrMode.BaseGV &&
6140 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6141 return Modified;
6142
6143 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6144 << " for " << *MemoryInst << "\n");
6145 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6146 Value *Result = nullptr;
6147
6148 // Start with the base register. Do this first so that subsequent address
6149 // matching finds it last, which will prevent it from trying to match it
6150 // as the scaled value in case it happens to be a mul. That would be
6151 // problematic if we've sunk a different mul for the scale, because then
6152 // we'd end up sinking both muls.
6153 if (AddrMode.BaseReg) {
6154 Value *V = AddrMode.BaseReg;
6155 if (V->getType()->isPointerTy())
6156 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6157 if (V->getType() != IntPtrTy)
6158 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6159 Result = V;
6160 }
6161
6162 // Add the scale value.
6163 if (AddrMode.Scale) {
6164 Value *V = AddrMode.ScaledReg;
6165 if (V->getType() == IntPtrTy) {
6166 // done.
6167 } else if (V->getType()->isPointerTy()) {
6168 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6169 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6170 cast<IntegerType>(V->getType())->getBitWidth()) {
6171 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6172 } else {
6173 // It is only safe to sign extend the BaseReg if we know that the math
6174 // required to create it did not overflow before we extend it. Since
6175 // the original IR value was tossed in favor of a constant back when
6176 // the AddrMode was created we need to bail out gracefully if widths
6177 // do not match instead of extending it.
6179 if (I && (Result != AddrMode.BaseReg))
6180 I->eraseFromParent();
6181 return Modified;
6182 }
6183 if (AddrMode.Scale != 1)
6184 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6185 "sunkaddr");
6186 if (Result)
6187 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6188 else
6189 Result = V;
6190 }
6191
6192 // Add in the BaseGV if present.
6193 GlobalValue *BaseGV = AddrMode.BaseGV;
6194 if (BaseGV != nullptr) {
6195 Value *BaseGVPtr;
6196 if (BaseGV->isThreadLocal()) {
6197 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6198 } else {
6199 BaseGVPtr = BaseGV;
6200 }
6201 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6202 if (Result)
6203 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6204 else
6205 Result = V;
6206 }
6207
6208 // Add in the Base Offset if present.
6209 if (AddrMode.BaseOffs) {
6210 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
6211 if (Result)
6212 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6213 else
6214 Result = V;
6215 }
6216
6217 if (!Result)
6218 SunkAddr = Constant::getNullValue(Addr->getType());
6219 else
6220 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6221 }
6222
6223 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6224 // Store the newly computed address into the cache. In the case we reused a
6225 // value, this should be idempotent.
6226 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6227
6228 // If we have no uses, recursively delete the value and all dead instructions
6229 // using it.
6230 if (Repl->use_empty()) {
6231 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6232 RecursivelyDeleteTriviallyDeadInstructions(
6233 Repl, TLInfo, nullptr,
6234 [&](Value *V) { removeAllAssertingVHReferences(V); });
6235 });
6236 }
6237 ++NumMemoryInsts;
6238 return true;
6239}
6240
6241/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6242/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6243/// only handle a 2 operand GEP in the same basic block or a splat constant
6244/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6245/// index.
6246///
6247/// If the existing GEP has a vector base pointer that is splat, we can look
6248/// through the splat to find the scalar pointer. If we can't find a scalar
6249/// pointer there's nothing we can do.
6250///
6251/// If we have a GEP with more than 2 indices where the middle indices are all
6252/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6253///
6254/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6255/// followed by a GEP with an all zeroes vector index. This will enable
6256/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6257/// zero index.
6258bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6259 Value *Ptr) {
6260 Value *NewAddr;
6261
6262 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6263 // Don't optimize GEPs that don't have indices.
6264 if (!GEP->hasIndices())
6265 return false;
6266
6267 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6268 // FIXME: We should support this by sinking the GEP.
6269 if (MemoryInst->getParent() != GEP->getParent())
6270 return false;
6271
6272 SmallVector<Value *, 2> Ops(GEP->operands());
6273
6274 bool RewriteGEP = false;
6275
6276 if (Ops[0]->getType()->isVectorTy()) {
6277 Ops[0] = getSplatValue(Ops[0]);
6278 if (!Ops[0])
6279 return false;
6280 RewriteGEP = true;
6281 }
6282
6283 unsigned FinalIndex = Ops.size() - 1;
6284
6285 // Ensure all but the last index is 0.
6286 // FIXME: This isn't strictly required. All that's required is that they are
6287 // all scalars or splats.
6288 for (unsigned i = 1; i < FinalIndex; ++i) {
6289 auto *C = dyn_cast<Constant>(Ops[i]);
6290 if (!C)
6291 return false;
6292 if (isa<VectorType>(C->getType()))
6293 C = C->getSplatValue();
6294 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6295 if (!CI || !CI->isZero())
6296 return false;
6297 // Scalarize the index if needed.
6298 Ops[i] = CI;
6299 }
6300
6301 // Try to scalarize the final index.
6302 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6303 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6304 auto *C = dyn_cast<ConstantInt>(V);
6305 // Don't scalarize all zeros vector.
6306 if (!C || !C->isZero()) {
6307 Ops[FinalIndex] = V;
6308 RewriteGEP = true;
6309 }
6310 }
6311 }
6312
6313 // If we made any changes or the we have extra operands, we need to generate
6314 // new instructions.
6315 if (!RewriteGEP && Ops.size() == 2)
6316 return false;
6317
6318 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6319
6320 IRBuilder<> Builder(MemoryInst);
6321
6322 Type *SourceTy = GEP->getSourceElementType();
6323 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6324
6325 // If the final index isn't a vector, emit a scalar GEP containing all ops
6326 // and a vector GEP with all zeroes final index.
6327 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6328 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6329 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6330 auto *SecondTy = GetElementPtrInst::getIndexedType(
6331 SourceTy, ArrayRef(Ops).drop_front());
6332 NewAddr =
6333 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6334 } else {
6335 Value *Base = Ops[0];
6336 Value *Index = Ops[FinalIndex];
6337
6338 // Create a scalar GEP if there are more than 2 operands.
6339 if (Ops.size() != 2) {
6340 // Replace the last index with 0.
6341 Ops[FinalIndex] =
6342 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6343 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6345 SourceTy, ArrayRef(Ops).drop_front());
6346 }
6347
6348 // Now create the GEP with scalar pointer and vector index.
6349 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6350 }
6351 } else if (!isa<Constant>(Ptr)) {
6352 // Not a GEP, maybe its a splat and we can create a GEP to enable
6353 // SelectionDAGBuilder to use it as a uniform base.
6355 if (!V)
6356 return false;
6357
6358 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6359
6360 IRBuilder<> Builder(MemoryInst);
6361
6362 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6363 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6364 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6365 Type *ScalarTy;
6366 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6367 Intrinsic::masked_gather) {
6368 ScalarTy = MemoryInst->getType()->getScalarType();
6369 } else {
6370 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6371 Intrinsic::masked_scatter);
6372 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6373 }
6374 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6375 } else {
6376 // Constant, SelectionDAGBuilder knows to check if its a splat.
6377 return false;
6378 }
6379
6380 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6381
6382 // If we have no uses, recursively delete the value and all dead instructions
6383 // using it.
6384 if (Ptr->use_empty())
6386 Ptr, TLInfo, nullptr,
6387 [&](Value *V) { removeAllAssertingVHReferences(V); });
6388
6389 return true;
6390}
6391
6392/// If there are any memory operands, use OptimizeMemoryInst to sink their
6393/// address computing into the block when possible / profitable.
6394bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6395 bool MadeChange = false;
6396
6397 const TargetRegisterInfo *TRI =
6398 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6399 TargetLowering::AsmOperandInfoVector TargetConstraints =
6400 TLI->ParseConstraints(*DL, TRI, *CS);
6401 unsigned ArgNo = 0;
6402 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6403 // Compute the constraint code and ConstraintType to use.
6404 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6405
6406 // TODO: Also handle C_Address?
6407 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6408 OpInfo.isIndirect) {
6409 Value *OpVal = CS->getArgOperand(ArgNo++);
6410 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6411 } else if (OpInfo.Type == InlineAsm::isInput)
6412 ArgNo++;
6413 }
6414
6415 return MadeChange;
6416}
6417
6418/// Check if all the uses of \p Val are equivalent (or free) zero or
6419/// sign extensions.
6420static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6421 assert(!Val->use_empty() && "Input must have at least one use");
6422 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6423 bool IsSExt = isa<SExtInst>(FirstUser);
6424 Type *ExtTy = FirstUser->getType();
6425 for (const User *U : Val->users()) {
6426 const Instruction *UI = cast<Instruction>(U);
6427 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6428 return false;
6429 Type *CurTy = UI->getType();
6430 // Same input and output types: Same instruction after CSE.
6431 if (CurTy == ExtTy)
6432 continue;
6433
6434 // If IsSExt is true, we are in this situation:
6435 // a = Val
6436 // b = sext ty1 a to ty2
6437 // c = sext ty1 a to ty3
6438 // Assuming ty2 is shorter than ty3, this could be turned into:
6439 // a = Val
6440 // b = sext ty1 a to ty2
6441 // c = sext ty2 b to ty3
6442 // However, the last sext is not free.
6443 if (IsSExt)
6444 return false;
6445
6446 // This is a ZExt, maybe this is free to extend from one type to another.
6447 // In that case, we would not account for a different use.
6448 Type *NarrowTy;
6449 Type *LargeTy;
6450 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6451 CurTy->getScalarType()->getIntegerBitWidth()) {
6452 NarrowTy = CurTy;
6453 LargeTy = ExtTy;
6454 } else {
6455 NarrowTy = ExtTy;
6456 LargeTy = CurTy;
6457 }
6458
6459 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6460 return false;
6461 }
6462 // All uses are the same or can be derived from one another for free.
6463 return true;
6464}
6465
6466/// Try to speculatively promote extensions in \p Exts and continue
6467/// promoting through newly promoted operands recursively as far as doing so is
6468/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6469/// When some promotion happened, \p TPT contains the proper state to revert
6470/// them.
6471///
6472/// \return true if some promotion happened, false otherwise.
6473bool CodeGenPrepare::tryToPromoteExts(
6474 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6475 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6476 unsigned CreatedInstsCost) {
6477 bool Promoted = false;
6478
6479 // Iterate over all the extensions to try to promote them.
6480 for (auto *I : Exts) {
6481 // Early check if we directly have ext(load).
6482 if (isa<LoadInst>(I->getOperand(0))) {
6483 ProfitablyMovedExts.push_back(I);
6484 continue;
6485 }
6486
6487 // Check whether or not we want to do any promotion. The reason we have
6488 // this check inside the for loop is to catch the case where an extension
6489 // is directly fed by a load because in such case the extension can be moved
6490 // up without any promotion on its operands.
6492 return false;
6493
6494 // Get the action to perform the promotion.
6495 TypePromotionHelper::Action TPH =
6496 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6497 // Check if we can promote.
6498 if (!TPH) {
6499 // Save the current extension as we cannot move up through its operand.
6500 ProfitablyMovedExts.push_back(I);
6501 continue;
6502 }
6503
6504 // Save the current state.
6505 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6506 TPT.getRestorationPoint();
6507 SmallVector<Instruction *, 4> NewExts;
6508 unsigned NewCreatedInstsCost = 0;
6509 unsigned ExtCost = !TLI->isExtFree(I);
6510 // Promote.
6511 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6512 &NewExts, nullptr, *TLI);
6513 assert(PromotedVal &&
6514 "TypePromotionHelper should have filtered out those cases");
6515
6516 // We would be able to merge only one extension in a load.
6517 // Therefore, if we have more than 1 new extension we heuristically
6518 // cut this search path, because it means we degrade the code quality.
6519 // With exactly 2, the transformation is neutral, because we will merge
6520 // one extension but leave one. However, we optimistically keep going,
6521 // because the new extension may be removed too. Also avoid replacing a
6522 // single free extension with multiple extensions, as this increases the
6523 // number of IR instructions while not providing any savings.
6524 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6525 // FIXME: It would be possible to propagate a negative value instead of
6526 // conservatively ceiling it to 0.
6527 TotalCreatedInstsCost =
6528 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6529 if (!StressExtLdPromotion &&
6530 (TotalCreatedInstsCost > 1 ||
6531 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6532 (ExtCost == 0 && NewExts.size() > 1))) {
6533 // This promotion is not profitable, rollback to the previous state, and
6534 // save the current extension in ProfitablyMovedExts as the latest
6535 // speculative promotion turned out to be unprofitable.
6536 TPT.rollback(LastKnownGood);
6537 ProfitablyMovedExts.push_back(I);
6538 continue;
6539 }
6540 // Continue promoting NewExts as far as doing so is profitable.
6541 SmallVector<Instruction *, 2> NewlyMovedExts;
6542 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6543 bool NewPromoted = false;
6544 for (auto *ExtInst : NewlyMovedExts) {
6545 Instruction *MovedExt = cast<Instruction>(ExtInst);
6546 Value *ExtOperand = MovedExt->getOperand(0);
6547 // If we have reached to a load, we need this extra profitability check
6548 // as it could potentially be merged into an ext(load).
6549 if (isa<LoadInst>(ExtOperand) &&
6550 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6551 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6552 continue;
6553
6554 ProfitablyMovedExts.push_back(MovedExt);
6555 NewPromoted = true;
6556 }
6557
6558 // If none of speculative promotions for NewExts is profitable, rollback
6559 // and save the current extension (I) as the last profitable extension.
6560 if (!NewPromoted) {
6561 TPT.rollback(LastKnownGood);
6562 ProfitablyMovedExts.push_back(I);
6563 continue;
6564 }
6565 // The promotion is profitable.
6566 Promoted = true;
6567 }
6568 return Promoted;
6569}
6570
6571/// Merging redundant sexts when one is dominating the other.
6572bool CodeGenPrepare::mergeSExts(Function &F) {
6573 bool Changed = false;
6574 for (auto &Entry : ValToSExtendedUses) {
6575 SExts &Insts = Entry.second;
6576 SExts CurPts;
6577 for (Instruction *Inst : Insts) {
6578 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6579 Inst->getOperand(0) != Entry.first)
6580 continue;
6581 bool inserted = false;
6582 for (auto &Pt : CurPts) {
6583 if (getDT(F).dominates(Inst, Pt)) {
6584 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6585 RemovedInsts.insert(Pt);
6586 Pt->removeFromParent();
6587 Pt = Inst;
6588 inserted = true;
6589 Changed = true;
6590 break;
6591 }
6592 if (!getDT(F).dominates(Pt, Inst))
6593 // Give up if we need to merge in a common dominator as the
6594 // experiments show it is not profitable.
6595 continue;
6596 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6597 RemovedInsts.insert(Inst);
6598 Inst->removeFromParent();
6599 inserted = true;
6600 Changed = true;
6601 break;
6602 }
6603 if (!inserted)
6604 CurPts.push_back(Inst);
6605 }
6606 }
6607 return Changed;
6608}
6609
6610// Splitting large data structures so that the GEPs accessing them can have
6611// smaller offsets so that they can be sunk to the same blocks as their users.
6612// For example, a large struct starting from %base is split into two parts
6613// where the second part starts from %new_base.
6614//
6615// Before:
6616// BB0:
6617// %base =
6618//
6619// BB1:
6620// %gep0 = gep %base, off0
6621// %gep1 = gep %base, off1
6622// %gep2 = gep %base, off2
6623//
6624// BB2:
6625// %load1 = load %gep0
6626// %load2 = load %gep1
6627// %load3 = load %gep2
6628//
6629// After:
6630// BB0:
6631// %base =
6632// %new_base = gep %base, off0
6633//
6634// BB1:
6635// %new_gep0 = %new_base
6636// %new_gep1 = gep %new_base, off1 - off0
6637// %new_gep2 = gep %new_base, off2 - off0
6638//
6639// BB2:
6640// %load1 = load i32, i32* %new_gep0
6641// %load2 = load i32, i32* %new_gep1
6642// %load3 = load i32, i32* %new_gep2
6643//
6644// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6645// their offsets are smaller enough to fit into the addressing mode.
6646bool CodeGenPrepare::splitLargeGEPOffsets() {
6647 bool Changed = false;
6648 for (auto &Entry : LargeOffsetGEPMap) {
6649 Value *OldBase = Entry.first;
6650 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6651 &LargeOffsetGEPs = Entry.second;
6652 auto compareGEPOffset =
6653 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6654 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6655 if (LHS.first == RHS.first)
6656 return false;
6657 if (LHS.second != RHS.second)
6658 return LHS.second < RHS.second;
6659 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6660 };
6661 // Sorting all the GEPs of the same data structures based on the offsets.
6662 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6663 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6664 // Skip if all the GEPs have the same offsets.
6665 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6666 continue;
6667 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6668 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6669 Value *NewBaseGEP = nullptr;
6670
6671 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6672 GetElementPtrInst *GEP) {
6673 LLVMContext &Ctx = GEP->getContext();
6674 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6675 Type *I8PtrTy =
6676 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6677
6678 BasicBlock::iterator NewBaseInsertPt;
6679 BasicBlock *NewBaseInsertBB;
6680 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6681 // If the base of the struct is an instruction, the new base will be
6682 // inserted close to it.
6683 NewBaseInsertBB = BaseI->getParent();
6684 if (isa<PHINode>(BaseI))
6685 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6686 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6687 NewBaseInsertBB =
6688 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6689 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6690 } else
6691 NewBaseInsertPt = std::next(BaseI->getIterator());
6692 } else {
6693 // If the current base is an argument or global value, the new base
6694 // will be inserted to the entry block.
6695 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6696 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6697 }
6698 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6699 // Create a new base.
6700 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6701 NewBaseGEP = OldBase;
6702 if (NewBaseGEP->getType() != I8PtrTy)
6703 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6704 NewBaseGEP =
6705 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6706 NewGEPBases.insert(NewBaseGEP);
6707 return;
6708 };
6709
6710 // Check whether all the offsets can be encoded with prefered common base.
6711 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6712 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6713 BaseOffset = PreferBase;
6714 // Create a new base if the offset of the BaseGEP can be decoded with one
6715 // instruction.
6716 createNewBase(BaseOffset, OldBase, BaseGEP);
6717 }
6718
6719 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6720 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6721 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6722 int64_t Offset = LargeOffsetGEP->second;
6723 if (Offset != BaseOffset) {
6724 TargetLowering::AddrMode AddrMode;
6725 AddrMode.HasBaseReg = true;
6726 AddrMode.BaseOffs = Offset - BaseOffset;
6727 // The result type of the GEP might not be the type of the memory
6728 // access.
6729 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6730 GEP->getResultElementType(),
6731 GEP->getAddressSpace())) {
6732 // We need to create a new base if the offset to the current base is
6733 // too large to fit into the addressing mode. So, a very large struct
6734 // may be split into several parts.
6735 BaseGEP = GEP;
6736 BaseOffset = Offset;
6737 NewBaseGEP = nullptr;
6738 }
6739 }
6740
6741 // Generate a new GEP to replace the current one.
6742 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6743
6744 if (!NewBaseGEP) {
6745 // Create a new base if we don't have one yet. Find the insertion
6746 // pointer for the new base first.
6747 createNewBase(BaseOffset, OldBase, GEP);
6748 }
6749
6750 IRBuilder<> Builder(GEP);
6751 Value *NewGEP = NewBaseGEP;
6752 if (Offset != BaseOffset) {
6753 // Calculate the new offset for the new GEP.
6754 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6755 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6756 }
6757 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6758 LargeOffsetGEPID.erase(GEP);
6759 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6760 GEP->eraseFromParent();
6761 Changed = true;
6762 }
6763 }
6764 return Changed;
6765}
6766
6767bool CodeGenPrepare::optimizePhiType(
6768 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6769 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6770 // We are looking for a collection on interconnected phi nodes that together
6771 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6772 // are of the same type. Convert the whole set of nodes to the type of the
6773 // bitcast.
6774 Type *PhiTy = I->getType();
6775 Type *ConvertTy = nullptr;
6776 if (Visited.count(I) ||
6777 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6778 return false;
6779
6780 SmallVector<Instruction *, 4> Worklist;
6781 Worklist.push_back(cast<Instruction>(I));
6782 SmallPtrSet<PHINode *, 4> PhiNodes;
6783 SmallPtrSet<ConstantData *, 4> Constants;
6784 PhiNodes.insert(I);
6785 Visited.insert(I);
6786 SmallPtrSet<Instruction *, 4> Defs;
6787 SmallPtrSet<Instruction *, 4> Uses;
6788 // This works by adding extra bitcasts between load/stores and removing
6789 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6790 // we can get in the situation where we remove a bitcast in one iteration
6791 // just to add it again in the next. We need to ensure that at least one
6792 // bitcast we remove are anchored to something that will not change back.
6793 bool AnyAnchored = false;
6794
6795 while (!Worklist.empty()) {
6796 Instruction *II = Worklist.pop_back_val();
6797
6798 if (auto *Phi = dyn_cast<PHINode>(II)) {
6799 // Handle Defs, which might also be PHI's
6800 for (Value *V : Phi->incoming_values()) {
6801 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6802 if (!PhiNodes.count(OpPhi)) {
6803 if (!Visited.insert(OpPhi).second)
6804 return false;
6805 PhiNodes.insert(OpPhi);
6806 Worklist.push_back(OpPhi);
6807 }
6808 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6809 if (!OpLoad->isSimple())
6810 return false;
6811 if (Defs.insert(OpLoad).second)
6812 Worklist.push_back(OpLoad);
6813 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6814 if (Defs.insert(OpEx).second)
6815 Worklist.push_back(OpEx);
6816 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6817 if (!ConvertTy)
6818 ConvertTy = OpBC->getOperand(0)->getType();
6819 if (OpBC->getOperand(0)->getType() != ConvertTy)
6820 return false;
6821 if (Defs.insert(OpBC).second) {
6822 Worklist.push_back(OpBC);
6823 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6824 !isa<ExtractElementInst>(OpBC->getOperand(0));
6825 }
6826 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6827 Constants.insert(OpC);
6828 else
6829 return false;
6830 }
6831 }
6832
6833 // Handle uses which might also be phi's
6834 for (User *V : II->users()) {
6835 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6836 if (!PhiNodes.count(OpPhi)) {
6837 if (Visited.count(OpPhi))
6838 return false;
6839 PhiNodes.insert(OpPhi);
6840 Visited.insert(OpPhi);
6841 Worklist.push_back(OpPhi);
6842 }
6843 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6844 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6845 return false;
6846 Uses.insert(OpStore);
6847 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6848 if (!ConvertTy)
6849 ConvertTy = OpBC->getType();
6850 if (OpBC->getType() != ConvertTy)
6851 return false;
6852 Uses.insert(OpBC);
6853 AnyAnchored |=
6854 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6855 } else {
6856 return false;
6857 }
6858 }
6859 }
6860
6861 if (!ConvertTy || !AnyAnchored ||
6862 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6863 return false;
6864
6865 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6866 << *ConvertTy << "\n");
6867
6868 // Create all the new phi nodes of the new type, and bitcast any loads to the
6869 // correct type.
6870 ValueToValueMap ValMap;
6871 for (ConstantData *C : Constants)
6872 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6873 for (Instruction *D : Defs) {
6874 if (isa<BitCastInst>(D)) {
6875 ValMap[D] = D->getOperand(0);
6876 DeletedInstrs.insert(D);
6877 } else {
6878 BasicBlock::iterator insertPt = std::next(D->getIterator());
6879 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6880 }
6881 }
6882 for (PHINode *Phi : PhiNodes)
6883 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6884 Phi->getName() + ".tc", Phi->getIterator());
6885 // Pipe together all the PhiNodes.
6886 for (PHINode *Phi : PhiNodes) {
6887 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6888 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6889 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6890 Phi->getIncomingBlock(i));
6891 Visited.insert(NewPhi);
6892 }
6893 // And finally pipe up the stores and bitcasts
6894 for (Instruction *U : Uses) {
6895 if (isa<BitCastInst>(U)) {
6896 DeletedInstrs.insert(U);
6897 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6898 } else {
6899 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6900 U->getIterator()));
6901 }
6902 }
6903
6904 // Save the removed phis to be deleted later.
6905 DeletedInstrs.insert_range(PhiNodes);
6906 return true;
6907}
6908
6909bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6910 if (!OptimizePhiTypes)
6911 return false;
6912
6913 bool Changed = false;
6914 SmallPtrSet<PHINode *, 4> Visited;
6915 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6916
6917 // Attempt to optimize all the phis in the functions to the correct type.
6918 for (auto &BB : F)
6919 for (auto &Phi : BB.phis())
6920 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6921
6922 // Remove any old phi's that have been converted.
6923 for (auto *I : DeletedInstrs) {
6924 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6925 I->eraseFromParent();
6926 }
6927
6928 return Changed;
6929}
6930
6931/// Return true, if an ext(load) can be formed from an extension in
6932/// \p MovedExts.
6933bool CodeGenPrepare::canFormExtLd(
6934 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6935 Instruction *&Inst, bool HasPromoted) {
6936 for (auto *MovedExtInst : MovedExts) {
6937 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6938 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6939 Inst = MovedExtInst;
6940 break;
6941 }
6942 }
6943 if (!LI)
6944 return false;
6945
6946 // If they're already in the same block, there's nothing to do.
6947 // Make the cheap checks first if we did not promote.
6948 // If we promoted, we need to check if it is indeed profitable.
6949 if (!HasPromoted && LI->getParent() == Inst->getParent())
6950 return false;
6951
6952 return TLI->isExtLoad(LI, Inst, *DL);
6953}
6954
6955/// Move a zext or sext fed by a load into the same basic block as the load,
6956/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6957/// extend into the load.
6958///
6959/// E.g.,
6960/// \code
6961/// %ld = load i32* %addr
6962/// %add = add nuw i32 %ld, 4
6963/// %zext = zext i32 %add to i64
6964// \endcode
6965/// =>
6966/// \code
6967/// %ld = load i32* %addr
6968/// %zext = zext i32 %ld to i64
6969/// %add = add nuw i64 %zext, 4
6970/// \encode
6971/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6972/// allow us to match zext(load i32*) to i64.
6973///
6974/// Also, try to promote the computations used to obtain a sign extended
6975/// value used into memory accesses.
6976/// E.g.,
6977/// \code
6978/// a = add nsw i32 b, 3
6979/// d = sext i32 a to i64
6980/// e = getelementptr ..., i64 d
6981/// \endcode
6982/// =>
6983/// \code
6984/// f = sext i32 b to i64
6985/// a = add nsw i64 f, 3
6986/// e = getelementptr ..., i64 a
6987/// \endcode
6988///
6989/// \p Inst[in/out] the extension may be modified during the process if some
6990/// promotions apply.
6991bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6992 bool AllowPromotionWithoutCommonHeader = false;
6993 /// See if it is an interesting sext operations for the address type
6994 /// promotion before trying to promote it, e.g., the ones with the right
6995 /// type and used in memory accesses.
6996 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6997 *Inst, AllowPromotionWithoutCommonHeader);
6998 TypePromotionTransaction TPT(RemovedInsts);
6999 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7000 TPT.getRestorationPoint();
7002 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7003 Exts.push_back(Inst);
7004
7005 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7006
7007 // Look for a load being extended.
7008 LoadInst *LI = nullptr;
7009 Instruction *ExtFedByLoad;
7010
7011 // Try to promote a chain of computation if it allows to form an extended
7012 // load.
7013 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7014 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7015 TPT.commit();
7016 // Move the extend into the same block as the load.
7017 ExtFedByLoad->moveAfter(LI);
7018 ++NumExtsMoved;
7019 Inst = ExtFedByLoad;
7020 return true;
7021 }
7022
7023 // Continue promoting SExts if known as considerable depending on targets.
7024 if (ATPConsiderable &&
7025 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7026 HasPromoted, TPT, SpeculativelyMovedExts))
7027 return true;
7028
7029 TPT.rollback(LastKnownGood);
7030 return false;
7031}
7032
7033// Perform address type promotion if doing so is profitable.
7034// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7035// instructions that sign extended the same initial value. However, if
7036// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7037// extension is just profitable.
7038bool CodeGenPrepare::performAddressTypePromotion(
7039 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7040 bool HasPromoted, TypePromotionTransaction &TPT,
7041 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7042 bool Promoted = false;
7043 SmallPtrSet<Instruction *, 1> UnhandledExts;
7044 bool AllSeenFirst = true;
7045 for (auto *I : SpeculativelyMovedExts) {
7046 Value *HeadOfChain = I->getOperand(0);
7047 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
7048 SeenChainsForSExt.find(HeadOfChain);
7049 // If there is an unhandled SExt which has the same header, try to promote
7050 // it as well.
7051 if (AlreadySeen != SeenChainsForSExt.end()) {
7052 if (AlreadySeen->second != nullptr)
7053 UnhandledExts.insert(AlreadySeen->second);
7054 AllSeenFirst = false;
7055 }
7056 }
7057
7058 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7059 SpeculativelyMovedExts.size() == 1)) {
7060 TPT.commit();
7061 if (HasPromoted)
7062 Promoted = true;
7063 for (auto *I : SpeculativelyMovedExts) {
7064 Value *HeadOfChain = I->getOperand(0);
7065 SeenChainsForSExt[HeadOfChain] = nullptr;
7066 ValToSExtendedUses[HeadOfChain].push_back(I);
7067 }
7068 // Update Inst as promotion happen.
7069 Inst = SpeculativelyMovedExts.pop_back_val();
7070 } else {
7071 // This is the first chain visited from the header, keep the current chain
7072 // as unhandled. Defer to promote this until we encounter another SExt
7073 // chain derived from the same header.
7074 for (auto *I : SpeculativelyMovedExts) {
7075 Value *HeadOfChain = I->getOperand(0);
7076 SeenChainsForSExt[HeadOfChain] = Inst;
7077 }
7078 return false;
7079 }
7080
7081 if (!AllSeenFirst && !UnhandledExts.empty())
7082 for (auto *VisitedSExt : UnhandledExts) {
7083 if (RemovedInsts.count(VisitedSExt))
7084 continue;
7085 TypePromotionTransaction TPT(RemovedInsts);
7087 SmallVector<Instruction *, 2> Chains;
7088 Exts.push_back(VisitedSExt);
7089 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7090 TPT.commit();
7091 if (HasPromoted)
7092 Promoted = true;
7093 for (auto *I : Chains) {
7094 Value *HeadOfChain = I->getOperand(0);
7095 // Mark this as handled.
7096 SeenChainsForSExt[HeadOfChain] = nullptr;
7097 ValToSExtendedUses[HeadOfChain].push_back(I);
7098 }
7099 }
7100 return Promoted;
7101}
7102
7103bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7104 BasicBlock *DefBB = I->getParent();
7105
7106 // If the result of a {s|z}ext and its source are both live out, rewrite all
7107 // other uses of the source with result of extension.
7108 Value *Src = I->getOperand(0);
7109 if (Src->hasOneUse())
7110 return false;
7111
7112 // Only do this xform if truncating is free.
7113 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7114 return false;
7115
7116 // Only safe to perform the optimization if the source is also defined in
7117 // this block.
7118 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7119 return false;
7120
7121 bool DefIsLiveOut = false;
7122 for (User *U : I->users()) {
7124
7125 // Figure out which BB this ext is used in.
7126 BasicBlock *UserBB = UI->getParent();
7127 if (UserBB == DefBB)
7128 continue;
7129 DefIsLiveOut = true;
7130 break;
7131 }
7132 if (!DefIsLiveOut)
7133 return false;
7134
7135 // Make sure none of the uses are PHI nodes.
7136 for (User *U : Src->users()) {
7138 BasicBlock *UserBB = UI->getParent();
7139 if (UserBB == DefBB)
7140 continue;
7141 // Be conservative. We don't want this xform to end up introducing
7142 // reloads just before load / store instructions.
7143 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7144 return false;
7145 }
7146
7147 // InsertedTruncs - Only insert one trunc in each block once.
7148 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7149
7150 bool MadeChange = false;
7151 for (Use &U : Src->uses()) {
7152 Instruction *User = cast<Instruction>(U.getUser());
7153
7154 // Figure out which BB this ext is used in.
7155 BasicBlock *UserBB = User->getParent();
7156 if (UserBB == DefBB)
7157 continue;
7158
7159 // Both src and def are live in this block. Rewrite the use.
7160 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7161
7162 if (!InsertedTrunc) {
7163 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7164 assert(InsertPt != UserBB->end());
7165 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7166 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7167 InsertedInsts.insert(InsertedTrunc);
7168 }
7169
7170 // Replace a use of the {s|z}ext source with a use of the result.
7171 U = InsertedTrunc;
7172 ++NumExtUses;
7173 MadeChange = true;
7174 }
7175
7176 return MadeChange;
7177}
7178
7179// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7180// just after the load if the target can fold this into one extload instruction,
7181// with the hope of eliminating some of the other later "and" instructions using
7182// the loaded value. "and"s that are made trivially redundant by the insertion
7183// of the new "and" are removed by this function, while others (e.g. those whose
7184// path from the load goes through a phi) are left for isel to potentially
7185// remove.
7186//
7187// For example:
7188//
7189// b0:
7190// x = load i32
7191// ...
7192// b1:
7193// y = and x, 0xff
7194// z = use y
7195//
7196// becomes:
7197//
7198// b0:
7199// x = load i32
7200// x' = and x, 0xff
7201// ...
7202// b1:
7203// z = use x'
7204//
7205// whereas:
7206//
7207// b0:
7208// x1 = load i32
7209// ...
7210// b1:
7211// x2 = load i32
7212// ...
7213// b2:
7214// x = phi x1, x2
7215// y = and x, 0xff
7216//
7217// becomes (after a call to optimizeLoadExt for each load):
7218//
7219// b0:
7220// x1 = load i32
7221// x1' = and x1, 0xff
7222// ...
7223// b1:
7224// x2 = load i32
7225// x2' = and x2, 0xff
7226// ...
7227// b2:
7228// x = phi x1', x2'
7229// y = and x, 0xff
7230bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7231 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7232 return false;
7233
7234 // Skip loads we've already transformed.
7235 if (Load->hasOneUse() &&
7236 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7237 return false;
7238
7239 // Look at all uses of Load, looking through phis, to determine how many bits
7240 // of the loaded value are needed.
7241 SmallVector<Instruction *, 8> WorkList;
7242 SmallPtrSet<Instruction *, 16> Visited;
7243 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7244 SmallVector<Instruction *, 8> DropFlags;
7245 for (auto *U : Load->users())
7246 WorkList.push_back(cast<Instruction>(U));
7247
7248 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7249 unsigned BitWidth = LoadResultVT.getSizeInBits();
7250 // If the BitWidth is 0, do not try to optimize the type
7251 if (BitWidth == 0)
7252 return false;
7253
7254 APInt DemandBits(BitWidth, 0);
7255 APInt WidestAndBits(BitWidth, 0);
7256
7257 while (!WorkList.empty()) {
7258 Instruction *I = WorkList.pop_back_val();
7259
7260 // Break use-def graph loops.
7261 if (!Visited.insert(I).second)
7262 continue;
7263
7264 // For a PHI node, push all of its users.
7265 if (auto *Phi = dyn_cast<PHINode>(I)) {
7266 for (auto *U : Phi->users())
7267 WorkList.push_back(cast<Instruction>(U));
7268 continue;
7269 }
7270
7271 switch (I->getOpcode()) {
7272 case Instruction::And: {
7273 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7274 if (!AndC)
7275 return false;
7276 APInt AndBits = AndC->getValue();
7277 DemandBits |= AndBits;
7278 // Keep track of the widest and mask we see.
7279 if (AndBits.ugt(WidestAndBits))
7280 WidestAndBits = AndBits;
7281 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7282 AndsToMaybeRemove.push_back(I);
7283 break;
7284 }
7285
7286 case Instruction::Shl: {
7287 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7288 if (!ShlC)
7289 return false;
7290 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7291 DemandBits.setLowBits(BitWidth - ShiftAmt);
7292 DropFlags.push_back(I);
7293 break;
7294 }
7295
7296 case Instruction::Trunc: {
7297 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7298 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7299 DemandBits.setLowBits(TruncBitWidth);
7300 DropFlags.push_back(I);
7301 break;
7302 }
7303
7304 default:
7305 return false;
7306 }
7307 }
7308
7309 uint32_t ActiveBits = DemandBits.getActiveBits();
7310 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7311 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7312 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7313 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7314 // followed by an AND.
7315 // TODO: Look into removing this restriction by fixing backends to either
7316 // return false for isLoadExtLegal for i1 or have them select this pattern to
7317 // a single instruction.
7318 //
7319 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7320 // mask, since these are the only ands that will be removed by isel.
7321 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7322 WidestAndBits != DemandBits)
7323 return false;
7324
7325 LLVMContext &Ctx = Load->getType()->getContext();
7326 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7327 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7328
7329 // Reject cases that won't be matched as extloads.
7330 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7331 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7332 return false;
7333
7334 IRBuilder<> Builder(Load->getNextNode());
7335 auto *NewAnd = cast<Instruction>(
7336 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7337 // Mark this instruction as "inserted by CGP", so that other
7338 // optimizations don't touch it.
7339 InsertedInsts.insert(NewAnd);
7340
7341 // Replace all uses of load with new and (except for the use of load in the
7342 // new and itself).
7343 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7344 NewAnd->setOperand(0, Load);
7345
7346 // Remove any and instructions that are now redundant.
7347 for (auto *And : AndsToMaybeRemove)
7348 // Check that the and mask is the same as the one we decided to put on the
7349 // new and.
7350 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7351 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7352 if (&*CurInstIterator == And)
7353 CurInstIterator = std::next(And->getIterator());
7354 And->eraseFromParent();
7355 ++NumAndUses;
7356 }
7357
7358 // NSW flags may not longer hold.
7359 for (auto *Inst : DropFlags)
7360 Inst->setHasNoSignedWrap(false);
7361
7362 ++NumAndsAdded;
7363 return true;
7364}
7365
7366/// Check if V (an operand of a select instruction) is an expensive instruction
7367/// that is only used once.
7369 auto *I = dyn_cast<Instruction>(V);
7370 // If it's safe to speculatively execute, then it should not have side
7371 // effects; therefore, it's safe to sink and possibly *not* execute.
7372 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7373 TTI->isExpensiveToSpeculativelyExecute(I);
7374}
7375
7376/// Returns true if a SelectInst should be turned into an explicit branch.
7378 const TargetLowering *TLI,
7379 SelectInst *SI) {
7380 // If even a predictable select is cheap, then a branch can't be cheaper.
7381 if (!TLI->isPredictableSelectExpensive())
7382 return false;
7383
7384 // FIXME: This should use the same heuristics as IfConversion to determine
7385 // whether a select is better represented as a branch.
7386
7387 // If metadata tells us that the select condition is obviously predictable,
7388 // then we want to replace the select with a branch.
7389 uint64_t TrueWeight, FalseWeight;
7390 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7391 uint64_t Max = std::max(TrueWeight, FalseWeight);
7392 uint64_t Sum = TrueWeight + FalseWeight;
7393 if (Sum != 0) {
7394 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7395 if (Probability > TTI->getPredictableBranchThreshold())
7396 return true;
7397 }
7398 }
7399
7400 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7401
7402 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7403 // comparison condition. If the compare has more than one use, there's
7404 // probably another cmov or setcc around, so it's not worth emitting a branch.
7405 if (!Cmp || !Cmp->hasOneUse())
7406 return false;
7407
7408 // If either operand of the select is expensive and only needed on one side
7409 // of the select, we should form a branch.
7410 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7411 sinkSelectOperand(TTI, SI->getFalseValue()))
7412 return true;
7413
7414 return false;
7415}
7416
7417/// If \p isTrue is true, return the true value of \p SI, otherwise return
7418/// false value of \p SI. If the true/false value of \p SI is defined by any
7419/// select instructions in \p Selects, look through the defining select
7420/// instruction until the true/false value is not defined in \p Selects.
7421static Value *
7423 const SmallPtrSet<const Instruction *, 2> &Selects) {
7424 Value *V = nullptr;
7425
7426 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7427 DefSI = dyn_cast<SelectInst>(V)) {
7428 assert(DefSI->getCondition() == SI->getCondition() &&
7429 "The condition of DefSI does not match with SI");
7430 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7431 }
7432
7433 assert(V && "Failed to get select true/false value");
7434 return V;
7435}
7436
7437bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7438 assert(Shift->isShift() && "Expected a shift");
7439
7440 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7441 // general vector shifts, and (3) the shift amount is a select-of-splatted
7442 // values, hoist the shifts before the select:
7443 // shift Op0, (select Cond, TVal, FVal) -->
7444 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7445 //
7446 // This is inverting a generic IR transform when we know that the cost of a
7447 // general vector shift is more than the cost of 2 shift-by-scalars.
7448 // We can't do this effectively in SDAG because we may not be able to
7449 // determine if the select operands are splats from within a basic block.
7450 Type *Ty = Shift->getType();
7451 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7452 return false;
7453 Value *Cond, *TVal, *FVal;
7454 if (!match(Shift->getOperand(1),
7455 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7456 return false;
7457 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7458 return false;
7459
7460 IRBuilder<> Builder(Shift);
7461 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7462 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7463 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7464 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7465 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7466 Shift->eraseFromParent();
7467 return true;
7468}
7469
7470bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7471 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7472 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7473 "Expected a funnel shift");
7474
7475 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7476 // than general vector shifts, and (3) the shift amount is select-of-splatted
7477 // values, hoist the funnel shifts before the select:
7478 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7479 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7480 //
7481 // This is inverting a generic IR transform when we know that the cost of a
7482 // general vector shift is more than the cost of 2 shift-by-scalars.
7483 // We can't do this effectively in SDAG because we may not be able to
7484 // determine if the select operands are splats from within a basic block.
7485 Type *Ty = Fsh->getType();
7486 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7487 return false;
7488 Value *Cond, *TVal, *FVal;
7489 if (!match(Fsh->getOperand(2),
7490 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7491 return false;
7492 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7493 return false;
7494
7495 IRBuilder<> Builder(Fsh);
7496 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7497 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7498 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7499 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7500 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7501 Fsh->eraseFromParent();
7502 return true;
7503}
7504
7505/// If we have a SelectInst that will likely profit from branch prediction,
7506/// turn it into a branch.
7507bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7509 return false;
7510
7511 // If the SelectOptimize pass is enabled, selects have already been optimized.
7513 return false;
7514
7515 // Find all consecutive select instructions that share the same condition.
7517 ASI.push_back(SI);
7519 It != SI->getParent()->end(); ++It) {
7520 SelectInst *I = dyn_cast<SelectInst>(&*It);
7521 if (I && SI->getCondition() == I->getCondition()) {
7522 ASI.push_back(I);
7523 } else {
7524 break;
7525 }
7526 }
7527
7528 SelectInst *LastSI = ASI.back();
7529 // Increment the current iterator to skip all the rest of select instructions
7530 // because they will be either "not lowered" or "all lowered" to branch.
7531 CurInstIterator = std::next(LastSI->getIterator());
7532 // Examine debug-info attached to the consecutive select instructions. They
7533 // won't be individually optimised by optimizeInst, so we need to perform
7534 // DbgVariableRecord maintenence here instead.
7535 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7536 fixupDbgVariableRecordsOnInst(*SI);
7537
7538 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7539
7540 // Can we convert the 'select' to CF ?
7541 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7542 return false;
7543
7544 TargetLowering::SelectSupportKind SelectKind;
7545 if (SI->getType()->isVectorTy())
7546 SelectKind = TargetLowering::ScalarCondVectorVal;
7547 else
7548 SelectKind = TargetLowering::ScalarValSelect;
7549
7550 if (TLI->isSelectSupported(SelectKind) &&
7552 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7553 return false;
7554
7555 // The DominatorTree needs to be rebuilt by any consumers after this
7556 // transformation. We simply reset here rather than setting the ModifiedDT
7557 // flag to avoid restarting the function walk in runOnFunction for each
7558 // select optimized.
7559 DT.reset();
7560
7561 // Transform a sequence like this:
7562 // start:
7563 // %cmp = cmp uge i32 %a, %b
7564 // %sel = select i1 %cmp, i32 %c, i32 %d
7565 //
7566 // Into:
7567 // start:
7568 // %cmp = cmp uge i32 %a, %b
7569 // %cmp.frozen = freeze %cmp
7570 // br i1 %cmp.frozen, label %select.true, label %select.false
7571 // select.true:
7572 // br label %select.end
7573 // select.false:
7574 // br label %select.end
7575 // select.end:
7576 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7577 //
7578 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7579 // In addition, we may sink instructions that produce %c or %d from
7580 // the entry block into the destination(s) of the new branch.
7581 // If the true or false blocks do not contain a sunken instruction, that
7582 // block and its branch may be optimized away. In that case, one side of the
7583 // first branch will point directly to select.end, and the corresponding PHI
7584 // predecessor block will be the start block.
7585
7586 // Collect values that go on the true side and the values that go on the false
7587 // side.
7588 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7589 for (SelectInst *SI : ASI) {
7590 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7591 TrueInstrs.push_back(cast<Instruction>(V));
7592 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7593 FalseInstrs.push_back(cast<Instruction>(V));
7594 }
7595
7596 // Split the select block, according to how many (if any) values go on each
7597 // side.
7598 BasicBlock *StartBlock = SI->getParent();
7599 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7600 // We should split before any debug-info.
7601 SplitPt.setHeadBit(true);
7602
7603 IRBuilder<> IB(SI);
7604 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7605
7606 BasicBlock *TrueBlock = nullptr;
7607 BasicBlock *FalseBlock = nullptr;
7608 BasicBlock *EndBlock = nullptr;
7609 BranchInst *TrueBranch = nullptr;
7610 BranchInst *FalseBranch = nullptr;
7611 if (TrueInstrs.size() == 0) {
7613 CondFr, SplitPt, false, nullptr, nullptr, LI));
7614 FalseBlock = FalseBranch->getParent();
7615 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7616 } else if (FalseInstrs.size() == 0) {
7618 CondFr, SplitPt, false, nullptr, nullptr, LI));
7619 TrueBlock = TrueBranch->getParent();
7620 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7621 } else {
7622 Instruction *ThenTerm = nullptr;
7623 Instruction *ElseTerm = nullptr;
7624 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7625 nullptr, nullptr, LI);
7626 TrueBranch = cast<BranchInst>(ThenTerm);
7627 FalseBranch = cast<BranchInst>(ElseTerm);
7628 TrueBlock = TrueBranch->getParent();
7629 FalseBlock = FalseBranch->getParent();
7630 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7631 }
7632
7633 EndBlock->setName("select.end");
7634 if (TrueBlock)
7635 TrueBlock->setName("select.true.sink");
7636 if (FalseBlock)
7637 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7638 : "select.false.sink");
7639
7640 if (IsHugeFunc) {
7641 if (TrueBlock)
7642 FreshBBs.insert(TrueBlock);
7643 if (FalseBlock)
7644 FreshBBs.insert(FalseBlock);
7645 FreshBBs.insert(EndBlock);
7646 }
7647
7648 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7649
7650 static const unsigned MD[] = {
7651 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7652 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7653 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7654
7655 // Sink expensive instructions into the conditional blocks to avoid executing
7656 // them speculatively.
7657 for (Instruction *I : TrueInstrs)
7658 I->moveBefore(TrueBranch->getIterator());
7659 for (Instruction *I : FalseInstrs)
7660 I->moveBefore(FalseBranch->getIterator());
7661
7662 // If we did not create a new block for one of the 'true' or 'false' paths
7663 // of the condition, it means that side of the branch goes to the end block
7664 // directly and the path originates from the start block from the point of
7665 // view of the new PHI.
7666 if (TrueBlock == nullptr)
7667 TrueBlock = StartBlock;
7668 else if (FalseBlock == nullptr)
7669 FalseBlock = StartBlock;
7670
7671 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7672 // Use reverse iterator because later select may use the value of the
7673 // earlier select, and we need to propagate value through earlier select
7674 // to get the PHI operand.
7675 for (SelectInst *SI : llvm::reverse(ASI)) {
7676 // The select itself is replaced with a PHI Node.
7677 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7678 PN->insertBefore(EndBlock->begin());
7679 PN->takeName(SI);
7680 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7681 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7682 PN->setDebugLoc(SI->getDebugLoc());
7683
7684 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7685 SI->eraseFromParent();
7686 INS.erase(SI);
7687 ++NumSelectsExpanded;
7688 }
7689
7690 // Instruct OptimizeBlock to skip to the next block.
7691 CurInstIterator = StartBlock->end();
7692 return true;
7693}
7694
7695/// Some targets only accept certain types for splat inputs. For example a VDUP
7696/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7697/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7698bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7699 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7701 m_Undef(), m_ZeroMask())))
7702 return false;
7703 Type *NewType = TLI->shouldConvertSplatType(SVI);
7704 if (!NewType)
7705 return false;
7706
7707 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7708 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7709 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7710 "Expected a type of the same size!");
7711 auto *NewVecType =
7712 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7713
7714 // Create a bitcast (shuffle (insert (bitcast(..))))
7715 IRBuilder<> Builder(SVI->getContext());
7716 Builder.SetInsertPoint(SVI);
7717 Value *BC1 = Builder.CreateBitCast(
7718 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7719 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7720 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7721
7722 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7724 SVI, TLInfo, nullptr,
7725 [&](Value *V) { removeAllAssertingVHReferences(V); });
7726
7727 // Also hoist the bitcast up to its operand if it they are not in the same
7728 // block.
7729 if (auto *BCI = dyn_cast<Instruction>(BC1))
7730 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7731 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7732 !Op->isTerminator() && !Op->isEHPad())
7733 BCI->moveAfter(Op);
7734
7735 return true;
7736}
7737
7738bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7739 // If the operands of I can be folded into a target instruction together with
7740 // I, duplicate and sink them.
7741 SmallVector<Use *, 4> OpsToSink;
7742 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7743 return false;
7744
7745 // OpsToSink can contain multiple uses in a use chain (e.g.
7746 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7747 // uses must come first, so we process the ops in reverse order so as to not
7748 // create invalid IR.
7749 BasicBlock *TargetBB = I->getParent();
7750 bool Changed = false;
7751 SmallVector<Use *, 4> ToReplace;
7752 Instruction *InsertPoint = I;
7753 DenseMap<const Instruction *, unsigned long> InstOrdering;
7754 unsigned long InstNumber = 0;
7755 for (const auto &I : *TargetBB)
7756 InstOrdering[&I] = InstNumber++;
7757
7758 for (Use *U : reverse(OpsToSink)) {
7759 auto *UI = cast<Instruction>(U->get());
7760 if (isa<PHINode>(UI))
7761 continue;
7762 if (UI->getParent() == TargetBB) {
7763 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7764 InsertPoint = UI;
7765 continue;
7766 }
7767 ToReplace.push_back(U);
7768 }
7769
7770 SetVector<Instruction *> MaybeDead;
7771 DenseMap<Instruction *, Instruction *> NewInstructions;
7772 for (Use *U : ToReplace) {
7773 auto *UI = cast<Instruction>(U->get());
7774 Instruction *NI = UI->clone();
7775
7776 if (IsHugeFunc) {
7777 // Now we clone an instruction, its operands' defs may sink to this BB
7778 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7779 for (Value *Op : NI->operands())
7780 if (auto *OpDef = dyn_cast<Instruction>(Op))
7781 FreshBBs.insert(OpDef->getParent());
7782 }
7783
7784 NewInstructions[UI] = NI;
7785 MaybeDead.insert(UI);
7786 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7787 NI->insertBefore(InsertPoint->getIterator());
7788 InsertPoint = NI;
7789 InsertedInsts.insert(NI);
7790
7791 // Update the use for the new instruction, making sure that we update the
7792 // sunk instruction uses, if it is part of a chain that has already been
7793 // sunk.
7794 Instruction *OldI = cast<Instruction>(U->getUser());
7795 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7796 It->second->setOperand(U->getOperandNo(), NI);
7797 else
7798 U->set(NI);
7799 Changed = true;
7800 }
7801
7802 // Remove instructions that are dead after sinking.
7803 for (auto *I : MaybeDead) {
7804 if (!I->hasNUsesOrMore(1)) {
7805 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7806 I->eraseFromParent();
7807 }
7808 }
7809
7810 return Changed;
7811}
7812
7813bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7814 Value *Cond = SI->getCondition();
7815 Type *OldType = Cond->getType();
7816 LLVMContext &Context = Cond->getContext();
7817 EVT OldVT = TLI->getValueType(*DL, OldType);
7818 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7819 unsigned RegWidth = RegType.getSizeInBits();
7820
7821 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7822 return false;
7823
7824 // If the register width is greater than the type width, expand the condition
7825 // of the switch instruction and each case constant to the width of the
7826 // register. By widening the type of the switch condition, subsequent
7827 // comparisons (for case comparisons) will not need to be extended to the
7828 // preferred register width, so we will potentially eliminate N-1 extends,
7829 // where N is the number of cases in the switch.
7830 auto *NewType = Type::getIntNTy(Context, RegWidth);
7831
7832 // Extend the switch condition and case constants using the target preferred
7833 // extend unless the switch condition is a function argument with an extend
7834 // attribute. In that case, we can avoid an unnecessary mask/extension by
7835 // matching the argument extension instead.
7836 Instruction::CastOps ExtType = Instruction::ZExt;
7837 // Some targets prefer SExt over ZExt.
7838 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7839 ExtType = Instruction::SExt;
7840
7841 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7842 if (Arg->hasSExtAttr())
7843 ExtType = Instruction::SExt;
7844 if (Arg->hasZExtAttr())
7845 ExtType = Instruction::ZExt;
7846 }
7847
7848 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7849 ExtInst->insertBefore(SI->getIterator());
7850 ExtInst->setDebugLoc(SI->getDebugLoc());
7851 SI->setCondition(ExtInst);
7852 for (auto Case : SI->cases()) {
7853 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7854 APInt WideConst = (ExtType == Instruction::ZExt)
7855 ? NarrowConst.zext(RegWidth)
7856 : NarrowConst.sext(RegWidth);
7857 Case.setValue(ConstantInt::get(Context, WideConst));
7858 }
7859
7860 return true;
7861}
7862
7863bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7864 // The SCCP optimization tends to produce code like this:
7865 // switch(x) { case 42: phi(42, ...) }
7866 // Materializing the constant for the phi-argument needs instructions; So we
7867 // change the code to:
7868 // switch(x) { case 42: phi(x, ...) }
7869
7870 Value *Condition = SI->getCondition();
7871 // Avoid endless loop in degenerate case.
7872 if (isa<ConstantInt>(*Condition))
7873 return false;
7874
7875 bool Changed = false;
7876 BasicBlock *SwitchBB = SI->getParent();
7877 Type *ConditionType = Condition->getType();
7878
7879 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7880 ConstantInt *CaseValue = Case.getCaseValue();
7881 BasicBlock *CaseBB = Case.getCaseSuccessor();
7882 // Set to true if we previously checked that `CaseBB` is only reached by
7883 // a single case from this switch.
7884 bool CheckedForSinglePred = false;
7885 for (PHINode &PHI : CaseBB->phis()) {
7886 Type *PHIType = PHI.getType();
7887 // If ZExt is free then we can also catch patterns like this:
7888 // switch((i32)x) { case 42: phi((i64)42, ...); }
7889 // and replace `(i64)42` with `zext i32 %x to i64`.
7890 bool TryZExt =
7891 PHIType->isIntegerTy() &&
7892 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7893 TLI->isZExtFree(ConditionType, PHIType);
7894 if (PHIType == ConditionType || TryZExt) {
7895 // Set to true to skip this case because of multiple preds.
7896 bool SkipCase = false;
7897 Value *Replacement = nullptr;
7898 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7899 Value *PHIValue = PHI.getIncomingValue(I);
7900 if (PHIValue != CaseValue) {
7901 if (!TryZExt)
7902 continue;
7903 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7904 if (!PHIValueInt ||
7905 PHIValueInt->getValue() !=
7906 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7907 continue;
7908 }
7909 if (PHI.getIncomingBlock(I) != SwitchBB)
7910 continue;
7911 // We cannot optimize if there are multiple case labels jumping to
7912 // this block. This check may get expensive when there are many
7913 // case labels so we test for it last.
7914 if (!CheckedForSinglePred) {
7915 CheckedForSinglePred = true;
7916 if (SI->findCaseDest(CaseBB) == nullptr) {
7917 SkipCase = true;
7918 break;
7919 }
7920 }
7921
7922 if (Replacement == nullptr) {
7923 if (PHIValue == CaseValue) {
7924 Replacement = Condition;
7925 } else {
7926 IRBuilder<> Builder(SI);
7927 Replacement = Builder.CreateZExt(Condition, PHIType);
7928 }
7929 }
7930 PHI.setIncomingValue(I, Replacement);
7931 Changed = true;
7932 }
7933 if (SkipCase)
7934 break;
7935 }
7936 }
7937 }
7938 return Changed;
7939}
7940
7941bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7942 bool Changed = optimizeSwitchType(SI);
7943 Changed |= optimizeSwitchPhiConstants(SI);
7944 return Changed;
7945}
7946
7947namespace {
7948
7949/// Helper class to promote a scalar operation to a vector one.
7950/// This class is used to move downward extractelement transition.
7951/// E.g.,
7952/// a = vector_op <2 x i32>
7953/// b = extractelement <2 x i32> a, i32 0
7954/// c = scalar_op b
7955/// store c
7956///
7957/// =>
7958/// a = vector_op <2 x i32>
7959/// c = vector_op a (equivalent to scalar_op on the related lane)
7960/// * d = extractelement <2 x i32> c, i32 0
7961/// * store d
7962/// Assuming both extractelement and store can be combine, we get rid of the
7963/// transition.
7964class VectorPromoteHelper {
7965 /// DataLayout associated with the current module.
7966 const DataLayout &DL;
7967
7968 /// Used to perform some checks on the legality of vector operations.
7969 const TargetLowering &TLI;
7970
7971 /// Used to estimated the cost of the promoted chain.
7972 const TargetTransformInfo &TTI;
7973
7974 /// The transition being moved downwards.
7975 Instruction *Transition;
7976
7977 /// The sequence of instructions to be promoted.
7978 SmallVector<Instruction *, 4> InstsToBePromoted;
7979
7980 /// Cost of combining a store and an extract.
7981 unsigned StoreExtractCombineCost;
7982
7983 /// Instruction that will be combined with the transition.
7984 Instruction *CombineInst = nullptr;
7985
7986 /// The instruction that represents the current end of the transition.
7987 /// Since we are faking the promotion until we reach the end of the chain
7988 /// of computation, we need a way to get the current end of the transition.
7989 Instruction *getEndOfTransition() const {
7990 if (InstsToBePromoted.empty())
7991 return Transition;
7992 return InstsToBePromoted.back();
7993 }
7994
7995 /// Return the index of the original value in the transition.
7996 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7997 /// c, is at index 0.
7998 unsigned getTransitionOriginalValueIdx() const {
7999 assert(isa<ExtractElementInst>(Transition) &&
8000 "Other kind of transitions are not supported yet");
8001 return 0;
8002 }
8003
8004 /// Return the index of the index in the transition.
8005 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8006 /// is at index 1.
8007 unsigned getTransitionIdx() const {
8008 assert(isa<ExtractElementInst>(Transition) &&
8009 "Other kind of transitions are not supported yet");
8010 return 1;
8011 }
8012
8013 /// Get the type of the transition.
8014 /// This is the type of the original value.
8015 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8016 /// transition is <2 x i32>.
8017 Type *getTransitionType() const {
8018 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8019 }
8020
8021 /// Promote \p ToBePromoted by moving \p Def downward through.
8022 /// I.e., we have the following sequence:
8023 /// Def = Transition <ty1> a to <ty2>
8024 /// b = ToBePromoted <ty2> Def, ...
8025 /// =>
8026 /// b = ToBePromoted <ty1> a, ...
8027 /// Def = Transition <ty1> ToBePromoted to <ty2>
8028 void promoteImpl(Instruction *ToBePromoted);
8029
8030 /// Check whether or not it is profitable to promote all the
8031 /// instructions enqueued to be promoted.
8032 bool isProfitableToPromote() {
8033 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8034 unsigned Index = isa<ConstantInt>(ValIdx)
8035 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8036 : -1;
8037 Type *PromotedType = getTransitionType();
8038
8039 StoreInst *ST = cast<StoreInst>(CombineInst);
8040 unsigned AS = ST->getPointerAddressSpace();
8041 // Check if this store is supported.
8043 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8044 ST->getAlign())) {
8045 // If this is not supported, there is no way we can combine
8046 // the extract with the store.
8047 return false;
8048 }
8049
8050 // The scalar chain of computation has to pay for the transition
8051 // scalar to vector.
8052 // The vector chain has to account for the combining cost.
8055 InstructionCost ScalarCost =
8056 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8057 InstructionCost VectorCost = StoreExtractCombineCost;
8058 for (const auto &Inst : InstsToBePromoted) {
8059 // Compute the cost.
8060 // By construction, all instructions being promoted are arithmetic ones.
8061 // Moreover, one argument is a constant that can be viewed as a splat
8062 // constant.
8063 Value *Arg0 = Inst->getOperand(0);
8064 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8065 isa<ConstantFP>(Arg0);
8066 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8067 if (IsArg0Constant)
8069 else
8071
8072 ScalarCost += TTI.getArithmeticInstrCost(
8073 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8074 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8075 CostKind, Arg0Info, Arg1Info);
8076 }
8077 LLVM_DEBUG(
8078 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8079 << ScalarCost << "\nVector: " << VectorCost << '\n');
8080 return ScalarCost > VectorCost;
8081 }
8082
8083 /// Generate a constant vector with \p Val with the same
8084 /// number of elements as the transition.
8085 /// \p UseSplat defines whether or not \p Val should be replicated
8086 /// across the whole vector.
8087 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8088 /// otherwise we generate a vector with as many poison as possible:
8089 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8090 /// used at the index of the extract.
8091 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8092 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8093 if (!UseSplat) {
8094 // If we cannot determine where the constant must be, we have to
8095 // use a splat constant.
8096 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8097 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8098 ExtractIdx = CstVal->getSExtValue();
8099 else
8100 UseSplat = true;
8101 }
8102
8103 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8104 if (UseSplat)
8105 return ConstantVector::getSplat(EC, Val);
8106
8107 if (!EC.isScalable()) {
8109 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8110 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8111 if (Idx == ExtractIdx)
8112 ConstVec.push_back(Val);
8113 else
8114 ConstVec.push_back(PoisonVal);
8115 }
8116 return ConstantVector::get(ConstVec);
8117 } else
8119 "Generate scalable vector for non-splat is unimplemented");
8120 }
8121
8122 /// Check if promoting to a vector type an operand at \p OperandIdx
8123 /// in \p Use can trigger undefined behavior.
8124 static bool canCauseUndefinedBehavior(const Instruction *Use,
8125 unsigned OperandIdx) {
8126 // This is not safe to introduce undef when the operand is on
8127 // the right hand side of a division-like instruction.
8128 if (OperandIdx != 1)
8129 return false;
8130 switch (Use->getOpcode()) {
8131 default:
8132 return false;
8133 case Instruction::SDiv:
8134 case Instruction::UDiv:
8135 case Instruction::SRem:
8136 case Instruction::URem:
8137 return true;
8138 case Instruction::FDiv:
8139 case Instruction::FRem:
8140 return !Use->hasNoNaNs();
8141 }
8142 llvm_unreachable(nullptr);
8143 }
8144
8145public:
8146 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8147 const TargetTransformInfo &TTI, Instruction *Transition,
8148 unsigned CombineCost)
8149 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8150 StoreExtractCombineCost(CombineCost) {
8151 assert(Transition && "Do not know how to promote null");
8152 }
8153
8154 /// Check if we can promote \p ToBePromoted to \p Type.
8155 bool canPromote(const Instruction *ToBePromoted) const {
8156 // We could support CastInst too.
8157 return isa<BinaryOperator>(ToBePromoted);
8158 }
8159
8160 /// Check if it is profitable to promote \p ToBePromoted
8161 /// by moving downward the transition through.
8162 bool shouldPromote(const Instruction *ToBePromoted) const {
8163 // Promote only if all the operands can be statically expanded.
8164 // Indeed, we do not want to introduce any new kind of transitions.
8165 for (const Use &U : ToBePromoted->operands()) {
8166 const Value *Val = U.get();
8167 if (Val == getEndOfTransition()) {
8168 // If the use is a division and the transition is on the rhs,
8169 // we cannot promote the operation, otherwise we may create a
8170 // division by zero.
8171 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8172 return false;
8173 continue;
8174 }
8175 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8176 !isa<ConstantFP>(Val))
8177 return false;
8178 }
8179 // Check that the resulting operation is legal.
8180 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8181 if (!ISDOpcode)
8182 return false;
8183 return StressStoreExtract ||
8185 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8186 }
8187
8188 /// Check whether or not \p Use can be combined
8189 /// with the transition.
8190 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8191 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8192
8193 /// Record \p ToBePromoted as part of the chain to be promoted.
8194 void enqueueForPromotion(Instruction *ToBePromoted) {
8195 InstsToBePromoted.push_back(ToBePromoted);
8196 }
8197
8198 /// Set the instruction that will be combined with the transition.
8199 void recordCombineInstruction(Instruction *ToBeCombined) {
8200 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8201 CombineInst = ToBeCombined;
8202 }
8203
8204 /// Promote all the instructions enqueued for promotion if it is
8205 /// is profitable.
8206 /// \return True if the promotion happened, false otherwise.
8207 bool promote() {
8208 // Check if there is something to promote.
8209 // Right now, if we do not have anything to combine with,
8210 // we assume the promotion is not profitable.
8211 if (InstsToBePromoted.empty() || !CombineInst)
8212 return false;
8213
8214 // Check cost.
8215 if (!StressStoreExtract && !isProfitableToPromote())
8216 return false;
8217
8218 // Promote.
8219 for (auto &ToBePromoted : InstsToBePromoted)
8220 promoteImpl(ToBePromoted);
8221 InstsToBePromoted.clear();
8222 return true;
8223 }
8224};
8225
8226} // end anonymous namespace
8227
8228void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8229 // At this point, we know that all the operands of ToBePromoted but Def
8230 // can be statically promoted.
8231 // For Def, we need to use its parameter in ToBePromoted:
8232 // b = ToBePromoted ty1 a
8233 // Def = Transition ty1 b to ty2
8234 // Move the transition down.
8235 // 1. Replace all uses of the promoted operation by the transition.
8236 // = ... b => = ... Def.
8237 assert(ToBePromoted->getType() == Transition->getType() &&
8238 "The type of the result of the transition does not match "
8239 "the final type");
8240 ToBePromoted->replaceAllUsesWith(Transition);
8241 // 2. Update the type of the uses.
8242 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8243 Type *TransitionTy = getTransitionType();
8244 ToBePromoted->mutateType(TransitionTy);
8245 // 3. Update all the operands of the promoted operation with promoted
8246 // operands.
8247 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8248 for (Use &U : ToBePromoted->operands()) {
8249 Value *Val = U.get();
8250 Value *NewVal = nullptr;
8251 if (Val == Transition)
8252 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8253 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8254 isa<ConstantFP>(Val)) {
8255 // Use a splat constant if it is not safe to use undef.
8256 NewVal = getConstantVector(
8257 cast<Constant>(Val),
8258 isa<UndefValue>(Val) ||
8259 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8260 } else
8261 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8262 "this?");
8263 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8264 }
8265 Transition->moveAfter(ToBePromoted);
8266 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8267}
8268
8269/// Some targets can do store(extractelement) with one instruction.
8270/// Try to push the extractelement towards the stores when the target
8271/// has this feature and this is profitable.
8272bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8273 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8274 if (DisableStoreExtract ||
8277 Inst->getOperand(1), CombineCost)))
8278 return false;
8279
8280 // At this point we know that Inst is a vector to scalar transition.
8281 // Try to move it down the def-use chain, until:
8282 // - We can combine the transition with its single use
8283 // => we got rid of the transition.
8284 // - We escape the current basic block
8285 // => we would need to check that we are moving it at a cheaper place and
8286 // we do not do that for now.
8287 BasicBlock *Parent = Inst->getParent();
8288 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8289 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8290 // If the transition has more than one use, assume this is not going to be
8291 // beneficial.
8292 while (Inst->hasOneUse()) {
8293 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8294 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8295
8296 if (ToBePromoted->getParent() != Parent) {
8297 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8298 << ToBePromoted->getParent()->getName()
8299 << ") than the transition (" << Parent->getName()
8300 << ").\n");
8301 return false;
8302 }
8303
8304 if (VPH.canCombine(ToBePromoted)) {
8305 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8306 << "will be combined with: " << *ToBePromoted << '\n');
8307 VPH.recordCombineInstruction(ToBePromoted);
8308 bool Changed = VPH.promote();
8309 NumStoreExtractExposed += Changed;
8310 return Changed;
8311 }
8312
8313 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8314 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8315 return false;
8316
8317 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8318
8319 VPH.enqueueForPromotion(ToBePromoted);
8320 Inst = ToBePromoted;
8321 }
8322 return false;
8323}
8324
8325/// For the instruction sequence of store below, F and I values
8326/// are bundled together as an i64 value before being stored into memory.
8327/// Sometimes it is more efficient to generate separate stores for F and I,
8328/// which can remove the bitwise instructions or sink them to colder places.
8329///
8330/// (store (or (zext (bitcast F to i32) to i64),
8331/// (shl (zext I to i64), 32)), addr) -->
8332/// (store F, addr) and (store I, addr+4)
8333///
8334/// Similarly, splitting for other merged store can also be beneficial, like:
8335/// For pair of {i32, i32}, i64 store --> two i32 stores.
8336/// For pair of {i32, i16}, i64 store --> two i32 stores.
8337/// For pair of {i16, i16}, i32 store --> two i16 stores.
8338/// For pair of {i16, i8}, i32 store --> two i16 stores.
8339/// For pair of {i8, i8}, i16 store --> two i8 stores.
8340///
8341/// We allow each target to determine specifically which kind of splitting is
8342/// supported.
8343///
8344/// The store patterns are commonly seen from the simple code snippet below
8345/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8346/// void goo(const std::pair<int, float> &);
8347/// hoo() {
8348/// ...
8349/// goo(std::make_pair(tmp, ftmp));
8350/// ...
8351/// }
8352///
8353/// Although we already have similar splitting in DAG Combine, we duplicate
8354/// it in CodeGenPrepare to catch the case in which pattern is across
8355/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8356/// during code expansion.
8358 const TargetLowering &TLI) {
8359 // Handle simple but common cases only.
8360 Type *StoreType = SI.getValueOperand()->getType();
8361
8362 // The code below assumes shifting a value by <number of bits>,
8363 // whereas scalable vectors would have to be shifted by
8364 // <2log(vscale) + number of bits> in order to store the
8365 // low/high parts. Bailing out for now.
8366 if (StoreType->isScalableTy())
8367 return false;
8368
8369 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8370 DL.getTypeSizeInBits(StoreType) == 0)
8371 return false;
8372
8373 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8374 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8375 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8376 return false;
8377
8378 // Don't split the store if it is volatile.
8379 if (SI.isVolatile())
8380 return false;
8381
8382 // Match the following patterns:
8383 // (store (or (zext LValue to i64),
8384 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8385 // or
8386 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8387 // (zext LValue to i64),
8388 // Expect both operands of OR and the first operand of SHL have only
8389 // one use.
8390 Value *LValue, *HValue;
8391 if (!match(SI.getValueOperand(),
8394 m_SpecificInt(HalfValBitSize))))))
8395 return false;
8396
8397 // Check LValue and HValue are int with size less or equal than 32.
8398 if (!LValue->getType()->isIntegerTy() ||
8399 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8400 !HValue->getType()->isIntegerTy() ||
8401 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8402 return false;
8403
8404 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8405 // as the input of target query.
8406 auto *LBC = dyn_cast<BitCastInst>(LValue);
8407 auto *HBC = dyn_cast<BitCastInst>(HValue);
8408 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8409 : EVT::getEVT(LValue->getType());
8410 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8411 : EVT::getEVT(HValue->getType());
8412 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8413 return false;
8414
8415 // Start to split store.
8416 IRBuilder<> Builder(SI.getContext());
8417 Builder.SetInsertPoint(&SI);
8418
8419 // If LValue/HValue is a bitcast in another BB, create a new one in current
8420 // BB so it may be merged with the splitted stores by dag combiner.
8421 if (LBC && LBC->getParent() != SI.getParent())
8422 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8423 if (HBC && HBC->getParent() != SI.getParent())
8424 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8425
8426 bool IsLE = SI.getDataLayout().isLittleEndian();
8427 auto CreateSplitStore = [&](Value *V, bool Upper) {
8428 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8429 Value *Addr = SI.getPointerOperand();
8430 Align Alignment = SI.getAlign();
8431 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8432 if (IsOffsetStore) {
8433 Addr = Builder.CreateGEP(
8434 SplitStoreType, Addr,
8435 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8436
8437 // When splitting the store in half, naturally one half will retain the
8438 // alignment of the original wider store, regardless of whether it was
8439 // over-aligned or not, while the other will require adjustment.
8440 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8441 }
8442 Builder.CreateAlignedStore(V, Addr, Alignment);
8443 };
8444
8445 CreateSplitStore(LValue, false);
8446 CreateSplitStore(HValue, true);
8447
8448 // Delete the old store.
8449 SI.eraseFromParent();
8450 return true;
8451}
8452
8453// Return true if the GEP has two operands, the first operand is of a sequential
8454// type, and the second operand is a constant.
8457 return GEP->getNumOperands() == 2 && I.isSequential() &&
8458 isa<ConstantInt>(GEP->getOperand(1));
8459}
8460
8461// Try unmerging GEPs to reduce liveness interference (register pressure) across
8462// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8463// reducing liveness interference across those edges benefits global register
8464// allocation. Currently handles only certain cases.
8465//
8466// For example, unmerge %GEPI and %UGEPI as below.
8467//
8468// ---------- BEFORE ----------
8469// SrcBlock:
8470// ...
8471// %GEPIOp = ...
8472// ...
8473// %GEPI = gep %GEPIOp, Idx
8474// ...
8475// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8476// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8477// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8478// %UGEPI)
8479//
8480// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8481// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8482// ...
8483//
8484// DstBi:
8485// ...
8486// %UGEPI = gep %GEPIOp, UIdx
8487// ...
8488// ---------------------------
8489//
8490// ---------- AFTER ----------
8491// SrcBlock:
8492// ... (same as above)
8493// (* %GEPI is still alive on the indirectbr edges)
8494// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8495// unmerging)
8496// ...
8497//
8498// DstBi:
8499// ...
8500// %UGEPI = gep %GEPI, (UIdx-Idx)
8501// ...
8502// ---------------------------
8503//
8504// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8505// no longer alive on them.
8506//
8507// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8508// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8509// not to disable further simplications and optimizations as a result of GEP
8510// merging.
8511//
8512// Note this unmerging may increase the length of the data flow critical path
8513// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8514// between the register pressure and the length of data-flow critical
8515// path. Restricting this to the uncommon IndirectBr case would minimize the
8516// impact of potentially longer critical path, if any, and the impact on compile
8517// time.
8519 const TargetTransformInfo *TTI) {
8520 BasicBlock *SrcBlock = GEPI->getParent();
8521 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8522 // (non-IndirectBr) cases exit early here.
8523 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8524 return false;
8525 // Check that GEPI is a simple gep with a single constant index.
8526 if (!GEPSequentialConstIndexed(GEPI))
8527 return false;
8528 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8529 // Check that GEPI is a cheap one.
8530 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8533 return false;
8534 Value *GEPIOp = GEPI->getOperand(0);
8535 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8536 if (!isa<Instruction>(GEPIOp))
8537 return false;
8538 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8539 if (GEPIOpI->getParent() != SrcBlock)
8540 return false;
8541 // Check that GEP is used outside the block, meaning it's alive on the
8542 // IndirectBr edge(s).
8543 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8544 if (auto *I = dyn_cast<Instruction>(Usr)) {
8545 if (I->getParent() != SrcBlock) {
8546 return true;
8547 }
8548 }
8549 return false;
8550 }))
8551 return false;
8552 // The second elements of the GEP chains to be unmerged.
8553 std::vector<GetElementPtrInst *> UGEPIs;
8554 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8555 // on IndirectBr edges.
8556 for (User *Usr : GEPIOp->users()) {
8557 if (Usr == GEPI)
8558 continue;
8559 // Check if Usr is an Instruction. If not, give up.
8560 if (!isa<Instruction>(Usr))
8561 return false;
8562 auto *UI = cast<Instruction>(Usr);
8563 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8564 if (UI->getParent() == SrcBlock)
8565 continue;
8566 // Check if Usr is a GEP. If not, give up.
8567 if (!isa<GetElementPtrInst>(Usr))
8568 return false;
8569 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8570 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8571 // the pointer operand to it. If so, record it in the vector. If not, give
8572 // up.
8573 if (!GEPSequentialConstIndexed(UGEPI))
8574 return false;
8575 if (UGEPI->getOperand(0) != GEPIOp)
8576 return false;
8577 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8578 return false;
8579 if (GEPIIdx->getType() !=
8580 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8581 return false;
8582 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8583 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8586 return false;
8587 UGEPIs.push_back(UGEPI);
8588 }
8589 if (UGEPIs.size() == 0)
8590 return false;
8591 // Check the materializing cost of (Uidx-Idx).
8592 for (GetElementPtrInst *UGEPI : UGEPIs) {
8593 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8594 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8596 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8597 if (ImmCost > TargetTransformInfo::TCC_Basic)
8598 return false;
8599 }
8600 // Now unmerge between GEPI and UGEPIs.
8601 for (GetElementPtrInst *UGEPI : UGEPIs) {
8602 UGEPI->setOperand(0, GEPI);
8603 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8604 Constant *NewUGEPIIdx = ConstantInt::get(
8605 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8606 UGEPI->setOperand(1, NewUGEPIIdx);
8607 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8608 // inbounds to avoid UB.
8609 if (!GEPI->isInBounds()) {
8610 UGEPI->setIsInBounds(false);
8611 }
8612 }
8613 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8614 // alive on IndirectBr edges).
8615 assert(llvm::none_of(GEPIOp->users(),
8616 [&](User *Usr) {
8617 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8618 }) &&
8619 "GEPIOp is used outside SrcBlock");
8620 return true;
8621}
8622
8623static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8625 bool IsHugeFunc) {
8626 // Try and convert
8627 // %c = icmp ult %x, 8
8628 // br %c, bla, blb
8629 // %tc = lshr %x, 3
8630 // to
8631 // %tc = lshr %x, 3
8632 // %c = icmp eq %tc, 0
8633 // br %c, bla, blb
8634 // Creating the cmp to zero can be better for the backend, especially if the
8635 // lshr produces flags that can be used automatically.
8636 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8637 return false;
8638
8639 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8640 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8641 return false;
8642
8643 Value *X = Cmp->getOperand(0);
8644 if (!X->hasUseList())
8645 return false;
8646
8647 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8648
8649 for (auto *U : X->users()) {
8651 // A quick dominance check
8652 if (!UI ||
8653 (UI->getParent() != Branch->getParent() &&
8654 UI->getParent() != Branch->getSuccessor(0) &&
8655 UI->getParent() != Branch->getSuccessor(1)) ||
8656 (UI->getParent() != Branch->getParent() &&
8657 !UI->getParent()->getSinglePredecessor()))
8658 continue;
8659
8660 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8661 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8662 IRBuilder<> Builder(Branch);
8663 if (UI->getParent() != Branch->getParent())
8664 UI->moveBefore(Branch->getIterator());
8666 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8667 ConstantInt::get(UI->getType(), 0));
8668 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8669 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8670 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8671 return true;
8672 }
8673 if (Cmp->isEquality() &&
8674 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8675 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8676 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8677 IRBuilder<> Builder(Branch);
8678 if (UI->getParent() != Branch->getParent())
8679 UI->moveBefore(Branch->getIterator());
8681 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8682 ConstantInt::get(UI->getType(), 0));
8683 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8684 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8685 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8686 return true;
8687 }
8688 }
8689 return false;
8690}
8691
8692bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8693 bool AnyChange = false;
8694 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8695
8696 // Bail out if we inserted the instruction to prevent optimizations from
8697 // stepping on each other's toes.
8698 if (InsertedInsts.count(I))
8699 return AnyChange;
8700
8701 // TODO: Move into the switch on opcode below here.
8702 if (PHINode *P = dyn_cast<PHINode>(I)) {
8703 // It is possible for very late stage optimizations (such as SimplifyCFG)
8704 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8705 // trivial PHI, go ahead and zap it here.
8706 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8707 LargeOffsetGEPMap.erase(P);
8708 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8709 P->eraseFromParent();
8710 ++NumPHIsElim;
8711 return true;
8712 }
8713 return AnyChange;
8714 }
8715
8716 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8717 // If the source of the cast is a constant, then this should have
8718 // already been constant folded. The only reason NOT to constant fold
8719 // it is if something (e.g. LSR) was careful to place the constant
8720 // evaluation in a block other than then one that uses it (e.g. to hoist
8721 // the address of globals out of a loop). If this is the case, we don't
8722 // want to forward-subst the cast.
8723 if (isa<Constant>(CI->getOperand(0)))
8724 return AnyChange;
8725
8726 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8727 return true;
8728
8730 isa<TruncInst>(I)) &&
8732 I, LI->getLoopFor(I->getParent()), *TTI))
8733 return true;
8734
8735 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8736 /// Sink a zext or sext into its user blocks if the target type doesn't
8737 /// fit in one register
8738 if (TLI->getTypeAction(CI->getContext(),
8739 TLI->getValueType(*DL, CI->getType())) ==
8740 TargetLowering::TypeExpandInteger) {
8741 return SinkCast(CI);
8742 } else {
8744 I, LI->getLoopFor(I->getParent()), *TTI))
8745 return true;
8746
8747 bool MadeChange = optimizeExt(I);
8748 return MadeChange | optimizeExtUses(I);
8749 }
8750 }
8751 return AnyChange;
8752 }
8753
8754 if (auto *Cmp = dyn_cast<CmpInst>(I))
8755 if (optimizeCmp(Cmp, ModifiedDT))
8756 return true;
8757
8758 if (match(I, m_URem(m_Value(), m_Value())))
8759 if (optimizeURem(I))
8760 return true;
8761
8762 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8763 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8764 bool Modified = optimizeLoadExt(LI);
8765 unsigned AS = LI->getPointerAddressSpace();
8766 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8767 return Modified;
8768 }
8769
8770 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8771 if (splitMergedValStore(*SI, *DL, *TLI))
8772 return true;
8773 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8774 unsigned AS = SI->getPointerAddressSpace();
8775 return optimizeMemoryInst(I, SI->getOperand(1),
8776 SI->getOperand(0)->getType(), AS);
8777 }
8778
8779 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8780 unsigned AS = RMW->getPointerAddressSpace();
8781 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8782 }
8783
8784 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8785 unsigned AS = CmpX->getPointerAddressSpace();
8786 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8787 CmpX->getCompareOperand()->getType(), AS);
8788 }
8789
8790 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8791
8792 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8793 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8794 return true;
8795
8796 // TODO: Move this into the switch on opcode - it handles shifts already.
8797 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8798 BinOp->getOpcode() == Instruction::LShr)) {
8799 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8800 if (CI && TLI->hasExtractBitsInsn())
8801 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8802 return true;
8803 }
8804
8805 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8806 if (GEPI->hasAllZeroIndices()) {
8807 /// The GEP operand must be a pointer, so must its result -> BitCast
8808 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8809 GEPI->getName(), GEPI->getIterator());
8810 NC->setDebugLoc(GEPI->getDebugLoc());
8811 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8813 GEPI, TLInfo, nullptr,
8814 [&](Value *V) { removeAllAssertingVHReferences(V); });
8815 ++NumGEPsElim;
8816 optimizeInst(NC, ModifiedDT);
8817 return true;
8818 }
8820 return true;
8821 }
8822 }
8823
8824 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8825 // freeze(icmp a, const)) -> icmp (freeze a), const
8826 // This helps generate efficient conditional jumps.
8827 Instruction *CmpI = nullptr;
8828 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8829 CmpI = II;
8830 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8831 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8832
8833 if (CmpI && CmpI->hasOneUse()) {
8834 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8835 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8837 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8839 if (Const0 || Const1) {
8840 if (!Const0 || !Const1) {
8841 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8842 F->takeName(FI);
8843 CmpI->setOperand(Const0 ? 1 : 0, F);
8844 }
8845 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8846 FI->eraseFromParent();
8847 return true;
8848 }
8849 }
8850 return AnyChange;
8851 }
8852
8853 if (tryToSinkFreeOperands(I))
8854 return true;
8855
8856 switch (I->getOpcode()) {
8857 case Instruction::Shl:
8858 case Instruction::LShr:
8859 case Instruction::AShr:
8860 return optimizeShiftInst(cast<BinaryOperator>(I));
8861 case Instruction::Call:
8862 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8863 case Instruction::Select:
8864 return optimizeSelectInst(cast<SelectInst>(I));
8865 case Instruction::ShuffleVector:
8866 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8867 case Instruction::Switch:
8868 return optimizeSwitchInst(cast<SwitchInst>(I));
8869 case Instruction::ExtractElement:
8870 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8871 case Instruction::Br:
8872 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8873 }
8874
8875 return AnyChange;
8876}
8877
8878/// Given an OR instruction, check to see if this is a bitreverse
8879/// idiom. If so, insert the new intrinsic and return true.
8880bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8881 if (!I.getType()->isIntegerTy() ||
8883 TLI->getValueType(*DL, I.getType(), true)))
8884 return false;
8885
8886 SmallVector<Instruction *, 4> Insts;
8887 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8888 return false;
8889 Instruction *LastInst = Insts.back();
8890 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8892 &I, TLInfo, nullptr,
8893 [&](Value *V) { removeAllAssertingVHReferences(V); });
8894 return true;
8895}
8896
8897// In this pass we look for GEP and cast instructions that are used
8898// across basic blocks and rewrite them to improve basic-block-at-a-time
8899// selection.
8900bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8901 SunkAddrs.clear();
8902 bool MadeChange = false;
8903
8904 do {
8905 CurInstIterator = BB.begin();
8906 ModifiedDT = ModifyDT::NotModifyDT;
8907 while (CurInstIterator != BB.end()) {
8908 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8909 if (ModifiedDT != ModifyDT::NotModifyDT) {
8910 // For huge function we tend to quickly go though the inner optmization
8911 // opportunities in the BB. So we go back to the BB head to re-optimize
8912 // each instruction instead of go back to the function head.
8913 if (IsHugeFunc) {
8914 DT.reset();
8915 getDT(*BB.getParent());
8916 break;
8917 } else {
8918 return true;
8919 }
8920 }
8921 }
8922 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8923
8924 bool MadeBitReverse = true;
8925 while (MadeBitReverse) {
8926 MadeBitReverse = false;
8927 for (auto &I : reverse(BB)) {
8928 if (makeBitReverse(I)) {
8929 MadeBitReverse = MadeChange = true;
8930 break;
8931 }
8932 }
8933 }
8934 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8935
8936 return MadeChange;
8937}
8938
8939bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8940 bool AnyChange = false;
8941 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8942 AnyChange |= fixupDbgVariableRecord(DVR);
8943 return AnyChange;
8944}
8945
8946// FIXME: should updating debug-info really cause the "changed" flag to fire,
8947// which can cause a function to be reprocessed?
8948bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8949 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8950 DVR.Type != DbgVariableRecord::LocationType::Assign)
8951 return false;
8952
8953 // Does this DbgVariableRecord refer to a sunk address calculation?
8954 bool AnyChange = false;
8955 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8956 DVR.location_ops().end());
8957 for (Value *Location : LocationOps) {
8958 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8959 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8960 if (SunkAddr) {
8961 // Point dbg.value at locally computed address, which should give the best
8962 // opportunity to be accurately lowered. This update may change the type
8963 // of pointer being referred to; however this makes no difference to
8964 // debugging information, and we can't generate bitcasts that may affect
8965 // codegen.
8966 DVR.replaceVariableLocationOp(Location, SunkAddr);
8967 AnyChange = true;
8968 }
8969 }
8970 return AnyChange;
8971}
8972
8974 DVR->removeFromParent();
8975 BasicBlock *VIBB = VI->getParent();
8976 if (isa<PHINode>(VI))
8977 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8978 else
8979 VIBB->insertDbgRecordAfter(DVR, &*VI);
8980}
8981
8982// A llvm.dbg.value may be using a value before its definition, due to
8983// optimizations in this pass and others. Scan for such dbg.values, and rescue
8984// them by moving the dbg.value to immediately after the value definition.
8985// FIXME: Ideally this should never be necessary, and this has the potential
8986// to re-order dbg.value intrinsics.
8987bool CodeGenPrepare::placeDbgValues(Function &F) {
8988 bool MadeChange = false;
8989 DominatorTree DT(F);
8990
8991 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8992 SmallVector<Instruction *, 4> VIs;
8993 for (Value *V : DbgItem->location_ops())
8994 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8995 VIs.push_back(VI);
8996
8997 // This item may depend on multiple instructions, complicating any
8998 // potential sink. This block takes the defensive approach, opting to
8999 // "undef" the item if it has more than one instruction and any of them do
9000 // not dominate iem.
9001 for (Instruction *VI : VIs) {
9002 if (VI->isTerminator())
9003 continue;
9004
9005 // If VI is a phi in a block with an EHPad terminator, we can't insert
9006 // after it.
9007 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9008 continue;
9009
9010 // If the defining instruction dominates the dbg.value, we do not need
9011 // to move the dbg.value.
9012 if (DT.dominates(VI, Position))
9013 continue;
9014
9015 // If we depend on multiple instructions and any of them doesn't
9016 // dominate this DVI, we probably can't salvage it: moving it to
9017 // after any of the instructions could cause us to lose the others.
9018 if (VIs.size() > 1) {
9019 LLVM_DEBUG(
9020 dbgs()
9021 << "Unable to find valid location for Debug Value, undefing:\n"
9022 << *DbgItem);
9023 DbgItem->setKillLocation();
9024 break;
9025 }
9026
9027 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9028 << *DbgItem << ' ' << *VI);
9029 DbgInserterHelper(DbgItem, VI->getIterator());
9030 MadeChange = true;
9031 ++NumDbgValueMoved;
9032 }
9033 };
9034
9035 for (BasicBlock &BB : F) {
9036 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9037 // Process any DbgVariableRecord records attached to this
9038 // instruction.
9039 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9040 filterDbgVars(Insn.getDbgRecordRange()))) {
9041 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9042 continue;
9043 DbgProcessor(&DVR, &Insn);
9044 }
9045 }
9046 }
9047
9048 return MadeChange;
9049}
9050
9051// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9052// probes can be chained dependencies of other regular DAG nodes and block DAG
9053// combine optimizations.
9054bool CodeGenPrepare::placePseudoProbes(Function &F) {
9055 bool MadeChange = false;
9056 for (auto &Block : F) {
9057 // Move the rest probes to the beginning of the block.
9058 auto FirstInst = Block.getFirstInsertionPt();
9059 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9060 ++FirstInst;
9061 BasicBlock::iterator I(FirstInst);
9062 I++;
9063 while (I != Block.end()) {
9064 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9065 II->moveBefore(FirstInst);
9066 MadeChange = true;
9067 }
9068 }
9069 }
9070 return MadeChange;
9071}
9072
9073/// Scale down both weights to fit into uint32_t.
9074static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9075 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9076 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9077 NewTrue = NewTrue / Scale;
9078 NewFalse = NewFalse / Scale;
9079}
9080
9081/// Some targets prefer to split a conditional branch like:
9082/// \code
9083/// %0 = icmp ne i32 %a, 0
9084/// %1 = icmp ne i32 %b, 0
9085/// %or.cond = or i1 %0, %1
9086/// br i1 %or.cond, label %TrueBB, label %FalseBB
9087/// \endcode
9088/// into multiple branch instructions like:
9089/// \code
9090/// bb1:
9091/// %0 = icmp ne i32 %a, 0
9092/// br i1 %0, label %TrueBB, label %bb2
9093/// bb2:
9094/// %1 = icmp ne i32 %b, 0
9095/// br i1 %1, label %TrueBB, label %FalseBB
9096/// \endcode
9097/// This usually allows instruction selection to do even further optimizations
9098/// and combine the compare with the branch instruction. Currently this is
9099/// applied for targets which have "cheap" jump instructions.
9100///
9101/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9102///
9103bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9104 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9105 return false;
9106
9107 bool MadeChange = false;
9108 for (auto &BB : F) {
9109 // Does this BB end with the following?
9110 // %cond1 = icmp|fcmp|binary instruction ...
9111 // %cond2 = icmp|fcmp|binary instruction ...
9112 // %cond.or = or|and i1 %cond1, cond2
9113 // br i1 %cond.or label %dest1, label %dest2"
9114 Instruction *LogicOp;
9115 BasicBlock *TBB, *FBB;
9116 if (!match(BB.getTerminator(),
9117 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9118 continue;
9119
9120 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9121 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9122 continue;
9123
9124 // The merging of mostly empty BB can cause a degenerate branch.
9125 if (TBB == FBB)
9126 continue;
9127
9128 unsigned Opc;
9129 Value *Cond1, *Cond2;
9130 if (match(LogicOp,
9131 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9132 Opc = Instruction::And;
9133 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9134 m_OneUse(m_Value(Cond2)))))
9135 Opc = Instruction::Or;
9136 else
9137 continue;
9138
9139 auto IsGoodCond = [](Value *Cond) {
9140 return match(
9141 Cond,
9143 m_LogicalOr(m_Value(), m_Value()))));
9144 };
9145 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9146 continue;
9147
9148 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9149
9150 // Create a new BB.
9151 auto *TmpBB =
9152 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9153 BB.getParent(), BB.getNextNode());
9154 if (IsHugeFunc)
9155 FreshBBs.insert(TmpBB);
9156
9157 // Update original basic block by using the first condition directly by the
9158 // branch instruction and removing the no longer needed and/or instruction.
9159 Br1->setCondition(Cond1);
9160 LogicOp->eraseFromParent();
9161
9162 // Depending on the condition we have to either replace the true or the
9163 // false successor of the original branch instruction.
9164 if (Opc == Instruction::And)
9165 Br1->setSuccessor(0, TmpBB);
9166 else
9167 Br1->setSuccessor(1, TmpBB);
9168
9169 // Fill in the new basic block.
9170 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9171 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9172 I->removeFromParent();
9173 I->insertBefore(Br2->getIterator());
9174 }
9175
9176 // Update PHI nodes in both successors. The original BB needs to be
9177 // replaced in one successor's PHI nodes, because the branch comes now from
9178 // the newly generated BB (NewBB). In the other successor we need to add one
9179 // incoming edge to the PHI nodes, because both branch instructions target
9180 // now the same successor. Depending on the original branch condition
9181 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9182 // we perform the correct update for the PHI nodes.
9183 // This doesn't change the successor order of the just created branch
9184 // instruction (or any other instruction).
9185 if (Opc == Instruction::Or)
9186 std::swap(TBB, FBB);
9187
9188 // Replace the old BB with the new BB.
9189 TBB->replacePhiUsesWith(&BB, TmpBB);
9190
9191 // Add another incoming edge from the new BB.
9192 for (PHINode &PN : FBB->phis()) {
9193 auto *Val = PN.getIncomingValueForBlock(&BB);
9194 PN.addIncoming(Val, TmpBB);
9195 }
9196
9197 // Update the branch weights (from SelectionDAGBuilder::
9198 // FindMergedConditions).
9199 if (Opc == Instruction::Or) {
9200 // Codegen X | Y as:
9201 // BB1:
9202 // jmp_if_X TBB
9203 // jmp TmpBB
9204 // TmpBB:
9205 // jmp_if_Y TBB
9206 // jmp FBB
9207 //
9208
9209 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9210 // The requirement is that
9211 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9212 // = TrueProb for original BB.
9213 // Assuming the original weights are A and B, one choice is to set BB1's
9214 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9215 // assumes that
9216 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9217 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9218 // TmpBB, but the math is more complicated.
9219 uint64_t TrueWeight, FalseWeight;
9220 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9221 uint64_t NewTrueWeight = TrueWeight;
9222 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9223 scaleWeights(NewTrueWeight, NewFalseWeight);
9224 Br1->setMetadata(LLVMContext::MD_prof,
9225 MDBuilder(Br1->getContext())
9226 .createBranchWeights(TrueWeight, FalseWeight,
9227 hasBranchWeightOrigin(*Br1)));
9228
9229 NewTrueWeight = TrueWeight;
9230 NewFalseWeight = 2 * FalseWeight;
9231 scaleWeights(NewTrueWeight, NewFalseWeight);
9232 Br2->setMetadata(LLVMContext::MD_prof,
9233 MDBuilder(Br2->getContext())
9234 .createBranchWeights(TrueWeight, FalseWeight));
9235 }
9236 } else {
9237 // Codegen X & Y as:
9238 // BB1:
9239 // jmp_if_X TmpBB
9240 // jmp FBB
9241 // TmpBB:
9242 // jmp_if_Y TBB
9243 // jmp FBB
9244 //
9245 // This requires creation of TmpBB after CurBB.
9246
9247 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9248 // The requirement is that
9249 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9250 // = FalseProb for original BB.
9251 // Assuming the original weights are A and B, one choice is to set BB1's
9252 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9253 // assumes that
9254 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9255 uint64_t TrueWeight, FalseWeight;
9256 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9257 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9258 uint64_t NewFalseWeight = FalseWeight;
9259 scaleWeights(NewTrueWeight, NewFalseWeight);
9260 Br1->setMetadata(LLVMContext::MD_prof,
9261 MDBuilder(Br1->getContext())
9262 .createBranchWeights(TrueWeight, FalseWeight));
9263
9264 NewTrueWeight = 2 * TrueWeight;
9265 NewFalseWeight = FalseWeight;
9266 scaleWeights(NewTrueWeight, NewFalseWeight);
9267 Br2->setMetadata(LLVMContext::MD_prof,
9268 MDBuilder(Br2->getContext())
9269 .createBranchWeights(TrueWeight, FalseWeight));
9270 }
9271 }
9272
9273 ModifiedDT = ModifyDT::ModifyBBDT;
9274 MadeChange = true;
9275
9276 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9277 TmpBB->dump());
9278 }
9279 return MadeChange;
9280}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
#define LLVM_ATTRIBUTE_UNUSED
Definition Compiler.h:298
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1451
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:167
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1182
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:380
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:435
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1531
unsigned logBase2() const
Definition APInt.h:1761
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:440
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1562
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:666
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:678
@ ICMP_SLT
signed less than
Definition InstrTypes.h:707
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:701
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:705
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:703
@ ICMP_NE
not equal
Definition InstrTypes.h:700
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:704
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:829
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:767
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
LLVM_ABI IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:165
bool erase(const KeyT &Val)
Definition DenseMap.h:303
unsigned size() const
Definition DenseMap.h:108
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:57
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:803
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:807
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:330
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Type * getValueType() const
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2780
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:167
iterator find(const KeyT &Key)
Definition MapVector.h:141
bool empty() const
Definition MapVector.h:75
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:115
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
void clear()
Completely clear the SetVector.
Definition SetVector.h:284
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:279
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:99
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:168
value_type pop_back_val()
Definition SetVector.h:296
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:197
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:181
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:652
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:62
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:231
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:301
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:292
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:242
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1101
iterator_range< use_iterator > uses()
Definition Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:838
user_iterator_impl< User > user_iterator
Definition Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:156
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:134
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:359
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
apint_match m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:310
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1707
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1665
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:649
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2235
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:145
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1725
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2113
constexpr from_range_t from_range
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2118
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:626
auto cast_or_null(const Y &Val)
Definition Casting.h:720
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2058
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:759
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2110
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1714
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:400
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3728
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1632
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1721
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1407
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:207
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:548
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:592
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1936
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:565
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2102
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:712
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:853
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
const DataLayout & DL
This contains information for each constraint that we are lowering.