LLVM 22.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/Argument.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InlineAsm.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/MDBuilder.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Operator.h"
73#include "llvm/IR/Statepoint.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/Use.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
79#include "llvm/IR/ValueMap.h"
81#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinking and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
329 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
347 MapVector<AssertingVH<Value>,
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare() = default;
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
380 SmallPtrSet<BasicBlock *, 32> FreshBBs;
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
391 bool run(Function &F, FunctionAnalysisManager &AM);
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeInlineAsmInst(CallInst *CS);
435 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436 bool optimizeExt(Instruction *&I);
437 bool optimizeExtUses(Instruction *I);
438 bool optimizeLoadExt(LoadInst *Load);
439 bool optimizeShiftInst(BinaryOperator *BO);
440 bool optimizeFunnelShift(IntrinsicInst *Fsh);
441 bool optimizeSelectInst(SelectInst *SI);
442 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443 bool optimizeSwitchType(SwitchInst *SI);
444 bool optimizeSwitchPhiConstants(SwitchInst *SI);
445 bool optimizeSwitchInst(SwitchInst *SI);
446 bool optimizeExtractElementInst(Instruction *Inst);
447 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
455 const SmallVectorImpl<Instruction *> &Exts,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool optimizeURem(Instruction *Rem);
475 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 bool unfoldPowerOf2Test(CmpInst *Cmp);
478 void verifyBFIUpdates(Function &F);
479 bool _run(Function &F);
480};
481
482class CodeGenPrepareLegacyPass : public FunctionPass {
483public:
484 static char ID; // Pass identification, replacement for typeid
485
486 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
488 }
489
490 bool runOnFunction(Function &F) override;
491
492 StringRef getPassName() const override { return "CodeGen Prepare"; }
493
494 void getAnalysisUsage(AnalysisUsage &AU) const override {
495 // FIXME: When we can selectively preserve passes, preserve the domtree.
496 AU.addRequired<ProfileSummaryInfoWrapperPass>();
497 AU.addRequired<TargetLibraryInfoWrapperPass>();
498 AU.addRequired<TargetPassConfig>();
499 AU.addRequired<TargetTransformInfoWrapperPass>();
500 AU.addRequired<LoopInfoWrapperPass>();
501 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
522 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
540 "Optimize for code generation", false, false)
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
569 BPI.reset(new BranchProbabilityInfo(F, *LI));
570 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
575 return _run(F);
576}
577
578bool CodeGenPrepare::_run(Function &F) {
579 bool EverMadeChange = false;
580
581 OptSize = F.hasOptSize();
582 // Use the basic-block-sections profile to promote hot functions to .text.hot
583 // if requested.
584 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
585 BBSectionsProfileReader->isFunctionHot(F.getName())) {
586 (void)F.setSectionPrefix("hot");
587 } else if (ProfileGuidedSectionPrefix) {
588 // The hot attribute overwrites profile count based hotness while profile
589 // counts based hotness overwrite the cold attribute.
590 // This is a conservative behabvior.
591 if (F.hasFnAttribute(Attribute::Hot) ||
592 PSI->isFunctionHotInCallGraph(&F, *BFI))
593 (void)F.setSectionPrefix("hot");
594 // If PSI shows this function is not hot, we will placed the function
595 // into unlikely section if (1) PSI shows this is a cold function, or
596 // (2) the function has a attribute of cold.
597 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
598 F.hasFnAttribute(Attribute::Cold))
599 (void)F.setSectionPrefix("unlikely");
602 (void)F.setSectionPrefix("unknown");
603 }
604
605 /// This optimization identifies DIV instructions that can be
606 /// profitably bypassed and carried out with a shorter, faster divide.
607 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
608 const DenseMap<unsigned int, unsigned int> &BypassWidths =
610 BasicBlock *BB = &*F.begin();
611 while (BB != nullptr) {
612 // bypassSlowDivision may create new BBs, but we don't want to reapply the
613 // optimization to those blocks.
614 BasicBlock *Next = BB->getNextNode();
615 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
616 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
617 BB = Next;
618 }
619 }
620
621 // Get rid of @llvm.assume builtins before attempting to eliminate empty
622 // blocks, since there might be blocks that only contain @llvm.assume calls
623 // (plus arguments that we can get rid of).
624 EverMadeChange |= eliminateAssumptions(F);
625
626 // Eliminate blocks that contain only PHI nodes and an
627 // unconditional branch.
628 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
629
630 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
632 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
633
634 // Split some critical edges where one of the sources is an indirect branch,
635 // to help generate sane code for PHIs involving such edges.
636 EverMadeChange |=
637 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
638
639 // If we are optimzing huge function, we need to consider the build time.
640 // Because the basic algorithm's complex is near O(N!).
641 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
642
643 // Transformations above may invalidate dominator tree and/or loop info.
644 DT.reset();
645 LI->releaseMemory();
646 LI->analyze(getDT(F));
647
648 bool MadeChange = true;
649 bool FuncIterated = false;
650 while (MadeChange) {
651 MadeChange = false;
652
653 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
654 if (FuncIterated && !FreshBBs.contains(&BB))
655 continue;
656
657 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
658 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
659
660 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
661 DT.reset();
662
663 MadeChange |= Changed;
664 if (IsHugeFunc) {
665 // If the BB is updated, it may still has chance to be optimized.
666 // This usually happen at sink optimization.
667 // For example:
668 //
669 // bb0:
670 // %and = and i32 %a, 4
671 // %cmp = icmp eq i32 %and, 0
672 //
673 // If the %cmp sink to other BB, the %and will has chance to sink.
674 if (Changed)
675 FreshBBs.insert(&BB);
676 else if (FuncIterated)
677 FreshBBs.erase(&BB);
678 } else {
679 // For small/normal functions, we restart BB iteration if the dominator
680 // tree of the Function was changed.
681 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
682 break;
683 }
684 }
685 // We have iterated all the BB in the (only work for huge) function.
686 FuncIterated = IsHugeFunc;
687
688 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
689 MadeChange |= mergeSExts(F);
690 if (!LargeOffsetGEPMap.empty())
691 MadeChange |= splitLargeGEPOffsets();
692 MadeChange |= optimizePhiTypes(F);
693
694 if (MadeChange)
695 eliminateFallThrough(F, DT.get());
696
697#ifndef NDEBUG
698 if (MadeChange && VerifyLoopInfo)
699 LI->verify(getDT(F));
700#endif
701
702 // Really free removed instructions during promotion.
703 for (Instruction *I : RemovedInsts)
704 I->deleteValue();
705
706 EverMadeChange |= MadeChange;
707 SeenChainsForSExt.clear();
708 ValToSExtendedUses.clear();
709 RemovedInsts.clear();
710 LargeOffsetGEPMap.clear();
711 LargeOffsetGEPID.clear();
712 }
713
714 NewGEPBases.clear();
715 SunkAddrs.clear();
716
717 if (!DisableBranchOpts) {
718 MadeChange = false;
719 // Use a set vector to get deterministic iteration order. The order the
720 // blocks are removed may affect whether or not PHI nodes in successors
721 // are removed.
722 SmallSetVector<BasicBlock *, 8> WorkList;
723 for (BasicBlock &BB : F) {
725 MadeChange |= ConstantFoldTerminator(&BB, true);
726 if (!MadeChange)
727 continue;
728
729 for (BasicBlock *Succ : Successors)
730 if (pred_empty(Succ))
731 WorkList.insert(Succ);
732 }
733
734 // Delete the dead blocks and any of their dead successors.
735 MadeChange |= !WorkList.empty();
736 while (!WorkList.empty()) {
737 BasicBlock *BB = WorkList.pop_back_val();
739
740 DeleteDeadBlock(BB);
741
742 for (BasicBlock *Succ : Successors)
743 if (pred_empty(Succ))
744 WorkList.insert(Succ);
745 }
746
747 // Merge pairs of basic blocks with unconditional branches, connected by
748 // a single edge.
749 if (EverMadeChange || MadeChange)
750 MadeChange |= eliminateFallThrough(F);
751
752 EverMadeChange |= MadeChange;
753 }
754
755 if (!DisableGCOpts) {
757 for (BasicBlock &BB : F)
758 for (Instruction &I : BB)
759 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
760 Statepoints.push_back(SP);
761 for (auto &I : Statepoints)
762 EverMadeChange |= simplifyOffsetableRelocate(*I);
763 }
764
765 // Do this last to clean up use-before-def scenarios introduced by other
766 // preparatory transforms.
767 EverMadeChange |= placeDbgValues(F);
768 EverMadeChange |= placePseudoProbes(F);
769
770#ifndef NDEBUG
772 verifyBFIUpdates(F);
773#endif
774
775 return EverMadeChange;
776}
777
778bool CodeGenPrepare::eliminateAssumptions(Function &F) {
779 bool MadeChange = false;
780 for (BasicBlock &BB : F) {
781 CurInstIterator = BB.begin();
782 while (CurInstIterator != BB.end()) {
783 Instruction *I = &*(CurInstIterator++);
784 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
785 MadeChange = true;
786 Value *Operand = Assume->getOperand(0);
787 Assume->eraseFromParent();
788
789 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
790 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
791 });
792 }
793 }
794 }
795 return MadeChange;
796}
797
798/// An instruction is about to be deleted, so remove all references to it in our
799/// GEP-tracking data strcutures.
800void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
801 LargeOffsetGEPMap.erase(V);
802 NewGEPBases.erase(V);
803
805 if (!GEP)
806 return;
807
808 LargeOffsetGEPID.erase(GEP);
809
810 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
811 if (VecI == LargeOffsetGEPMap.end())
812 return;
813
814 auto &GEPVector = VecI->second;
815 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
816
817 if (GEPVector.empty())
818 LargeOffsetGEPMap.erase(VecI);
819}
820
821// Verify BFI has been updated correctly by recomputing BFI and comparing them.
822[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
823 DominatorTree NewDT(F);
824 LoopInfo NewLI(NewDT);
825 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
826 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
827 NewBFI.verifyMatch(*BFI);
828}
829
830/// Merge basic blocks which are connected by a single edge, where one of the
831/// basic blocks has a single successor pointing to the other basic block,
832/// which has a single predecessor.
833bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
834 bool Changed = false;
835 // Scan all of the blocks in the function, except for the entry block.
836 // Use a temporary array to avoid iterator being invalidated when
837 // deleting blocks.
840
841 SmallSet<WeakTrackingVH, 16> Preds;
842 for (auto &Block : Blocks) {
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 if (!isa<PHINode>(BBI))
899 return nullptr;
900 }
901
902 // Do not break infinite loops.
903 BasicBlock *DestBB = BI->getSuccessor(0);
904 if (DestBB == BB)
905 return nullptr;
906
907 if (!canMergeBlocks(BB, DestBB))
908 DestBB = nullptr;
909
910 return DestBB;
911}
912
913/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
914/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
915/// edges in ways that are non-optimal for isel. Start by eliminating these
916/// blocks so we can split them the way we want them.
917bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
918 SmallPtrSet<BasicBlock *, 16> Preheaders;
919 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
920 while (!LoopList.empty()) {
921 Loop *L = LoopList.pop_back_val();
922 llvm::append_range(LoopList, *L);
923 if (BasicBlock *Preheader = L->getLoopPreheader())
924 Preheaders.insert(Preheader);
925 }
926
927 bool MadeChange = false;
928 // Copy blocks into a temporary array to avoid iterator invalidation issues
929 // as we remove them.
930 // Note that this intentionally skips the entry block.
932 for (auto &Block : llvm::drop_begin(F)) {
933 // Delete phi nodes that could block deleting other empty blocks.
935 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
936 Blocks.push_back(&Block);
937 }
938
939 for (auto &Block : Blocks) {
941 if (!BB)
942 continue;
943 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
944 if (!DestBB ||
945 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
946 continue;
947
948 eliminateMostlyEmptyBlock(BB);
949 MadeChange = true;
950 }
951 return MadeChange;
952}
953
954bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
955 BasicBlock *DestBB,
956 bool isPreheader) {
957 // Do not delete loop preheaders if doing so would create a critical edge.
958 // Loop preheaders can be good locations to spill registers. If the
959 // preheader is deleted and we create a critical edge, registers may be
960 // spilled in the loop body instead.
961 if (!DisablePreheaderProtect && isPreheader &&
962 !(BB->getSinglePredecessor() &&
964 return false;
965
966 // Skip merging if the block's successor is also a successor to any callbr
967 // that leads to this block.
968 // FIXME: Is this really needed? Is this a correctness issue?
969 for (BasicBlock *Pred : predecessors(BB)) {
970 if (isa<CallBrInst>(Pred->getTerminator()) &&
971 llvm::is_contained(successors(Pred), DestBB))
972 return false;
973 }
974
975 // Try to skip merging if the unique predecessor of BB is terminated by a
976 // switch or indirect branch instruction, and BB is used as an incoming block
977 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
978 // add COPY instructions in the predecessor of BB instead of BB (if it is not
979 // merged). Note that the critical edge created by merging such blocks wont be
980 // split in MachineSink because the jump table is not analyzable. By keeping
981 // such empty block (BB), ISel will place COPY instructions in BB, not in the
982 // predecessor of BB.
983 BasicBlock *Pred = BB->getUniquePredecessor();
984 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
986 return true;
987
988 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
989 return true;
990
991 // We use a simple cost heuristic which determine skipping merging is
992 // profitable if the cost of skipping merging is less than the cost of
993 // merging : Cost(skipping merging) < Cost(merging BB), where the
994 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
995 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
996 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
997 // Freq(Pred) / Freq(BB) > 2.
998 // Note that if there are multiple empty blocks sharing the same incoming
999 // value for the PHIs in the DestBB, we consider them together. In such
1000 // case, Cost(merging BB) will be the sum of their frequencies.
1001
1002 if (!isa<PHINode>(DestBB->begin()))
1003 return true;
1004
1005 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1006
1007 // Find all other incoming blocks from which incoming values of all PHIs in
1008 // DestBB are the same as the ones from BB.
1009 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1010 if (DestBBPred == BB)
1011 continue;
1012
1013 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1014 return DestPN.getIncomingValueForBlock(BB) ==
1015 DestPN.getIncomingValueForBlock(DestBBPred);
1016 }))
1017 SameIncomingValueBBs.insert(DestBBPred);
1018 }
1019
1020 // See if all BB's incoming values are same as the value from Pred. In this
1021 // case, no reason to skip merging because COPYs are expected to be place in
1022 // Pred already.
1023 if (SameIncomingValueBBs.count(Pred))
1024 return true;
1025
1026 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1027 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1028
1029 for (auto *SameValueBB : SameIncomingValueBBs)
1030 if (SameValueBB->getUniquePredecessor() == Pred &&
1031 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1032 BBFreq += BFI->getBlockFreq(SameValueBB);
1033
1034 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1035 return !Limit || PredFreq <= *Limit;
1036}
1037
1038/// Return true if we can merge BB into DestBB if there is a single
1039/// unconditional branch between them, and BB contains no other non-phi
1040/// instructions.
1041bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1042 const BasicBlock *DestBB) const {
1043 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1044 // the successor. If there are more complex condition (e.g. preheaders),
1045 // don't mess around with them.
1046 for (const PHINode &PN : BB->phis()) {
1047 for (const User *U : PN.users()) {
1048 const Instruction *UI = cast<Instruction>(U);
1049 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1050 return false;
1051 // If User is inside DestBB block and it is a PHINode then check
1052 // incoming value. If incoming value is not from BB then this is
1053 // a complex condition (e.g. preheaders) we want to avoid here.
1054 if (UI->getParent() == DestBB) {
1055 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1056 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1057 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1058 if (Insn && Insn->getParent() == BB &&
1059 Insn->getParent() != UPN->getIncomingBlock(I))
1060 return false;
1061 }
1062 }
1063 }
1064 }
1065
1066 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1067 // and DestBB may have conflicting incoming values for the block. If so, we
1068 // can't merge the block.
1069 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1070 if (!DestBBPN)
1071 return true; // no conflict.
1072
1073 // Collect the preds of BB.
1074 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1075 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1076 // It is faster to get preds from a PHI than with pred_iterator.
1077 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1078 BBPreds.insert(BBPN->getIncomingBlock(i));
1079 } else {
1080 BBPreds.insert_range(predecessors(BB));
1081 }
1082
1083 // Walk the preds of DestBB.
1084 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1085 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1086 if (BBPreds.count(Pred)) { // Common predecessor?
1087 for (const PHINode &PN : DestBB->phis()) {
1088 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1089 const Value *V2 = PN.getIncomingValueForBlock(BB);
1090
1091 // If V2 is a phi node in BB, look up what the mapped value will be.
1092 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1093 if (V2PN->getParent() == BB)
1094 V2 = V2PN->getIncomingValueForBlock(Pred);
1095
1096 // If there is a conflict, bail out.
1097 if (V1 != V2)
1098 return false;
1099 }
1100 }
1101 }
1102
1103 return true;
1104}
1105
1106/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1107static void replaceAllUsesWith(Value *Old, Value *New,
1109 bool IsHuge) {
1110 auto *OldI = dyn_cast<Instruction>(Old);
1111 if (OldI) {
1112 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1113 UI != E; ++UI) {
1115 if (IsHuge)
1116 FreshBBs.insert(User->getParent());
1117 }
1118 }
1119 Old->replaceAllUsesWith(New);
1120}
1121
1122/// Eliminate a basic block that has only phi's and an unconditional branch in
1123/// it.
1124void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1125 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1126 BasicBlock *DestBB = BI->getSuccessor(0);
1127
1128 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1129 << *BB << *DestBB);
1130
1131 // If the destination block has a single pred, then this is a trivial edge,
1132 // just collapse it.
1133 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1134 if (SinglePred != DestBB) {
1135 assert(SinglePred == BB &&
1136 "Single predecessor not the same as predecessor");
1137 // Merge DestBB into SinglePred/BB and delete it.
1139 // Note: BB(=SinglePred) will not be deleted on this path.
1140 // DestBB(=its single successor) is the one that was deleted.
1141 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1142
1143 if (IsHugeFunc) {
1144 // Update FreshBBs to optimize the merged BB.
1145 FreshBBs.insert(SinglePred);
1146 FreshBBs.erase(DestBB);
1147 }
1148 return;
1149 }
1150 }
1151
1152 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1153 // to handle the new incoming edges it is about to have.
1154 for (PHINode &PN : DestBB->phis()) {
1155 // Remove the incoming value for BB, and remember it.
1156 Value *InVal = PN.removeIncomingValue(BB, false);
1157
1158 // Two options: either the InVal is a phi node defined in BB or it is some
1159 // value that dominates BB.
1160 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1161 if (InValPhi && InValPhi->getParent() == BB) {
1162 // Add all of the input values of the input PHI as inputs of this phi.
1163 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1164 PN.addIncoming(InValPhi->getIncomingValue(i),
1165 InValPhi->getIncomingBlock(i));
1166 } else {
1167 // Otherwise, add one instance of the dominating value for each edge that
1168 // we will be adding.
1169 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1170 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1171 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1172 } else {
1173 for (BasicBlock *Pred : predecessors(BB))
1174 PN.addIncoming(InVal, Pred);
1175 }
1176 }
1177 }
1178
1179 // Preserve loop Metadata.
1180 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1181 for (auto *Pred : predecessors(BB))
1182 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1183 }
1184
1185 // The PHIs are now updated, change everything that refers to BB to use
1186 // DestBB and remove BB.
1187 BB->replaceAllUsesWith(DestBB);
1188 BB->eraseFromParent();
1189 ++NumBlocksElim;
1190
1191 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1192}
1193
1194// Computes a map of base pointer relocation instructions to corresponding
1195// derived pointer relocation instructions given a vector of all relocate calls
1197 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1199 &RelocateInstMap) {
1200 // Collect information in two maps: one primarily for locating the base object
1201 // while filling the second map; the second map is the final structure holding
1202 // a mapping between Base and corresponding Derived relocate calls
1204 for (auto *ThisRelocate : AllRelocateCalls) {
1205 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1206 ThisRelocate->getDerivedPtrIndex());
1207 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1208 }
1209 for (auto &Item : RelocateIdxMap) {
1210 std::pair<unsigned, unsigned> Key = Item.first;
1211 if (Key.first == Key.second)
1212 // Base relocation: nothing to insert
1213 continue;
1214
1215 GCRelocateInst *I = Item.second;
1216 auto BaseKey = std::make_pair(Key.first, Key.first);
1217
1218 // We're iterating over RelocateIdxMap so we cannot modify it.
1219 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1220 if (MaybeBase == RelocateIdxMap.end())
1221 // TODO: We might want to insert a new base object relocate and gep off
1222 // that, if there are enough derived object relocates.
1223 continue;
1224
1225 RelocateInstMap[MaybeBase->second].push_back(I);
1226 }
1227}
1228
1229// Accepts a GEP and extracts the operands into a vector provided they're all
1230// small integer constants
1232 SmallVectorImpl<Value *> &OffsetV) {
1233 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1234 // Only accept small constant integer operands
1235 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1236 if (!Op || Op->getZExtValue() > 20)
1237 return false;
1238 }
1239
1240 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1241 OffsetV.push_back(GEP->getOperand(i));
1242 return true;
1243}
1244
1245// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1246// replace, computes a replacement, and affects it.
1247static bool
1249 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1250 bool MadeChange = false;
1251 // We must ensure the relocation of derived pointer is defined after
1252 // relocation of base pointer. If we find a relocation corresponding to base
1253 // defined earlier than relocation of base then we move relocation of base
1254 // right before found relocation. We consider only relocation in the same
1255 // basic block as relocation of base. Relocations from other basic block will
1256 // be skipped by optimization and we do not care about them.
1257 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1258 &*R != RelocatedBase; ++R)
1259 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1260 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1261 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1262 RelocatedBase->moveBefore(RI->getIterator());
1263 MadeChange = true;
1264 break;
1265 }
1266
1267 for (GCRelocateInst *ToReplace : Targets) {
1268 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1269 "Not relocating a derived object of the original base object");
1270 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1271 // A duplicate relocate call. TODO: coalesce duplicates.
1272 continue;
1273 }
1274
1275 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1276 // Base and derived relocates are in different basic blocks.
1277 // In this case transform is only valid when base dominates derived
1278 // relocate. However it would be too expensive to check dominance
1279 // for each such relocate, so we skip the whole transformation.
1280 continue;
1281 }
1282
1283 Value *Base = ToReplace->getBasePtr();
1284 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1285 if (!Derived || Derived->getPointerOperand() != Base)
1286 continue;
1287
1289 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1290 continue;
1291
1292 // Create a Builder and replace the target callsite with a gep
1293 assert(RelocatedBase->getNextNode() &&
1294 "Should always have one since it's not a terminator");
1295
1296 // Insert after RelocatedBase
1297 IRBuilder<> Builder(RelocatedBase->getNextNode());
1298 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1299
1300 // If gc_relocate does not match the actual type, cast it to the right type.
1301 // In theory, there must be a bitcast after gc_relocate if the type does not
1302 // match, and we should reuse it to get the derived pointer. But it could be
1303 // cases like this:
1304 // bb1:
1305 // ...
1306 // %g1 = call coldcc i8 addrspace(1)*
1307 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1308 //
1309 // bb2:
1310 // ...
1311 // %g2 = call coldcc i8 addrspace(1)*
1312 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1313 //
1314 // merge:
1315 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1316 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1317 //
1318 // In this case, we can not find the bitcast any more. So we insert a new
1319 // bitcast no matter there is already one or not. In this way, we can handle
1320 // all cases, and the extra bitcast should be optimized away in later
1321 // passes.
1322 Value *ActualRelocatedBase = RelocatedBase;
1323 if (RelocatedBase->getType() != Base->getType()) {
1324 ActualRelocatedBase =
1325 Builder.CreateBitCast(RelocatedBase, Base->getType());
1326 }
1327 Value *Replacement =
1328 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1329 ArrayRef(OffsetV));
1330 Replacement->takeName(ToReplace);
1331 // If the newly generated derived pointer's type does not match the original
1332 // derived pointer's type, cast the new derived pointer to match it. Same
1333 // reasoning as above.
1334 Value *ActualReplacement = Replacement;
1335 if (Replacement->getType() != ToReplace->getType()) {
1336 ActualReplacement =
1337 Builder.CreateBitCast(Replacement, ToReplace->getType());
1338 }
1339 ToReplace->replaceAllUsesWith(ActualReplacement);
1340 ToReplace->eraseFromParent();
1341
1342 MadeChange = true;
1343 }
1344 return MadeChange;
1345}
1346
1347// Turns this:
1348//
1349// %base = ...
1350// %ptr = gep %base + 15
1351// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1352// %base' = relocate(%tok, i32 4, i32 4)
1353// %ptr' = relocate(%tok, i32 4, i32 5)
1354// %val = load %ptr'
1355//
1356// into this:
1357//
1358// %base = ...
1359// %ptr = gep %base + 15
1360// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1361// %base' = gc.relocate(%tok, i32 4, i32 4)
1362// %ptr' = gep %base' + 15
1363// %val = load %ptr'
1364bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1365 bool MadeChange = false;
1366 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1367 for (auto *U : I.users())
1368 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1369 // Collect all the relocate calls associated with a statepoint
1370 AllRelocateCalls.push_back(Relocate);
1371
1372 // We need at least one base pointer relocation + one derived pointer
1373 // relocation to mangle
1374 if (AllRelocateCalls.size() < 2)
1375 return false;
1376
1377 // RelocateInstMap is a mapping from the base relocate instruction to the
1378 // corresponding derived relocate instructions
1379 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1380 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1381 if (RelocateInstMap.empty())
1382 return false;
1383
1384 for (auto &Item : RelocateInstMap)
1385 // Item.first is the RelocatedBase to offset against
1386 // Item.second is the vector of Targets to replace
1387 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1388 return MadeChange;
1389}
1390
1391/// Sink the specified cast instruction into its user blocks.
1392static bool SinkCast(CastInst *CI) {
1393 BasicBlock *DefBB = CI->getParent();
1394
1395 /// InsertedCasts - Only insert a cast in each block once.
1397
1398 bool MadeChange = false;
1399 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1400 UI != E;) {
1401 Use &TheUse = UI.getUse();
1403
1404 // Figure out which BB this cast is used in. For PHI's this is the
1405 // appropriate predecessor block.
1406 BasicBlock *UserBB = User->getParent();
1407 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1408 UserBB = PN->getIncomingBlock(TheUse);
1409 }
1410
1411 // Preincrement use iterator so we don't invalidate it.
1412 ++UI;
1413
1414 // The first insertion point of a block containing an EH pad is after the
1415 // pad. If the pad is the user, we cannot sink the cast past the pad.
1416 if (User->isEHPad())
1417 continue;
1418
1419 // If the block selected to receive the cast is an EH pad that does not
1420 // allow non-PHI instructions before the terminator, we can't sink the
1421 // cast.
1422 if (UserBB->getTerminator()->isEHPad())
1423 continue;
1424
1425 // If this user is in the same block as the cast, don't change the cast.
1426 if (UserBB == DefBB)
1427 continue;
1428
1429 // If we have already inserted a cast into this block, use it.
1430 CastInst *&InsertedCast = InsertedCasts[UserBB];
1431
1432 if (!InsertedCast) {
1433 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1434 assert(InsertPt != UserBB->end());
1435 InsertedCast = cast<CastInst>(CI->clone());
1436 InsertedCast->insertBefore(*UserBB, InsertPt);
1437 }
1438
1439 // Replace a use of the cast with a use of the new cast.
1440 TheUse = InsertedCast;
1441 MadeChange = true;
1442 ++NumCastUses;
1443 }
1444
1445 // If we removed all uses, nuke the cast.
1446 if (CI->use_empty()) {
1447 salvageDebugInfo(*CI);
1448 CI->eraseFromParent();
1449 MadeChange = true;
1450 }
1451
1452 return MadeChange;
1453}
1454
1455/// If the specified cast instruction is a noop copy (e.g. it's casting from
1456/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1457/// reduce the number of virtual registers that must be created and coalesced.
1458///
1459/// Return true if any changes are made.
1461 const DataLayout &DL) {
1462 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1463 // than sinking only nop casts, but is helpful on some platforms.
1464 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1465 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1466 ASC->getDestAddressSpace()))
1467 return false;
1468 }
1469
1470 // If this is a noop copy,
1471 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1472 EVT DstVT = TLI.getValueType(DL, CI->getType());
1473
1474 // This is an fp<->int conversion?
1475 if (SrcVT.isInteger() != DstVT.isInteger())
1476 return false;
1477
1478 // If this is an extension, it will be a zero or sign extension, which
1479 // isn't a noop.
1480 if (SrcVT.bitsLT(DstVT))
1481 return false;
1482
1483 // If these values will be promoted, find out what they will be promoted
1484 // to. This helps us consider truncates on PPC as noop copies when they
1485 // are.
1486 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1488 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1489 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1491 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1492
1493 // If, after promotion, these are the same types, this is a noop copy.
1494 if (SrcVT != DstVT)
1495 return false;
1496
1497 return SinkCast(CI);
1498}
1499
1500// Match a simple increment by constant operation. Note that if a sub is
1501// matched, the step is negated (as if the step had been canonicalized to
1502// an add, even though we leave the instruction alone.)
1503static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1504 Constant *&Step) {
1505 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1507 m_Instruction(LHS), m_Constant(Step)))))
1508 return true;
1509 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1511 m_Instruction(LHS), m_Constant(Step))))) {
1512 Step = ConstantExpr::getNeg(Step);
1513 return true;
1514 }
1515 return false;
1516}
1517
1518/// If given \p PN is an inductive variable with value IVInc coming from the
1519/// backedge, and on each iteration it gets increased by Step, return pair
1520/// <IVInc, Step>. Otherwise, return std::nullopt.
1521static std::optional<std::pair<Instruction *, Constant *>>
1522getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1523 const Loop *L = LI->getLoopFor(PN->getParent());
1524 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1525 return std::nullopt;
1526 auto *IVInc =
1527 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1528 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1529 return std::nullopt;
1530 Instruction *LHS = nullptr;
1531 Constant *Step = nullptr;
1532 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1533 return std::make_pair(IVInc, Step);
1534 return std::nullopt;
1535}
1536
1537static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1538 auto *I = dyn_cast<Instruction>(V);
1539 if (!I)
1540 return false;
1541 Instruction *LHS = nullptr;
1542 Constant *Step = nullptr;
1543 if (!matchIncrement(I, LHS, Step))
1544 return false;
1545 if (auto *PN = dyn_cast<PHINode>(LHS))
1546 if (auto IVInc = getIVIncrement(PN, LI))
1547 return IVInc->first == I;
1548 return false;
1549}
1550
1551bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1552 Value *Arg0, Value *Arg1,
1553 CmpInst *Cmp,
1554 Intrinsic::ID IID) {
1555 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1556 if (!isIVIncrement(BO, LI))
1557 return false;
1558 const Loop *L = LI->getLoopFor(BO->getParent());
1559 assert(L && "L should not be null after isIVIncrement()");
1560 // Do not risk on moving increment into a child loop.
1561 if (LI->getLoopFor(Cmp->getParent()) != L)
1562 return false;
1563
1564 // Finally, we need to ensure that the insert point will dominate all
1565 // existing uses of the increment.
1566
1567 auto &DT = getDT(*BO->getParent()->getParent());
1568 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1569 // If we're moving up the dom tree, all uses are trivially dominated.
1570 // (This is the common case for code produced by LSR.)
1571 return true;
1572
1573 // Otherwise, special case the single use in the phi recurrence.
1574 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1575 };
1576 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1577 // We used to use a dominator tree here to allow multi-block optimization.
1578 // But that was problematic because:
1579 // 1. It could cause a perf regression by hoisting the math op into the
1580 // critical path.
1581 // 2. It could cause a perf regression by creating a value that was live
1582 // across multiple blocks and increasing register pressure.
1583 // 3. Use of a dominator tree could cause large compile-time regression.
1584 // This is because we recompute the DT on every change in the main CGP
1585 // run-loop. The recomputing is probably unnecessary in many cases, so if
1586 // that was fixed, using a DT here would be ok.
1587 //
1588 // There is one important particular case we still want to handle: if BO is
1589 // the IV increment. Important properties that make it profitable:
1590 // - We can speculate IV increment anywhere in the loop (as long as the
1591 // indvar Phi is its only user);
1592 // - Upon computing Cmp, we effectively compute something equivalent to the
1593 // IV increment (despite it loops differently in the IR). So moving it up
1594 // to the cmp point does not really increase register pressure.
1595 return false;
1596 }
1597
1598 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1599 if (BO->getOpcode() == Instruction::Add &&
1600 IID == Intrinsic::usub_with_overflow) {
1601 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1603 }
1604
1605 // Insert at the first instruction of the pair.
1606 Instruction *InsertPt = nullptr;
1607 for (Instruction &Iter : *Cmp->getParent()) {
1608 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1609 // the overflow intrinsic are defined.
1610 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1611 InsertPt = &Iter;
1612 break;
1613 }
1614 }
1615 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1616
1617 IRBuilder<> Builder(InsertPt);
1618 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1619 if (BO->getOpcode() != Instruction::Xor) {
1620 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1621 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1622 } else
1623 assert(BO->hasOneUse() &&
1624 "Patterns with XOr should use the BO only in the compare");
1625 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1626 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1627 Cmp->eraseFromParent();
1628 BO->eraseFromParent();
1629 return true;
1630}
1631
1632/// Match special-case patterns that check for unsigned add overflow.
1634 BinaryOperator *&Add) {
1635 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1636 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1637 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1638
1639 // We are not expecting non-canonical/degenerate code. Just bail out.
1640 if (isa<Constant>(A))
1641 return false;
1642
1643 ICmpInst::Predicate Pred = Cmp->getPredicate();
1644 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1645 B = ConstantInt::get(B->getType(), 1);
1646 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1647 B = Constant::getAllOnesValue(B->getType());
1648 else
1649 return false;
1650
1651 // Check the users of the variable operand of the compare looking for an add
1652 // with the adjusted constant.
1653 for (User *U : A->users()) {
1654 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1656 return true;
1657 }
1658 }
1659 return false;
1660}
1661
1662/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1663/// intrinsic. Return true if any changes were made.
1664bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1665 ModifyDT &ModifiedDT) {
1666 bool EdgeCase = false;
1667 Value *A, *B;
1668 BinaryOperator *Add;
1669 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1671 return false;
1672 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1673 A = Add->getOperand(0);
1674 B = Add->getOperand(1);
1675 EdgeCase = true;
1676 }
1677
1679 TLI->getValueType(*DL, Add->getType()),
1680 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1681 return false;
1682
1683 // We don't want to move around uses of condition values this late, so we
1684 // check if it is legal to create the call to the intrinsic in the basic
1685 // block containing the icmp.
1686 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1687 return false;
1688
1689 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1690 Intrinsic::uadd_with_overflow))
1691 return false;
1692
1693 // Reset callers - do not crash by iterating over a dead instruction.
1694 ModifiedDT = ModifyDT::ModifyInstDT;
1695 return true;
1696}
1697
1698bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1699 ModifyDT &ModifiedDT) {
1700 // We are not expecting non-canonical/degenerate code. Just bail out.
1701 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1702 if (isa<Constant>(A) && isa<Constant>(B))
1703 return false;
1704
1705 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1706 ICmpInst::Predicate Pred = Cmp->getPredicate();
1707 if (Pred == ICmpInst::ICMP_UGT) {
1708 std::swap(A, B);
1709 Pred = ICmpInst::ICMP_ULT;
1710 }
1711 // Convert special-case: (A == 0) is the same as (A u< 1).
1712 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1713 B = ConstantInt::get(B->getType(), 1);
1714 Pred = ICmpInst::ICMP_ULT;
1715 }
1716 // Convert special-case: (A != 0) is the same as (0 u< A).
1717 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1718 std::swap(A, B);
1719 Pred = ICmpInst::ICMP_ULT;
1720 }
1721 if (Pred != ICmpInst::ICMP_ULT)
1722 return false;
1723
1724 // Walk the users of a variable operand of a compare looking for a subtract or
1725 // add with that same operand. Also match the 2nd operand of the compare to
1726 // the add/sub, but that may be a negated constant operand of an add.
1727 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1728 BinaryOperator *Sub = nullptr;
1729 for (User *U : CmpVariableOperand->users()) {
1730 // A - B, A u< B --> usubo(A, B)
1731 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1733 break;
1734 }
1735
1736 // A + (-C), A u< C (canonicalized form of (sub A, C))
1737 const APInt *CmpC, *AddC;
1738 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1739 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1741 break;
1742 }
1743 }
1744 if (!Sub)
1745 return false;
1746
1748 TLI->getValueType(*DL, Sub->getType()),
1749 Sub->hasNUsesOrMore(1)))
1750 return false;
1751
1752 // We don't want to move around uses of condition values this late, so we
1753 // check if it is legal to create the call to the intrinsic in the basic
1754 // block containing the icmp.
1755 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1756 return false;
1757
1758 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1759 Cmp, Intrinsic::usub_with_overflow))
1760 return false;
1761
1762 // Reset callers - do not crash by iterating over a dead instruction.
1763 ModifiedDT = ModifyDT::ModifyInstDT;
1764 return true;
1765}
1766
1767// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1768// The same transformation exists in DAG combiner, but we repeat it here because
1769// DAG builder can break the pattern by moving icmp into a successor block.
1770bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1771 CmpPredicate Pred;
1772 Value *X;
1773 const APInt *C;
1774
1775 // (icmp (ctpop x), c)
1778 return false;
1779
1780 // We're only interested in "is power of 2 [or zero]" patterns.
1781 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1782 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1783 (Pred == CmpInst::ICMP_UGT && *C == 1);
1784 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1785 return false;
1786
1787 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1788 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1789 // and otherwise expand ctpop into a few simple instructions.
1790 Type *OpTy = X->getType();
1791 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1792 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1793 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1794 return false;
1795
1796 // ctpop(x) == 1 -> ctpop(x) u< 2
1797 // ctpop(x) != 1 -> ctpop(x) u> 1
1798 if (Pred == ICmpInst::ICMP_EQ) {
1799 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1800 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1801 } else {
1802 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1803 }
1804 return true;
1805 }
1806
1807 Value *NewCmp;
1808 if (IsPowerOf2OrZeroTest ||
1809 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1810 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1811 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1812 IRBuilder<> Builder(Cmp);
1813 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1814 Value *And = Builder.CreateAnd(X, Sub);
1815 CmpInst::Predicate NewPred =
1816 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1818 : CmpInst::ICMP_NE;
1819 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1820 } else {
1821 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1822 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1823 IRBuilder<> Builder(Cmp);
1824 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1825 Value *Xor = Builder.CreateXor(X, Sub);
1826 CmpInst::Predicate NewPred =
1828 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1829 }
1830
1831 Cmp->replaceAllUsesWith(NewCmp);
1833 return true;
1834}
1835
1836/// Sink the given CmpInst into user blocks to reduce the number of virtual
1837/// registers that must be created and coalesced. This is a clear win except on
1838/// targets with multiple condition code registers (PowerPC), where it might
1839/// lose; some adjustment may be wanted there.
1840///
1841/// Return true if any changes are made.
1842static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1843 const DataLayout &DL) {
1844 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1845 return false;
1846
1847 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1848 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1849 return false;
1850
1851 bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1852 return isa<PHINode>(U) ||
1853 cast<Instruction>(U)->getParent() == Cmp->getParent();
1854 });
1855
1856 // Avoid sinking larger than legal integer comparisons unless its ONLY used in
1857 // another BB.
1858 if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1859 Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1860 DL.getLargestLegalIntTypeSizeInBits())
1861 return false;
1862
1863 // Only insert a cmp in each block once.
1865
1866 bool MadeChange = false;
1867 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1868 UI != E;) {
1869 Use &TheUse = UI.getUse();
1871
1872 // Preincrement use iterator so we don't invalidate it.
1873 ++UI;
1874
1875 // Don't bother for PHI nodes.
1876 if (isa<PHINode>(User))
1877 continue;
1878
1879 // Figure out which BB this cmp is used in.
1880 BasicBlock *UserBB = User->getParent();
1881 BasicBlock *DefBB = Cmp->getParent();
1882
1883 // If this user is in the same block as the cmp, don't change the cmp.
1884 if (UserBB == DefBB)
1885 continue;
1886
1887 // If we have already inserted a cmp into this block, use it.
1888 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1889
1890 if (!InsertedCmp) {
1891 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1892 assert(InsertPt != UserBB->end());
1893 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1894 Cmp->getOperand(0), Cmp->getOperand(1), "");
1895 InsertedCmp->insertBefore(*UserBB, InsertPt);
1896 // Propagate the debug info.
1897 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1898 }
1899
1900 // Replace a use of the cmp with a use of the new cmp.
1901 TheUse = InsertedCmp;
1902 MadeChange = true;
1903 ++NumCmpUses;
1904 }
1905
1906 // If we removed all uses, nuke the cmp.
1907 if (Cmp->use_empty()) {
1908 Cmp->eraseFromParent();
1909 MadeChange = true;
1910 }
1911
1912 return MadeChange;
1913}
1914
1915/// For pattern like:
1916///
1917/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1918/// ...
1919/// DomBB:
1920/// ...
1921/// br DomCond, TrueBB, CmpBB
1922/// CmpBB: (with DomBB being the single predecessor)
1923/// ...
1924/// Cmp = icmp eq CmpOp0, CmpOp1
1925/// ...
1926///
1927/// It would use two comparison on targets that lowering of icmp sgt/slt is
1928/// different from lowering of icmp eq (PowerPC). This function try to convert
1929/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1930/// After that, DomCond and Cmp can use the same comparison so reduce one
1931/// comparison.
1932///
1933/// Return true if any changes are made.
1935 const TargetLowering &TLI) {
1937 return false;
1938
1939 ICmpInst::Predicate Pred = Cmp->getPredicate();
1940 if (Pred != ICmpInst::ICMP_EQ)
1941 return false;
1942
1943 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1944 // icmp slt/sgt would introduce more redundant LLVM IR.
1945 for (User *U : Cmp->users()) {
1946 if (isa<BranchInst>(U))
1947 continue;
1948 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1949 continue;
1950 return false;
1951 }
1952
1953 // This is a cheap/incomplete check for dominance - just match a single
1954 // predecessor with a conditional branch.
1955 BasicBlock *CmpBB = Cmp->getParent();
1956 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1957 if (!DomBB)
1958 return false;
1959
1960 // We want to ensure that the only way control gets to the comparison of
1961 // interest is that a less/greater than comparison on the same operands is
1962 // false.
1963 Value *DomCond;
1964 BasicBlock *TrueBB, *FalseBB;
1965 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1966 return false;
1967 if (CmpBB != FalseBB)
1968 return false;
1969
1970 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1971 CmpPredicate DomPred;
1972 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1973 return false;
1974 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1975 return false;
1976
1977 // Convert the equality comparison to the opposite of the dominating
1978 // comparison and swap the direction for all branch/select users.
1979 // We have conceptually converted:
1980 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1981 // to
1982 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1983 // And similarly for branches.
1984 for (User *U : Cmp->users()) {
1985 if (auto *BI = dyn_cast<BranchInst>(U)) {
1986 assert(BI->isConditional() && "Must be conditional");
1987 BI->swapSuccessors();
1988 continue;
1989 }
1990 if (auto *SI = dyn_cast<SelectInst>(U)) {
1991 // Swap operands
1992 SI->swapValues();
1993 SI->swapProfMetadata();
1994 continue;
1995 }
1996 llvm_unreachable("Must be a branch or a select");
1997 }
1998 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1999 return true;
2000}
2001
2002/// Many architectures use the same instruction for both subtract and cmp. Try
2003/// to swap cmp operands to match subtract operations to allow for CSE.
2005 Value *Op0 = Cmp->getOperand(0);
2006 Value *Op1 = Cmp->getOperand(1);
2007 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
2008 isa<Constant>(Op1) || Op0 == Op1)
2009 return false;
2010
2011 // If a subtract already has the same operands as a compare, swapping would be
2012 // bad. If a subtract has the same operands as a compare but in reverse order,
2013 // then swapping is good.
2014 int GoodToSwap = 0;
2015 unsigned NumInspected = 0;
2016 for (const User *U : Op0->users()) {
2017 // Avoid walking many users.
2018 if (++NumInspected > 128)
2019 return false;
2020 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2021 GoodToSwap++;
2022 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2023 GoodToSwap--;
2024 }
2025
2026 if (GoodToSwap > 0) {
2027 Cmp->swapOperands();
2028 return true;
2029 }
2030 return false;
2031}
2032
2033static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2034 const DataLayout &DL) {
2035 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2036 if (!FCmp)
2037 return false;
2038
2039 // Don't fold if the target offers free fabs and the predicate is legal.
2040 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2041 if (TLI.isFAbsFree(VT) &&
2043 VT.getSimpleVT()))
2044 return false;
2045
2046 // Reverse the canonicalization if it is a FP class test
2047 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2048 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2049 };
2050 auto [ClassVal, ClassTest] =
2051 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2052 FCmp->getOperand(0), FCmp->getOperand(1));
2053 if (!ClassVal)
2054 return false;
2055
2056 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2057 return false;
2058
2059 IRBuilder<> Builder(Cmp);
2060 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2061 Cmp->replaceAllUsesWith(IsFPClass);
2063 return true;
2064}
2065
2067 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2068 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2069 Value *Incr, *RemAmt;
2070 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2071 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2072 return false;
2073
2074 Value *AddInst, *AddOffset;
2075 // Find out loop increment PHI.
2076 auto *PN = dyn_cast<PHINode>(Incr);
2077 if (PN != nullptr) {
2078 AddInst = nullptr;
2079 AddOffset = nullptr;
2080 } else {
2081 // Search through a NUW add on top of the loop increment.
2082 Value *V0, *V1;
2083 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2084 return false;
2085
2086 AddInst = Incr;
2087 PN = dyn_cast<PHINode>(V0);
2088 if (PN != nullptr) {
2089 AddOffset = V1;
2090 } else {
2091 PN = dyn_cast<PHINode>(V1);
2092 AddOffset = V0;
2093 }
2094 }
2095
2096 if (!PN)
2097 return false;
2098
2099 // This isn't strictly necessary, what we really need is one increment and any
2100 // amount of initial values all being the same.
2101 if (PN->getNumIncomingValues() != 2)
2102 return false;
2103
2104 // Only trivially analyzable loops.
2105 Loop *L = LI->getLoopFor(PN->getParent());
2106 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2107 return false;
2108
2109 // Req that the remainder is in the loop
2110 if (!L->contains(Rem))
2111 return false;
2112
2113 // Only works if the remainder amount is a loop invaraint
2114 if (!L->isLoopInvariant(RemAmt))
2115 return false;
2116
2117 // Only works if the AddOffset is a loop invaraint
2118 if (AddOffset && !L->isLoopInvariant(AddOffset))
2119 return false;
2120
2121 // Is the PHI a loop increment?
2122 auto LoopIncrInfo = getIVIncrement(PN, LI);
2123 if (!LoopIncrInfo)
2124 return false;
2125
2126 // We need remainder_amount % increment_amount to be zero. Increment of one
2127 // satisfies that without any special logic and is overwhelmingly the common
2128 // case.
2129 if (!match(LoopIncrInfo->second, m_One()))
2130 return false;
2131
2132 // Need the increment to not overflow.
2133 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2134 return false;
2135
2136 // Set output variables.
2137 RemAmtOut = RemAmt;
2138 LoopIncrPNOut = PN;
2139 AddInstOut = AddInst;
2140 AddOffsetOut = AddOffset;
2141
2142 return true;
2143}
2144
2145// Try to transform:
2146//
2147// for(i = Start; i < End; ++i)
2148// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2149//
2150// ->
2151//
2152// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2153// for(i = Start; i < End; ++i, ++rem)
2154// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2156 const LoopInfo *LI,
2158 bool IsHuge) {
2159 Value *AddOffset, *RemAmt, *AddInst;
2160 PHINode *LoopIncrPN;
2161 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2162 AddOffset, LoopIncrPN))
2163 return false;
2164
2165 // Only non-constant remainder as the extra IV is probably not profitable
2166 // in that case.
2167 //
2168 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2169 // we can rule out register pressure and ensure this `urem` is executed each
2170 // iteration, its probably profitable to handle the const case as well.
2171 //
2172 // Potential TODO(2): Should we have a check for how "nested" this remainder
2173 // operation is? The new code runs every iteration so if the remainder is
2174 // guarded behind unlikely conditions this might not be worth it.
2175 if (match(RemAmt, m_ImmConstant()))
2176 return false;
2177
2178 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2179 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2180 // If we have add create initial value for remainder.
2181 // The logic here is:
2182 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2183 //
2184 // Only proceed if the expression simplifies (otherwise we can't fully
2185 // optimize out the urem).
2186 if (AddInst) {
2187 assert(AddOffset && "We found an add but missing values");
2188 // Without dom-condition/assumption cache we aren't likely to get much out
2189 // of a context instruction.
2190 Start = simplifyAddInst(Start, AddOffset,
2191 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2192 /*IsNUW=*/true, *DL);
2193 if (!Start)
2194 return false;
2195 }
2196
2197 // If we can't fully optimize out the `rem`, skip this transform.
2198 Start = simplifyURemInst(Start, RemAmt, *DL);
2199 if (!Start)
2200 return false;
2201
2202 // Create new remainder with induction variable.
2203 Type *Ty = Rem->getType();
2204 IRBuilder<> Builder(Rem->getContext());
2205
2206 Builder.SetInsertPoint(LoopIncrPN);
2207 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2208
2209 Builder.SetInsertPoint(cast<Instruction>(
2210 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2211 // `(add (urem x, y), 1)` is always nuw.
2212 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2213 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2214 Value *RemSel =
2215 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2216
2217 NewRem->addIncoming(Start, L->getLoopPreheader());
2218 NewRem->addIncoming(RemSel, L->getLoopLatch());
2219
2220 // Insert all touched BBs.
2221 FreshBBs.insert(LoopIncrPN->getParent());
2222 FreshBBs.insert(L->getLoopLatch());
2223 FreshBBs.insert(Rem->getParent());
2224 if (AddInst)
2225 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2226 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2227 Rem->eraseFromParent();
2228 if (AddInst && AddInst->use_empty())
2229 cast<Instruction>(AddInst)->eraseFromParent();
2230 return true;
2231}
2232
2233bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2234 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2235 return true;
2236 return false;
2237}
2238
2239bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2240 if (sinkCmpExpression(Cmp, *TLI, *DL))
2241 return true;
2242
2243 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2244 return true;
2245
2246 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2247 return true;
2248
2249 if (unfoldPowerOf2Test(Cmp))
2250 return true;
2251
2252 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2253 return true;
2254
2256 return true;
2257
2258 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2259 return true;
2260
2261 return false;
2262}
2263
2264/// Duplicate and sink the given 'and' instruction into user blocks where it is
2265/// used in a compare to allow isel to generate better code for targets where
2266/// this operation can be combined.
2267///
2268/// Return true if any changes are made.
2270 SetOfInstrs &InsertedInsts) {
2271 // Double-check that we're not trying to optimize an instruction that was
2272 // already optimized by some other part of this pass.
2273 assert(!InsertedInsts.count(AndI) &&
2274 "Attempting to optimize already optimized and instruction");
2275 (void)InsertedInsts;
2276
2277 // Nothing to do for single use in same basic block.
2278 if (AndI->hasOneUse() &&
2279 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2280 return false;
2281
2282 // Try to avoid cases where sinking/duplicating is likely to increase register
2283 // pressure.
2284 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2285 !isa<ConstantInt>(AndI->getOperand(1)) &&
2286 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2287 return false;
2288
2289 for (auto *U : AndI->users()) {
2291
2292 // Only sink 'and' feeding icmp with 0.
2293 if (!isa<ICmpInst>(User))
2294 return false;
2295
2296 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2297 if (!CmpC || !CmpC->isZero())
2298 return false;
2299 }
2300
2301 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2302 return false;
2303
2304 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2305 LLVM_DEBUG(AndI->getParent()->dump());
2306
2307 // Push the 'and' into the same block as the icmp 0. There should only be
2308 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2309 // others, so we don't need to keep track of which BBs we insert into.
2310 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2311 UI != E;) {
2312 Use &TheUse = UI.getUse();
2314
2315 // Preincrement use iterator so we don't invalidate it.
2316 ++UI;
2317
2318 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2319
2320 // Keep the 'and' in the same place if the use is already in the same block.
2321 Instruction *InsertPt =
2322 User->getParent() == AndI->getParent() ? AndI : User;
2323 Instruction *InsertedAnd = BinaryOperator::Create(
2324 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2325 InsertPt->getIterator());
2326 // Propagate the debug info.
2327 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2328
2329 // Replace a use of the 'and' with a use of the new 'and'.
2330 TheUse = InsertedAnd;
2331 ++NumAndUses;
2332 LLVM_DEBUG(User->getParent()->dump());
2333 }
2334
2335 // We removed all uses, nuke the and.
2336 AndI->eraseFromParent();
2337 return true;
2338}
2339
2340/// Check if the candidates could be combined with a shift instruction, which
2341/// includes:
2342/// 1. Truncate instruction
2343/// 2. And instruction and the imm is a mask of the low bits:
2344/// imm & (imm+1) == 0
2346 if (!isa<TruncInst>(User)) {
2347 if (User->getOpcode() != Instruction::And ||
2349 return false;
2350
2351 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2352
2353 if ((Cimm & (Cimm + 1)).getBoolValue())
2354 return false;
2355 }
2356 return true;
2357}
2358
2359/// Sink both shift and truncate instruction to the use of truncate's BB.
2360static bool
2363 const TargetLowering &TLI, const DataLayout &DL) {
2364 BasicBlock *UserBB = User->getParent();
2366 auto *TruncI = cast<TruncInst>(User);
2367 bool MadeChange = false;
2368
2369 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2370 TruncE = TruncI->user_end();
2371 TruncUI != TruncE;) {
2372
2373 Use &TruncTheUse = TruncUI.getUse();
2374 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2375 // Preincrement use iterator so we don't invalidate it.
2376
2377 ++TruncUI;
2378
2379 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2380 if (!ISDOpcode)
2381 continue;
2382
2383 // If the use is actually a legal node, there will not be an
2384 // implicit truncate.
2385 // FIXME: always querying the result type is just an
2386 // approximation; some nodes' legality is determined by the
2387 // operand or other means. There's no good way to find out though.
2389 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2390 continue;
2391
2392 // Don't bother for PHI nodes.
2393 if (isa<PHINode>(TruncUser))
2394 continue;
2395
2396 BasicBlock *TruncUserBB = TruncUser->getParent();
2397
2398 if (UserBB == TruncUserBB)
2399 continue;
2400
2401 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2402 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2403
2404 if (!InsertedShift && !InsertedTrunc) {
2405 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2406 assert(InsertPt != TruncUserBB->end());
2407 // Sink the shift
2408 if (ShiftI->getOpcode() == Instruction::AShr)
2409 InsertedShift =
2410 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2411 else
2412 InsertedShift =
2413 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2414 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2415 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2416
2417 // Sink the trunc
2418 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2419 TruncInsertPt++;
2420 // It will go ahead of any debug-info.
2421 TruncInsertPt.setHeadBit(true);
2422 assert(TruncInsertPt != TruncUserBB->end());
2423
2424 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2425 TruncI->getType(), "");
2426 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2427 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2428
2429 MadeChange = true;
2430
2431 TruncTheUse = InsertedTrunc;
2432 }
2433 }
2434 return MadeChange;
2435}
2436
2437/// Sink the shift *right* instruction into user blocks if the uses could
2438/// potentially be combined with this shift instruction and generate BitExtract
2439/// instruction. It will only be applied if the architecture supports BitExtract
2440/// instruction. Here is an example:
2441/// BB1:
2442/// %x.extract.shift = lshr i64 %arg1, 32
2443/// BB2:
2444/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2445/// ==>
2446///
2447/// BB2:
2448/// %x.extract.shift.1 = lshr i64 %arg1, 32
2449/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2450///
2451/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2452/// instruction.
2453/// Return true if any changes are made.
2455 const TargetLowering &TLI,
2456 const DataLayout &DL) {
2457 BasicBlock *DefBB = ShiftI->getParent();
2458
2459 /// Only insert instructions in each block once.
2461
2462 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2463
2464 bool MadeChange = false;
2465 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2466 UI != E;) {
2467 Use &TheUse = UI.getUse();
2469 // Preincrement use iterator so we don't invalidate it.
2470 ++UI;
2471
2472 // Don't bother for PHI nodes.
2473 if (isa<PHINode>(User))
2474 continue;
2475
2477 continue;
2478
2479 BasicBlock *UserBB = User->getParent();
2480
2481 if (UserBB == DefBB) {
2482 // If the shift and truncate instruction are in the same BB. The use of
2483 // the truncate(TruncUse) may still introduce another truncate if not
2484 // legal. In this case, we would like to sink both shift and truncate
2485 // instruction to the BB of TruncUse.
2486 // for example:
2487 // BB1:
2488 // i64 shift.result = lshr i64 opnd, imm
2489 // trunc.result = trunc shift.result to i16
2490 //
2491 // BB2:
2492 // ----> We will have an implicit truncate here if the architecture does
2493 // not have i16 compare.
2494 // cmp i16 trunc.result, opnd2
2495 //
2496 if (isa<TruncInst>(User) &&
2497 shiftIsLegal
2498 // If the type of the truncate is legal, no truncate will be
2499 // introduced in other basic blocks.
2500 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2501 MadeChange =
2502 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2503
2504 continue;
2505 }
2506 // If we have already inserted a shift into this block, use it.
2507 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2508
2509 if (!InsertedShift) {
2510 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2511 assert(InsertPt != UserBB->end());
2512
2513 if (ShiftI->getOpcode() == Instruction::AShr)
2514 InsertedShift =
2515 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2516 else
2517 InsertedShift =
2518 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2519 InsertedShift->insertBefore(*UserBB, InsertPt);
2520 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2521
2522 MadeChange = true;
2523 }
2524
2525 // Replace a use of the shift with a use of the new shift.
2526 TheUse = InsertedShift;
2527 }
2528
2529 // If we removed all uses, or there are none, nuke the shift.
2530 if (ShiftI->use_empty()) {
2531 salvageDebugInfo(*ShiftI);
2532 ShiftI->eraseFromParent();
2533 MadeChange = true;
2534 }
2535
2536 return MadeChange;
2537}
2538
2539/// If counting leading or trailing zeros is an expensive operation and a zero
2540/// input is defined, add a check for zero to avoid calling the intrinsic.
2541///
2542/// We want to transform:
2543/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2544///
2545/// into:
2546/// entry:
2547/// %cmpz = icmp eq i64 %A, 0
2548/// br i1 %cmpz, label %cond.end, label %cond.false
2549/// cond.false:
2550/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2551/// br label %cond.end
2552/// cond.end:
2553/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2554///
2555/// If the transform is performed, return true and set ModifiedDT to true.
2556static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
2557 const TargetLowering *TLI,
2558 const DataLayout *DL, ModifyDT &ModifiedDT,
2560 bool IsHugeFunc) {
2561 // If a zero input is undefined, it doesn't make sense to despeculate that.
2562 if (match(CountZeros->getOperand(1), m_One()))
2563 return false;
2564
2565 // If it's cheap to speculate, there's nothing to do.
2566 Type *Ty = CountZeros->getType();
2567 auto IntrinsicID = CountZeros->getIntrinsicID();
2568 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2569 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2570 return false;
2571
2572 // Only handle scalar cases. Anything else requires too much work.
2573 unsigned SizeInBits = Ty->getScalarSizeInBits();
2574 if (Ty->isVectorTy())
2575 return false;
2576
2577 // Bail if the value is never zero.
2578 Use &Op = CountZeros->getOperandUse(0);
2579 if (isKnownNonZero(Op, *DL))
2580 return false;
2581
2582 // The intrinsic will be sunk behind a compare against zero and branch.
2583 BasicBlock *StartBlock = CountZeros->getParent();
2584 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2585 if (IsHugeFunc)
2586 FreshBBs.insert(CallBlock);
2587
2588 // Create another block after the count zero intrinsic. A PHI will be added
2589 // in this block to select the result of the intrinsic or the bit-width
2590 // constant if the input to the intrinsic is zero.
2591 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2592 // Any debug-info after CountZeros should not be included.
2593 SplitPt.setHeadBit(true);
2594 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2595 if (IsHugeFunc)
2596 FreshBBs.insert(EndBlock);
2597
2598 // Update the LoopInfo. The new blocks are in the same loop as the start
2599 // block.
2600 if (Loop *L = LI.getLoopFor(StartBlock)) {
2601 L->addBasicBlockToLoop(CallBlock, LI);
2602 L->addBasicBlockToLoop(EndBlock, LI);
2603 }
2604
2605 // Set up a builder to create a compare, conditional branch, and PHI.
2606 IRBuilder<> Builder(CountZeros->getContext());
2607 Builder.SetInsertPoint(StartBlock->getTerminator());
2608 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2609
2610 // Replace the unconditional branch that was created by the first split with
2611 // a compare against zero and a conditional branch.
2612 Value *Zero = Constant::getNullValue(Ty);
2613 // Avoid introducing branch on poison. This also replaces the ctz operand.
2615 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2616 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2617 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2618 StartBlock->getTerminator()->eraseFromParent();
2619
2620 // Create a PHI in the end block to select either the output of the intrinsic
2621 // or the bit width of the operand.
2622 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2623 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2624 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2625 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2626 PN->addIncoming(BitWidth, StartBlock);
2627 PN->addIncoming(CountZeros, CallBlock);
2628
2629 // We are explicitly handling the zero case, so we can set the intrinsic's
2630 // undefined zero argument to 'true'. This will also prevent reprocessing the
2631 // intrinsic; we only despeculate when a zero input is defined.
2632 CountZeros->setArgOperand(1, Builder.getTrue());
2633 ModifiedDT = ModifyDT::ModifyBBDT;
2634 return true;
2635}
2636
2637bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2638 BasicBlock *BB = CI->getParent();
2639
2640 // Sink address computing for memory operands into the block.
2641 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2642 return true;
2643
2644 // Align the pointer arguments to this call if the target thinks it's a good
2645 // idea
2646 unsigned MinSize;
2647 Align PrefAlign;
2648 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2649 for (auto &Arg : CI->args()) {
2650 // We want to align both objects whose address is used directly and
2651 // objects whose address is used in casts and GEPs, though it only makes
2652 // sense for GEPs if the offset is a multiple of the desired alignment and
2653 // if size - offset meets the size threshold.
2654 if (!Arg->getType()->isPointerTy())
2655 continue;
2656 APInt Offset(DL->getIndexSizeInBits(
2657 cast<PointerType>(Arg->getType())->getAddressSpace()),
2658 0);
2659 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2660 uint64_t Offset2 = Offset.getLimitedValue();
2661 if (!isAligned(PrefAlign, Offset2))
2662 continue;
2663 AllocaInst *AI;
2664 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2665 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2666 AI->setAlignment(PrefAlign);
2667 // Global variables can only be aligned if they are defined in this
2668 // object (i.e. they are uniquely initialized in this object), and
2669 // over-aligning global variables that have an explicit section is
2670 // forbidden.
2671 GlobalVariable *GV;
2672 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2673 GV->getPointerAlignment(*DL) < PrefAlign &&
2674 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2675 GV->setAlignment(PrefAlign);
2676 }
2677 }
2678 // If this is a memcpy (or similar) then we may be able to improve the
2679 // alignment.
2680 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2681 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2682 MaybeAlign MIDestAlign = MI->getDestAlign();
2683 if (!MIDestAlign || DestAlign > *MIDestAlign)
2684 MI->setDestAlignment(DestAlign);
2685 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2686 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2687 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2688 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2689 MTI->setSourceAlignment(SrcAlign);
2690 }
2691 }
2692
2693 // If we have a cold call site, try to sink addressing computation into the
2694 // cold block. This interacts with our handling for loads and stores to
2695 // ensure that we can fold all uses of a potential addressing computation
2696 // into their uses. TODO: generalize this to work over profiling data
2697 if (CI->hasFnAttr(Attribute::Cold) &&
2698 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2699 for (auto &Arg : CI->args()) {
2700 if (!Arg->getType()->isPointerTy())
2701 continue;
2702 unsigned AS = Arg->getType()->getPointerAddressSpace();
2703 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2704 return true;
2705 }
2706
2707 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2708 if (II) {
2709 switch (II->getIntrinsicID()) {
2710 default:
2711 break;
2712 case Intrinsic::assume:
2713 llvm_unreachable("llvm.assume should have been removed already");
2714 case Intrinsic::allow_runtime_check:
2715 case Intrinsic::allow_ubsan_check:
2716 case Intrinsic::experimental_widenable_condition: {
2717 // Give up on future widening opportunities so that we can fold away dead
2718 // paths and merge blocks before going into block-local instruction
2719 // selection.
2720 if (II->use_empty()) {
2721 II->eraseFromParent();
2722 return true;
2723 }
2724 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2725 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2726 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2727 });
2728 return true;
2729 }
2730 case Intrinsic::objectsize:
2731 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2732 case Intrinsic::is_constant:
2733 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2734 case Intrinsic::aarch64_stlxr:
2735 case Intrinsic::aarch64_stxr: {
2736 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2737 if (!ExtVal || !ExtVal->hasOneUse() ||
2738 ExtVal->getParent() == CI->getParent())
2739 return false;
2740 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2741 ExtVal->moveBefore(CI->getIterator());
2742 // Mark this instruction as "inserted by CGP", so that other
2743 // optimizations don't touch it.
2744 InsertedInsts.insert(ExtVal);
2745 return true;
2746 }
2747
2748 case Intrinsic::launder_invariant_group:
2749 case Intrinsic::strip_invariant_group: {
2750 Value *ArgVal = II->getArgOperand(0);
2751 auto it = LargeOffsetGEPMap.find(II);
2752 if (it != LargeOffsetGEPMap.end()) {
2753 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2754 // Make sure not to have to deal with iterator invalidation
2755 // after possibly adding ArgVal to LargeOffsetGEPMap.
2756 auto GEPs = std::move(it->second);
2757 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2758 LargeOffsetGEPMap.erase(II);
2759 }
2760
2761 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2762 II->eraseFromParent();
2763 return true;
2764 }
2765 case Intrinsic::cttz:
2766 case Intrinsic::ctlz:
2767 // If counting zeros is expensive, try to avoid it.
2768 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2769 IsHugeFunc);
2770 case Intrinsic::fshl:
2771 case Intrinsic::fshr:
2772 return optimizeFunnelShift(II);
2773 case Intrinsic::masked_gather:
2774 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2775 case Intrinsic::masked_scatter:
2776 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2777 case Intrinsic::masked_load:
2778 // Treat v1X masked load as load X type.
2779 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2780 if (VT->getNumElements() == 1) {
2781 Value *PtrVal = II->getArgOperand(0);
2782 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2783 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2784 return true;
2785 }
2786 }
2787 return false;
2788 case Intrinsic::masked_store:
2789 // Treat v1X masked store as store X type.
2790 if (auto *VT =
2791 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2792 if (VT->getNumElements() == 1) {
2793 Value *PtrVal = II->getArgOperand(1);
2794 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2795 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2796 return true;
2797 }
2798 }
2799 return false;
2800 }
2801
2802 SmallVector<Value *, 2> PtrOps;
2803 Type *AccessTy;
2804 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2805 while (!PtrOps.empty()) {
2806 Value *PtrVal = PtrOps.pop_back_val();
2807 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2808 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2809 return true;
2810 }
2811 }
2812
2813 // From here on out we're working with named functions.
2814 auto *Callee = CI->getCalledFunction();
2815 if (!Callee)
2816 return false;
2817
2818 // Lower all default uses of _chk calls. This is very similar
2819 // to what InstCombineCalls does, but here we are only lowering calls
2820 // to fortified library functions (e.g. __memcpy_chk) that have the default
2821 // "don't know" as the objectsize. Anything else should be left alone.
2822 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2823 IRBuilder<> Builder(CI);
2824 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2825 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2826 CI->eraseFromParent();
2827 return true;
2828 }
2829
2830 // SCCP may have propagated, among other things, C++ static variables across
2831 // calls. If this happens to be the case, we may want to undo it in order to
2832 // avoid redundant pointer computation of the constant, as the function method
2833 // returning the constant needs to be executed anyways.
2834 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2835 if (!F->getReturnType()->isPointerTy())
2836 return nullptr;
2837
2838 GlobalVariable *UniformValue = nullptr;
2839 for (auto &BB : *F) {
2840 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2841 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2842 if (!UniformValue)
2843 UniformValue = V;
2844 else if (V != UniformValue)
2845 return nullptr;
2846 } else {
2847 return nullptr;
2848 }
2849 }
2850 }
2851
2852 return UniformValue;
2853 };
2854
2855 if (Callee->hasExactDefinition()) {
2856 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2857 bool MadeChange = false;
2858 for (Use &U : make_early_inc_range(RV->uses())) {
2859 auto *I = dyn_cast<Instruction>(U.getUser());
2860 if (!I || I->getParent() != CI->getParent()) {
2861 // Limit to the same basic block to avoid extending the call-site live
2862 // range, which otherwise could increase register pressure.
2863 continue;
2864 }
2865 if (CI->comesBefore(I)) {
2866 U.set(CI);
2867 MadeChange = true;
2868 }
2869 }
2870
2871 return MadeChange;
2872 }
2873 }
2874
2875 return false;
2876}
2877
2879 const CallInst *CI) {
2880 assert(CI && CI->use_empty());
2881
2882 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2883 switch (II->getIntrinsicID()) {
2884 case Intrinsic::memset:
2885 case Intrinsic::memcpy:
2886 case Intrinsic::memmove:
2887 return true;
2888 default:
2889 return false;
2890 }
2891
2892 LibFunc LF;
2893 Function *Callee = CI->getCalledFunction();
2894 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2895 switch (LF) {
2896 case LibFunc_strcpy:
2897 case LibFunc_strncpy:
2898 case LibFunc_strcat:
2899 case LibFunc_strncat:
2900 return true;
2901 default:
2902 return false;
2903 }
2904
2905 return false;
2906}
2907
2908/// Look for opportunities to duplicate return instructions to the predecessor
2909/// to enable tail call optimizations. The case it is currently looking for is
2910/// the following one. Known intrinsics or library function that may be tail
2911/// called are taken into account as well.
2912/// @code
2913/// bb0:
2914/// %tmp0 = tail call i32 @f0()
2915/// br label %return
2916/// bb1:
2917/// %tmp1 = tail call i32 @f1()
2918/// br label %return
2919/// bb2:
2920/// %tmp2 = tail call i32 @f2()
2921/// br label %return
2922/// return:
2923/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2924/// ret i32 %retval
2925/// @endcode
2926///
2927/// =>
2928///
2929/// @code
2930/// bb0:
2931/// %tmp0 = tail call i32 @f0()
2932/// ret i32 %tmp0
2933/// bb1:
2934/// %tmp1 = tail call i32 @f1()
2935/// ret i32 %tmp1
2936/// bb2:
2937/// %tmp2 = tail call i32 @f2()
2938/// ret i32 %tmp2
2939/// @endcode
2940bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2941 ModifyDT &ModifiedDT) {
2942 if (!BB->getTerminator())
2943 return false;
2944
2945 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2946 if (!RetI)
2947 return false;
2948
2949 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2950
2951 PHINode *PN = nullptr;
2952 ExtractValueInst *EVI = nullptr;
2953 BitCastInst *BCI = nullptr;
2954 Value *V = RetI->getReturnValue();
2955 if (V) {
2956 BCI = dyn_cast<BitCastInst>(V);
2957 if (BCI)
2958 V = BCI->getOperand(0);
2959
2961 if (EVI) {
2962 V = EVI->getOperand(0);
2963 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2964 return false;
2965 }
2966
2967 PN = dyn_cast<PHINode>(V);
2968 }
2969
2970 if (PN && PN->getParent() != BB)
2971 return false;
2972
2973 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2974 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2975 if (BC && BC->hasOneUse())
2976 Inst = BC->user_back();
2977
2978 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2979 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2980 return false;
2981 };
2982
2984
2985 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2986 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2987 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2988 // Record the instruction so it can be preserved when the exit block is
2989 // removed. Do not preserve the fake use that uses the result of the
2990 // PHI instruction.
2991 // Do not copy fake uses that use the result of a PHI node.
2992 // FIXME: If we do want to copy the fake use into the return blocks, we
2993 // have to figure out which of the PHI node operands to use for each
2994 // copy.
2995 if (!isa<PHINode>(II->getOperand(0))) {
2996 FakeUses.push_back(II);
2997 }
2998 return true;
2999 }
3000
3001 return false;
3002 };
3003
3004 // Make sure there are no instructions between the first instruction
3005 // and return.
3007 // Skip over pseudo-probes and the bitcast.
3008 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3009 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3010 BI = std::next(BI);
3011 if (&*BI != RetI)
3012 return false;
3013
3014 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3015 /// call.
3016 const Function *F = BB->getParent();
3017 SmallVector<BasicBlock *, 4> TailCallBBs;
3018 // Record the call instructions so we can insert any fake uses
3019 // that need to be preserved before them.
3021 if (PN) {
3022 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3023 // Look through bitcasts.
3024 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3025 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3026 BasicBlock *PredBB = PN->getIncomingBlock(I);
3027 // Make sure the phi value is indeed produced by the tail call.
3028 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3029 TLI->mayBeEmittedAsTailCall(CI) &&
3030 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3031 TailCallBBs.push_back(PredBB);
3032 CallInsts.push_back(CI);
3033 } else {
3034 // Consider the cases in which the phi value is indirectly produced by
3035 // the tail call, for example when encountering memset(), memmove(),
3036 // strcpy(), whose return value may have been optimized out. In such
3037 // cases, the value needs to be the first function argument.
3038 //
3039 // bb0:
3040 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3041 // br label %return
3042 // return:
3043 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3044 if (PredBB && PredBB->getSingleSuccessor() == BB)
3046 PredBB->getTerminator()->getPrevNode());
3047
3048 if (CI && CI->use_empty() &&
3049 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3050 IncomingVal == CI->getArgOperand(0) &&
3051 TLI->mayBeEmittedAsTailCall(CI) &&
3052 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3053 TailCallBBs.push_back(PredBB);
3054 CallInsts.push_back(CI);
3055 }
3056 }
3057 }
3058 } else {
3059 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3060 for (BasicBlock *Pred : predecessors(BB)) {
3061 if (!VisitedBBs.insert(Pred).second)
3062 continue;
3063 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3064 CallInst *CI = dyn_cast<CallInst>(I);
3065 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
3066 attributesPermitTailCall(F, CI, RetI, *TLI)) {
3067 // Either we return void or the return value must be the first
3068 // argument of a known intrinsic or library function.
3069 if (!V || isa<UndefValue>(V) ||
3070 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3071 V == CI->getArgOperand(0))) {
3072 TailCallBBs.push_back(Pred);
3073 CallInsts.push_back(CI);
3074 }
3075 }
3076 }
3077 }
3078 }
3079
3080 bool Changed = false;
3081 for (auto const &TailCallBB : TailCallBBs) {
3082 // Make sure the call instruction is followed by an unconditional branch to
3083 // the return block.
3084 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3085 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3086 continue;
3087
3088 // Duplicate the return into TailCallBB.
3089 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3091 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3092 BFI->setBlockFreq(BB,
3093 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3094 ModifiedDT = ModifyDT::ModifyBBDT;
3095 Changed = true;
3096 ++NumRetsDup;
3097 }
3098
3099 // If we eliminated all predecessors of the block, delete the block now.
3100 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3101 // Copy the fake uses found in the original return block to all blocks
3102 // that contain tail calls.
3103 for (auto *CI : CallInsts) {
3104 for (auto const *FakeUse : FakeUses) {
3105 auto *ClonedInst = FakeUse->clone();
3106 ClonedInst->insertBefore(CI->getIterator());
3107 }
3108 }
3109 BB->eraseFromParent();
3110 }
3111
3112 return Changed;
3113}
3114
3115//===----------------------------------------------------------------------===//
3116// Memory Optimization
3117//===----------------------------------------------------------------------===//
3118
3119namespace {
3120
3121/// This is an extended version of TargetLowering::AddrMode
3122/// which holds actual Value*'s for register values.
3123struct ExtAddrMode : public TargetLowering::AddrMode {
3124 Value *BaseReg = nullptr;
3125 Value *ScaledReg = nullptr;
3126 Value *OriginalValue = nullptr;
3127 bool InBounds = true;
3128
3129 enum FieldName {
3130 NoField = 0x00,
3131 BaseRegField = 0x01,
3132 BaseGVField = 0x02,
3133 BaseOffsField = 0x04,
3134 ScaledRegField = 0x08,
3135 ScaleField = 0x10,
3136 MultipleFields = 0xff
3137 };
3138
3139 ExtAddrMode() = default;
3140
3141 void print(raw_ostream &OS) const;
3142 void dump() const;
3143
3144 // Replace From in ExtAddrMode with To.
3145 // E.g., SExt insts may be promoted and deleted. We should replace them with
3146 // the promoted values.
3147 void replaceWith(Value *From, Value *To) {
3148 if (ScaledReg == From)
3149 ScaledReg = To;
3150 }
3151
3152 FieldName compare(const ExtAddrMode &other) {
3153 // First check that the types are the same on each field, as differing types
3154 // is something we can't cope with later on.
3155 if (BaseReg && other.BaseReg &&
3156 BaseReg->getType() != other.BaseReg->getType())
3157 return MultipleFields;
3158 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3159 return MultipleFields;
3160 if (ScaledReg && other.ScaledReg &&
3161 ScaledReg->getType() != other.ScaledReg->getType())
3162 return MultipleFields;
3163
3164 // Conservatively reject 'inbounds' mismatches.
3165 if (InBounds != other.InBounds)
3166 return MultipleFields;
3167
3168 // Check each field to see if it differs.
3169 unsigned Result = NoField;
3170 if (BaseReg != other.BaseReg)
3171 Result |= BaseRegField;
3172 if (BaseGV != other.BaseGV)
3173 Result |= BaseGVField;
3174 if (BaseOffs != other.BaseOffs)
3175 Result |= BaseOffsField;
3176 if (ScaledReg != other.ScaledReg)
3177 Result |= ScaledRegField;
3178 // Don't count 0 as being a different scale, because that actually means
3179 // unscaled (which will already be counted by having no ScaledReg).
3180 if (Scale && other.Scale && Scale != other.Scale)
3181 Result |= ScaleField;
3182
3183 if (llvm::popcount(Result) > 1)
3184 return MultipleFields;
3185 else
3186 return static_cast<FieldName>(Result);
3187 }
3188
3189 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3190 // with no offset.
3191 bool isTrivial() {
3192 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3193 // trivial if at most one of these terms is nonzero, except that BaseGV and
3194 // BaseReg both being zero actually means a null pointer value, which we
3195 // consider to be 'non-zero' here.
3196 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3197 }
3198
3199 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3200 switch (Field) {
3201 default:
3202 return nullptr;
3203 case BaseRegField:
3204 return BaseReg;
3205 case BaseGVField:
3206 return BaseGV;
3207 case ScaledRegField:
3208 return ScaledReg;
3209 case BaseOffsField:
3210 return ConstantInt::getSigned(IntPtrTy, BaseOffs);
3211 }
3212 }
3213
3214 void SetCombinedField(FieldName Field, Value *V,
3215 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3216 switch (Field) {
3217 default:
3218 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3219 break;
3220 case ExtAddrMode::BaseRegField:
3221 BaseReg = V;
3222 break;
3223 case ExtAddrMode::BaseGVField:
3224 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3225 // in the BaseReg field.
3226 assert(BaseReg == nullptr);
3227 BaseReg = V;
3228 BaseGV = nullptr;
3229 break;
3230 case ExtAddrMode::ScaledRegField:
3231 ScaledReg = V;
3232 // If we have a mix of scaled and unscaled addrmodes then we want scale
3233 // to be the scale and not zero.
3234 if (!Scale)
3235 for (const ExtAddrMode &AM : AddrModes)
3236 if (AM.Scale) {
3237 Scale = AM.Scale;
3238 break;
3239 }
3240 break;
3241 case ExtAddrMode::BaseOffsField:
3242 // The offset is no longer a constant, so it goes in ScaledReg with a
3243 // scale of 1.
3244 assert(ScaledReg == nullptr);
3245 ScaledReg = V;
3246 Scale = 1;
3247 BaseOffs = 0;
3248 break;
3249 }
3250 }
3251};
3252
3253#ifndef NDEBUG
3254static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3255 AM.print(OS);
3256 return OS;
3257}
3258#endif
3259
3260#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3261void ExtAddrMode::print(raw_ostream &OS) const {
3262 bool NeedPlus = false;
3263 OS << "[";
3264 if (InBounds)
3265 OS << "inbounds ";
3266 if (BaseGV) {
3267 OS << "GV:";
3268 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3269 NeedPlus = true;
3270 }
3271
3272 if (BaseOffs) {
3273 OS << (NeedPlus ? " + " : "") << BaseOffs;
3274 NeedPlus = true;
3275 }
3276
3277 if (BaseReg) {
3278 OS << (NeedPlus ? " + " : "") << "Base:";
3279 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3280 NeedPlus = true;
3281 }
3282 if (Scale) {
3283 OS << (NeedPlus ? " + " : "") << Scale << "*";
3284 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3285 }
3286
3287 OS << ']';
3288}
3289
3290LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3291 print(dbgs());
3292 dbgs() << '\n';
3293}
3294#endif
3295
3296} // end anonymous namespace
3297
3298namespace {
3299
3300/// This class provides transaction based operation on the IR.
3301/// Every change made through this class is recorded in the internal state and
3302/// can be undone (rollback) until commit is called.
3303/// CGP does not check if instructions could be speculatively executed when
3304/// moved. Preserving the original location would pessimize the debugging
3305/// experience, as well as negatively impact the quality of sample PGO.
3306class TypePromotionTransaction {
3307 /// This represents the common interface of the individual transaction.
3308 /// Each class implements the logic for doing one specific modification on
3309 /// the IR via the TypePromotionTransaction.
3310 class TypePromotionAction {
3311 protected:
3312 /// The Instruction modified.
3313 Instruction *Inst;
3314
3315 public:
3316 /// Constructor of the action.
3317 /// The constructor performs the related action on the IR.
3318 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3319
3320 virtual ~TypePromotionAction() = default;
3321
3322 /// Undo the modification done by this action.
3323 /// When this method is called, the IR must be in the same state as it was
3324 /// before this action was applied.
3325 /// \pre Undoing the action works if and only if the IR is in the exact same
3326 /// state as it was directly after this action was applied.
3327 virtual void undo() = 0;
3328
3329 /// Advocate every change made by this action.
3330 /// When the results on the IR of the action are to be kept, it is important
3331 /// to call this function, otherwise hidden information may be kept forever.
3332 virtual void commit() {
3333 // Nothing to be done, this action is not doing anything.
3334 }
3335 };
3336
3337 /// Utility to remember the position of an instruction.
3338 class InsertionHandler {
3339 /// Position of an instruction.
3340 /// Either an instruction:
3341 /// - Is the first in a basic block: BB is used.
3342 /// - Has a previous instruction: PrevInst is used.
3343 struct {
3344 BasicBlock::iterator PrevInst;
3345 BasicBlock *BB;
3346 } Point;
3347 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3348
3349 /// Remember whether or not the instruction had a previous instruction.
3350 bool HasPrevInstruction;
3351
3352 public:
3353 /// Record the position of \p Inst.
3354 InsertionHandler(Instruction *Inst) {
3355 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3356 BasicBlock *BB = Inst->getParent();
3357
3358 // Record where we would have to re-insert the instruction in the sequence
3359 // of DbgRecords, if we ended up reinserting.
3360 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3361
3362 if (HasPrevInstruction) {
3363 Point.PrevInst = std::prev(Inst->getIterator());
3364 } else {
3365 Point.BB = BB;
3366 }
3367 }
3368
3369 /// Insert \p Inst at the recorded position.
3370 void insert(Instruction *Inst) {
3371 if (HasPrevInstruction) {
3372 if (Inst->getParent())
3373 Inst->removeFromParent();
3374 Inst->insertAfter(Point.PrevInst);
3375 } else {
3376 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3377 if (Inst->getParent())
3378 Inst->moveBefore(*Point.BB, Position);
3379 else
3380 Inst->insertBefore(*Point.BB, Position);
3381 }
3382
3383 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3384 }
3385 };
3386
3387 /// Move an instruction before another.
3388 class InstructionMoveBefore : public TypePromotionAction {
3389 /// Original position of the instruction.
3390 InsertionHandler Position;
3391
3392 public:
3393 /// Move \p Inst before \p Before.
3394 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3395 : TypePromotionAction(Inst), Position(Inst) {
3396 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3397 << "\n");
3398 Inst->moveBefore(Before);
3399 }
3400
3401 /// Move the instruction back to its original position.
3402 void undo() override {
3403 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3404 Position.insert(Inst);
3405 }
3406 };
3407
3408 /// Set the operand of an instruction with a new value.
3409 class OperandSetter : public TypePromotionAction {
3410 /// Original operand of the instruction.
3411 Value *Origin;
3412
3413 /// Index of the modified instruction.
3414 unsigned Idx;
3415
3416 public:
3417 /// Set \p Idx operand of \p Inst with \p NewVal.
3418 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3419 : TypePromotionAction(Inst), Idx(Idx) {
3420 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3421 << "for:" << *Inst << "\n"
3422 << "with:" << *NewVal << "\n");
3423 Origin = Inst->getOperand(Idx);
3424 Inst->setOperand(Idx, NewVal);
3425 }
3426
3427 /// Restore the original value of the instruction.
3428 void undo() override {
3429 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3430 << "for: " << *Inst << "\n"
3431 << "with: " << *Origin << "\n");
3432 Inst->setOperand(Idx, Origin);
3433 }
3434 };
3435
3436 /// Hide the operands of an instruction.
3437 /// Do as if this instruction was not using any of its operands.
3438 class OperandsHider : public TypePromotionAction {
3439 /// The list of original operands.
3440 SmallVector<Value *, 4> OriginalValues;
3441
3442 public:
3443 /// Remove \p Inst from the uses of the operands of \p Inst.
3444 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3445 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3446 unsigned NumOpnds = Inst->getNumOperands();
3447 OriginalValues.reserve(NumOpnds);
3448 for (unsigned It = 0; It < NumOpnds; ++It) {
3449 // Save the current operand.
3450 Value *Val = Inst->getOperand(It);
3451 OriginalValues.push_back(Val);
3452 // Set a dummy one.
3453 // We could use OperandSetter here, but that would imply an overhead
3454 // that we are not willing to pay.
3455 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3456 }
3457 }
3458
3459 /// Restore the original list of uses.
3460 void undo() override {
3461 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3462 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3463 Inst->setOperand(It, OriginalValues[It]);
3464 }
3465 };
3466
3467 /// Build a truncate instruction.
3468 class TruncBuilder : public TypePromotionAction {
3469 Value *Val;
3470
3471 public:
3472 /// Build a truncate instruction of \p Opnd producing a \p Ty
3473 /// result.
3474 /// trunc Opnd to Ty.
3475 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3476 IRBuilder<> Builder(Opnd);
3477 Builder.SetCurrentDebugLocation(DebugLoc());
3478 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3479 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3480 }
3481
3482 /// Get the built value.
3483 Value *getBuiltValue() { return Val; }
3484
3485 /// Remove the built instruction.
3486 void undo() override {
3487 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3488 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3489 IVal->eraseFromParent();
3490 }
3491 };
3492
3493 /// Build a sign extension instruction.
3494 class SExtBuilder : public TypePromotionAction {
3495 Value *Val;
3496
3497 public:
3498 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3499 /// result.
3500 /// sext Opnd to Ty.
3501 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3502 : TypePromotionAction(InsertPt) {
3503 IRBuilder<> Builder(InsertPt);
3504 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3505 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3506 }
3507
3508 /// Get the built value.
3509 Value *getBuiltValue() { return Val; }
3510
3511 /// Remove the built instruction.
3512 void undo() override {
3513 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3514 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3515 IVal->eraseFromParent();
3516 }
3517 };
3518
3519 /// Build a zero extension instruction.
3520 class ZExtBuilder : public TypePromotionAction {
3521 Value *Val;
3522
3523 public:
3524 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3525 /// result.
3526 /// zext Opnd to Ty.
3527 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3528 : TypePromotionAction(InsertPt) {
3529 IRBuilder<> Builder(InsertPt);
3530 Builder.SetCurrentDebugLocation(DebugLoc());
3531 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3532 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3533 }
3534
3535 /// Get the built value.
3536 Value *getBuiltValue() { return Val; }
3537
3538 /// Remove the built instruction.
3539 void undo() override {
3540 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3541 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3542 IVal->eraseFromParent();
3543 }
3544 };
3545
3546 /// Mutate an instruction to another type.
3547 class TypeMutator : public TypePromotionAction {
3548 /// Record the original type.
3549 Type *OrigTy;
3550
3551 public:
3552 /// Mutate the type of \p Inst into \p NewTy.
3553 TypeMutator(Instruction *Inst, Type *NewTy)
3554 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3555 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3556 << "\n");
3557 Inst->mutateType(NewTy);
3558 }
3559
3560 /// Mutate the instruction back to its original type.
3561 void undo() override {
3562 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3563 << "\n");
3564 Inst->mutateType(OrigTy);
3565 }
3566 };
3567
3568 /// Replace the uses of an instruction by another instruction.
3569 class UsesReplacer : public TypePromotionAction {
3570 /// Helper structure to keep track of the replaced uses.
3571 struct InstructionAndIdx {
3572 /// The instruction using the instruction.
3573 Instruction *Inst;
3574
3575 /// The index where this instruction is used for Inst.
3576 unsigned Idx;
3577
3578 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3579 : Inst(Inst), Idx(Idx) {}
3580 };
3581
3582 /// Keep track of the original uses (pair Instruction, Index).
3584 /// Keep track of the debug users.
3585 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3586
3587 /// Keep track of the new value so that we can undo it by replacing
3588 /// instances of the new value with the original value.
3589 Value *New;
3590
3592
3593 public:
3594 /// Replace all the use of \p Inst by \p New.
3595 UsesReplacer(Instruction *Inst, Value *New)
3596 : TypePromotionAction(Inst), New(New) {
3597 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3598 << "\n");
3599 // Record the original uses.
3600 for (Use &U : Inst->uses()) {
3601 Instruction *UserI = cast<Instruction>(U.getUser());
3602 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3603 }
3604 // Record the debug uses separately. They are not in the instruction's
3605 // use list, but they are replaced by RAUW.
3606 findDbgValues(Inst, DbgVariableRecords);
3607
3608 // Now, we can replace the uses.
3609 Inst->replaceAllUsesWith(New);
3610 }
3611
3612 /// Reassign the original uses of Inst to Inst.
3613 void undo() override {
3614 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3615 for (InstructionAndIdx &Use : OriginalUses)
3616 Use.Inst->setOperand(Use.Idx, Inst);
3617 // RAUW has replaced all original uses with references to the new value,
3618 // including the debug uses. Since we are undoing the replacements,
3619 // the original debug uses must also be reinstated to maintain the
3620 // correctness and utility of debug value records.
3621 for (DbgVariableRecord *DVR : DbgVariableRecords)
3622 DVR->replaceVariableLocationOp(New, Inst);
3623 }
3624 };
3625
3626 /// Remove an instruction from the IR.
3627 class InstructionRemover : public TypePromotionAction {
3628 /// Original position of the instruction.
3629 InsertionHandler Inserter;
3630
3631 /// Helper structure to hide all the link to the instruction. In other
3632 /// words, this helps to do as if the instruction was removed.
3633 OperandsHider Hider;
3634
3635 /// Keep track of the uses replaced, if any.
3636 UsesReplacer *Replacer = nullptr;
3637
3638 /// Keep track of instructions removed.
3639 SetOfInstrs &RemovedInsts;
3640
3641 public:
3642 /// Remove all reference of \p Inst and optionally replace all its
3643 /// uses with New.
3644 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3645 /// \pre If !Inst->use_empty(), then New != nullptr
3646 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3647 Value *New = nullptr)
3648 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3649 RemovedInsts(RemovedInsts) {
3650 if (New)
3651 Replacer = new UsesReplacer(Inst, New);
3652 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3653 RemovedInsts.insert(Inst);
3654 /// The instructions removed here will be freed after completing
3655 /// optimizeBlock() for all blocks as we need to keep track of the
3656 /// removed instructions during promotion.
3657 Inst->removeFromParent();
3658 }
3659
3660 ~InstructionRemover() override { delete Replacer; }
3661
3662 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3663 InstructionRemover(const InstructionRemover &other) = delete;
3664
3665 /// Resurrect the instruction and reassign it to the proper uses if
3666 /// new value was provided when build this action.
3667 void undo() override {
3668 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3669 Inserter.insert(Inst);
3670 if (Replacer)
3671 Replacer->undo();
3672 Hider.undo();
3673 RemovedInsts.erase(Inst);
3674 }
3675 };
3676
3677public:
3678 /// Restoration point.
3679 /// The restoration point is a pointer to an action instead of an iterator
3680 /// because the iterator may be invalidated but not the pointer.
3681 using ConstRestorationPt = const TypePromotionAction *;
3682
3683 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3684 : RemovedInsts(RemovedInsts) {}
3685
3686 /// Advocate every changes made in that transaction. Return true if any change
3687 /// happen.
3688 bool commit();
3689
3690 /// Undo all the changes made after the given point.
3691 void rollback(ConstRestorationPt Point);
3692
3693 /// Get the current restoration point.
3694 ConstRestorationPt getRestorationPoint() const;
3695
3696 /// \name API for IR modification with state keeping to support rollback.
3697 /// @{
3698 /// Same as Instruction::setOperand.
3699 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3700
3701 /// Same as Instruction::eraseFromParent.
3702 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3703
3704 /// Same as Value::replaceAllUsesWith.
3705 void replaceAllUsesWith(Instruction *Inst, Value *New);
3706
3707 /// Same as Value::mutateType.
3708 void mutateType(Instruction *Inst, Type *NewTy);
3709
3710 /// Same as IRBuilder::createTrunc.
3711 Value *createTrunc(Instruction *Opnd, Type *Ty);
3712
3713 /// Same as IRBuilder::createSExt.
3714 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3715
3716 /// Same as IRBuilder::createZExt.
3717 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3718
3719private:
3720 /// The ordered list of actions made so far.
3722
3723 using CommitPt =
3724 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3725
3726 SetOfInstrs &RemovedInsts;
3727};
3728
3729} // end anonymous namespace
3730
3731void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3732 Value *NewVal) {
3733 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3734 Inst, Idx, NewVal));
3735}
3736
3737void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3738 Value *NewVal) {
3739 Actions.push_back(
3740 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3741 Inst, RemovedInsts, NewVal));
3742}
3743
3744void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3745 Value *New) {
3746 Actions.push_back(
3747 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3748}
3749
3750void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3751 Actions.push_back(
3752 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3753}
3754
3755Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3756 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3757 Value *Val = Ptr->getBuiltValue();
3758 Actions.push_back(std::move(Ptr));
3759 return Val;
3760}
3761
3762Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3763 Type *Ty) {
3764 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3765 Value *Val = Ptr->getBuiltValue();
3766 Actions.push_back(std::move(Ptr));
3767 return Val;
3768}
3769
3770Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3771 Type *Ty) {
3772 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3773 Value *Val = Ptr->getBuiltValue();
3774 Actions.push_back(std::move(Ptr));
3775 return Val;
3776}
3777
3778TypePromotionTransaction::ConstRestorationPt
3779TypePromotionTransaction::getRestorationPoint() const {
3780 return !Actions.empty() ? Actions.back().get() : nullptr;
3781}
3782
3783bool TypePromotionTransaction::commit() {
3784 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3785 Action->commit();
3786 bool Modified = !Actions.empty();
3787 Actions.clear();
3788 return Modified;
3789}
3790
3791void TypePromotionTransaction::rollback(
3792 TypePromotionTransaction::ConstRestorationPt Point) {
3793 while (!Actions.empty() && Point != Actions.back().get()) {
3794 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3795 Curr->undo();
3796 }
3797}
3798
3799namespace {
3800
3801/// A helper class for matching addressing modes.
3802///
3803/// This encapsulates the logic for matching the target-legal addressing modes.
3804class AddressingModeMatcher {
3805 SmallVectorImpl<Instruction *> &AddrModeInsts;
3806 const TargetLowering &TLI;
3807 const TargetRegisterInfo &TRI;
3808 const DataLayout &DL;
3809 const LoopInfo &LI;
3810 const std::function<const DominatorTree &()> getDTFn;
3811
3812 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3813 /// the memory instruction that we're computing this address for.
3814 Type *AccessTy;
3815 unsigned AddrSpace;
3816 Instruction *MemoryInst;
3817
3818 /// This is the addressing mode that we're building up. This is
3819 /// part of the return value of this addressing mode matching stuff.
3820 ExtAddrMode &AddrMode;
3821
3822 /// The instructions inserted by other CodeGenPrepare optimizations.
3823 const SetOfInstrs &InsertedInsts;
3824
3825 /// A map from the instructions to their type before promotion.
3826 InstrToOrigTy &PromotedInsts;
3827
3828 /// The ongoing transaction where every action should be registered.
3829 TypePromotionTransaction &TPT;
3830
3831 // A GEP which has too large offset to be folded into the addressing mode.
3832 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3833
3834 /// This is set to true when we should not do profitability checks.
3835 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3836 bool IgnoreProfitability;
3837
3838 /// True if we are optimizing for size.
3839 bool OptSize = false;
3840
3841 ProfileSummaryInfo *PSI;
3842 BlockFrequencyInfo *BFI;
3843
3844 AddressingModeMatcher(
3845 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3846 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3847 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3848 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3849 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3850 TypePromotionTransaction &TPT,
3851 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3852 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3853 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3854 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3855 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3856 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3857 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3858 IgnoreProfitability = false;
3859 }
3860
3861public:
3862 /// Find the maximal addressing mode that a load/store of V can fold,
3863 /// give an access type of AccessTy. This returns a list of involved
3864 /// instructions in AddrModeInsts.
3865 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3866 /// optimizations.
3867 /// \p PromotedInsts maps the instructions to their type before promotion.
3868 /// \p The ongoing transaction where every action should be registered.
3869 static ExtAddrMode
3870 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3871 SmallVectorImpl<Instruction *> &AddrModeInsts,
3872 const TargetLowering &TLI, const LoopInfo &LI,
3873 const std::function<const DominatorTree &()> getDTFn,
3874 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3875 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3876 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3877 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3878 ExtAddrMode Result;
3879
3880 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3881 AccessTy, AS, MemoryInst, Result,
3882 InsertedInsts, PromotedInsts, TPT,
3883 LargeOffsetGEP, OptSize, PSI, BFI)
3884 .matchAddr(V, 0);
3885 (void)Success;
3886 assert(Success && "Couldn't select *anything*?");
3887 return Result;
3888 }
3889
3890private:
3891 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3892 bool matchAddr(Value *Addr, unsigned Depth);
3893 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3894 bool *MovedAway = nullptr);
3895 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3896 ExtAddrMode &AMBefore,
3897 ExtAddrMode &AMAfter);
3898 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3899 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3900 Value *PromotedOperand) const;
3901};
3902
3903class PhiNodeSet;
3904
3905/// An iterator for PhiNodeSet.
3906class PhiNodeSetIterator {
3907 PhiNodeSet *const Set;
3908 size_t CurrentIndex = 0;
3909
3910public:
3911 /// The constructor. Start should point to either a valid element, or be equal
3912 /// to the size of the underlying SmallVector of the PhiNodeSet.
3913 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3914 PHINode *operator*() const;
3915 PhiNodeSetIterator &operator++();
3916 bool operator==(const PhiNodeSetIterator &RHS) const;
3917 bool operator!=(const PhiNodeSetIterator &RHS) const;
3918};
3919
3920/// Keeps a set of PHINodes.
3921///
3922/// This is a minimal set implementation for a specific use case:
3923/// It is very fast when there are very few elements, but also provides good
3924/// performance when there are many. It is similar to SmallPtrSet, but also
3925/// provides iteration by insertion order, which is deterministic and stable
3926/// across runs. It is also similar to SmallSetVector, but provides removing
3927/// elements in O(1) time. This is achieved by not actually removing the element
3928/// from the underlying vector, so comes at the cost of using more memory, but
3929/// that is fine, since PhiNodeSets are used as short lived objects.
3930class PhiNodeSet {
3931 friend class PhiNodeSetIterator;
3932
3933 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3934 using iterator = PhiNodeSetIterator;
3935
3936 /// Keeps the elements in the order of their insertion in the underlying
3937 /// vector. To achieve constant time removal, it never deletes any element.
3939
3940 /// Keeps the elements in the underlying set implementation. This (and not the
3941 /// NodeList defined above) is the source of truth on whether an element
3942 /// is actually in the collection.
3943 MapType NodeMap;
3944
3945 /// Points to the first valid (not deleted) element when the set is not empty
3946 /// and the value is not zero. Equals to the size of the underlying vector
3947 /// when the set is empty. When the value is 0, as in the beginning, the
3948 /// first element may or may not be valid.
3949 size_t FirstValidElement = 0;
3950
3951public:
3952 /// Inserts a new element to the collection.
3953 /// \returns true if the element is actually added, i.e. was not in the
3954 /// collection before the operation.
3955 bool insert(PHINode *Ptr) {
3956 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3958 return true;
3959 }
3960 return false;
3961 }
3962
3963 /// Removes the element from the collection.
3964 /// \returns whether the element is actually removed, i.e. was in the
3965 /// collection before the operation.
3966 bool erase(PHINode *Ptr) {
3967 if (NodeMap.erase(Ptr)) {
3968 SkipRemovedElements(FirstValidElement);
3969 return true;
3970 }
3971 return false;
3972 }
3973
3974 /// Removes all elements and clears the collection.
3975 void clear() {
3976 NodeMap.clear();
3977 NodeList.clear();
3978 FirstValidElement = 0;
3979 }
3980
3981 /// \returns an iterator that will iterate the elements in the order of
3982 /// insertion.
3983 iterator begin() {
3984 if (FirstValidElement == 0)
3985 SkipRemovedElements(FirstValidElement);
3986 return PhiNodeSetIterator(this, FirstValidElement);
3987 }
3988
3989 /// \returns an iterator that points to the end of the collection.
3990 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3991
3992 /// Returns the number of elements in the collection.
3993 size_t size() const { return NodeMap.size(); }
3994
3995 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3996 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3997
3998private:
3999 /// Updates the CurrentIndex so that it will point to a valid element.
4000 ///
4001 /// If the element of NodeList at CurrentIndex is valid, it does not
4002 /// change it. If there are no more valid elements, it updates CurrentIndex
4003 /// to point to the end of the NodeList.
4004 void SkipRemovedElements(size_t &CurrentIndex) {
4005 while (CurrentIndex < NodeList.size()) {
4006 auto it = NodeMap.find(NodeList[CurrentIndex]);
4007 // If the element has been deleted and added again later, NodeMap will
4008 // point to a different index, so CurrentIndex will still be invalid.
4009 if (it != NodeMap.end() && it->second == CurrentIndex)
4010 break;
4011 ++CurrentIndex;
4012 }
4013 }
4014};
4015
4016PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4017 : Set(Set), CurrentIndex(Start) {}
4018
4019PHINode *PhiNodeSetIterator::operator*() const {
4020 assert(CurrentIndex < Set->NodeList.size() &&
4021 "PhiNodeSet access out of range");
4022 return Set->NodeList[CurrentIndex];
4023}
4024
4025PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4026 assert(CurrentIndex < Set->NodeList.size() &&
4027 "PhiNodeSet access out of range");
4028 ++CurrentIndex;
4029 Set->SkipRemovedElements(CurrentIndex);
4030 return *this;
4031}
4032
4033bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4034 return CurrentIndex == RHS.CurrentIndex;
4035}
4036
4037bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4038 return !((*this) == RHS);
4039}
4040
4041/// Keep track of simplification of Phi nodes.
4042/// Accept the set of all phi nodes and erase phi node from this set
4043/// if it is simplified.
4044class SimplificationTracker {
4045 DenseMap<Value *, Value *> Storage;
4046 // Tracks newly created Phi nodes. The elements are iterated by insertion
4047 // order.
4048 PhiNodeSet AllPhiNodes;
4049 // Tracks newly created Select nodes.
4050 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4051
4052public:
4053 Value *Get(Value *V) {
4054 do {
4055 auto SV = Storage.find(V);
4056 if (SV == Storage.end())
4057 return V;
4058 V = SV->second;
4059 } while (true);
4060 }
4061
4062 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4063
4064 void ReplacePhi(PHINode *From, PHINode *To) {
4065 Value *OldReplacement = Get(From);
4066 while (OldReplacement != From) {
4067 From = To;
4068 To = dyn_cast<PHINode>(OldReplacement);
4069 OldReplacement = Get(From);
4070 }
4071 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4072 Put(From, To);
4073 From->replaceAllUsesWith(To);
4074 AllPhiNodes.erase(From);
4075 From->eraseFromParent();
4076 }
4077
4078 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4079
4080 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4081
4082 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4083
4084 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4085
4086 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4087
4088 void destroyNewNodes(Type *CommonType) {
4089 // For safe erasing, replace the uses with dummy value first.
4090 auto *Dummy = PoisonValue::get(CommonType);
4091 for (auto *I : AllPhiNodes) {
4092 I->replaceAllUsesWith(Dummy);
4093 I->eraseFromParent();
4094 }
4095 AllPhiNodes.clear();
4096 for (auto *I : AllSelectNodes) {
4097 I->replaceAllUsesWith(Dummy);
4098 I->eraseFromParent();
4099 }
4100 AllSelectNodes.clear();
4101 }
4102};
4103
4104/// A helper class for combining addressing modes.
4105class AddressingModeCombiner {
4106 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4107 typedef std::pair<PHINode *, PHINode *> PHIPair;
4108
4109private:
4110 /// The addressing modes we've collected.
4112
4113 /// The field in which the AddrModes differ, when we have more than one.
4114 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4115
4116 /// Are the AddrModes that we have all just equal to their original values?
4117 bool AllAddrModesTrivial = true;
4118
4119 /// Common Type for all different fields in addressing modes.
4120 Type *CommonType = nullptr;
4121
4122 const DataLayout &DL;
4123
4124 /// Original Address.
4125 Value *Original;
4126
4127 /// Common value among addresses
4128 Value *CommonValue = nullptr;
4129
4130public:
4131 AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
4132 : DL(DL), Original(OriginalValue) {}
4133
4134 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4135
4136 /// Get the combined AddrMode
4137 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4138
4139 /// Add a new AddrMode if it's compatible with the AddrModes we already
4140 /// have.
4141 /// \return True iff we succeeded in doing so.
4142 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4143 // Take note of if we have any non-trivial AddrModes, as we need to detect
4144 // when all AddrModes are trivial as then we would introduce a phi or select
4145 // which just duplicates what's already there.
4146 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4147
4148 // If this is the first addrmode then everything is fine.
4149 if (AddrModes.empty()) {
4150 AddrModes.emplace_back(NewAddrMode);
4151 return true;
4152 }
4153
4154 // Figure out how different this is from the other address modes, which we
4155 // can do just by comparing against the first one given that we only care
4156 // about the cumulative difference.
4157 ExtAddrMode::FieldName ThisDifferentField =
4158 AddrModes[0].compare(NewAddrMode);
4159 if (DifferentField == ExtAddrMode::NoField)
4160 DifferentField = ThisDifferentField;
4161 else if (DifferentField != ThisDifferentField)
4162 DifferentField = ExtAddrMode::MultipleFields;
4163
4164 // If NewAddrMode differs in more than one dimension we cannot handle it.
4165 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4166
4167 // If Scale Field is different then we reject.
4168 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4169
4170 // We also must reject the case when base offset is different and
4171 // scale reg is not null, we cannot handle this case due to merge of
4172 // different offsets will be used as ScaleReg.
4173 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4174 !NewAddrMode.ScaledReg);
4175
4176 // We also must reject the case when GV is different and BaseReg installed
4177 // due to we want to use base reg as a merge of GV values.
4178 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4179 !NewAddrMode.HasBaseReg);
4180
4181 // Even if NewAddMode is the same we still need to collect it due to
4182 // original value is different. And later we will need all original values
4183 // as anchors during finding the common Phi node.
4184 if (CanHandle)
4185 AddrModes.emplace_back(NewAddrMode);
4186 else
4187 AddrModes.clear();
4188
4189 return CanHandle;
4190 }
4191
4192 /// Combine the addressing modes we've collected into a single
4193 /// addressing mode.
4194 /// \return True iff we successfully combined them or we only had one so
4195 /// didn't need to combine them anyway.
4196 bool combineAddrModes() {
4197 // If we have no AddrModes then they can't be combined.
4198 if (AddrModes.size() == 0)
4199 return false;
4200
4201 // A single AddrMode can trivially be combined.
4202 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4203 return true;
4204
4205 // If the AddrModes we collected are all just equal to the value they are
4206 // derived from then combining them wouldn't do anything useful.
4207 if (AllAddrModesTrivial)
4208 return false;
4209
4210 if (!addrModeCombiningAllowed())
4211 return false;
4212
4213 // Build a map between <original value, basic block where we saw it> to
4214 // value of base register.
4215 // Bail out if there is no common type.
4216 FoldAddrToValueMapping Map;
4217 if (!initializeMap(Map))
4218 return false;
4219
4220 CommonValue = findCommon(Map);
4221 if (CommonValue)
4222 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4223 return CommonValue != nullptr;
4224 }
4225
4226private:
4227 /// `CommonValue` may be a placeholder inserted by us.
4228 /// If the placeholder is not used, we should remove this dead instruction.
4229 void eraseCommonValueIfDead() {
4230 if (CommonValue && CommonValue->use_empty())
4231 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4232 CommonInst->eraseFromParent();
4233 }
4234
4235 /// Initialize Map with anchor values. For address seen
4236 /// we set the value of different field saw in this address.
4237 /// At the same time we find a common type for different field we will
4238 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4239 /// Return false if there is no common type found.
4240 bool initializeMap(FoldAddrToValueMapping &Map) {
4241 // Keep track of keys where the value is null. We will need to replace it
4242 // with constant null when we know the common type.
4243 SmallVector<Value *, 2> NullValue;
4244 Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4245 for (auto &AM : AddrModes) {
4246 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4247 if (DV) {
4248 auto *Type = DV->getType();
4249 if (CommonType && CommonType != Type)
4250 return false;
4251 CommonType = Type;
4252 Map[AM.OriginalValue] = DV;
4253 } else {
4254 NullValue.push_back(AM.OriginalValue);
4255 }
4256 }
4257 assert(CommonType && "At least one non-null value must be!");
4258 for (auto *V : NullValue)
4259 Map[V] = Constant::getNullValue(CommonType);
4260 return true;
4261 }
4262
4263 /// We have mapping between value A and other value B where B was a field in
4264 /// addressing mode represented by A. Also we have an original value C
4265 /// representing an address we start with. Traversing from C through phi and
4266 /// selects we ended up with A's in a map. This utility function tries to find
4267 /// a value V which is a field in addressing mode C and traversing through phi
4268 /// nodes and selects we will end up in corresponded values B in a map.
4269 /// The utility will create a new Phi/Selects if needed.
4270 // The simple example looks as follows:
4271 // BB1:
4272 // p1 = b1 + 40
4273 // br cond BB2, BB3
4274 // BB2:
4275 // p2 = b2 + 40
4276 // br BB3
4277 // BB3:
4278 // p = phi [p1, BB1], [p2, BB2]
4279 // v = load p
4280 // Map is
4281 // p1 -> b1
4282 // p2 -> b2
4283 // Request is
4284 // p -> ?
4285 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4286 Value *findCommon(FoldAddrToValueMapping &Map) {
4287 // Tracks the simplification of newly created phi nodes. The reason we use
4288 // this mapping is because we will add new created Phi nodes in AddrToBase.
4289 // Simplification of Phi nodes is recursive, so some Phi node may
4290 // be simplified after we added it to AddrToBase. In reality this
4291 // simplification is possible only if original phi/selects were not
4292 // simplified yet.
4293 // Using this mapping we can find the current value in AddrToBase.
4294 SimplificationTracker ST;
4295
4296 // First step, DFS to create PHI nodes for all intermediate blocks.
4297 // Also fill traverse order for the second step.
4298 SmallVector<Value *, 32> TraverseOrder;
4299 InsertPlaceholders(Map, TraverseOrder, ST);
4300
4301 // Second Step, fill new nodes by merged values and simplify if possible.
4302 FillPlaceholders(Map, TraverseOrder, ST);
4303
4304 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4305 ST.destroyNewNodes(CommonType);
4306 return nullptr;
4307 }
4308
4309 // Now we'd like to match New Phi nodes to existed ones.
4310 unsigned PhiNotMatchedCount = 0;
4311 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4312 ST.destroyNewNodes(CommonType);
4313 return nullptr;
4314 }
4315
4316 auto *Result = ST.Get(Map.find(Original)->second);
4317 if (Result) {
4318 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4319 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4320 }
4321 return Result;
4322 }
4323
4324 /// Try to match PHI node to Candidate.
4325 /// Matcher tracks the matched Phi nodes.
4326 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4327 SmallSetVector<PHIPair, 8> &Matcher,
4328 PhiNodeSet &PhiNodesToMatch) {
4329 SmallVector<PHIPair, 8> WorkList;
4330 Matcher.insert({PHI, Candidate});
4331 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4332 MatchedPHIs.insert(PHI);
4333 WorkList.push_back({PHI, Candidate});
4334 SmallSet<PHIPair, 8> Visited;
4335 while (!WorkList.empty()) {
4336 auto Item = WorkList.pop_back_val();
4337 if (!Visited.insert(Item).second)
4338 continue;
4339 // We iterate over all incoming values to Phi to compare them.
4340 // If values are different and both of them Phi and the first one is a
4341 // Phi we added (subject to match) and both of them is in the same basic
4342 // block then we can match our pair if values match. So we state that
4343 // these values match and add it to work list to verify that.
4344 for (auto *B : Item.first->blocks()) {
4345 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4346 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4347 if (FirstValue == SecondValue)
4348 continue;
4349
4350 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4351 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4352
4353 // One of them is not Phi or
4354 // The first one is not Phi node from the set we'd like to match or
4355 // Phi nodes from different basic blocks then
4356 // we will not be able to match.
4357 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4358 FirstPhi->getParent() != SecondPhi->getParent())
4359 return false;
4360
4361 // If we already matched them then continue.
4362 if (Matcher.count({FirstPhi, SecondPhi}))
4363 continue;
4364 // So the values are different and does not match. So we need them to
4365 // match. (But we register no more than one match per PHI node, so that
4366 // we won't later try to replace them twice.)
4367 if (MatchedPHIs.insert(FirstPhi).second)
4368 Matcher.insert({FirstPhi, SecondPhi});
4369 // But me must check it.
4370 WorkList.push_back({FirstPhi, SecondPhi});
4371 }
4372 }
4373 return true;
4374 }
4375
4376 /// For the given set of PHI nodes (in the SimplificationTracker) try
4377 /// to find their equivalents.
4378 /// Returns false if this matching fails and creation of new Phi is disabled.
4379 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4380 unsigned &PhiNotMatchedCount) {
4381 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4382 // order, so the replacements (ReplacePhi) are also done in a deterministic
4383 // order.
4384 SmallSetVector<PHIPair, 8> Matched;
4385 SmallPtrSet<PHINode *, 8> WillNotMatch;
4386 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4387 while (PhiNodesToMatch.size()) {
4388 PHINode *PHI = *PhiNodesToMatch.begin();
4389
4390 // Add us, if no Phi nodes in the basic block we do not match.
4391 WillNotMatch.clear();
4392 WillNotMatch.insert(PHI);
4393
4394 // Traverse all Phis until we found equivalent or fail to do that.
4395 bool IsMatched = false;
4396 for (auto &P : PHI->getParent()->phis()) {
4397 // Skip new Phi nodes.
4398 if (PhiNodesToMatch.count(&P))
4399 continue;
4400 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4401 break;
4402 // If it does not match, collect all Phi nodes from matcher.
4403 // if we end up with no match, them all these Phi nodes will not match
4404 // later.
4405 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4406 Matched.clear();
4407 }
4408 if (IsMatched) {
4409 // Replace all matched values and erase them.
4410 for (auto MV : Matched)
4411 ST.ReplacePhi(MV.first, MV.second);
4412 Matched.clear();
4413 continue;
4414 }
4415 // If we are not allowed to create new nodes then bail out.
4416 if (!AllowNewPhiNodes)
4417 return false;
4418 // Just remove all seen values in matcher. They will not match anything.
4419 PhiNotMatchedCount += WillNotMatch.size();
4420 for (auto *P : WillNotMatch)
4421 PhiNodesToMatch.erase(P);
4422 }
4423 return true;
4424 }
4425 /// Fill the placeholders with values from predecessors and simplify them.
4426 void FillPlaceholders(FoldAddrToValueMapping &Map,
4427 SmallVectorImpl<Value *> &TraverseOrder,
4428 SimplificationTracker &ST) {
4429 while (!TraverseOrder.empty()) {
4430 Value *Current = TraverseOrder.pop_back_val();
4431 assert(Map.contains(Current) && "No node to fill!!!");
4432 Value *V = Map[Current];
4433
4434 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4435 // CurrentValue also must be Select.
4436 auto *CurrentSelect = cast<SelectInst>(Current);
4437 auto *TrueValue = CurrentSelect->getTrueValue();
4438 assert(Map.contains(TrueValue) && "No True Value!");
4439 Select->setTrueValue(ST.Get(Map[TrueValue]));
4440 auto *FalseValue = CurrentSelect->getFalseValue();
4441 assert(Map.contains(FalseValue) && "No False Value!");
4442 Select->setFalseValue(ST.Get(Map[FalseValue]));
4443 } else {
4444 // Must be a Phi node then.
4445 auto *PHI = cast<PHINode>(V);
4446 // Fill the Phi node with values from predecessors.
4447 for (auto *B : predecessors(PHI->getParent())) {
4448 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4449 assert(Map.contains(PV) && "No predecessor Value!");
4450 PHI->addIncoming(ST.Get(Map[PV]), B);
4451 }
4452 }
4453 }
4454 }
4455
4456 /// Starting from original value recursively iterates over def-use chain up to
4457 /// known ending values represented in a map. For each traversed phi/select
4458 /// inserts a placeholder Phi or Select.
4459 /// Reports all new created Phi/Select nodes by adding them to set.
4460 /// Also reports and order in what values have been traversed.
4461 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4462 SmallVectorImpl<Value *> &TraverseOrder,
4463 SimplificationTracker &ST) {
4464 SmallVector<Value *, 32> Worklist;
4465 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4466 "Address must be a Phi or Select node");
4467 auto *Dummy = PoisonValue::get(CommonType);
4468 Worklist.push_back(Original);
4469 while (!Worklist.empty()) {
4470 Value *Current = Worklist.pop_back_val();
4471 // if it is already visited or it is an ending value then skip it.
4472 if (Map.contains(Current))
4473 continue;
4474 TraverseOrder.push_back(Current);
4475
4476 // CurrentValue must be a Phi node or select. All others must be covered
4477 // by anchors.
4478 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4479 // Is it OK to get metadata from OrigSelect?!
4480 // Create a Select placeholder with dummy value.
4481 SelectInst *Select =
4482 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4483 CurrentSelect->getName(),
4484 CurrentSelect->getIterator(), CurrentSelect);
4485 Map[Current] = Select;
4486 ST.insertNewSelect(Select);
4487 // We are interested in True and False values.
4488 Worklist.push_back(CurrentSelect->getTrueValue());
4489 Worklist.push_back(CurrentSelect->getFalseValue());
4490 } else {
4491 // It must be a Phi node then.
4492 PHINode *CurrentPhi = cast<PHINode>(Current);
4493 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4494 PHINode *PHI =
4495 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4496 Map[Current] = PHI;
4497 ST.insertNewPhi(PHI);
4498 append_range(Worklist, CurrentPhi->incoming_values());
4499 }
4500 }
4501 }
4502
4503 bool addrModeCombiningAllowed() {
4505 return false;
4506 switch (DifferentField) {
4507 default:
4508 return false;
4509 case ExtAddrMode::BaseRegField:
4511 case ExtAddrMode::BaseGVField:
4512 return AddrSinkCombineBaseGV;
4513 case ExtAddrMode::BaseOffsField:
4515 case ExtAddrMode::ScaledRegField:
4517 }
4518 }
4519};
4520} // end anonymous namespace
4521
4522/// Try adding ScaleReg*Scale to the current addressing mode.
4523/// Return true and update AddrMode if this addr mode is legal for the target,
4524/// false if not.
4525bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4526 unsigned Depth) {
4527 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4528 // mode. Just process that directly.
4529 if (Scale == 1)
4530 return matchAddr(ScaleReg, Depth);
4531
4532 // If the scale is 0, it takes nothing to add this.
4533 if (Scale == 0)
4534 return true;
4535
4536 // If we already have a scale of this value, we can add to it, otherwise, we
4537 // need an available scale field.
4538 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4539 return false;
4540
4541 ExtAddrMode TestAddrMode = AddrMode;
4542
4543 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4544 // [A+B + A*7] -> [B+A*8].
4545 TestAddrMode.Scale += Scale;
4546 TestAddrMode.ScaledReg = ScaleReg;
4547
4548 // If the new address isn't legal, bail out.
4549 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4550 return false;
4551
4552 // It was legal, so commit it.
4553 AddrMode = TestAddrMode;
4554
4555 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4556 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4557 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4558 // go any further: we can reuse it and cannot eliminate it.
4559 ConstantInt *CI = nullptr;
4560 Value *AddLHS = nullptr;
4561 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4562 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4563 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4564 TestAddrMode.InBounds = false;
4565 TestAddrMode.ScaledReg = AddLHS;
4566 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4567
4568 // If this addressing mode is legal, commit it and remember that we folded
4569 // this instruction.
4570 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4571 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4572 AddrMode = TestAddrMode;
4573 return true;
4574 }
4575 // Restore status quo.
4576 TestAddrMode = AddrMode;
4577 }
4578
4579 // If this is an add recurrence with a constant step, return the increment
4580 // instruction and the canonicalized step.
4581 auto GetConstantStep =
4582 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4583 auto *PN = dyn_cast<PHINode>(V);
4584 if (!PN)
4585 return std::nullopt;
4586 auto IVInc = getIVIncrement(PN, &LI);
4587 if (!IVInc)
4588 return std::nullopt;
4589 // TODO: The result of the intrinsics above is two-complement. However when
4590 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4591 // If it has nuw or nsw flags, we need to make sure that these flags are
4592 // inferrable at the point of memory instruction. Otherwise we are replacing
4593 // well-defined two-complement computation with poison. Currently, to avoid
4594 // potentially complex analysis needed to prove this, we reject such cases.
4595 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4596 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4597 return std::nullopt;
4598 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4599 return std::make_pair(IVInc->first, ConstantStep->getValue());
4600 return std::nullopt;
4601 };
4602
4603 // Try to account for the following special case:
4604 // 1. ScaleReg is an inductive variable;
4605 // 2. We use it with non-zero offset;
4606 // 3. IV's increment is available at the point of memory instruction.
4607 //
4608 // In this case, we may reuse the IV increment instead of the IV Phi to
4609 // achieve the following advantages:
4610 // 1. If IV step matches the offset, we will have no need in the offset;
4611 // 2. Even if they don't match, we will reduce the overlap of living IV
4612 // and IV increment, that will potentially lead to better register
4613 // assignment.
4614 if (AddrMode.BaseOffs) {
4615 if (auto IVStep = GetConstantStep(ScaleReg)) {
4616 Instruction *IVInc = IVStep->first;
4617 // The following assert is important to ensure a lack of infinite loops.
4618 // This transforms is (intentionally) the inverse of the one just above.
4619 // If they don't agree on the definition of an increment, we'd alternate
4620 // back and forth indefinitely.
4621 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4622 APInt Step = IVStep->second;
4623 APInt Offset = Step * AddrMode.Scale;
4624 if (Offset.isSignedIntN(64)) {
4625 TestAddrMode.InBounds = false;
4626 TestAddrMode.ScaledReg = IVInc;
4627 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4628 // If this addressing mode is legal, commit it..
4629 // (Note that we defer the (expensive) domtree base legality check
4630 // to the very last possible point.)
4631 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4632 getDTFn().dominates(IVInc, MemoryInst)) {
4633 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4634 AddrMode = TestAddrMode;
4635 return true;
4636 }
4637 // Restore status quo.
4638 TestAddrMode = AddrMode;
4639 }
4640 }
4641 }
4642
4643 // Otherwise, just return what we have.
4644 return true;
4645}
4646
4647/// This is a little filter, which returns true if an addressing computation
4648/// involving I might be folded into a load/store accessing it.
4649/// This doesn't need to be perfect, but needs to accept at least
4650/// the set of instructions that MatchOperationAddr can.
4652 switch (I->getOpcode()) {
4653 case Instruction::BitCast:
4654 case Instruction::AddrSpaceCast:
4655 // Don't touch identity bitcasts.
4656 if (I->getType() == I->getOperand(0)->getType())
4657 return false;
4658 return I->getType()->isIntOrPtrTy();
4659 case Instruction::PtrToInt:
4660 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4661 return true;
4662 case Instruction::IntToPtr:
4663 // We know the input is intptr_t, so this is foldable.
4664 return true;
4665 case Instruction::Add:
4666 return true;
4667 case Instruction::Mul:
4668 case Instruction::Shl:
4669 // Can only handle X*C and X << C.
4670 return isa<ConstantInt>(I->getOperand(1));
4671 case Instruction::GetElementPtr:
4672 return true;
4673 default:
4674 return false;
4675 }
4676}
4677
4678/// Check whether or not \p Val is a legal instruction for \p TLI.
4679/// \note \p Val is assumed to be the product of some type promotion.
4680/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4681/// to be legal, as the non-promoted value would have had the same state.
4683 const DataLayout &DL, Value *Val) {
4684 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4685 if (!PromotedInst)
4686 return false;
4687 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4688 // If the ISDOpcode is undefined, it was undefined before the promotion.
4689 if (!ISDOpcode)
4690 return true;
4691 // Otherwise, check if the promoted instruction is legal or not.
4692 return TLI.isOperationLegalOrCustom(
4693 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4694}
4695
4696namespace {
4697
4698/// Hepler class to perform type promotion.
4699class TypePromotionHelper {
4700 /// Utility function to add a promoted instruction \p ExtOpnd to
4701 /// \p PromotedInsts and record the type of extension we have seen.
4702 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4703 Instruction *ExtOpnd, bool IsSExt) {
4704 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4705 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4706 if (!Inserted) {
4707 // If the new extension is same as original, the information in
4708 // PromotedInsts[ExtOpnd] is still correct.
4709 if (It->second.getInt() == ExtTy)
4710 return;
4711
4712 // Now the new extension is different from old extension, we make
4713 // the type information invalid by setting extension type to
4714 // BothExtension.
4715 ExtTy = BothExtension;
4716 }
4717 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4718 }
4719
4720 /// Utility function to query the original type of instruction \p Opnd
4721 /// with a matched extension type. If the extension doesn't match, we
4722 /// cannot use the information we had on the original type.
4723 /// BothExtension doesn't match any extension type.
4724 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4725 Instruction *Opnd, bool IsSExt) {
4726 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4727 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4728 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4729 return It->second.getPointer();
4730 return nullptr;
4731 }
4732
4733 /// Utility function to check whether or not a sign or zero extension
4734 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4735 /// either using the operands of \p Inst or promoting \p Inst.
4736 /// The type of the extension is defined by \p IsSExt.
4737 /// In other words, check if:
4738 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4739 /// #1 Promotion applies:
4740 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4741 /// #2 Operand reuses:
4742 /// ext opnd1 to ConsideredExtType.
4743 /// \p PromotedInsts maps the instructions to their type before promotion.
4744 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4745 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4746
4747 /// Utility function to determine if \p OpIdx should be promoted when
4748 /// promoting \p Inst.
4749 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4750 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4751 }
4752
4753 /// Utility function to promote the operand of \p Ext when this
4754 /// operand is a promotable trunc or sext or zext.
4755 /// \p PromotedInsts maps the instructions to their type before promotion.
4756 /// \p CreatedInstsCost[out] contains the cost of all instructions
4757 /// created to promote the operand of Ext.
4758 /// Newly added extensions are inserted in \p Exts.
4759 /// Newly added truncates are inserted in \p Truncs.
4760 /// Should never be called directly.
4761 /// \return The promoted value which is used instead of Ext.
4762 static Value *promoteOperandForTruncAndAnyExt(
4763 Instruction *Ext, TypePromotionTransaction &TPT,
4764 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4765 SmallVectorImpl<Instruction *> *Exts,
4766 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4767
4768 /// Utility function to promote the operand of \p Ext when this
4769 /// operand is promotable and is not a supported trunc or sext.
4770 /// \p PromotedInsts maps the instructions to their type before promotion.
4771 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4772 /// created to promote the operand of Ext.
4773 /// Newly added extensions are inserted in \p Exts.
4774 /// Newly added truncates are inserted in \p Truncs.
4775 /// Should never be called directly.
4776 /// \return The promoted value which is used instead of Ext.
4777 static Value *promoteOperandForOther(Instruction *Ext,
4778 TypePromotionTransaction &TPT,
4779 InstrToOrigTy &PromotedInsts,
4780 unsigned &CreatedInstsCost,
4781 SmallVectorImpl<Instruction *> *Exts,
4782 SmallVectorImpl<Instruction *> *Truncs,
4783 const TargetLowering &TLI, bool IsSExt);
4784
4785 /// \see promoteOperandForOther.
4786 static Value *signExtendOperandForOther(
4787 Instruction *Ext, TypePromotionTransaction &TPT,
4788 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4789 SmallVectorImpl<Instruction *> *Exts,
4790 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4791 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4792 Exts, Truncs, TLI, true);
4793 }
4794
4795 /// \see promoteOperandForOther.
4796 static Value *zeroExtendOperandForOther(
4797 Instruction *Ext, TypePromotionTransaction &TPT,
4798 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4799 SmallVectorImpl<Instruction *> *Exts,
4800 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4801 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4802 Exts, Truncs, TLI, false);
4803 }
4804
4805public:
4806 /// Type for the utility function that promotes the operand of Ext.
4807 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4808 InstrToOrigTy &PromotedInsts,
4809 unsigned &CreatedInstsCost,
4810 SmallVectorImpl<Instruction *> *Exts,
4811 SmallVectorImpl<Instruction *> *Truncs,
4812 const TargetLowering &TLI);
4813
4814 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4815 /// action to promote the operand of \p Ext instead of using Ext.
4816 /// \return NULL if no promotable action is possible with the current
4817 /// sign extension.
4818 /// \p InsertedInsts keeps track of all the instructions inserted by the
4819 /// other CodeGenPrepare optimizations. This information is important
4820 /// because we do not want to promote these instructions as CodeGenPrepare
4821 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4822 /// \p PromotedInsts maps the instructions to their type before promotion.
4823 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4824 const TargetLowering &TLI,
4825 const InstrToOrigTy &PromotedInsts);
4826};
4827
4828} // end anonymous namespace
4829
4830bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4831 Type *ConsideredExtType,
4832 const InstrToOrigTy &PromotedInsts,
4833 bool IsSExt) {
4834 // The promotion helper does not know how to deal with vector types yet.
4835 // To be able to fix that, we would need to fix the places where we
4836 // statically extend, e.g., constants and such.
4837 if (Inst->getType()->isVectorTy())
4838 return false;
4839
4840 // We can always get through zext.
4841 if (isa<ZExtInst>(Inst))
4842 return true;
4843
4844 // sext(sext) is ok too.
4845 if (IsSExt && isa<SExtInst>(Inst))
4846 return true;
4847
4848 // We can get through binary operator, if it is legal. In other words, the
4849 // binary operator must have a nuw or nsw flag.
4850 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4851 if (isa<OverflowingBinaryOperator>(BinOp) &&
4852 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4853 (IsSExt && BinOp->hasNoSignedWrap())))
4854 return true;
4855
4856 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4857 if ((Inst->getOpcode() == Instruction::And ||
4858 Inst->getOpcode() == Instruction::Or))
4859 return true;
4860
4861 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4862 if (Inst->getOpcode() == Instruction::Xor) {
4863 // Make sure it is not a NOT.
4864 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4865 if (!Cst->getValue().isAllOnes())
4866 return true;
4867 }
4868
4869 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4870 // It may change a poisoned value into a regular value, like
4871 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4872 // poisoned value regular value
4873 // It should be OK since undef covers valid value.
4874 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4875 return true;
4876
4877 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4878 // It may change a poisoned value into a regular value, like
4879 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4880 // poisoned value regular value
4881 // It should be OK since undef covers valid value.
4882 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4883 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4884 if (ExtInst->hasOneUse()) {
4885 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4886 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4887 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4888 if (Cst &&
4889 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4890 return true;
4891 }
4892 }
4893 }
4894
4895 // Check if we can do the following simplification.
4896 // ext(trunc(opnd)) --> ext(opnd)
4897 if (!isa<TruncInst>(Inst))
4898 return false;
4899
4900 Value *OpndVal = Inst->getOperand(0);
4901 // Check if we can use this operand in the extension.
4902 // If the type is larger than the result type of the extension, we cannot.
4903 if (!OpndVal->getType()->isIntegerTy() ||
4904 OpndVal->getType()->getIntegerBitWidth() >
4905 ConsideredExtType->getIntegerBitWidth())
4906 return false;
4907
4908 // If the operand of the truncate is not an instruction, we will not have
4909 // any information on the dropped bits.
4910 // (Actually we could for constant but it is not worth the extra logic).
4911 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4912 if (!Opnd)
4913 return false;
4914
4915 // Check if the source of the type is narrow enough.
4916 // I.e., check that trunc just drops extended bits of the same kind of
4917 // the extension.
4918 // #1 get the type of the operand and check the kind of the extended bits.
4919 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4920 if (OpndType)
4921 ;
4922 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4923 OpndType = Opnd->getOperand(0)->getType();
4924 else
4925 return false;
4926
4927 // #2 check that the truncate just drops extended bits.
4928 return Inst->getType()->getIntegerBitWidth() >=
4929 OpndType->getIntegerBitWidth();
4930}
4931
4932TypePromotionHelper::Action TypePromotionHelper::getAction(
4933 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4934 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4935 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4936 "Unexpected instruction type");
4937 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4938 Type *ExtTy = Ext->getType();
4939 bool IsSExt = isa<SExtInst>(Ext);
4940 // If the operand of the extension is not an instruction, we cannot
4941 // get through.
4942 // If it, check we can get through.
4943 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4944 return nullptr;
4945
4946 // Do not promote if the operand has been added by codegenprepare.
4947 // Otherwise, it means we are undoing an optimization that is likely to be
4948 // redone, thus causing potential infinite loop.
4949 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4950 return nullptr;
4951
4952 // SExt or Trunc instructions.
4953 // Return the related handler.
4954 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4955 isa<ZExtInst>(ExtOpnd))
4956 return promoteOperandForTruncAndAnyExt;
4957
4958 // Regular instruction.
4959 // Abort early if we will have to insert non-free instructions.
4960 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4961 return nullptr;
4962 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4963}
4964
4965Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4966 Instruction *SExt, TypePromotionTransaction &TPT,
4967 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4968 SmallVectorImpl<Instruction *> *Exts,
4969 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4970 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4971 // get through it and this method should not be called.
4972 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4973 Value *ExtVal = SExt;
4974 bool HasMergedNonFreeExt = false;
4975 if (isa<ZExtInst>(SExtOpnd)) {
4976 // Replace s|zext(zext(opnd))
4977 // => zext(opnd).
4978 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4979 Value *ZExt =
4980 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4981 TPT.replaceAllUsesWith(SExt, ZExt);
4982 TPT.eraseInstruction(SExt);
4983 ExtVal = ZExt;
4984 } else {
4985 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4986 // => z|sext(opnd).
4987 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4988 }
4989 CreatedInstsCost = 0;
4990
4991 // Remove dead code.
4992 if (SExtOpnd->use_empty())
4993 TPT.eraseInstruction(SExtOpnd);
4994
4995 // Check if the extension is still needed.
4996 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4997 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4998 if (ExtInst) {
4999 if (Exts)
5000 Exts->push_back(ExtInst);
5001 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5002 }
5003 return ExtVal;
5004 }
5005
5006 // At this point we have: ext ty opnd to ty.
5007 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5008 Value *NextVal = ExtInst->getOperand(0);
5009 TPT.eraseInstruction(ExtInst, NextVal);
5010 return NextVal;
5011}
5012
5013Value *TypePromotionHelper::promoteOperandForOther(
5014 Instruction *Ext, TypePromotionTransaction &TPT,
5015 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5016 SmallVectorImpl<Instruction *> *Exts,
5017 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5018 bool IsSExt) {
5019 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5020 // get through it and this method should not be called.
5021 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5022 CreatedInstsCost = 0;
5023 if (!ExtOpnd->hasOneUse()) {
5024 // ExtOpnd will be promoted.
5025 // All its uses, but Ext, will need to use a truncated value of the
5026 // promoted version.
5027 // Create the truncate now.
5028 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5029 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5030 // Insert it just after the definition.
5031 ITrunc->moveAfter(ExtOpnd);
5032 if (Truncs)
5033 Truncs->push_back(ITrunc);
5034 }
5035
5036 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5037 // Restore the operand of Ext (which has been replaced by the previous call
5038 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5039 TPT.setOperand(Ext, 0, ExtOpnd);
5040 }
5041
5042 // Get through the Instruction:
5043 // 1. Update its type.
5044 // 2. Replace the uses of Ext by Inst.
5045 // 3. Extend each operand that needs to be extended.
5046
5047 // Remember the original type of the instruction before promotion.
5048 // This is useful to know that the high bits are sign extended bits.
5049 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5050 // Step #1.
5051 TPT.mutateType(ExtOpnd, Ext->getType());
5052 // Step #2.
5053 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5054 // Step #3.
5055 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5056 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5057 ++OpIdx) {
5058 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5059 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5060 !shouldExtOperand(ExtOpnd, OpIdx)) {
5061 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5062 continue;
5063 }
5064 // Check if we can statically extend the operand.
5065 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5066 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5067 LLVM_DEBUG(dbgs() << "Statically extend\n");
5068 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5069 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5070 : Cst->getValue().zext(BitWidth);
5071 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5072 continue;
5073 }
5074 // UndefValue are typed, so we have to statically sign extend them.
5075 if (isa<UndefValue>(Opnd)) {
5076 LLVM_DEBUG(dbgs() << "Statically extend\n");
5077 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5078 continue;
5079 }
5080
5081 // Otherwise we have to explicitly sign extend the operand.
5082 Value *ValForExtOpnd = IsSExt
5083 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5084 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5085 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5086 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5087 if (!InstForExtOpnd)
5088 continue;
5089
5090 if (Exts)
5091 Exts->push_back(InstForExtOpnd);
5092
5093 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5094 }
5095 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5096 TPT.eraseInstruction(Ext);
5097 return ExtOpnd;
5098}
5099
5100/// Check whether or not promoting an instruction to a wider type is profitable.
5101/// \p NewCost gives the cost of extension instructions created by the
5102/// promotion.
5103/// \p OldCost gives the cost of extension instructions before the promotion
5104/// plus the number of instructions that have been
5105/// matched in the addressing mode the promotion.
5106/// \p PromotedOperand is the value that has been promoted.
5107/// \return True if the promotion is profitable, false otherwise.
5108bool AddressingModeMatcher::isPromotionProfitable(
5109 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5110 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5111 << '\n');
5112 // The cost of the new extensions is greater than the cost of the
5113 // old extension plus what we folded.
5114 // This is not profitable.
5115 if (NewCost > OldCost)
5116 return false;
5117 if (NewCost < OldCost)
5118 return true;
5119 // The promotion is neutral but it may help folding the sign extension in
5120 // loads for instance.
5121 // Check that we did not create an illegal instruction.
5122 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5123}
5124
5125/// Given an instruction or constant expr, see if we can fold the operation
5126/// into the addressing mode. If so, update the addressing mode and return
5127/// true, otherwise return false without modifying AddrMode.
5128/// If \p MovedAway is not NULL, it contains the information of whether or
5129/// not AddrInst has to be folded into the addressing mode on success.
5130/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5131/// because it has been moved away.
5132/// Thus AddrInst must not be added in the matched instructions.
5133/// This state can happen when AddrInst is a sext, since it may be moved away.
5134/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5135/// not be referenced anymore.
5136bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5137 unsigned Depth,
5138 bool *MovedAway) {
5139 // Avoid exponential behavior on extremely deep expression trees.
5140 if (Depth >= 5)
5141 return false;
5142
5143 // By default, all matched instructions stay in place.
5144 if (MovedAway)
5145 *MovedAway = false;
5146
5147 switch (Opcode) {
5148 case Instruction::PtrToInt:
5149 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5150 return matchAddr(AddrInst->getOperand(0), Depth);
5151 case Instruction::IntToPtr: {
5152 auto AS = AddrInst->getType()->getPointerAddressSpace();
5153 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5154 // This inttoptr is a no-op if the integer type is pointer sized.
5155 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5156 return matchAddr(AddrInst->getOperand(0), Depth);
5157 return false;
5158 }
5159 case Instruction::BitCast:
5160 // BitCast is always a noop, and we can handle it as long as it is
5161 // int->int or pointer->pointer (we don't want int<->fp or something).
5162 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5163 // Don't touch identity bitcasts. These were probably put here by LSR,
5164 // and we don't want to mess around with them. Assume it knows what it
5165 // is doing.
5166 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5167 return matchAddr(AddrInst->getOperand(0), Depth);
5168 return false;
5169 case Instruction::AddrSpaceCast: {
5170 unsigned SrcAS =
5171 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5172 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5173 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5174 return matchAddr(AddrInst->getOperand(0), Depth);
5175 return false;
5176 }
5177 case Instruction::Add: {
5178 // Check to see if we can merge in one operand, then the other. If so, we
5179 // win.
5180 ExtAddrMode BackupAddrMode = AddrMode;
5181 unsigned OldSize = AddrModeInsts.size();
5182 // Start a transaction at this point.
5183 // The LHS may match but not the RHS.
5184 // Therefore, we need a higher level restoration point to undo partially
5185 // matched operation.
5186 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5187 TPT.getRestorationPoint();
5188
5189 // Try to match an integer constant second to increase its chance of ending
5190 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5191 int First = 0, Second = 1;
5192 if (isa<ConstantInt>(AddrInst->getOperand(First))
5193 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5194 std::swap(First, Second);
5195 AddrMode.InBounds = false;
5196 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5197 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5198 return true;
5199
5200 // Restore the old addr mode info.
5201 AddrMode = BackupAddrMode;
5202 AddrModeInsts.resize(OldSize);
5203 TPT.rollback(LastKnownGood);
5204
5205 // Otherwise this was over-aggressive. Try merging operands in the opposite
5206 // order.
5207 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5208 matchAddr(AddrInst->getOperand(First), Depth + 1))
5209 return true;
5210
5211 // Otherwise we definitely can't merge the ADD in.
5212 AddrMode = BackupAddrMode;
5213 AddrModeInsts.resize(OldSize);
5214 TPT.rollback(LastKnownGood);
5215 break;
5216 }
5217 // case Instruction::Or:
5218 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5219 // break;
5220 case Instruction::Mul:
5221 case Instruction::Shl: {
5222 // Can only handle X*C and X << C.
5223 AddrMode.InBounds = false;
5224 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5225 if (!RHS || RHS->getBitWidth() > 64)
5226 return false;
5227 int64_t Scale = Opcode == Instruction::Shl
5228 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5229 : RHS->getSExtValue();
5230
5231 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5232 }
5233 case Instruction::GetElementPtr: {
5234 // Scan the GEP. We check it if it contains constant offsets and at most
5235 // one variable offset.
5236 int VariableOperand = -1;
5237 unsigned VariableScale = 0;
5238
5239 int64_t ConstantOffset = 0;
5240 gep_type_iterator GTI = gep_type_begin(AddrInst);
5241 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5242 if (StructType *STy = GTI.getStructTypeOrNull()) {
5243 const StructLayout *SL = DL.getStructLayout(STy);
5244 unsigned Idx =
5245 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5246 ConstantOffset += SL->getElementOffset(Idx);
5247 } else {
5248 TypeSize TS = GTI.getSequentialElementStride(DL);
5249 if (TS.isNonZero()) {
5250 // The optimisations below currently only work for fixed offsets.
5251 if (TS.isScalable())
5252 return false;
5253 int64_t TypeSize = TS.getFixedValue();
5254 if (ConstantInt *CI =
5255 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5256 const APInt &CVal = CI->getValue();
5257 if (CVal.getSignificantBits() <= 64) {
5258 ConstantOffset += CVal.getSExtValue() * TypeSize;
5259 continue;
5260 }
5261 }
5262 // We only allow one variable index at the moment.
5263 if (VariableOperand != -1)
5264 return false;
5265
5266 // Remember the variable index.
5267 VariableOperand = i;
5268 VariableScale = TypeSize;
5269 }
5270 }
5271 }
5272
5273 // A common case is for the GEP to only do a constant offset. In this case,
5274 // just add it to the disp field and check validity.
5275 if (VariableOperand == -1) {
5276 AddrMode.BaseOffs += ConstantOffset;
5277 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5278 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5279 AddrMode.InBounds = false;
5280 return true;
5281 }
5282 AddrMode.BaseOffs -= ConstantOffset;
5283
5285 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5286 ConstantOffset > 0) {
5287 // Record GEPs with non-zero offsets as candidates for splitting in
5288 // the event that the offset cannot fit into the r+i addressing mode.
5289 // Simple and common case that only one GEP is used in calculating the
5290 // address for the memory access.
5291 Value *Base = AddrInst->getOperand(0);
5292 auto *BaseI = dyn_cast<Instruction>(Base);
5293 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5295 (BaseI && !isa<CastInst>(BaseI) &&
5296 !isa<GetElementPtrInst>(BaseI))) {
5297 // Make sure the parent block allows inserting non-PHI instructions
5298 // before the terminator.
5299 BasicBlock *Parent = BaseI ? BaseI->getParent()
5300 : &GEP->getFunction()->getEntryBlock();
5301 if (!Parent->getTerminator()->isEHPad())
5302 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5303 }
5304 }
5305
5306 return false;
5307 }
5308
5309 // Save the valid addressing mode in case we can't match.
5310 ExtAddrMode BackupAddrMode = AddrMode;
5311 unsigned OldSize = AddrModeInsts.size();
5312
5313 // See if the scale and offset amount is valid for this target.
5314 AddrMode.BaseOffs += ConstantOffset;
5315 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5316 AddrMode.InBounds = false;
5317
5318 // Match the base operand of the GEP.
5319 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5320 // If it couldn't be matched, just stuff the value in a register.
5321 if (AddrMode.HasBaseReg) {
5322 AddrMode = BackupAddrMode;
5323 AddrModeInsts.resize(OldSize);
5324 return false;
5325 }
5326 AddrMode.HasBaseReg = true;
5327 AddrMode.BaseReg = AddrInst->getOperand(0);
5328 }
5329
5330 // Match the remaining variable portion of the GEP.
5331 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5332 Depth)) {
5333 // If it couldn't be matched, try stuffing the base into a register
5334 // instead of matching it, and retrying the match of the scale.
5335 AddrMode = BackupAddrMode;
5336 AddrModeInsts.resize(OldSize);
5337 if (AddrMode.HasBaseReg)
5338 return false;
5339 AddrMode.HasBaseReg = true;
5340 AddrMode.BaseReg = AddrInst->getOperand(0);
5341 AddrMode.BaseOffs += ConstantOffset;
5342 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5343 VariableScale, Depth)) {
5344 // If even that didn't work, bail.
5345 AddrMode = BackupAddrMode;
5346 AddrModeInsts.resize(OldSize);
5347 return false;
5348 }
5349 }
5350
5351 return true;
5352 }
5353 case Instruction::SExt:
5354 case Instruction::ZExt: {
5356 if (!Ext)
5357 return false;
5358
5359 // Try to move this ext out of the way of the addressing mode.
5360 // Ask for a method for doing so.
5361 TypePromotionHelper::Action TPH =
5362 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5363 if (!TPH)
5364 return false;
5365
5366 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5367 TPT.getRestorationPoint();
5368 unsigned CreatedInstsCost = 0;
5369 unsigned ExtCost = !TLI.isExtFree(Ext);
5370 Value *PromotedOperand =
5371 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5372 // SExt has been moved away.
5373 // Thus either it will be rematched later in the recursive calls or it is
5374 // gone. Anyway, we must not fold it into the addressing mode at this point.
5375 // E.g.,
5376 // op = add opnd, 1
5377 // idx = ext op
5378 // addr = gep base, idx
5379 // is now:
5380 // promotedOpnd = ext opnd <- no match here
5381 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5382 // addr = gep base, op <- match
5383 if (MovedAway)
5384 *MovedAway = true;
5385
5386 assert(PromotedOperand &&
5387 "TypePromotionHelper should have filtered out those cases");
5388
5389 ExtAddrMode BackupAddrMode = AddrMode;
5390 unsigned OldSize = AddrModeInsts.size();
5391
5392 if (!matchAddr(PromotedOperand, Depth) ||
5393 // The total of the new cost is equal to the cost of the created
5394 // instructions.
5395 // The total of the old cost is equal to the cost of the extension plus
5396 // what we have saved in the addressing mode.
5397 !isPromotionProfitable(CreatedInstsCost,
5398 ExtCost + (AddrModeInsts.size() - OldSize),
5399 PromotedOperand)) {
5400 AddrMode = BackupAddrMode;
5401 AddrModeInsts.resize(OldSize);
5402 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5403 TPT.rollback(LastKnownGood);
5404 return false;
5405 }
5406
5407 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5408 AddrMode.replaceWith(Ext, PromotedOperand);
5409 return true;
5410 }
5411 case Instruction::Call:
5412 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5413 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5414 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5415 if (TLI.addressingModeSupportsTLS(GV))
5416 return matchAddr(AddrInst->getOperand(0), Depth);
5417 }
5418 }
5419 break;
5420 }
5421 return false;
5422}
5423
5424/// If we can, try to add the value of 'Addr' into the current addressing mode.
5425/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5426/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5427/// for the target.
5428///
5429bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5430 // Start a transaction at this point that we will rollback if the matching
5431 // fails.
5432 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5433 TPT.getRestorationPoint();
5434 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5435 if (CI->getValue().isSignedIntN(64)) {
5436 // Check if the addition would result in a signed overflow.
5437 int64_t Result;
5438 bool Overflow =
5439 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5440 if (!Overflow) {
5441 // Fold in immediates if legal for the target.
5442 AddrMode.BaseOffs = Result;
5443 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5444 return true;
5445 AddrMode.BaseOffs -= CI->getSExtValue();
5446 }
5447 }
5448 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5449 // If this is a global variable, try to fold it into the addressing mode.
5450 if (!AddrMode.BaseGV) {
5451 AddrMode.BaseGV = GV;
5452 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5453 return true;
5454 AddrMode.BaseGV = nullptr;
5455 }
5456 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5457 ExtAddrMode BackupAddrMode = AddrMode;
5458 unsigned OldSize = AddrModeInsts.size();
5459
5460 // Check to see if it is possible to fold this operation.
5461 bool MovedAway = false;
5462 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5463 // This instruction may have been moved away. If so, there is nothing
5464 // to check here.
5465 if (MovedAway)
5466 return true;
5467 // Okay, it's possible to fold this. Check to see if it is actually
5468 // *profitable* to do so. We use a simple cost model to avoid increasing
5469 // register pressure too much.
5470 if (I->hasOneUse() ||
5471 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5472 AddrModeInsts.push_back(I);
5473 return true;
5474 }
5475
5476 // It isn't profitable to do this, roll back.
5477 AddrMode = BackupAddrMode;
5478 AddrModeInsts.resize(OldSize);
5479 TPT.rollback(LastKnownGood);
5480 }
5481 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5482 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5483 return true;
5484 TPT.rollback(LastKnownGood);
5485 } else if (isa<ConstantPointerNull>(Addr)) {
5486 // Null pointer gets folded without affecting the addressing mode.
5487 return true;
5488 }
5489
5490 // Worse case, the target should support [reg] addressing modes. :)
5491 if (!AddrMode.HasBaseReg) {
5492 AddrMode.HasBaseReg = true;
5493 AddrMode.BaseReg = Addr;
5494 // Still check for legality in case the target supports [imm] but not [i+r].
5495 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5496 return true;
5497 AddrMode.HasBaseReg = false;
5498 AddrMode.BaseReg = nullptr;
5499 }
5500
5501 // If the base register is already taken, see if we can do [r+r].
5502 if (AddrMode.Scale == 0) {
5503 AddrMode.Scale = 1;
5504 AddrMode.ScaledReg = Addr;
5505 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5506 return true;
5507 AddrMode.Scale = 0;
5508 AddrMode.ScaledReg = nullptr;
5509 }
5510 // Couldn't match.
5511 TPT.rollback(LastKnownGood);
5512 return false;
5513}
5514
5515/// Check to see if all uses of OpVal by the specified inline asm call are due
5516/// to memory operands. If so, return true, otherwise return false.
5518 const TargetLowering &TLI,
5519 const TargetRegisterInfo &TRI) {
5520 const Function *F = CI->getFunction();
5521 TargetLowering::AsmOperandInfoVector TargetConstraints =
5522 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5523
5524 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5525 // Compute the constraint code and ConstraintType to use.
5526 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5527
5528 // If this asm operand is our Value*, and if it isn't an indirect memory
5529 // operand, we can't fold it! TODO: Also handle C_Address?
5530 if (OpInfo.CallOperandVal == OpVal &&
5531 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5532 !OpInfo.isIndirect))
5533 return false;
5534 }
5535
5536 return true;
5537}
5538
5539/// Recursively walk all the uses of I until we find a memory use.
5540/// If we find an obviously non-foldable instruction, return true.
5541/// Add accessed addresses and types to MemoryUses.
5543 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5544 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5545 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5546 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5547 // If we already considered this instruction, we're done.
5548 if (!ConsideredInsts.insert(I).second)
5549 return false;
5550
5551 // If this is an obviously unfoldable instruction, bail out.
5552 if (!MightBeFoldableInst(I))
5553 return true;
5554
5555 // Loop over all the uses, recursively processing them.
5556 for (Use &U : I->uses()) {
5557 // Conservatively return true if we're seeing a large number or a deep chain
5558 // of users. This avoids excessive compilation times in pathological cases.
5559 if (SeenInsts++ >= MaxAddressUsersToScan)
5560 return true;
5561
5562 Instruction *UserI = cast<Instruction>(U.getUser());
5563 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5564 MemoryUses.push_back({&U, LI->getType()});
5565 continue;
5566 }
5567
5568 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5569 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5570 return true; // Storing addr, not into addr.
5571 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5572 continue;
5573 }
5574
5575 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5576 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5577 return true; // Storing addr, not into addr.
5578 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5579 continue;
5580 }
5581
5583 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5584 return true; // Storing addr, not into addr.
5585 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5586 continue;
5587 }
5588
5591 Type *AccessTy;
5592 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5593 return true;
5594
5595 if (!find(PtrOps, U.get()))
5596 return true;
5597
5598 MemoryUses.push_back({&U, AccessTy});
5599 continue;
5600 }
5601
5602 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5603 if (CI->hasFnAttr(Attribute::Cold)) {
5604 // If this is a cold call, we can sink the addressing calculation into
5605 // the cold path. See optimizeCallInst
5606 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5607 continue;
5608 }
5609
5610 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5611 if (!IA)
5612 return true;
5613
5614 // If this is a memory operand, we're cool, otherwise bail out.
5615 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5616 return true;
5617 continue;
5618 }
5619
5620 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5621 PSI, BFI, SeenInsts))
5622 return true;
5623 }
5624
5625 return false;
5626}
5627
5629 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5630 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5632 unsigned SeenInsts = 0;
5633 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5634 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5635 PSI, BFI, SeenInsts);
5636}
5637
5638
5639/// Return true if Val is already known to be live at the use site that we're
5640/// folding it into. If so, there is no cost to include it in the addressing
5641/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5642/// instruction already.
5643bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5644 Value *KnownLive1,
5645 Value *KnownLive2) {
5646 // If Val is either of the known-live values, we know it is live!
5647 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5648 return true;
5649
5650 // All values other than instructions and arguments (e.g. constants) are live.
5651 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5652 return true;
5653
5654 // If Val is a constant sized alloca in the entry block, it is live, this is
5655 // true because it is just a reference to the stack/frame pointer, which is
5656 // live for the whole function.
5657 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5658 if (AI->isStaticAlloca())
5659 return true;
5660
5661 // Check to see if this value is already used in the memory instruction's
5662 // block. If so, it's already live into the block at the very least, so we
5663 // can reasonably fold it.
5664 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5665}
5666
5667/// It is possible for the addressing mode of the machine to fold the specified
5668/// instruction into a load or store that ultimately uses it.
5669/// However, the specified instruction has multiple uses.
5670/// Given this, it may actually increase register pressure to fold it
5671/// into the load. For example, consider this code:
5672///
5673/// X = ...
5674/// Y = X+1
5675/// use(Y) -> nonload/store
5676/// Z = Y+1
5677/// load Z
5678///
5679/// In this case, Y has multiple uses, and can be folded into the load of Z
5680/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5681/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5682/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5683/// number of computations either.
5684///
5685/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5686/// X was live across 'load Z' for other reasons, we actually *would* want to
5687/// fold the addressing mode in the Z case. This would make Y die earlier.
5688bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5689 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5690 if (IgnoreProfitability)
5691 return true;
5692
5693 // AMBefore is the addressing mode before this instruction was folded into it,
5694 // and AMAfter is the addressing mode after the instruction was folded. Get
5695 // the set of registers referenced by AMAfter and subtract out those
5696 // referenced by AMBefore: this is the set of values which folding in this
5697 // address extends the lifetime of.
5698 //
5699 // Note that there are only two potential values being referenced here,
5700 // BaseReg and ScaleReg (global addresses are always available, as are any
5701 // folded immediates).
5702 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5703
5704 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5705 // lifetime wasn't extended by adding this instruction.
5706 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5707 BaseReg = nullptr;
5708 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5709 ScaledReg = nullptr;
5710
5711 // If folding this instruction (and it's subexprs) didn't extend any live
5712 // ranges, we're ok with it.
5713 if (!BaseReg && !ScaledReg)
5714 return true;
5715
5716 // If all uses of this instruction can have the address mode sunk into them,
5717 // we can remove the addressing mode and effectively trade one live register
5718 // for another (at worst.) In this context, folding an addressing mode into
5719 // the use is just a particularly nice way of sinking it.
5721 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5722 return false; // Has a non-memory, non-foldable use!
5723
5724 // Now that we know that all uses of this instruction are part of a chain of
5725 // computation involving only operations that could theoretically be folded
5726 // into a memory use, loop over each of these memory operation uses and see
5727 // if they could *actually* fold the instruction. The assumption is that
5728 // addressing modes are cheap and that duplicating the computation involved
5729 // many times is worthwhile, even on a fastpath. For sinking candidates
5730 // (i.e. cold call sites), this serves as a way to prevent excessive code
5731 // growth since most architectures have some reasonable small and fast way to
5732 // compute an effective address. (i.e LEA on x86)
5733 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5734 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5735 Value *Address = Pair.first->get();
5736 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5737 Type *AddressAccessTy = Pair.second;
5738 unsigned AS = Address->getType()->getPointerAddressSpace();
5739
5740 // Do a match against the root of this address, ignoring profitability. This
5741 // will tell us if the addressing mode for the memory operation will
5742 // *actually* cover the shared instruction.
5743 ExtAddrMode Result;
5744 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5745 0);
5746 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5747 TPT.getRestorationPoint();
5748 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5749 AddressAccessTy, AS, UserI, Result,
5750 InsertedInsts, PromotedInsts, TPT,
5751 LargeOffsetGEP, OptSize, PSI, BFI);
5752 Matcher.IgnoreProfitability = true;
5753 bool Success = Matcher.matchAddr(Address, 0);
5754 (void)Success;
5755 assert(Success && "Couldn't select *anything*?");
5756
5757 // The match was to check the profitability, the changes made are not
5758 // part of the original matcher. Therefore, they should be dropped
5759 // otherwise the original matcher will not present the right state.
5760 TPT.rollback(LastKnownGood);
5761
5762 // If the match didn't cover I, then it won't be shared by it.
5763 if (!is_contained(MatchedAddrModeInsts, I))
5764 return false;
5765
5766 MatchedAddrModeInsts.clear();
5767 }
5768
5769 return true;
5770}
5771
5772/// Return true if the specified values are defined in a
5773/// different basic block than BB.
5774static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5776 return I->getParent() != BB;
5777 return false;
5778}
5779
5780// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5781// is the first instruction that will use Addr. So we need to find the first
5782// user of Addr in current BB.
5784 Value *SunkAddr) {
5785 if (Addr->hasOneUse())
5786 return MemoryInst->getIterator();
5787
5788 // We already have a SunkAddr in current BB, but we may need to insert cast
5789 // instruction after it.
5790 if (SunkAddr) {
5791 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5792 return std::next(AddrInst->getIterator());
5793 }
5794
5795 // Find the first user of Addr in current BB.
5796 Instruction *Earliest = MemoryInst;
5797 for (User *U : Addr->users()) {
5798 Instruction *UserInst = dyn_cast<Instruction>(U);
5799 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5800 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5801 continue;
5802 if (UserInst->comesBefore(Earliest))
5803 Earliest = UserInst;
5804 }
5805 }
5806 return Earliest->getIterator();
5807}
5808
5809/// Sink addressing mode computation immediate before MemoryInst if doing so
5810/// can be done without increasing register pressure. The need for the
5811/// register pressure constraint means this can end up being an all or nothing
5812/// decision for all uses of the same addressing computation.
5813///
5814/// Load and Store Instructions often have addressing modes that can do
5815/// significant amounts of computation. As such, instruction selection will try
5816/// to get the load or store to do as much computation as possible for the
5817/// program. The problem is that isel can only see within a single block. As
5818/// such, we sink as much legal addressing mode work into the block as possible.
5819///
5820/// This method is used to optimize both load/store and inline asms with memory
5821/// operands. It's also used to sink addressing computations feeding into cold
5822/// call sites into their (cold) basic block.
5823///
5824/// The motivation for handling sinking into cold blocks is that doing so can
5825/// both enable other address mode sinking (by satisfying the register pressure
5826/// constraint above), and reduce register pressure globally (by removing the
5827/// addressing mode computation from the fast path entirely.).
5828bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5829 Type *AccessTy, unsigned AddrSpace) {
5830 Value *Repl = Addr;
5831
5832 // Try to collapse single-value PHI nodes. This is necessary to undo
5833 // unprofitable PRE transformations.
5834 SmallVector<Value *, 8> worklist;
5835 SmallPtrSet<Value *, 16> Visited;
5836 worklist.push_back(Addr);
5837
5838 // Use a worklist to iteratively look through PHI and select nodes, and
5839 // ensure that the addressing mode obtained from the non-PHI/select roots of
5840 // the graph are compatible.
5841 bool PhiOrSelectSeen = false;
5842 SmallVector<Instruction *, 16> AddrModeInsts;
5843 AddressingModeCombiner AddrModes(*DL, Addr);
5844 TypePromotionTransaction TPT(RemovedInsts);
5845 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5846 TPT.getRestorationPoint();
5847 while (!worklist.empty()) {
5848 Value *V = worklist.pop_back_val();
5849
5850 // We allow traversing cyclic Phi nodes.
5851 // In case of success after this loop we ensure that traversing through
5852 // Phi nodes ends up with all cases to compute address of the form
5853 // BaseGV + Base + Scale * Index + Offset
5854 // where Scale and Offset are constans and BaseGV, Base and Index
5855 // are exactly the same Values in all cases.
5856 // It means that BaseGV, Scale and Offset dominate our memory instruction
5857 // and have the same value as they had in address computation represented
5858 // as Phi. So we can safely sink address computation to memory instruction.
5859 if (!Visited.insert(V).second)
5860 continue;
5861
5862 // For a PHI node, push all of its incoming values.
5863 if (PHINode *P = dyn_cast<PHINode>(V)) {
5864 append_range(worklist, P->incoming_values());
5865 PhiOrSelectSeen = true;
5866 continue;
5867 }
5868 // Similar for select.
5869 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5870 worklist.push_back(SI->getFalseValue());
5871 worklist.push_back(SI->getTrueValue());
5872 PhiOrSelectSeen = true;
5873 continue;
5874 }
5875
5876 // For non-PHIs, determine the addressing mode being computed. Note that
5877 // the result may differ depending on what other uses our candidate
5878 // addressing instructions might have.
5879 AddrModeInsts.clear();
5880 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5881 0);
5882 // Defer the query (and possible computation of) the dom tree to point of
5883 // actual use. It's expected that most address matches don't actually need
5884 // the domtree.
5885 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5886 Function *F = MemoryInst->getParent()->getParent();
5887 return this->getDT(*F);
5888 };
5889 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5890 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5891 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5892 BFI.get());
5893
5894 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5895 if (GEP && !NewGEPBases.count(GEP)) {
5896 // If splitting the underlying data structure can reduce the offset of a
5897 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5898 // previously split data structures.
5899 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5900 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5901 }
5902
5903 NewAddrMode.OriginalValue = V;
5904 if (!AddrModes.addNewAddrMode(NewAddrMode))
5905 break;
5906 }
5907
5908 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5909 // or we have multiple but either couldn't combine them or combining them
5910 // wouldn't do anything useful, bail out now.
5911 if (!AddrModes.combineAddrModes()) {
5912 TPT.rollback(LastKnownGood);
5913 return false;
5914 }
5915 bool Modified = TPT.commit();
5916
5917 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5918 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5919
5920 // If all the instructions matched are already in this BB, don't do anything.
5921 // If we saw a Phi node then it is not local definitely, and if we saw a
5922 // select then we want to push the address calculation past it even if it's
5923 // already in this BB.
5924 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5925 return IsNonLocalValue(V, MemoryInst->getParent());
5926 })) {
5927 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5928 << "\n");
5929 return Modified;
5930 }
5931
5932 // Now that we determined the addressing expression we want to use and know
5933 // that we have to sink it into this block. Check to see if we have already
5934 // done this for some other load/store instr in this block. If so, reuse
5935 // the computation. Before attempting reuse, check if the address is valid
5936 // as it may have been erased.
5937
5938 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5939
5940 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5941 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5942
5943 // The current BB may be optimized multiple times, we can't guarantee the
5944 // reuse of Addr happens later, call findInsertPos to find an appropriate
5945 // insert position.
5946 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5947
5948 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5949 if (!SunkAddr) {
5950 auto &DT = getDT(*MemoryInst->getFunction());
5951 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5952 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5953 return Modified;
5954 }
5955
5956 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5957
5958 if (SunkAddr) {
5959 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5960 << " for " << *MemoryInst << "\n");
5961 if (SunkAddr->getType() != Addr->getType()) {
5962 if (SunkAddr->getType()->getPointerAddressSpace() !=
5963 Addr->getType()->getPointerAddressSpace() &&
5964 !DL->isNonIntegralPointerType(Addr->getType())) {
5965 // There are two reasons the address spaces might not match: a no-op
5966 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5967 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5968 // TODO: allow bitcast between different address space pointers with the
5969 // same size.
5970 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5971 SunkAddr =
5972 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5973 } else
5974 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5975 }
5977 SubtargetInfo->addrSinkUsingGEPs())) {
5978 // By default, we use the GEP-based method when AA is used later. This
5979 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5980 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5981 << " for " << *MemoryInst << "\n");
5982 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5983
5984 // First, find the pointer.
5985 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5986 ResultPtr = AddrMode.BaseReg;
5987 AddrMode.BaseReg = nullptr;
5988 }
5989
5990 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5991 // We can't add more than one pointer together, nor can we scale a
5992 // pointer (both of which seem meaningless).
5993 if (ResultPtr || AddrMode.Scale != 1)
5994 return Modified;
5995
5996 ResultPtr = AddrMode.ScaledReg;
5997 AddrMode.Scale = 0;
5998 }
5999
6000 // It is only safe to sign extend the BaseReg if we know that the math
6001 // required to create it did not overflow before we extend it. Since
6002 // the original IR value was tossed in favor of a constant back when
6003 // the AddrMode was created we need to bail out gracefully if widths
6004 // do not match instead of extending it.
6005 //
6006 // (See below for code to add the scale.)
6007 if (AddrMode.Scale) {
6008 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6009 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6010 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6011 return Modified;
6012 }
6013
6014 GlobalValue *BaseGV = AddrMode.BaseGV;
6015 if (BaseGV != nullptr) {
6016 if (ResultPtr)
6017 return Modified;
6018
6019 if (BaseGV->isThreadLocal()) {
6020 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6021 } else {
6022 ResultPtr = BaseGV;
6023 }
6024 }
6025
6026 // If the real base value actually came from an inttoptr, then the matcher
6027 // will look through it and provide only the integer value. In that case,
6028 // use it here.
6029 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6030 if (!ResultPtr && AddrMode.BaseReg) {
6031 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6032 "sunkaddr");
6033 AddrMode.BaseReg = nullptr;
6034 } else if (!ResultPtr && AddrMode.Scale == 1) {
6035 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6036 "sunkaddr");
6037 AddrMode.Scale = 0;
6038 }
6039 }
6040
6041 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6042 !AddrMode.BaseOffs) {
6043 SunkAddr = Constant::getNullValue(Addr->getType());
6044 } else if (!ResultPtr) {
6045 return Modified;
6046 } else {
6047 Type *I8PtrTy =
6048 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6049
6050 // Start with the base register. Do this first so that subsequent address
6051 // matching finds it last, which will prevent it from trying to match it
6052 // as the scaled value in case it happens to be a mul. That would be
6053 // problematic if we've sunk a different mul for the scale, because then
6054 // we'd end up sinking both muls.
6055 if (AddrMode.BaseReg) {
6056 Value *V = AddrMode.BaseReg;
6057 if (V->getType() != IntPtrTy)
6058 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6059
6060 ResultIndex = V;
6061 }
6062
6063 // Add the scale value.
6064 if (AddrMode.Scale) {
6065 Value *V = AddrMode.ScaledReg;
6066 if (V->getType() == IntPtrTy) {
6067 // done.
6068 } else {
6069 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6070 cast<IntegerType>(V->getType())->getBitWidth() &&
6071 "We can't transform if ScaledReg is too narrow");
6072 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6073 }
6074
6075 if (AddrMode.Scale != 1)
6076 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6077 "sunkaddr");
6078 if (ResultIndex)
6079 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6080 else
6081 ResultIndex = V;
6082 }
6083
6084 // Add in the Base Offset if present.
6085 if (AddrMode.BaseOffs) {
6086 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6087 if (ResultIndex) {
6088 // We need to add this separately from the scale above to help with
6089 // SDAG consecutive load/store merging.
6090 if (ResultPtr->getType() != I8PtrTy)
6091 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6092 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6093 AddrMode.InBounds);
6094 }
6095
6096 ResultIndex = V;
6097 }
6098
6099 if (!ResultIndex) {
6100 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6101 // We know that we have a pointer without any offsets. If this pointer
6102 // originates from a different basic block than the current one, we
6103 // must be able to recreate it in the current basic block.
6104 // We do not support the recreation of any instructions yet.
6105 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6106 return Modified;
6107 SunkAddr = ResultPtr;
6108 } else {
6109 if (ResultPtr->getType() != I8PtrTy)
6110 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6111 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6112 AddrMode.InBounds);
6113 }
6114
6115 if (SunkAddr->getType() != Addr->getType()) {
6116 if (SunkAddr->getType()->getPointerAddressSpace() !=
6117 Addr->getType()->getPointerAddressSpace() &&
6118 !DL->isNonIntegralPointerType(Addr->getType())) {
6119 // There are two reasons the address spaces might not match: a no-op
6120 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6121 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6122 // TODO: allow bitcast between different address space pointers with
6123 // the same size.
6124 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6125 SunkAddr =
6126 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6127 } else
6128 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6129 }
6130 }
6131 } else {
6132 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6133 // non-integral pointers, so in that case bail out now.
6134 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6135 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6136 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6137 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6138 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6139 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6140 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6141 (AddrMode.BaseGV &&
6142 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6143 return Modified;
6144
6145 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6146 << " for " << *MemoryInst << "\n");
6147 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6148 Value *Result = nullptr;
6149
6150 // Start with the base register. Do this first so that subsequent address
6151 // matching finds it last, which will prevent it from trying to match it
6152 // as the scaled value in case it happens to be a mul. That would be
6153 // problematic if we've sunk a different mul for the scale, because then
6154 // we'd end up sinking both muls.
6155 if (AddrMode.BaseReg) {
6156 Value *V = AddrMode.BaseReg;
6157 if (V->getType()->isPointerTy())
6158 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6159 if (V->getType() != IntPtrTy)
6160 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6161 Result = V;
6162 }
6163
6164 // Add the scale value.
6165 if (AddrMode.Scale) {
6166 Value *V = AddrMode.ScaledReg;
6167 if (V->getType() == IntPtrTy) {
6168 // done.
6169 } else if (V->getType()->isPointerTy()) {
6170 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6171 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6172 cast<IntegerType>(V->getType())->getBitWidth()) {
6173 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6174 } else {
6175 // It is only safe to sign extend the BaseReg if we know that the math
6176 // required to create it did not overflow before we extend it. Since
6177 // the original IR value was tossed in favor of a constant back when
6178 // the AddrMode was created we need to bail out gracefully if widths
6179 // do not match instead of extending it.
6181 if (I && (Result != AddrMode.BaseReg))
6182 I->eraseFromParent();
6183 return Modified;
6184 }
6185 if (AddrMode.Scale != 1)
6186 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
6187 "sunkaddr");
6188 if (Result)
6189 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6190 else
6191 Result = V;
6192 }
6193
6194 // Add in the BaseGV if present.
6195 GlobalValue *BaseGV = AddrMode.BaseGV;
6196 if (BaseGV != nullptr) {
6197 Value *BaseGVPtr;
6198 if (BaseGV->isThreadLocal()) {
6199 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6200 } else {
6201 BaseGVPtr = BaseGV;
6202 }
6203 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6204 if (Result)
6205 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6206 else
6207 Result = V;
6208 }
6209
6210 // Add in the Base Offset if present.
6211 if (AddrMode.BaseOffs) {
6212 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6213 if (Result)
6214 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6215 else
6216 Result = V;
6217 }
6218
6219 if (!Result)
6220 SunkAddr = Constant::getNullValue(Addr->getType());
6221 else
6222 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6223 }
6224
6225 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6226 // Store the newly computed address into the cache. In the case we reused a
6227 // value, this should be idempotent.
6228 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6229
6230 // If we have no uses, recursively delete the value and all dead instructions
6231 // using it.
6232 if (Repl->use_empty()) {
6233 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6234 RecursivelyDeleteTriviallyDeadInstructions(
6235 Repl, TLInfo, nullptr,
6236 [&](Value *V) { removeAllAssertingVHReferences(V); });
6237 });
6238 }
6239 ++NumMemoryInsts;
6240 return true;
6241}
6242
6243/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6244/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6245/// only handle a 2 operand GEP in the same basic block or a splat constant
6246/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6247/// index.
6248///
6249/// If the existing GEP has a vector base pointer that is splat, we can look
6250/// through the splat to find the scalar pointer. If we can't find a scalar
6251/// pointer there's nothing we can do.
6252///
6253/// If we have a GEP with more than 2 indices where the middle indices are all
6254/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6255///
6256/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6257/// followed by a GEP with an all zeroes vector index. This will enable
6258/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6259/// zero index.
6260bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6261 Value *Ptr) {
6262 Value *NewAddr;
6263
6264 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6265 // Don't optimize GEPs that don't have indices.
6266 if (!GEP->hasIndices())
6267 return false;
6268
6269 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6270 // FIXME: We should support this by sinking the GEP.
6271 if (MemoryInst->getParent() != GEP->getParent())
6272 return false;
6273
6274 SmallVector<Value *, 2> Ops(GEP->operands());
6275
6276 bool RewriteGEP = false;
6277
6278 if (Ops[0]->getType()->isVectorTy()) {
6279 Ops[0] = getSplatValue(Ops[0]);
6280 if (!Ops[0])
6281 return false;
6282 RewriteGEP = true;
6283 }
6284
6285 unsigned FinalIndex = Ops.size() - 1;
6286
6287 // Ensure all but the last index is 0.
6288 // FIXME: This isn't strictly required. All that's required is that they are
6289 // all scalars or splats.
6290 for (unsigned i = 1; i < FinalIndex; ++i) {
6291 auto *C = dyn_cast<Constant>(Ops[i]);
6292 if (!C)
6293 return false;
6294 if (isa<VectorType>(C->getType()))
6295 C = C->getSplatValue();
6296 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6297 if (!CI || !CI->isZero())
6298 return false;
6299 // Scalarize the index if needed.
6300 Ops[i] = CI;
6301 }
6302
6303 // Try to scalarize the final index.
6304 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6305 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6306 auto *C = dyn_cast<ConstantInt>(V);
6307 // Don't scalarize all zeros vector.
6308 if (!C || !C->isZero()) {
6309 Ops[FinalIndex] = V;
6310 RewriteGEP = true;
6311 }
6312 }
6313 }
6314
6315 // If we made any changes or the we have extra operands, we need to generate
6316 // new instructions.
6317 if (!RewriteGEP && Ops.size() == 2)
6318 return false;
6319
6320 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6321
6322 IRBuilder<> Builder(MemoryInst);
6323
6324 Type *SourceTy = GEP->getSourceElementType();
6325 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6326
6327 // If the final index isn't a vector, emit a scalar GEP containing all ops
6328 // and a vector GEP with all zeroes final index.
6329 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6330 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6331 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6332 auto *SecondTy = GetElementPtrInst::getIndexedType(
6333 SourceTy, ArrayRef(Ops).drop_front());
6334 NewAddr =
6335 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6336 } else {
6337 Value *Base = Ops[0];
6338 Value *Index = Ops[FinalIndex];
6339
6340 // Create a scalar GEP if there are more than 2 operands.
6341 if (Ops.size() != 2) {
6342 // Replace the last index with 0.
6343 Ops[FinalIndex] =
6344 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6345 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6347 SourceTy, ArrayRef(Ops).drop_front());
6348 }
6349
6350 // Now create the GEP with scalar pointer and vector index.
6351 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6352 }
6353 } else if (!isa<Constant>(Ptr)) {
6354 // Not a GEP, maybe its a splat and we can create a GEP to enable
6355 // SelectionDAGBuilder to use it as a uniform base.
6357 if (!V)
6358 return false;
6359
6360 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6361
6362 IRBuilder<> Builder(MemoryInst);
6363
6364 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6365 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6366 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6367 Type *ScalarTy;
6368 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6369 Intrinsic::masked_gather) {
6370 ScalarTy = MemoryInst->getType()->getScalarType();
6371 } else {
6372 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6373 Intrinsic::masked_scatter);
6374 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6375 }
6376 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6377 } else {
6378 // Constant, SelectionDAGBuilder knows to check if its a splat.
6379 return false;
6380 }
6381
6382 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6383
6384 // If we have no uses, recursively delete the value and all dead instructions
6385 // using it.
6386 if (Ptr->use_empty())
6388 Ptr, TLInfo, nullptr,
6389 [&](Value *V) { removeAllAssertingVHReferences(V); });
6390
6391 return true;
6392}
6393
6394/// If there are any memory operands, use OptimizeMemoryInst to sink their
6395/// address computing into the block when possible / profitable.
6396bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6397 bool MadeChange = false;
6398
6399 const TargetRegisterInfo *TRI =
6400 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6401 TargetLowering::AsmOperandInfoVector TargetConstraints =
6402 TLI->ParseConstraints(*DL, TRI, *CS);
6403 unsigned ArgNo = 0;
6404 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6405 // Compute the constraint code and ConstraintType to use.
6406 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6407
6408 // TODO: Also handle C_Address?
6409 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6410 OpInfo.isIndirect) {
6411 Value *OpVal = CS->getArgOperand(ArgNo++);
6412 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6413 } else if (OpInfo.Type == InlineAsm::isInput)
6414 ArgNo++;
6415 }
6416
6417 return MadeChange;
6418}
6419
6420/// Check if all the uses of \p Val are equivalent (or free) zero or
6421/// sign extensions.
6422static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6423 assert(!Val->use_empty() && "Input must have at least one use");
6424 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6425 bool IsSExt = isa<SExtInst>(FirstUser);
6426 Type *ExtTy = FirstUser->getType();
6427 for (const User *U : Val->users()) {
6428 const Instruction *UI = cast<Instruction>(U);
6429 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6430 return false;
6431 Type *CurTy = UI->getType();
6432 // Same input and output types: Same instruction after CSE.
6433 if (CurTy == ExtTy)
6434 continue;
6435
6436 // If IsSExt is true, we are in this situation:
6437 // a = Val
6438 // b = sext ty1 a to ty2
6439 // c = sext ty1 a to ty3
6440 // Assuming ty2 is shorter than ty3, this could be turned into:
6441 // a = Val
6442 // b = sext ty1 a to ty2
6443 // c = sext ty2 b to ty3
6444 // However, the last sext is not free.
6445 if (IsSExt)
6446 return false;
6447
6448 // This is a ZExt, maybe this is free to extend from one type to another.
6449 // In that case, we would not account for a different use.
6450 Type *NarrowTy;
6451 Type *LargeTy;
6452 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6453 CurTy->getScalarType()->getIntegerBitWidth()) {
6454 NarrowTy = CurTy;
6455 LargeTy = ExtTy;
6456 } else {
6457 NarrowTy = ExtTy;
6458 LargeTy = CurTy;
6459 }
6460
6461 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6462 return false;
6463 }
6464 // All uses are the same or can be derived from one another for free.
6465 return true;
6466}
6467
6468/// Try to speculatively promote extensions in \p Exts and continue
6469/// promoting through newly promoted operands recursively as far as doing so is
6470/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6471/// When some promotion happened, \p TPT contains the proper state to revert
6472/// them.
6473///
6474/// \return true if some promotion happened, false otherwise.
6475bool CodeGenPrepare::tryToPromoteExts(
6476 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6477 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6478 unsigned CreatedInstsCost) {
6479 bool Promoted = false;
6480
6481 // Iterate over all the extensions to try to promote them.
6482 for (auto *I : Exts) {
6483 // Early check if we directly have ext(load).
6484 if (isa<LoadInst>(I->getOperand(0))) {
6485 ProfitablyMovedExts.push_back(I);
6486 continue;
6487 }
6488
6489 // Check whether or not we want to do any promotion. The reason we have
6490 // this check inside the for loop is to catch the case where an extension
6491 // is directly fed by a load because in such case the extension can be moved
6492 // up without any promotion on its operands.
6494 return false;
6495
6496 // Get the action to perform the promotion.
6497 TypePromotionHelper::Action TPH =
6498 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6499 // Check if we can promote.
6500 if (!TPH) {
6501 // Save the current extension as we cannot move up through its operand.
6502 ProfitablyMovedExts.push_back(I);
6503 continue;
6504 }
6505
6506 // Save the current state.
6507 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6508 TPT.getRestorationPoint();
6509 SmallVector<Instruction *, 4> NewExts;
6510 unsigned NewCreatedInstsCost = 0;
6511 unsigned ExtCost = !TLI->isExtFree(I);
6512 // Promote.
6513 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6514 &NewExts, nullptr, *TLI);
6515 assert(PromotedVal &&
6516 "TypePromotionHelper should have filtered out those cases");
6517
6518 // We would be able to merge only one extension in a load.
6519 // Therefore, if we have more than 1 new extension we heuristically
6520 // cut this search path, because it means we degrade the code quality.
6521 // With exactly 2, the transformation is neutral, because we will merge
6522 // one extension but leave one. However, we optimistically keep going,
6523 // because the new extension may be removed too. Also avoid replacing a
6524 // single free extension with multiple extensions, as this increases the
6525 // number of IR instructions while not providing any savings.
6526 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6527 // FIXME: It would be possible to propagate a negative value instead of
6528 // conservatively ceiling it to 0.
6529 TotalCreatedInstsCost =
6530 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6531 if (!StressExtLdPromotion &&
6532 (TotalCreatedInstsCost > 1 ||
6533 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6534 (ExtCost == 0 && NewExts.size() > 1))) {
6535 // This promotion is not profitable, rollback to the previous state, and
6536 // save the current extension in ProfitablyMovedExts as the latest
6537 // speculative promotion turned out to be unprofitable.
6538 TPT.rollback(LastKnownGood);
6539 ProfitablyMovedExts.push_back(I);
6540 continue;
6541 }
6542 // Continue promoting NewExts as far as doing so is profitable.
6543 SmallVector<Instruction *, 2> NewlyMovedExts;
6544 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6545 bool NewPromoted = false;
6546 for (auto *ExtInst : NewlyMovedExts) {
6547 Instruction *MovedExt = cast<Instruction>(ExtInst);
6548 Value *ExtOperand = MovedExt->getOperand(0);
6549 // If we have reached to a load, we need this extra profitability check
6550 // as it could potentially be merged into an ext(load).
6551 if (isa<LoadInst>(ExtOperand) &&
6552 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6553 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6554 continue;
6555
6556 ProfitablyMovedExts.push_back(MovedExt);
6557 NewPromoted = true;
6558 }
6559
6560 // If none of speculative promotions for NewExts is profitable, rollback
6561 // and save the current extension (I) as the last profitable extension.
6562 if (!NewPromoted) {
6563 TPT.rollback(LastKnownGood);
6564 ProfitablyMovedExts.push_back(I);
6565 continue;
6566 }
6567 // The promotion is profitable.
6568 Promoted = true;
6569 }
6570 return Promoted;
6571}
6572
6573/// Merging redundant sexts when one is dominating the other.
6574bool CodeGenPrepare::mergeSExts(Function &F) {
6575 bool Changed = false;
6576 for (auto &Entry : ValToSExtendedUses) {
6577 SExts &Insts = Entry.second;
6578 SExts CurPts;
6579 for (Instruction *Inst : Insts) {
6580 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6581 Inst->getOperand(0) != Entry.first)
6582 continue;
6583 bool inserted = false;
6584 for (auto &Pt : CurPts) {
6585 if (getDT(F).dominates(Inst, Pt)) {
6586 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6587 RemovedInsts.insert(Pt);
6588 Pt->removeFromParent();
6589 Pt = Inst;
6590 inserted = true;
6591 Changed = true;
6592 break;
6593 }
6594 if (!getDT(F).dominates(Pt, Inst))
6595 // Give up if we need to merge in a common dominator as the
6596 // experiments show it is not profitable.
6597 continue;
6598 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6599 RemovedInsts.insert(Inst);
6600 Inst->removeFromParent();
6601 inserted = true;
6602 Changed = true;
6603 break;
6604 }
6605 if (!inserted)
6606 CurPts.push_back(Inst);
6607 }
6608 }
6609 return Changed;
6610}
6611
6612// Splitting large data structures so that the GEPs accessing them can have
6613// smaller offsets so that they can be sunk to the same blocks as their users.
6614// For example, a large struct starting from %base is split into two parts
6615// where the second part starts from %new_base.
6616//
6617// Before:
6618// BB0:
6619// %base =
6620//
6621// BB1:
6622// %gep0 = gep %base, off0
6623// %gep1 = gep %base, off1
6624// %gep2 = gep %base, off2
6625//
6626// BB2:
6627// %load1 = load %gep0
6628// %load2 = load %gep1
6629// %load3 = load %gep2
6630//
6631// After:
6632// BB0:
6633// %base =
6634// %new_base = gep %base, off0
6635//
6636// BB1:
6637// %new_gep0 = %new_base
6638// %new_gep1 = gep %new_base, off1 - off0
6639// %new_gep2 = gep %new_base, off2 - off0
6640//
6641// BB2:
6642// %load1 = load i32, i32* %new_gep0
6643// %load2 = load i32, i32* %new_gep1
6644// %load3 = load i32, i32* %new_gep2
6645//
6646// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6647// their offsets are smaller enough to fit into the addressing mode.
6648bool CodeGenPrepare::splitLargeGEPOffsets() {
6649 bool Changed = false;
6650 for (auto &Entry : LargeOffsetGEPMap) {
6651 Value *OldBase = Entry.first;
6652 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6653 &LargeOffsetGEPs = Entry.second;
6654 auto compareGEPOffset =
6655 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6656 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6657 if (LHS.first == RHS.first)
6658 return false;
6659 if (LHS.second != RHS.second)
6660 return LHS.second < RHS.second;
6661 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6662 };
6663 // Sorting all the GEPs of the same data structures based on the offsets.
6664 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6665 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6666 // Skip if all the GEPs have the same offsets.
6667 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6668 continue;
6669 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6670 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6671 Value *NewBaseGEP = nullptr;
6672
6673 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6674 GetElementPtrInst *GEP) {
6675 LLVMContext &Ctx = GEP->getContext();
6676 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6677 Type *I8PtrTy =
6678 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6679
6680 BasicBlock::iterator NewBaseInsertPt;
6681 BasicBlock *NewBaseInsertBB;
6682 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6683 // If the base of the struct is an instruction, the new base will be
6684 // inserted close to it.
6685 NewBaseInsertBB = BaseI->getParent();
6686 if (isa<PHINode>(BaseI))
6687 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6688 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6689 NewBaseInsertBB =
6690 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6691 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6692 } else
6693 NewBaseInsertPt = std::next(BaseI->getIterator());
6694 } else {
6695 // If the current base is an argument or global value, the new base
6696 // will be inserted to the entry block.
6697 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6698 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6699 }
6700 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6701 // Create a new base.
6702 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6703 NewBaseGEP = OldBase;
6704 if (NewBaseGEP->getType() != I8PtrTy)
6705 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6706 NewBaseGEP =
6707 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6708 NewGEPBases.insert(NewBaseGEP);
6709 return;
6710 };
6711
6712 // Check whether all the offsets can be encoded with prefered common base.
6713 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6714 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6715 BaseOffset = PreferBase;
6716 // Create a new base if the offset of the BaseGEP can be decoded with one
6717 // instruction.
6718 createNewBase(BaseOffset, OldBase, BaseGEP);
6719 }
6720
6721 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6722 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6723 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6724 int64_t Offset = LargeOffsetGEP->second;
6725 if (Offset != BaseOffset) {
6726 TargetLowering::AddrMode AddrMode;
6727 AddrMode.HasBaseReg = true;
6728 AddrMode.BaseOffs = Offset - BaseOffset;
6729 // The result type of the GEP might not be the type of the memory
6730 // access.
6731 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6732 GEP->getResultElementType(),
6733 GEP->getAddressSpace())) {
6734 // We need to create a new base if the offset to the current base is
6735 // too large to fit into the addressing mode. So, a very large struct
6736 // may be split into several parts.
6737 BaseGEP = GEP;
6738 BaseOffset = Offset;
6739 NewBaseGEP = nullptr;
6740 }
6741 }
6742
6743 // Generate a new GEP to replace the current one.
6744 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6745
6746 if (!NewBaseGEP) {
6747 // Create a new base if we don't have one yet. Find the insertion
6748 // pointer for the new base first.
6749 createNewBase(BaseOffset, OldBase, GEP);
6750 }
6751
6752 IRBuilder<> Builder(GEP);
6753 Value *NewGEP = NewBaseGEP;
6754 if (Offset != BaseOffset) {
6755 // Calculate the new offset for the new GEP.
6756 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6757 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6758 }
6759 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6760 LargeOffsetGEPID.erase(GEP);
6761 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6762 GEP->eraseFromParent();
6763 Changed = true;
6764 }
6765 }
6766 return Changed;
6767}
6768
6769bool CodeGenPrepare::optimizePhiType(
6770 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6771 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6772 // We are looking for a collection on interconnected phi nodes that together
6773 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6774 // are of the same type. Convert the whole set of nodes to the type of the
6775 // bitcast.
6776 Type *PhiTy = I->getType();
6777 Type *ConvertTy = nullptr;
6778 if (Visited.count(I) ||
6779 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6780 return false;
6781
6782 SmallVector<Instruction *, 4> Worklist;
6783 Worklist.push_back(cast<Instruction>(I));
6784 SmallPtrSet<PHINode *, 4> PhiNodes;
6785 SmallPtrSet<ConstantData *, 4> Constants;
6786 PhiNodes.insert(I);
6787 Visited.insert(I);
6788 SmallPtrSet<Instruction *, 4> Defs;
6789 SmallPtrSet<Instruction *, 4> Uses;
6790 // This works by adding extra bitcasts between load/stores and removing
6791 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6792 // we can get in the situation where we remove a bitcast in one iteration
6793 // just to add it again in the next. We need to ensure that at least one
6794 // bitcast we remove are anchored to something that will not change back.
6795 bool AnyAnchored = false;
6796
6797 while (!Worklist.empty()) {
6798 Instruction *II = Worklist.pop_back_val();
6799
6800 if (auto *Phi = dyn_cast<PHINode>(II)) {
6801 // Handle Defs, which might also be PHI's
6802 for (Value *V : Phi->incoming_values()) {
6803 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6804 if (!PhiNodes.count(OpPhi)) {
6805 if (!Visited.insert(OpPhi).second)
6806 return false;
6807 PhiNodes.insert(OpPhi);
6808 Worklist.push_back(OpPhi);
6809 }
6810 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6811 if (!OpLoad->isSimple())
6812 return false;
6813 if (Defs.insert(OpLoad).second)
6814 Worklist.push_back(OpLoad);
6815 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6816 if (Defs.insert(OpEx).second)
6817 Worklist.push_back(OpEx);
6818 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6819 if (!ConvertTy)
6820 ConvertTy = OpBC->getOperand(0)->getType();
6821 if (OpBC->getOperand(0)->getType() != ConvertTy)
6822 return false;
6823 if (Defs.insert(OpBC).second) {
6824 Worklist.push_back(OpBC);
6825 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6826 !isa<ExtractElementInst>(OpBC->getOperand(0));
6827 }
6828 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6829 Constants.insert(OpC);
6830 else
6831 return false;
6832 }
6833 }
6834
6835 // Handle uses which might also be phi's
6836 for (User *V : II->users()) {
6837 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6838 if (!PhiNodes.count(OpPhi)) {
6839 if (Visited.count(OpPhi))
6840 return false;
6841 PhiNodes.insert(OpPhi);
6842 Visited.insert(OpPhi);
6843 Worklist.push_back(OpPhi);
6844 }
6845 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6846 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6847 return false;
6848 Uses.insert(OpStore);
6849 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6850 if (!ConvertTy)
6851 ConvertTy = OpBC->getType();
6852 if (OpBC->getType() != ConvertTy)
6853 return false;
6854 Uses.insert(OpBC);
6855 AnyAnchored |=
6856 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6857 } else {
6858 return false;
6859 }
6860 }
6861 }
6862
6863 if (!ConvertTy || !AnyAnchored ||
6864 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6865 return false;
6866
6867 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6868 << *ConvertTy << "\n");
6869
6870 // Create all the new phi nodes of the new type, and bitcast any loads to the
6871 // correct type.
6872 ValueToValueMap ValMap;
6873 for (ConstantData *C : Constants)
6874 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6875 for (Instruction *D : Defs) {
6876 if (isa<BitCastInst>(D)) {
6877 ValMap[D] = D->getOperand(0);
6878 DeletedInstrs.insert(D);
6879 } else {
6880 BasicBlock::iterator insertPt = std::next(D->getIterator());
6881 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6882 }
6883 }
6884 for (PHINode *Phi : PhiNodes)
6885 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6886 Phi->getName() + ".tc", Phi->getIterator());
6887 // Pipe together all the PhiNodes.
6888 for (PHINode *Phi : PhiNodes) {
6889 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6890 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6891 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6892 Phi->getIncomingBlock(i));
6893 Visited.insert(NewPhi);
6894 }
6895 // And finally pipe up the stores and bitcasts
6896 for (Instruction *U : Uses) {
6897 if (isa<BitCastInst>(U)) {
6898 DeletedInstrs.insert(U);
6899 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6900 } else {
6901 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6902 U->getIterator()));
6903 }
6904 }
6905
6906 // Save the removed phis to be deleted later.
6907 DeletedInstrs.insert_range(PhiNodes);
6908 return true;
6909}
6910
6911bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6912 if (!OptimizePhiTypes)
6913 return false;
6914
6915 bool Changed = false;
6916 SmallPtrSet<PHINode *, 4> Visited;
6917 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6918
6919 // Attempt to optimize all the phis in the functions to the correct type.
6920 for (auto &BB : F)
6921 for (auto &Phi : BB.phis())
6922 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6923
6924 // Remove any old phi's that have been converted.
6925 for (auto *I : DeletedInstrs) {
6926 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6927 I->eraseFromParent();
6928 }
6929
6930 return Changed;
6931}
6932
6933/// Return true, if an ext(load) can be formed from an extension in
6934/// \p MovedExts.
6935bool CodeGenPrepare::canFormExtLd(
6936 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6937 Instruction *&Inst, bool HasPromoted) {
6938 for (auto *MovedExtInst : MovedExts) {
6939 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6940 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6941 Inst = MovedExtInst;
6942 break;
6943 }
6944 }
6945 if (!LI)
6946 return false;
6947
6948 // If they're already in the same block, there's nothing to do.
6949 // Make the cheap checks first if we did not promote.
6950 // If we promoted, we need to check if it is indeed profitable.
6951 if (!HasPromoted && LI->getParent() == Inst->getParent())
6952 return false;
6953
6954 return TLI->isExtLoad(LI, Inst, *DL);
6955}
6956
6957/// Move a zext or sext fed by a load into the same basic block as the load,
6958/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6959/// extend into the load.
6960///
6961/// E.g.,
6962/// \code
6963/// %ld = load i32* %addr
6964/// %add = add nuw i32 %ld, 4
6965/// %zext = zext i32 %add to i64
6966// \endcode
6967/// =>
6968/// \code
6969/// %ld = load i32* %addr
6970/// %zext = zext i32 %ld to i64
6971/// %add = add nuw i64 %zext, 4
6972/// \encode
6973/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6974/// allow us to match zext(load i32*) to i64.
6975///
6976/// Also, try to promote the computations used to obtain a sign extended
6977/// value used into memory accesses.
6978/// E.g.,
6979/// \code
6980/// a = add nsw i32 b, 3
6981/// d = sext i32 a to i64
6982/// e = getelementptr ..., i64 d
6983/// \endcode
6984/// =>
6985/// \code
6986/// f = sext i32 b to i64
6987/// a = add nsw i64 f, 3
6988/// e = getelementptr ..., i64 a
6989/// \endcode
6990///
6991/// \p Inst[in/out] the extension may be modified during the process if some
6992/// promotions apply.
6993bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6994 bool AllowPromotionWithoutCommonHeader = false;
6995 /// See if it is an interesting sext operations for the address type
6996 /// promotion before trying to promote it, e.g., the ones with the right
6997 /// type and used in memory accesses.
6998 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6999 *Inst, AllowPromotionWithoutCommonHeader);
7000 TypePromotionTransaction TPT(RemovedInsts);
7001 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7002 TPT.getRestorationPoint();
7004 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7005 Exts.push_back(Inst);
7006
7007 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7008
7009 // Look for a load being extended.
7010 LoadInst *LI = nullptr;
7011 Instruction *ExtFedByLoad;
7012
7013 // Try to promote a chain of computation if it allows to form an extended
7014 // load.
7015 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7016 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7017 TPT.commit();
7018 // Move the extend into the same block as the load.
7019 ExtFedByLoad->moveAfter(LI);
7020 ++NumExtsMoved;
7021 Inst = ExtFedByLoad;
7022 return true;
7023 }
7024
7025 // Continue promoting SExts if known as considerable depending on targets.
7026 if (ATPConsiderable &&
7027 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7028 HasPromoted, TPT, SpeculativelyMovedExts))
7029 return true;
7030
7031 TPT.rollback(LastKnownGood);
7032 return false;
7033}
7034
7035// Perform address type promotion if doing so is profitable.
7036// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7037// instructions that sign extended the same initial value. However, if
7038// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7039// extension is just profitable.
7040bool CodeGenPrepare::performAddressTypePromotion(
7041 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7042 bool HasPromoted, TypePromotionTransaction &TPT,
7043 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7044 bool Promoted = false;
7045 SmallPtrSet<Instruction *, 1> UnhandledExts;
7046 bool AllSeenFirst = true;
7047 for (auto *I : SpeculativelyMovedExts) {
7048 Value *HeadOfChain = I->getOperand(0);
7049 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
7050 SeenChainsForSExt.find(HeadOfChain);
7051 // If there is an unhandled SExt which has the same header, try to promote
7052 // it as well.
7053 if (AlreadySeen != SeenChainsForSExt.end()) {
7054 if (AlreadySeen->second != nullptr)
7055 UnhandledExts.insert(AlreadySeen->second);
7056 AllSeenFirst = false;
7057 }
7058 }
7059
7060 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7061 SpeculativelyMovedExts.size() == 1)) {
7062 TPT.commit();
7063 if (HasPromoted)
7064 Promoted = true;
7065 for (auto *I : SpeculativelyMovedExts) {
7066 Value *HeadOfChain = I->getOperand(0);
7067 SeenChainsForSExt[HeadOfChain] = nullptr;
7068 ValToSExtendedUses[HeadOfChain].push_back(I);
7069 }
7070 // Update Inst as promotion happen.
7071 Inst = SpeculativelyMovedExts.pop_back_val();
7072 } else {
7073 // This is the first chain visited from the header, keep the current chain
7074 // as unhandled. Defer to promote this until we encounter another SExt
7075 // chain derived from the same header.
7076 for (auto *I : SpeculativelyMovedExts) {
7077 Value *HeadOfChain = I->getOperand(0);
7078 SeenChainsForSExt[HeadOfChain] = Inst;
7079 }
7080 return false;
7081 }
7082
7083 if (!AllSeenFirst && !UnhandledExts.empty())
7084 for (auto *VisitedSExt : UnhandledExts) {
7085 if (RemovedInsts.count(VisitedSExt))
7086 continue;
7087 TypePromotionTransaction TPT(RemovedInsts);
7089 SmallVector<Instruction *, 2> Chains;
7090 Exts.push_back(VisitedSExt);
7091 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7092 TPT.commit();
7093 if (HasPromoted)
7094 Promoted = true;
7095 for (auto *I : Chains) {
7096 Value *HeadOfChain = I->getOperand(0);
7097 // Mark this as handled.
7098 SeenChainsForSExt[HeadOfChain] = nullptr;
7099 ValToSExtendedUses[HeadOfChain].push_back(I);
7100 }
7101 }
7102 return Promoted;
7103}
7104
7105bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7106 BasicBlock *DefBB = I->getParent();
7107
7108 // If the result of a {s|z}ext and its source are both live out, rewrite all
7109 // other uses of the source with result of extension.
7110 Value *Src = I->getOperand(0);
7111 if (Src->hasOneUse())
7112 return false;
7113
7114 // Only do this xform if truncating is free.
7115 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7116 return false;
7117
7118 // Only safe to perform the optimization if the source is also defined in
7119 // this block.
7120 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7121 return false;
7122
7123 bool DefIsLiveOut = false;
7124 for (User *U : I->users()) {
7126
7127 // Figure out which BB this ext is used in.
7128 BasicBlock *UserBB = UI->getParent();
7129 if (UserBB == DefBB)
7130 continue;
7131 DefIsLiveOut = true;
7132 break;
7133 }
7134 if (!DefIsLiveOut)
7135 return false;
7136
7137 // Make sure none of the uses are PHI nodes.
7138 for (User *U : Src->users()) {
7140 BasicBlock *UserBB = UI->getParent();
7141 if (UserBB == DefBB)
7142 continue;
7143 // Be conservative. We don't want this xform to end up introducing
7144 // reloads just before load / store instructions.
7145 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7146 return false;
7147 }
7148
7149 // InsertedTruncs - Only insert one trunc in each block once.
7150 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7151
7152 bool MadeChange = false;
7153 for (Use &U : Src->uses()) {
7154 Instruction *User = cast<Instruction>(U.getUser());
7155
7156 // Figure out which BB this ext is used in.
7157 BasicBlock *UserBB = User->getParent();
7158 if (UserBB == DefBB)
7159 continue;
7160
7161 // Both src and def are live in this block. Rewrite the use.
7162 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7163
7164 if (!InsertedTrunc) {
7165 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7166 assert(InsertPt != UserBB->end());
7167 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7168 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7169 InsertedInsts.insert(InsertedTrunc);
7170 }
7171
7172 // Replace a use of the {s|z}ext source with a use of the result.
7173 U = InsertedTrunc;
7174 ++NumExtUses;
7175 MadeChange = true;
7176 }
7177
7178 return MadeChange;
7179}
7180
7181// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7182// just after the load if the target can fold this into one extload instruction,
7183// with the hope of eliminating some of the other later "and" instructions using
7184// the loaded value. "and"s that are made trivially redundant by the insertion
7185// of the new "and" are removed by this function, while others (e.g. those whose
7186// path from the load goes through a phi) are left for isel to potentially
7187// remove.
7188//
7189// For example:
7190//
7191// b0:
7192// x = load i32
7193// ...
7194// b1:
7195// y = and x, 0xff
7196// z = use y
7197//
7198// becomes:
7199//
7200// b0:
7201// x = load i32
7202// x' = and x, 0xff
7203// ...
7204// b1:
7205// z = use x'
7206//
7207// whereas:
7208//
7209// b0:
7210// x1 = load i32
7211// ...
7212// b1:
7213// x2 = load i32
7214// ...
7215// b2:
7216// x = phi x1, x2
7217// y = and x, 0xff
7218//
7219// becomes (after a call to optimizeLoadExt for each load):
7220//
7221// b0:
7222// x1 = load i32
7223// x1' = and x1, 0xff
7224// ...
7225// b1:
7226// x2 = load i32
7227// x2' = and x2, 0xff
7228// ...
7229// b2:
7230// x = phi x1', x2'
7231// y = and x, 0xff
7232bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7233 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7234 return false;
7235
7236 // Skip loads we've already transformed.
7237 if (Load->hasOneUse() &&
7238 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7239 return false;
7240
7241 // Look at all uses of Load, looking through phis, to determine how many bits
7242 // of the loaded value are needed.
7243 SmallVector<Instruction *, 8> WorkList;
7244 SmallPtrSet<Instruction *, 16> Visited;
7245 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7246 SmallVector<Instruction *, 8> DropFlags;
7247 for (auto *U : Load->users())
7248 WorkList.push_back(cast<Instruction>(U));
7249
7250 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7251 unsigned BitWidth = LoadResultVT.getSizeInBits();
7252 // If the BitWidth is 0, do not try to optimize the type
7253 if (BitWidth == 0)
7254 return false;
7255
7256 APInt DemandBits(BitWidth, 0);
7257 APInt WidestAndBits(BitWidth, 0);
7258
7259 while (!WorkList.empty()) {
7260 Instruction *I = WorkList.pop_back_val();
7261
7262 // Break use-def graph loops.
7263 if (!Visited.insert(I).second)
7264 continue;
7265
7266 // For a PHI node, push all of its users.
7267 if (auto *Phi = dyn_cast<PHINode>(I)) {
7268 for (auto *U : Phi->users())
7269 WorkList.push_back(cast<Instruction>(U));
7270 continue;
7271 }
7272
7273 switch (I->getOpcode()) {
7274 case Instruction::And: {
7275 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7276 if (!AndC)
7277 return false;
7278 APInt AndBits = AndC->getValue();
7279 DemandBits |= AndBits;
7280 // Keep track of the widest and mask we see.
7281 if (AndBits.ugt(WidestAndBits))
7282 WidestAndBits = AndBits;
7283 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7284 AndsToMaybeRemove.push_back(I);
7285 break;
7286 }
7287
7288 case Instruction::Shl: {
7289 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7290 if (!ShlC)
7291 return false;
7292 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7293 DemandBits.setLowBits(BitWidth - ShiftAmt);
7294 DropFlags.push_back(I);
7295 break;
7296 }
7297
7298 case Instruction::Trunc: {
7299 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7300 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7301 DemandBits.setLowBits(TruncBitWidth);
7302 DropFlags.push_back(I);
7303 break;
7304 }
7305
7306 default:
7307 return false;
7308 }
7309 }
7310
7311 uint32_t ActiveBits = DemandBits.getActiveBits();
7312 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7313 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7314 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7315 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7316 // followed by an AND.
7317 // TODO: Look into removing this restriction by fixing backends to either
7318 // return false for isLoadExtLegal for i1 or have them select this pattern to
7319 // a single instruction.
7320 //
7321 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7322 // mask, since these are the only ands that will be removed by isel.
7323 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7324 WidestAndBits != DemandBits)
7325 return false;
7326
7327 LLVMContext &Ctx = Load->getType()->getContext();
7328 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7329 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7330
7331 // Reject cases that won't be matched as extloads.
7332 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7333 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7334 return false;
7335
7336 IRBuilder<> Builder(Load->getNextNode());
7337 auto *NewAnd = cast<Instruction>(
7338 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7339 // Mark this instruction as "inserted by CGP", so that other
7340 // optimizations don't touch it.
7341 InsertedInsts.insert(NewAnd);
7342
7343 // Replace all uses of load with new and (except for the use of load in the
7344 // new and itself).
7345 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7346 NewAnd->setOperand(0, Load);
7347
7348 // Remove any and instructions that are now redundant.
7349 for (auto *And : AndsToMaybeRemove)
7350 // Check that the and mask is the same as the one we decided to put on the
7351 // new and.
7352 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7353 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7354 if (&*CurInstIterator == And)
7355 CurInstIterator = std::next(And->getIterator());
7356 And->eraseFromParent();
7357 ++NumAndUses;
7358 }
7359
7360 // NSW flags may not longer hold.
7361 for (auto *Inst : DropFlags)
7362 Inst->setHasNoSignedWrap(false);
7363
7364 ++NumAndsAdded;
7365 return true;
7366}
7367
7368/// Check if V (an operand of a select instruction) is an expensive instruction
7369/// that is only used once.
7371 auto *I = dyn_cast<Instruction>(V);
7372 // If it's safe to speculatively execute, then it should not have side
7373 // effects; therefore, it's safe to sink and possibly *not* execute.
7374 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7375 TTI->isExpensiveToSpeculativelyExecute(I);
7376}
7377
7378/// Returns true if a SelectInst should be turned into an explicit branch.
7380 const TargetLowering *TLI,
7381 SelectInst *SI) {
7382 // If even a predictable select is cheap, then a branch can't be cheaper.
7383 if (!TLI->isPredictableSelectExpensive())
7384 return false;
7385
7386 // FIXME: This should use the same heuristics as IfConversion to determine
7387 // whether a select is better represented as a branch.
7388
7389 // If metadata tells us that the select condition is obviously predictable,
7390 // then we want to replace the select with a branch.
7391 uint64_t TrueWeight, FalseWeight;
7392 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7393 uint64_t Max = std::max(TrueWeight, FalseWeight);
7394 uint64_t Sum = TrueWeight + FalseWeight;
7395 if (Sum != 0) {
7396 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7397 if (Probability > TTI->getPredictableBranchThreshold())
7398 return true;
7399 }
7400 }
7401
7402 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7403
7404 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7405 // comparison condition. If the compare has more than one use, there's
7406 // probably another cmov or setcc around, so it's not worth emitting a branch.
7407 if (!Cmp || !Cmp->hasOneUse())
7408 return false;
7409
7410 // If either operand of the select is expensive and only needed on one side
7411 // of the select, we should form a branch.
7412 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7413 sinkSelectOperand(TTI, SI->getFalseValue()))
7414 return true;
7415
7416 return false;
7417}
7418
7419/// If \p isTrue is true, return the true value of \p SI, otherwise return
7420/// false value of \p SI. If the true/false value of \p SI is defined by any
7421/// select instructions in \p Selects, look through the defining select
7422/// instruction until the true/false value is not defined in \p Selects.
7423static Value *
7425 const SmallPtrSet<const Instruction *, 2> &Selects) {
7426 Value *V = nullptr;
7427
7428 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7429 DefSI = dyn_cast<SelectInst>(V)) {
7430 assert(DefSI->getCondition() == SI->getCondition() &&
7431 "The condition of DefSI does not match with SI");
7432 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7433 }
7434
7435 assert(V && "Failed to get select true/false value");
7436 return V;
7437}
7438
7439bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7440 assert(Shift->isShift() && "Expected a shift");
7441
7442 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7443 // general vector shifts, and (3) the shift amount is a select-of-splatted
7444 // values, hoist the shifts before the select:
7445 // shift Op0, (select Cond, TVal, FVal) -->
7446 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7447 //
7448 // This is inverting a generic IR transform when we know that the cost of a
7449 // general vector shift is more than the cost of 2 shift-by-scalars.
7450 // We can't do this effectively in SDAG because we may not be able to
7451 // determine if the select operands are splats from within a basic block.
7452 Type *Ty = Shift->getType();
7453 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7454 return false;
7455 Value *Cond, *TVal, *FVal;
7456 if (!match(Shift->getOperand(1),
7457 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7458 return false;
7459 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7460 return false;
7461
7462 IRBuilder<> Builder(Shift);
7463 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7464 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7465 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7466 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7467 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7468 Shift->eraseFromParent();
7469 return true;
7470}
7471
7472bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7473 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7474 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7475 "Expected a funnel shift");
7476
7477 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7478 // than general vector shifts, and (3) the shift amount is select-of-splatted
7479 // values, hoist the funnel shifts before the select:
7480 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7481 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7482 //
7483 // This is inverting a generic IR transform when we know that the cost of a
7484 // general vector shift is more than the cost of 2 shift-by-scalars.
7485 // We can't do this effectively in SDAG because we may not be able to
7486 // determine if the select operands are splats from within a basic block.
7487 Type *Ty = Fsh->getType();
7488 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7489 return false;
7490 Value *Cond, *TVal, *FVal;
7491 if (!match(Fsh->getOperand(2),
7492 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7493 return false;
7494 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7495 return false;
7496
7497 IRBuilder<> Builder(Fsh);
7498 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7499 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7500 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7501 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7502 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7503 Fsh->eraseFromParent();
7504 return true;
7505}
7506
7507/// If we have a SelectInst that will likely profit from branch prediction,
7508/// turn it into a branch.
7509bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7511 return false;
7512
7513 // If the SelectOptimize pass is enabled, selects have already been optimized.
7515 return false;
7516
7517 // Find all consecutive select instructions that share the same condition.
7519 ASI.push_back(SI);
7521 It != SI->getParent()->end(); ++It) {
7522 SelectInst *I = dyn_cast<SelectInst>(&*It);
7523 if (I && SI->getCondition() == I->getCondition()) {
7524 ASI.push_back(I);
7525 } else {
7526 break;
7527 }
7528 }
7529
7530 SelectInst *LastSI = ASI.back();
7531 // Increment the current iterator to skip all the rest of select instructions
7532 // because they will be either "not lowered" or "all lowered" to branch.
7533 CurInstIterator = std::next(LastSI->getIterator());
7534 // Examine debug-info attached to the consecutive select instructions. They
7535 // won't be individually optimised by optimizeInst, so we need to perform
7536 // DbgVariableRecord maintenence here instead.
7537 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7538 fixupDbgVariableRecordsOnInst(*SI);
7539
7540 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7541
7542 // Can we convert the 'select' to CF ?
7543 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7544 return false;
7545
7546 TargetLowering::SelectSupportKind SelectKind;
7547 if (SI->getType()->isVectorTy())
7548 SelectKind = TargetLowering::ScalarCondVectorVal;
7549 else
7550 SelectKind = TargetLowering::ScalarValSelect;
7551
7552 if (TLI->isSelectSupported(SelectKind) &&
7554 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7555 return false;
7556
7557 // The DominatorTree needs to be rebuilt by any consumers after this
7558 // transformation. We simply reset here rather than setting the ModifiedDT
7559 // flag to avoid restarting the function walk in runOnFunction for each
7560 // select optimized.
7561 DT.reset();
7562
7563 // Transform a sequence like this:
7564 // start:
7565 // %cmp = cmp uge i32 %a, %b
7566 // %sel = select i1 %cmp, i32 %c, i32 %d
7567 //
7568 // Into:
7569 // start:
7570 // %cmp = cmp uge i32 %a, %b
7571 // %cmp.frozen = freeze %cmp
7572 // br i1 %cmp.frozen, label %select.true, label %select.false
7573 // select.true:
7574 // br label %select.end
7575 // select.false:
7576 // br label %select.end
7577 // select.end:
7578 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7579 //
7580 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7581 // In addition, we may sink instructions that produce %c or %d from
7582 // the entry block into the destination(s) of the new branch.
7583 // If the true or false blocks do not contain a sunken instruction, that
7584 // block and its branch may be optimized away. In that case, one side of the
7585 // first branch will point directly to select.end, and the corresponding PHI
7586 // predecessor block will be the start block.
7587
7588 // Collect values that go on the true side and the values that go on the false
7589 // side.
7590 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7591 for (SelectInst *SI : ASI) {
7592 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7593 TrueInstrs.push_back(cast<Instruction>(V));
7594 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7595 FalseInstrs.push_back(cast<Instruction>(V));
7596 }
7597
7598 // Split the select block, according to how many (if any) values go on each
7599 // side.
7600 BasicBlock *StartBlock = SI->getParent();
7601 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7602 // We should split before any debug-info.
7603 SplitPt.setHeadBit(true);
7604
7605 IRBuilder<> IB(SI);
7606 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7607
7608 BasicBlock *TrueBlock = nullptr;
7609 BasicBlock *FalseBlock = nullptr;
7610 BasicBlock *EndBlock = nullptr;
7611 BranchInst *TrueBranch = nullptr;
7612 BranchInst *FalseBranch = nullptr;
7613 if (TrueInstrs.size() == 0) {
7615 CondFr, SplitPt, false, nullptr, nullptr, LI));
7616 FalseBlock = FalseBranch->getParent();
7617 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7618 } else if (FalseInstrs.size() == 0) {
7620 CondFr, SplitPt, false, nullptr, nullptr, LI));
7621 TrueBlock = TrueBranch->getParent();
7622 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7623 } else {
7624 Instruction *ThenTerm = nullptr;
7625 Instruction *ElseTerm = nullptr;
7626 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7627 nullptr, nullptr, LI);
7628 TrueBranch = cast<BranchInst>(ThenTerm);
7629 FalseBranch = cast<BranchInst>(ElseTerm);
7630 TrueBlock = TrueBranch->getParent();
7631 FalseBlock = FalseBranch->getParent();
7632 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7633 }
7634
7635 EndBlock->setName("select.end");
7636 if (TrueBlock)
7637 TrueBlock->setName("select.true.sink");
7638 if (FalseBlock)
7639 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7640 : "select.false.sink");
7641
7642 if (IsHugeFunc) {
7643 if (TrueBlock)
7644 FreshBBs.insert(TrueBlock);
7645 if (FalseBlock)
7646 FreshBBs.insert(FalseBlock);
7647 FreshBBs.insert(EndBlock);
7648 }
7649
7650 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7651
7652 static const unsigned MD[] = {
7653 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7654 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7655 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7656
7657 // Sink expensive instructions into the conditional blocks to avoid executing
7658 // them speculatively.
7659 for (Instruction *I : TrueInstrs)
7660 I->moveBefore(TrueBranch->getIterator());
7661 for (Instruction *I : FalseInstrs)
7662 I->moveBefore(FalseBranch->getIterator());
7663
7664 // If we did not create a new block for one of the 'true' or 'false' paths
7665 // of the condition, it means that side of the branch goes to the end block
7666 // directly and the path originates from the start block from the point of
7667 // view of the new PHI.
7668 if (TrueBlock == nullptr)
7669 TrueBlock = StartBlock;
7670 else if (FalseBlock == nullptr)
7671 FalseBlock = StartBlock;
7672
7673 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7674 // Use reverse iterator because later select may use the value of the
7675 // earlier select, and we need to propagate value through earlier select
7676 // to get the PHI operand.
7677 for (SelectInst *SI : llvm::reverse(ASI)) {
7678 // The select itself is replaced with a PHI Node.
7679 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7680 PN->insertBefore(EndBlock->begin());
7681 PN->takeName(SI);
7682 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7683 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7684 PN->setDebugLoc(SI->getDebugLoc());
7685
7686 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7687 SI->eraseFromParent();
7688 INS.erase(SI);
7689 ++NumSelectsExpanded;
7690 }
7691
7692 // Instruct OptimizeBlock to skip to the next block.
7693 CurInstIterator = StartBlock->end();
7694 return true;
7695}
7696
7697/// Some targets only accept certain types for splat inputs. For example a VDUP
7698/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7699/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7700bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7701 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7703 m_Undef(), m_ZeroMask())))
7704 return false;
7705 Type *NewType = TLI->shouldConvertSplatType(SVI);
7706 if (!NewType)
7707 return false;
7708
7709 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7710 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7711 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7712 "Expected a type of the same size!");
7713 auto *NewVecType =
7714 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7715
7716 // Create a bitcast (shuffle (insert (bitcast(..))))
7717 IRBuilder<> Builder(SVI->getContext());
7718 Builder.SetInsertPoint(SVI);
7719 Value *BC1 = Builder.CreateBitCast(
7720 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7721 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7722 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7723
7724 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7726 SVI, TLInfo, nullptr,
7727 [&](Value *V) { removeAllAssertingVHReferences(V); });
7728
7729 // Also hoist the bitcast up to its operand if it they are not in the same
7730 // block.
7731 if (auto *BCI = dyn_cast<Instruction>(BC1))
7732 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7733 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7734 !Op->isTerminator() && !Op->isEHPad())
7735 BCI->moveAfter(Op);
7736
7737 return true;
7738}
7739
7740bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7741 // If the operands of I can be folded into a target instruction together with
7742 // I, duplicate and sink them.
7743 SmallVector<Use *, 4> OpsToSink;
7744 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7745 return false;
7746
7747 // OpsToSink can contain multiple uses in a use chain (e.g.
7748 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7749 // uses must come first, so we process the ops in reverse order so as to not
7750 // create invalid IR.
7751 BasicBlock *TargetBB = I->getParent();
7752 bool Changed = false;
7753 SmallVector<Use *, 4> ToReplace;
7754 Instruction *InsertPoint = I;
7755 DenseMap<const Instruction *, unsigned long> InstOrdering;
7756 unsigned long InstNumber = 0;
7757 for (const auto &I : *TargetBB)
7758 InstOrdering[&I] = InstNumber++;
7759
7760 for (Use *U : reverse(OpsToSink)) {
7761 auto *UI = cast<Instruction>(U->get());
7762 if (isa<PHINode>(UI))
7763 continue;
7764 if (UI->getParent() == TargetBB) {
7765 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7766 InsertPoint = UI;
7767 continue;
7768 }
7769 ToReplace.push_back(U);
7770 }
7771
7772 SetVector<Instruction *> MaybeDead;
7773 DenseMap<Instruction *, Instruction *> NewInstructions;
7774 for (Use *U : ToReplace) {
7775 auto *UI = cast<Instruction>(U->get());
7776 Instruction *NI = UI->clone();
7777
7778 if (IsHugeFunc) {
7779 // Now we clone an instruction, its operands' defs may sink to this BB
7780 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7781 for (Value *Op : NI->operands())
7782 if (auto *OpDef = dyn_cast<Instruction>(Op))
7783 FreshBBs.insert(OpDef->getParent());
7784 }
7785
7786 NewInstructions[UI] = NI;
7787 MaybeDead.insert(UI);
7788 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7789 NI->insertBefore(InsertPoint->getIterator());
7790 InsertPoint = NI;
7791 InsertedInsts.insert(NI);
7792
7793 // Update the use for the new instruction, making sure that we update the
7794 // sunk instruction uses, if it is part of a chain that has already been
7795 // sunk.
7796 Instruction *OldI = cast<Instruction>(U->getUser());
7797 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7798 It->second->setOperand(U->getOperandNo(), NI);
7799 else
7800 U->set(NI);
7801 Changed = true;
7802 }
7803
7804 // Remove instructions that are dead after sinking.
7805 for (auto *I : MaybeDead) {
7806 if (!I->hasNUsesOrMore(1)) {
7807 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7808 I->eraseFromParent();
7809 }
7810 }
7811
7812 return Changed;
7813}
7814
7815bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7816 Value *Cond = SI->getCondition();
7817 Type *OldType = Cond->getType();
7818 LLVMContext &Context = Cond->getContext();
7819 EVT OldVT = TLI->getValueType(*DL, OldType);
7820 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7821 unsigned RegWidth = RegType.getSizeInBits();
7822
7823 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7824 return false;
7825
7826 // If the register width is greater than the type width, expand the condition
7827 // of the switch instruction and each case constant to the width of the
7828 // register. By widening the type of the switch condition, subsequent
7829 // comparisons (for case comparisons) will not need to be extended to the
7830 // preferred register width, so we will potentially eliminate N-1 extends,
7831 // where N is the number of cases in the switch.
7832 auto *NewType = Type::getIntNTy(Context, RegWidth);
7833
7834 // Extend the switch condition and case constants using the target preferred
7835 // extend unless the switch condition is a function argument with an extend
7836 // attribute. In that case, we can avoid an unnecessary mask/extension by
7837 // matching the argument extension instead.
7838 Instruction::CastOps ExtType = Instruction::ZExt;
7839 // Some targets prefer SExt over ZExt.
7840 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7841 ExtType = Instruction::SExt;
7842
7843 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7844 if (Arg->hasSExtAttr())
7845 ExtType = Instruction::SExt;
7846 if (Arg->hasZExtAttr())
7847 ExtType = Instruction::ZExt;
7848 }
7849
7850 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7851 ExtInst->insertBefore(SI->getIterator());
7852 ExtInst->setDebugLoc(SI->getDebugLoc());
7853 SI->setCondition(ExtInst);
7854 for (auto Case : SI->cases()) {
7855 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7856 APInt WideConst = (ExtType == Instruction::ZExt)
7857 ? NarrowConst.zext(RegWidth)
7858 : NarrowConst.sext(RegWidth);
7859 Case.setValue(ConstantInt::get(Context, WideConst));
7860 }
7861
7862 return true;
7863}
7864
7865bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7866 // The SCCP optimization tends to produce code like this:
7867 // switch(x) { case 42: phi(42, ...) }
7868 // Materializing the constant for the phi-argument needs instructions; So we
7869 // change the code to:
7870 // switch(x) { case 42: phi(x, ...) }
7871
7872 Value *Condition = SI->getCondition();
7873 // Avoid endless loop in degenerate case.
7874 if (isa<ConstantInt>(*Condition))
7875 return false;
7876
7877 bool Changed = false;
7878 BasicBlock *SwitchBB = SI->getParent();
7879 Type *ConditionType = Condition->getType();
7880
7881 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7882 ConstantInt *CaseValue = Case.getCaseValue();
7883 BasicBlock *CaseBB = Case.getCaseSuccessor();
7884 // Set to true if we previously checked that `CaseBB` is only reached by
7885 // a single case from this switch.
7886 bool CheckedForSinglePred = false;
7887 for (PHINode &PHI : CaseBB->phis()) {
7888 Type *PHIType = PHI.getType();
7889 // If ZExt is free then we can also catch patterns like this:
7890 // switch((i32)x) { case 42: phi((i64)42, ...); }
7891 // and replace `(i64)42` with `zext i32 %x to i64`.
7892 bool TryZExt =
7893 PHIType->isIntegerTy() &&
7894 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7895 TLI->isZExtFree(ConditionType, PHIType);
7896 if (PHIType == ConditionType || TryZExt) {
7897 // Set to true to skip this case because of multiple preds.
7898 bool SkipCase = false;
7899 Value *Replacement = nullptr;
7900 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7901 Value *PHIValue = PHI.getIncomingValue(I);
7902 if (PHIValue != CaseValue) {
7903 if (!TryZExt)
7904 continue;
7905 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7906 if (!PHIValueInt ||
7907 PHIValueInt->getValue() !=
7908 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7909 continue;
7910 }
7911 if (PHI.getIncomingBlock(I) != SwitchBB)
7912 continue;
7913 // We cannot optimize if there are multiple case labels jumping to
7914 // this block. This check may get expensive when there are many
7915 // case labels so we test for it last.
7916 if (!CheckedForSinglePred) {
7917 CheckedForSinglePred = true;
7918 if (SI->findCaseDest(CaseBB) == nullptr) {
7919 SkipCase = true;
7920 break;
7921 }
7922 }
7923
7924 if (Replacement == nullptr) {
7925 if (PHIValue == CaseValue) {
7926 Replacement = Condition;
7927 } else {
7928 IRBuilder<> Builder(SI);
7929 Replacement = Builder.CreateZExt(Condition, PHIType);
7930 }
7931 }
7932 PHI.setIncomingValue(I, Replacement);
7933 Changed = true;
7934 }
7935 if (SkipCase)
7936 break;
7937 }
7938 }
7939 }
7940 return Changed;
7941}
7942
7943bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7944 bool Changed = optimizeSwitchType(SI);
7945 Changed |= optimizeSwitchPhiConstants(SI);
7946 return Changed;
7947}
7948
7949namespace {
7950
7951/// Helper class to promote a scalar operation to a vector one.
7952/// This class is used to move downward extractelement transition.
7953/// E.g.,
7954/// a = vector_op <2 x i32>
7955/// b = extractelement <2 x i32> a, i32 0
7956/// c = scalar_op b
7957/// store c
7958///
7959/// =>
7960/// a = vector_op <2 x i32>
7961/// c = vector_op a (equivalent to scalar_op on the related lane)
7962/// * d = extractelement <2 x i32> c, i32 0
7963/// * store d
7964/// Assuming both extractelement and store can be combine, we get rid of the
7965/// transition.
7966class VectorPromoteHelper {
7967 /// DataLayout associated with the current module.
7968 const DataLayout &DL;
7969
7970 /// Used to perform some checks on the legality of vector operations.
7971 const TargetLowering &TLI;
7972
7973 /// Used to estimated the cost of the promoted chain.
7974 const TargetTransformInfo &TTI;
7975
7976 /// The transition being moved downwards.
7977 Instruction *Transition;
7978
7979 /// The sequence of instructions to be promoted.
7980 SmallVector<Instruction *, 4> InstsToBePromoted;
7981
7982 /// Cost of combining a store and an extract.
7983 unsigned StoreExtractCombineCost;
7984
7985 /// Instruction that will be combined with the transition.
7986 Instruction *CombineInst = nullptr;
7987
7988 /// The instruction that represents the current end of the transition.
7989 /// Since we are faking the promotion until we reach the end of the chain
7990 /// of computation, we need a way to get the current end of the transition.
7991 Instruction *getEndOfTransition() const {
7992 if (InstsToBePromoted.empty())
7993 return Transition;
7994 return InstsToBePromoted.back();
7995 }
7996
7997 /// Return the index of the original value in the transition.
7998 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7999 /// c, is at index 0.
8000 unsigned getTransitionOriginalValueIdx() const {
8001 assert(isa<ExtractElementInst>(Transition) &&
8002 "Other kind of transitions are not supported yet");
8003 return 0;
8004 }
8005
8006 /// Return the index of the index in the transition.
8007 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8008 /// is at index 1.
8009 unsigned getTransitionIdx() const {
8010 assert(isa<ExtractElementInst>(Transition) &&
8011 "Other kind of transitions are not supported yet");
8012 return 1;
8013 }
8014
8015 /// Get the type of the transition.
8016 /// This is the type of the original value.
8017 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8018 /// transition is <2 x i32>.
8019 Type *getTransitionType() const {
8020 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8021 }
8022
8023 /// Promote \p ToBePromoted by moving \p Def downward through.
8024 /// I.e., we have the following sequence:
8025 /// Def = Transition <ty1> a to <ty2>
8026 /// b = ToBePromoted <ty2> Def, ...
8027 /// =>
8028 /// b = ToBePromoted <ty1> a, ...
8029 /// Def = Transition <ty1> ToBePromoted to <ty2>
8030 void promoteImpl(Instruction *ToBePromoted);
8031
8032 /// Check whether or not it is profitable to promote all the
8033 /// instructions enqueued to be promoted.
8034 bool isProfitableToPromote() {
8035 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8036 unsigned Index = isa<ConstantInt>(ValIdx)
8037 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8038 : -1;
8039 Type *PromotedType = getTransitionType();
8040
8041 StoreInst *ST = cast<StoreInst>(CombineInst);
8042 unsigned AS = ST->getPointerAddressSpace();
8043 // Check if this store is supported.
8045 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8046 ST->getAlign())) {
8047 // If this is not supported, there is no way we can combine
8048 // the extract with the store.
8049 return false;
8050 }
8051
8052 // The scalar chain of computation has to pay for the transition
8053 // scalar to vector.
8054 // The vector chain has to account for the combining cost.
8057 InstructionCost ScalarCost =
8058 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8059 InstructionCost VectorCost = StoreExtractCombineCost;
8060 for (const auto &Inst : InstsToBePromoted) {
8061 // Compute the cost.
8062 // By construction, all instructions being promoted are arithmetic ones.
8063 // Moreover, one argument is a constant that can be viewed as a splat
8064 // constant.
8065 Value *Arg0 = Inst->getOperand(0);
8066 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8067 isa<ConstantFP>(Arg0);
8068 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8069 if (IsArg0Constant)
8071 else
8073
8074 ScalarCost += TTI.getArithmeticInstrCost(
8075 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8076 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8077 CostKind, Arg0Info, Arg1Info);
8078 }
8079 LLVM_DEBUG(
8080 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8081 << ScalarCost << "\nVector: " << VectorCost << '\n');
8082 return ScalarCost > VectorCost;
8083 }
8084
8085 /// Generate a constant vector with \p Val with the same
8086 /// number of elements as the transition.
8087 /// \p UseSplat defines whether or not \p Val should be replicated
8088 /// across the whole vector.
8089 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8090 /// otherwise we generate a vector with as many poison as possible:
8091 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8092 /// used at the index of the extract.
8093 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8094 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8095 if (!UseSplat) {
8096 // If we cannot determine where the constant must be, we have to
8097 // use a splat constant.
8098 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8099 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8100 ExtractIdx = CstVal->getSExtValue();
8101 else
8102 UseSplat = true;
8103 }
8104
8105 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8106 if (UseSplat)
8107 return ConstantVector::getSplat(EC, Val);
8108
8109 if (!EC.isScalable()) {
8111 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8112 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8113 if (Idx == ExtractIdx)
8114 ConstVec.push_back(Val);
8115 else
8116 ConstVec.push_back(PoisonVal);
8117 }
8118 return ConstantVector::get(ConstVec);
8119 } else
8121 "Generate scalable vector for non-splat is unimplemented");
8122 }
8123
8124 /// Check if promoting to a vector type an operand at \p OperandIdx
8125 /// in \p Use can trigger undefined behavior.
8126 static bool canCauseUndefinedBehavior(const Instruction *Use,
8127 unsigned OperandIdx) {
8128 // This is not safe to introduce undef when the operand is on
8129 // the right hand side of a division-like instruction.
8130 if (OperandIdx != 1)
8131 return false;
8132 switch (Use->getOpcode()) {
8133 default:
8134 return false;
8135 case Instruction::SDiv:
8136 case Instruction::UDiv:
8137 case Instruction::SRem:
8138 case Instruction::URem:
8139 return true;
8140 case Instruction::FDiv:
8141 case Instruction::FRem:
8142 return !Use->hasNoNaNs();
8143 }
8144 llvm_unreachable(nullptr);
8145 }
8146
8147public:
8148 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8149 const TargetTransformInfo &TTI, Instruction *Transition,
8150 unsigned CombineCost)
8151 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8152 StoreExtractCombineCost(CombineCost) {
8153 assert(Transition && "Do not know how to promote null");
8154 }
8155
8156 /// Check if we can promote \p ToBePromoted to \p Type.
8157 bool canPromote(const Instruction *ToBePromoted) const {
8158 // We could support CastInst too.
8159 return isa<BinaryOperator>(ToBePromoted);
8160 }
8161
8162 /// Check if it is profitable to promote \p ToBePromoted
8163 /// by moving downward the transition through.
8164 bool shouldPromote(const Instruction *ToBePromoted) const {
8165 // Promote only if all the operands can be statically expanded.
8166 // Indeed, we do not want to introduce any new kind of transitions.
8167 for (const Use &U : ToBePromoted->operands()) {
8168 const Value *Val = U.get();
8169 if (Val == getEndOfTransition()) {
8170 // If the use is a division and the transition is on the rhs,
8171 // we cannot promote the operation, otherwise we may create a
8172 // division by zero.
8173 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8174 return false;
8175 continue;
8176 }
8177 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8178 !isa<ConstantFP>(Val))
8179 return false;
8180 }
8181 // Check that the resulting operation is legal.
8182 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8183 if (!ISDOpcode)
8184 return false;
8185 return StressStoreExtract ||
8187 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8188 }
8189
8190 /// Check whether or not \p Use can be combined
8191 /// with the transition.
8192 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8193 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8194
8195 /// Record \p ToBePromoted as part of the chain to be promoted.
8196 void enqueueForPromotion(Instruction *ToBePromoted) {
8197 InstsToBePromoted.push_back(ToBePromoted);
8198 }
8199
8200 /// Set the instruction that will be combined with the transition.
8201 void recordCombineInstruction(Instruction *ToBeCombined) {
8202 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8203 CombineInst = ToBeCombined;
8204 }
8205
8206 /// Promote all the instructions enqueued for promotion if it is
8207 /// is profitable.
8208 /// \return True if the promotion happened, false otherwise.
8209 bool promote() {
8210 // Check if there is something to promote.
8211 // Right now, if we do not have anything to combine with,
8212 // we assume the promotion is not profitable.
8213 if (InstsToBePromoted.empty() || !CombineInst)
8214 return false;
8215
8216 // Check cost.
8217 if (!StressStoreExtract && !isProfitableToPromote())
8218 return false;
8219
8220 // Promote.
8221 for (auto &ToBePromoted : InstsToBePromoted)
8222 promoteImpl(ToBePromoted);
8223 InstsToBePromoted.clear();
8224 return true;
8225 }
8226};
8227
8228} // end anonymous namespace
8229
8230void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8231 // At this point, we know that all the operands of ToBePromoted but Def
8232 // can be statically promoted.
8233 // For Def, we need to use its parameter in ToBePromoted:
8234 // b = ToBePromoted ty1 a
8235 // Def = Transition ty1 b to ty2
8236 // Move the transition down.
8237 // 1. Replace all uses of the promoted operation by the transition.
8238 // = ... b => = ... Def.
8239 assert(ToBePromoted->getType() == Transition->getType() &&
8240 "The type of the result of the transition does not match "
8241 "the final type");
8242 ToBePromoted->replaceAllUsesWith(Transition);
8243 // 2. Update the type of the uses.
8244 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8245 Type *TransitionTy = getTransitionType();
8246 ToBePromoted->mutateType(TransitionTy);
8247 // 3. Update all the operands of the promoted operation with promoted
8248 // operands.
8249 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8250 for (Use &U : ToBePromoted->operands()) {
8251 Value *Val = U.get();
8252 Value *NewVal = nullptr;
8253 if (Val == Transition)
8254 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8255 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8256 isa<ConstantFP>(Val)) {
8257 // Use a splat constant if it is not safe to use undef.
8258 NewVal = getConstantVector(
8259 cast<Constant>(Val),
8260 isa<UndefValue>(Val) ||
8261 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8262 } else
8263 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8264 "this?");
8265 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8266 }
8267 Transition->moveAfter(ToBePromoted);
8268 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8269}
8270
8271/// Some targets can do store(extractelement) with one instruction.
8272/// Try to push the extractelement towards the stores when the target
8273/// has this feature and this is profitable.
8274bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8275 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8276 if (DisableStoreExtract ||
8279 Inst->getOperand(1), CombineCost)))
8280 return false;
8281
8282 // At this point we know that Inst is a vector to scalar transition.
8283 // Try to move it down the def-use chain, until:
8284 // - We can combine the transition with its single use
8285 // => we got rid of the transition.
8286 // - We escape the current basic block
8287 // => we would need to check that we are moving it at a cheaper place and
8288 // we do not do that for now.
8289 BasicBlock *Parent = Inst->getParent();
8290 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8291 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8292 // If the transition has more than one use, assume this is not going to be
8293 // beneficial.
8294 while (Inst->hasOneUse()) {
8295 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8296 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8297
8298 if (ToBePromoted->getParent() != Parent) {
8299 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8300 << ToBePromoted->getParent()->getName()
8301 << ") than the transition (" << Parent->getName()
8302 << ").\n");
8303 return false;
8304 }
8305
8306 if (VPH.canCombine(ToBePromoted)) {
8307 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8308 << "will be combined with: " << *ToBePromoted << '\n');
8309 VPH.recordCombineInstruction(ToBePromoted);
8310 bool Changed = VPH.promote();
8311 NumStoreExtractExposed += Changed;
8312 return Changed;
8313 }
8314
8315 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8316 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8317 return false;
8318
8319 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8320
8321 VPH.enqueueForPromotion(ToBePromoted);
8322 Inst = ToBePromoted;
8323 }
8324 return false;
8325}
8326
8327/// For the instruction sequence of store below, F and I values
8328/// are bundled together as an i64 value before being stored into memory.
8329/// Sometimes it is more efficient to generate separate stores for F and I,
8330/// which can remove the bitwise instructions or sink them to colder places.
8331///
8332/// (store (or (zext (bitcast F to i32) to i64),
8333/// (shl (zext I to i64), 32)), addr) -->
8334/// (store F, addr) and (store I, addr+4)
8335///
8336/// Similarly, splitting for other merged store can also be beneficial, like:
8337/// For pair of {i32, i32}, i64 store --> two i32 stores.
8338/// For pair of {i32, i16}, i64 store --> two i32 stores.
8339/// For pair of {i16, i16}, i32 store --> two i16 stores.
8340/// For pair of {i16, i8}, i32 store --> two i16 stores.
8341/// For pair of {i8, i8}, i16 store --> two i8 stores.
8342///
8343/// We allow each target to determine specifically which kind of splitting is
8344/// supported.
8345///
8346/// The store patterns are commonly seen from the simple code snippet below
8347/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8348/// void goo(const std::pair<int, float> &);
8349/// hoo() {
8350/// ...
8351/// goo(std::make_pair(tmp, ftmp));
8352/// ...
8353/// }
8354///
8355/// Although we already have similar splitting in DAG Combine, we duplicate
8356/// it in CodeGenPrepare to catch the case in which pattern is across
8357/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8358/// during code expansion.
8360 const TargetLowering &TLI) {
8361 // Handle simple but common cases only.
8362 Type *StoreType = SI.getValueOperand()->getType();
8363
8364 // The code below assumes shifting a value by <number of bits>,
8365 // whereas scalable vectors would have to be shifted by
8366 // <2log(vscale) + number of bits> in order to store the
8367 // low/high parts. Bailing out for now.
8368 if (StoreType->isScalableTy())
8369 return false;
8370
8371 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8372 DL.getTypeSizeInBits(StoreType) == 0)
8373 return false;
8374
8375 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8376 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8377 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8378 return false;
8379
8380 // Don't split the store if it is volatile.
8381 if (SI.isVolatile())
8382 return false;
8383
8384 // Match the following patterns:
8385 // (store (or (zext LValue to i64),
8386 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8387 // or
8388 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8389 // (zext LValue to i64),
8390 // Expect both operands of OR and the first operand of SHL have only
8391 // one use.
8392 Value *LValue, *HValue;
8393 if (!match(SI.getValueOperand(),
8396 m_SpecificInt(HalfValBitSize))))))
8397 return false;
8398
8399 // Check LValue and HValue are int with size less or equal than 32.
8400 if (!LValue->getType()->isIntegerTy() ||
8401 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8402 !HValue->getType()->isIntegerTy() ||
8403 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8404 return false;
8405
8406 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8407 // as the input of target query.
8408 auto *LBC = dyn_cast<BitCastInst>(LValue);
8409 auto *HBC = dyn_cast<BitCastInst>(HValue);
8410 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8411 : EVT::getEVT(LValue->getType());
8412 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8413 : EVT::getEVT(HValue->getType());
8414 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8415 return false;
8416
8417 // Start to split store.
8418 IRBuilder<> Builder(SI.getContext());
8419 Builder.SetInsertPoint(&SI);
8420
8421 // If LValue/HValue is a bitcast in another BB, create a new one in current
8422 // BB so it may be merged with the splitted stores by dag combiner.
8423 if (LBC && LBC->getParent() != SI.getParent())
8424 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8425 if (HBC && HBC->getParent() != SI.getParent())
8426 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8427
8428 bool IsLE = SI.getDataLayout().isLittleEndian();
8429 auto CreateSplitStore = [&](Value *V, bool Upper) {
8430 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8431 Value *Addr = SI.getPointerOperand();
8432 Align Alignment = SI.getAlign();
8433 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8434 if (IsOffsetStore) {
8435 Addr = Builder.CreateGEP(
8436 SplitStoreType, Addr,
8437 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8438
8439 // When splitting the store in half, naturally one half will retain the
8440 // alignment of the original wider store, regardless of whether it was
8441 // over-aligned or not, while the other will require adjustment.
8442 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8443 }
8444 Builder.CreateAlignedStore(V, Addr, Alignment);
8445 };
8446
8447 CreateSplitStore(LValue, false);
8448 CreateSplitStore(HValue, true);
8449
8450 // Delete the old store.
8451 SI.eraseFromParent();
8452 return true;
8453}
8454
8455// Return true if the GEP has two operands, the first operand is of a sequential
8456// type, and the second operand is a constant.
8459 return GEP->getNumOperands() == 2 && I.isSequential() &&
8460 isa<ConstantInt>(GEP->getOperand(1));
8461}
8462
8463// Try unmerging GEPs to reduce liveness interference (register pressure) across
8464// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8465// reducing liveness interference across those edges benefits global register
8466// allocation. Currently handles only certain cases.
8467//
8468// For example, unmerge %GEPI and %UGEPI as below.
8469//
8470// ---------- BEFORE ----------
8471// SrcBlock:
8472// ...
8473// %GEPIOp = ...
8474// ...
8475// %GEPI = gep %GEPIOp, Idx
8476// ...
8477// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8478// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8479// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8480// %UGEPI)
8481//
8482// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8483// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8484// ...
8485//
8486// DstBi:
8487// ...
8488// %UGEPI = gep %GEPIOp, UIdx
8489// ...
8490// ---------------------------
8491//
8492// ---------- AFTER ----------
8493// SrcBlock:
8494// ... (same as above)
8495// (* %GEPI is still alive on the indirectbr edges)
8496// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8497// unmerging)
8498// ...
8499//
8500// DstBi:
8501// ...
8502// %UGEPI = gep %GEPI, (UIdx-Idx)
8503// ...
8504// ---------------------------
8505//
8506// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8507// no longer alive on them.
8508//
8509// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8510// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8511// not to disable further simplications and optimizations as a result of GEP
8512// merging.
8513//
8514// Note this unmerging may increase the length of the data flow critical path
8515// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8516// between the register pressure and the length of data-flow critical
8517// path. Restricting this to the uncommon IndirectBr case would minimize the
8518// impact of potentially longer critical path, if any, and the impact on compile
8519// time.
8521 const TargetTransformInfo *TTI) {
8522 BasicBlock *SrcBlock = GEPI->getParent();
8523 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8524 // (non-IndirectBr) cases exit early here.
8525 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8526 return false;
8527 // Check that GEPI is a simple gep with a single constant index.
8528 if (!GEPSequentialConstIndexed(GEPI))
8529 return false;
8530 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8531 // Check that GEPI is a cheap one.
8532 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8535 return false;
8536 Value *GEPIOp = GEPI->getOperand(0);
8537 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8538 if (!isa<Instruction>(GEPIOp))
8539 return false;
8540 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8541 if (GEPIOpI->getParent() != SrcBlock)
8542 return false;
8543 // Check that GEP is used outside the block, meaning it's alive on the
8544 // IndirectBr edge(s).
8545 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8546 if (auto *I = dyn_cast<Instruction>(Usr)) {
8547 if (I->getParent() != SrcBlock) {
8548 return true;
8549 }
8550 }
8551 return false;
8552 }))
8553 return false;
8554 // The second elements of the GEP chains to be unmerged.
8555 std::vector<GetElementPtrInst *> UGEPIs;
8556 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8557 // on IndirectBr edges.
8558 for (User *Usr : GEPIOp->users()) {
8559 if (Usr == GEPI)
8560 continue;
8561 // Check if Usr is an Instruction. If not, give up.
8562 if (!isa<Instruction>(Usr))
8563 return false;
8564 auto *UI = cast<Instruction>(Usr);
8565 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8566 if (UI->getParent() == SrcBlock)
8567 continue;
8568 // Check if Usr is a GEP. If not, give up.
8569 if (!isa<GetElementPtrInst>(Usr))
8570 return false;
8571 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8572 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8573 // the pointer operand to it. If so, record it in the vector. If not, give
8574 // up.
8575 if (!GEPSequentialConstIndexed(UGEPI))
8576 return false;
8577 if (UGEPI->getOperand(0) != GEPIOp)
8578 return false;
8579 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8580 return false;
8581 if (GEPIIdx->getType() !=
8582 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8583 return false;
8584 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8585 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8588 return false;
8589 UGEPIs.push_back(UGEPI);
8590 }
8591 if (UGEPIs.size() == 0)
8592 return false;
8593 // Check the materializing cost of (Uidx-Idx).
8594 for (GetElementPtrInst *UGEPI : UGEPIs) {
8595 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8596 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8598 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8599 if (ImmCost > TargetTransformInfo::TCC_Basic)
8600 return false;
8601 }
8602 // Now unmerge between GEPI and UGEPIs.
8603 for (GetElementPtrInst *UGEPI : UGEPIs) {
8604 UGEPI->setOperand(0, GEPI);
8605 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8606 Constant *NewUGEPIIdx = ConstantInt::get(
8607 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8608 UGEPI->setOperand(1, NewUGEPIIdx);
8609 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8610 // inbounds to avoid UB.
8611 if (!GEPI->isInBounds()) {
8612 UGEPI->setIsInBounds(false);
8613 }
8614 }
8615 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8616 // alive on IndirectBr edges).
8617 assert(llvm::none_of(GEPIOp->users(),
8618 [&](User *Usr) {
8619 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8620 }) &&
8621 "GEPIOp is used outside SrcBlock");
8622 return true;
8623}
8624
8625static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8627 bool IsHugeFunc) {
8628 // Try and convert
8629 // %c = icmp ult %x, 8
8630 // br %c, bla, blb
8631 // %tc = lshr %x, 3
8632 // to
8633 // %tc = lshr %x, 3
8634 // %c = icmp eq %tc, 0
8635 // br %c, bla, blb
8636 // Creating the cmp to zero can be better for the backend, especially if the
8637 // lshr produces flags that can be used automatically.
8638 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8639 return false;
8640
8641 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8642 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8643 return false;
8644
8645 Value *X = Cmp->getOperand(0);
8646 if (!X->hasUseList())
8647 return false;
8648
8649 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8650
8651 for (auto *U : X->users()) {
8653 // A quick dominance check
8654 if (!UI ||
8655 (UI->getParent() != Branch->getParent() &&
8656 UI->getParent() != Branch->getSuccessor(0) &&
8657 UI->getParent() != Branch->getSuccessor(1)) ||
8658 (UI->getParent() != Branch->getParent() &&
8659 !UI->getParent()->getSinglePredecessor()))
8660 continue;
8661
8662 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8663 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8664 IRBuilder<> Builder(Branch);
8665 if (UI->getParent() != Branch->getParent())
8666 UI->moveBefore(Branch->getIterator());
8668 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8669 ConstantInt::get(UI->getType(), 0));
8670 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8671 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8672 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8673 return true;
8674 }
8675 if (Cmp->isEquality() &&
8676 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8677 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8678 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8679 IRBuilder<> Builder(Branch);
8680 if (UI->getParent() != Branch->getParent())
8681 UI->moveBefore(Branch->getIterator());
8683 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8684 ConstantInt::get(UI->getType(), 0));
8685 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8686 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8687 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8688 return true;
8689 }
8690 }
8691 return false;
8692}
8693
8694bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8695 bool AnyChange = false;
8696 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8697
8698 // Bail out if we inserted the instruction to prevent optimizations from
8699 // stepping on each other's toes.
8700 if (InsertedInsts.count(I))
8701 return AnyChange;
8702
8703 // TODO: Move into the switch on opcode below here.
8704 if (PHINode *P = dyn_cast<PHINode>(I)) {
8705 // It is possible for very late stage optimizations (such as SimplifyCFG)
8706 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8707 // trivial PHI, go ahead and zap it here.
8708 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8709 LargeOffsetGEPMap.erase(P);
8710 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8711 P->eraseFromParent();
8712 ++NumPHIsElim;
8713 return true;
8714 }
8715 return AnyChange;
8716 }
8717
8718 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8719 // If the source of the cast is a constant, then this should have
8720 // already been constant folded. The only reason NOT to constant fold
8721 // it is if something (e.g. LSR) was careful to place the constant
8722 // evaluation in a block other than then one that uses it (e.g. to hoist
8723 // the address of globals out of a loop). If this is the case, we don't
8724 // want to forward-subst the cast.
8725 if (isa<Constant>(CI->getOperand(0)))
8726 return AnyChange;
8727
8728 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8729 return true;
8730
8732 isa<TruncInst>(I)) &&
8734 I, LI->getLoopFor(I->getParent()), *TTI))
8735 return true;
8736
8737 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8738 /// Sink a zext or sext into its user blocks if the target type doesn't
8739 /// fit in one register
8740 if (TLI->getTypeAction(CI->getContext(),
8741 TLI->getValueType(*DL, CI->getType())) ==
8742 TargetLowering::TypeExpandInteger) {
8743 return SinkCast(CI);
8744 } else {
8746 I, LI->getLoopFor(I->getParent()), *TTI))
8747 return true;
8748
8749 bool MadeChange = optimizeExt(I);
8750 return MadeChange | optimizeExtUses(I);
8751 }
8752 }
8753 return AnyChange;
8754 }
8755
8756 if (auto *Cmp = dyn_cast<CmpInst>(I))
8757 if (optimizeCmp(Cmp, ModifiedDT))
8758 return true;
8759
8760 if (match(I, m_URem(m_Value(), m_Value())))
8761 if (optimizeURem(I))
8762 return true;
8763
8764 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8765 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8766 bool Modified = optimizeLoadExt(LI);
8767 unsigned AS = LI->getPointerAddressSpace();
8768 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8769 return Modified;
8770 }
8771
8772 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8773 if (splitMergedValStore(*SI, *DL, *TLI))
8774 return true;
8775 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8776 unsigned AS = SI->getPointerAddressSpace();
8777 return optimizeMemoryInst(I, SI->getOperand(1),
8778 SI->getOperand(0)->getType(), AS);
8779 }
8780
8781 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8782 unsigned AS = RMW->getPointerAddressSpace();
8783 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8784 }
8785
8786 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8787 unsigned AS = CmpX->getPointerAddressSpace();
8788 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8789 CmpX->getCompareOperand()->getType(), AS);
8790 }
8791
8792 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8793
8794 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8795 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8796 return true;
8797
8798 // TODO: Move this into the switch on opcode - it handles shifts already.
8799 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8800 BinOp->getOpcode() == Instruction::LShr)) {
8801 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8802 if (CI && TLI->hasExtractBitsInsn())
8803 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8804 return true;
8805 }
8806
8807 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8808 if (GEPI->hasAllZeroIndices()) {
8809 /// The GEP operand must be a pointer, so must its result -> BitCast
8810 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8811 GEPI->getName(), GEPI->getIterator());
8812 NC->setDebugLoc(GEPI->getDebugLoc());
8813 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8815 GEPI, TLInfo, nullptr,
8816 [&](Value *V) { removeAllAssertingVHReferences(V); });
8817 ++NumGEPsElim;
8818 optimizeInst(NC, ModifiedDT);
8819 return true;
8820 }
8822 return true;
8823 }
8824 }
8825
8826 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8827 // freeze(icmp a, const)) -> icmp (freeze a), const
8828 // This helps generate efficient conditional jumps.
8829 Instruction *CmpI = nullptr;
8830 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8831 CmpI = II;
8832 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8833 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8834
8835 if (CmpI && CmpI->hasOneUse()) {
8836 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8837 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8839 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8841 if (Const0 || Const1) {
8842 if (!Const0 || !Const1) {
8843 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8844 F->takeName(FI);
8845 CmpI->setOperand(Const0 ? 1 : 0, F);
8846 }
8847 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8848 FI->eraseFromParent();
8849 return true;
8850 }
8851 }
8852 return AnyChange;
8853 }
8854
8855 if (tryToSinkFreeOperands(I))
8856 return true;
8857
8858 switch (I->getOpcode()) {
8859 case Instruction::Shl:
8860 case Instruction::LShr:
8861 case Instruction::AShr:
8862 return optimizeShiftInst(cast<BinaryOperator>(I));
8863 case Instruction::Call:
8864 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8865 case Instruction::Select:
8866 return optimizeSelectInst(cast<SelectInst>(I));
8867 case Instruction::ShuffleVector:
8868 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8869 case Instruction::Switch:
8870 return optimizeSwitchInst(cast<SwitchInst>(I));
8871 case Instruction::ExtractElement:
8872 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8873 case Instruction::Br:
8874 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8875 }
8876
8877 return AnyChange;
8878}
8879
8880/// Given an OR instruction, check to see if this is a bitreverse
8881/// idiom. If so, insert the new intrinsic and return true.
8882bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8883 if (!I.getType()->isIntegerTy() ||
8885 TLI->getValueType(*DL, I.getType(), true)))
8886 return false;
8887
8888 SmallVector<Instruction *, 4> Insts;
8889 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8890 return false;
8891 Instruction *LastInst = Insts.back();
8892 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8894 &I, TLInfo, nullptr,
8895 [&](Value *V) { removeAllAssertingVHReferences(V); });
8896 return true;
8897}
8898
8899// In this pass we look for GEP and cast instructions that are used
8900// across basic blocks and rewrite them to improve basic-block-at-a-time
8901// selection.
8902bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8903 SunkAddrs.clear();
8904 bool MadeChange = false;
8905
8906 do {
8907 CurInstIterator = BB.begin();
8908 ModifiedDT = ModifyDT::NotModifyDT;
8909 while (CurInstIterator != BB.end()) {
8910 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8911 if (ModifiedDT != ModifyDT::NotModifyDT) {
8912 // For huge function we tend to quickly go though the inner optmization
8913 // opportunities in the BB. So we go back to the BB head to re-optimize
8914 // each instruction instead of go back to the function head.
8915 if (IsHugeFunc) {
8916 DT.reset();
8917 getDT(*BB.getParent());
8918 break;
8919 } else {
8920 return true;
8921 }
8922 }
8923 }
8924 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8925
8926 bool MadeBitReverse = true;
8927 while (MadeBitReverse) {
8928 MadeBitReverse = false;
8929 for (auto &I : reverse(BB)) {
8930 if (makeBitReverse(I)) {
8931 MadeBitReverse = MadeChange = true;
8932 break;
8933 }
8934 }
8935 }
8936 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8937
8938 return MadeChange;
8939}
8940
8941bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8942 bool AnyChange = false;
8943 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8944 AnyChange |= fixupDbgVariableRecord(DVR);
8945 return AnyChange;
8946}
8947
8948// FIXME: should updating debug-info really cause the "changed" flag to fire,
8949// which can cause a function to be reprocessed?
8950bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8951 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8952 DVR.Type != DbgVariableRecord::LocationType::Assign)
8953 return false;
8954
8955 // Does this DbgVariableRecord refer to a sunk address calculation?
8956 bool AnyChange = false;
8957 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8958 DVR.location_ops().end());
8959 for (Value *Location : LocationOps) {
8960 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8961 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8962 if (SunkAddr) {
8963 // Point dbg.value at locally computed address, which should give the best
8964 // opportunity to be accurately lowered. This update may change the type
8965 // of pointer being referred to; however this makes no difference to
8966 // debugging information, and we can't generate bitcasts that may affect
8967 // codegen.
8968 DVR.replaceVariableLocationOp(Location, SunkAddr);
8969 AnyChange = true;
8970 }
8971 }
8972 return AnyChange;
8973}
8974
8976 DVR->removeFromParent();
8977 BasicBlock *VIBB = VI->getParent();
8978 if (isa<PHINode>(VI))
8979 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8980 else
8981 VIBB->insertDbgRecordAfter(DVR, &*VI);
8982}
8983
8984// A llvm.dbg.value may be using a value before its definition, due to
8985// optimizations in this pass and others. Scan for such dbg.values, and rescue
8986// them by moving the dbg.value to immediately after the value definition.
8987// FIXME: Ideally this should never be necessary, and this has the potential
8988// to re-order dbg.value intrinsics.
8989bool CodeGenPrepare::placeDbgValues(Function &F) {
8990 bool MadeChange = false;
8991 DominatorTree DT(F);
8992
8993 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8994 SmallVector<Instruction *, 4> VIs;
8995 for (Value *V : DbgItem->location_ops())
8996 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8997 VIs.push_back(VI);
8998
8999 // This item may depend on multiple instructions, complicating any
9000 // potential sink. This block takes the defensive approach, opting to
9001 // "undef" the item if it has more than one instruction and any of them do
9002 // not dominate iem.
9003 for (Instruction *VI : VIs) {
9004 if (VI->isTerminator())
9005 continue;
9006
9007 // If VI is a phi in a block with an EHPad terminator, we can't insert
9008 // after it.
9009 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9010 continue;
9011
9012 // If the defining instruction dominates the dbg.value, we do not need
9013 // to move the dbg.value.
9014 if (DT.dominates(VI, Position))
9015 continue;
9016
9017 // If we depend on multiple instructions and any of them doesn't
9018 // dominate this DVI, we probably can't salvage it: moving it to
9019 // after any of the instructions could cause us to lose the others.
9020 if (VIs.size() > 1) {
9021 LLVM_DEBUG(
9022 dbgs()
9023 << "Unable to find valid location for Debug Value, undefing:\n"
9024 << *DbgItem);
9025 DbgItem->setKillLocation();
9026 break;
9027 }
9028
9029 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9030 << *DbgItem << ' ' << *VI);
9031 DbgInserterHelper(DbgItem, VI->getIterator());
9032 MadeChange = true;
9033 ++NumDbgValueMoved;
9034 }
9035 };
9036
9037 for (BasicBlock &BB : F) {
9038 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9039 // Process any DbgVariableRecord records attached to this
9040 // instruction.
9041 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9042 filterDbgVars(Insn.getDbgRecordRange()))) {
9043 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9044 continue;
9045 DbgProcessor(&DVR, &Insn);
9046 }
9047 }
9048 }
9049
9050 return MadeChange;
9051}
9052
9053// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9054// probes can be chained dependencies of other regular DAG nodes and block DAG
9055// combine optimizations.
9056bool CodeGenPrepare::placePseudoProbes(Function &F) {
9057 bool MadeChange = false;
9058 for (auto &Block : F) {
9059 // Move the rest probes to the beginning of the block.
9060 auto FirstInst = Block.getFirstInsertionPt();
9061 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9062 ++FirstInst;
9063 BasicBlock::iterator I(FirstInst);
9064 I++;
9065 while (I != Block.end()) {
9066 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9067 II->moveBefore(FirstInst);
9068 MadeChange = true;
9069 }
9070 }
9071 }
9072 return MadeChange;
9073}
9074
9075/// Scale down both weights to fit into uint32_t.
9076static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9077 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9078 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9079 NewTrue = NewTrue / Scale;
9080 NewFalse = NewFalse / Scale;
9081}
9082
9083/// Some targets prefer to split a conditional branch like:
9084/// \code
9085/// %0 = icmp ne i32 %a, 0
9086/// %1 = icmp ne i32 %b, 0
9087/// %or.cond = or i1 %0, %1
9088/// br i1 %or.cond, label %TrueBB, label %FalseBB
9089/// \endcode
9090/// into multiple branch instructions like:
9091/// \code
9092/// bb1:
9093/// %0 = icmp ne i32 %a, 0
9094/// br i1 %0, label %TrueBB, label %bb2
9095/// bb2:
9096/// %1 = icmp ne i32 %b, 0
9097/// br i1 %1, label %TrueBB, label %FalseBB
9098/// \endcode
9099/// This usually allows instruction selection to do even further optimizations
9100/// and combine the compare with the branch instruction. Currently this is
9101/// applied for targets which have "cheap" jump instructions.
9102///
9103/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9104///
9105bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9106 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9107 return false;
9108
9109 bool MadeChange = false;
9110 for (auto &BB : F) {
9111 // Does this BB end with the following?
9112 // %cond1 = icmp|fcmp|binary instruction ...
9113 // %cond2 = icmp|fcmp|binary instruction ...
9114 // %cond.or = or|and i1 %cond1, cond2
9115 // br i1 %cond.or label %dest1, label %dest2"
9116 Instruction *LogicOp;
9117 BasicBlock *TBB, *FBB;
9118 if (!match(BB.getTerminator(),
9119 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9120 continue;
9121
9122 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9123 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9124 continue;
9125
9126 // The merging of mostly empty BB can cause a degenerate branch.
9127 if (TBB == FBB)
9128 continue;
9129
9130 unsigned Opc;
9131 Value *Cond1, *Cond2;
9132 if (match(LogicOp,
9133 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9134 Opc = Instruction::And;
9135 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9136 m_OneUse(m_Value(Cond2)))))
9137 Opc = Instruction::Or;
9138 else
9139 continue;
9140
9141 auto IsGoodCond = [](Value *Cond) {
9142 return match(
9143 Cond,
9145 m_LogicalOr(m_Value(), m_Value()))));
9146 };
9147 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9148 continue;
9149
9150 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9151
9152 // Create a new BB.
9153 auto *TmpBB =
9154 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9155 BB.getParent(), BB.getNextNode());
9156 if (IsHugeFunc)
9157 FreshBBs.insert(TmpBB);
9158
9159 // Update original basic block by using the first condition directly by the
9160 // branch instruction and removing the no longer needed and/or instruction.
9161 Br1->setCondition(Cond1);
9162 LogicOp->eraseFromParent();
9163
9164 // Depending on the condition we have to either replace the true or the
9165 // false successor of the original branch instruction.
9166 if (Opc == Instruction::And)
9167 Br1->setSuccessor(0, TmpBB);
9168 else
9169 Br1->setSuccessor(1, TmpBB);
9170
9171 // Fill in the new basic block.
9172 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9173 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9174 I->removeFromParent();
9175 I->insertBefore(Br2->getIterator());
9176 }
9177
9178 // Update PHI nodes in both successors. The original BB needs to be
9179 // replaced in one successor's PHI nodes, because the branch comes now from
9180 // the newly generated BB (NewBB). In the other successor we need to add one
9181 // incoming edge to the PHI nodes, because both branch instructions target
9182 // now the same successor. Depending on the original branch condition
9183 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9184 // we perform the correct update for the PHI nodes.
9185 // This doesn't change the successor order of the just created branch
9186 // instruction (or any other instruction).
9187 if (Opc == Instruction::Or)
9188 std::swap(TBB, FBB);
9189
9190 // Replace the old BB with the new BB.
9191 TBB->replacePhiUsesWith(&BB, TmpBB);
9192
9193 // Add another incoming edge from the new BB.
9194 for (PHINode &PN : FBB->phis()) {
9195 auto *Val = PN.getIncomingValueForBlock(&BB);
9196 PN.addIncoming(Val, TmpBB);
9197 }
9198
9199 // Update the branch weights (from SelectionDAGBuilder::
9200 // FindMergedConditions).
9201 if (Opc == Instruction::Or) {
9202 // Codegen X | Y as:
9203 // BB1:
9204 // jmp_if_X TBB
9205 // jmp TmpBB
9206 // TmpBB:
9207 // jmp_if_Y TBB
9208 // jmp FBB
9209 //
9210
9211 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9212 // The requirement is that
9213 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9214 // = TrueProb for original BB.
9215 // Assuming the original weights are A and B, one choice is to set BB1's
9216 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9217 // assumes that
9218 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9219 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9220 // TmpBB, but the math is more complicated.
9221 uint64_t TrueWeight, FalseWeight;
9222 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9223 uint64_t NewTrueWeight = TrueWeight;
9224 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9225 scaleWeights(NewTrueWeight, NewFalseWeight);
9226 Br1->setMetadata(LLVMContext::MD_prof,
9227 MDBuilder(Br1->getContext())
9228 .createBranchWeights(TrueWeight, FalseWeight,
9229 hasBranchWeightOrigin(*Br1)));
9230
9231 NewTrueWeight = TrueWeight;
9232 NewFalseWeight = 2 * FalseWeight;
9233 scaleWeights(NewTrueWeight, NewFalseWeight);
9234 Br2->setMetadata(LLVMContext::MD_prof,
9235 MDBuilder(Br2->getContext())
9236 .createBranchWeights(TrueWeight, FalseWeight));
9237 }
9238 } else {
9239 // Codegen X & Y as:
9240 // BB1:
9241 // jmp_if_X TmpBB
9242 // jmp FBB
9243 // TmpBB:
9244 // jmp_if_Y TBB
9245 // jmp FBB
9246 //
9247 // This requires creation of TmpBB after CurBB.
9248
9249 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9250 // The requirement is that
9251 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9252 // = FalseProb for original BB.
9253 // Assuming the original weights are A and B, one choice is to set BB1's
9254 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9255 // assumes that
9256 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9257 uint64_t TrueWeight, FalseWeight;
9258 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9259 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9260 uint64_t NewFalseWeight = FalseWeight;
9261 scaleWeights(NewTrueWeight, NewFalseWeight);
9262 Br1->setMetadata(LLVMContext::MD_prof,
9263 MDBuilder(Br1->getContext())
9264 .createBranchWeights(TrueWeight, FalseWeight));
9265
9266 NewTrueWeight = 2 * TrueWeight;
9267 NewFalseWeight = FalseWeight;
9268 scaleWeights(NewTrueWeight, NewFalseWeight);
9269 Br2->setMetadata(LLVMContext::MD_prof,
9270 MDBuilder(Br2->getContext())
9271 .createBranchWeights(TrueWeight, FalseWeight));
9272 }
9273 }
9274
9275 ModifiedDT = ModifyDT::ModifyBBDT;
9276 MadeChange = true;
9277
9278 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9279 TmpBB->dump());
9280 }
9281 return MadeChange;
9282}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:638
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1450
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1012
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1532
unsigned logBase2() const
Definition APInt.h:1762
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:985
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1563
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:472
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:690
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:214
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:131
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:169
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:154
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:322
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:57
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:807
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:342
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Type * getValueType() const
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:175
iterator find(const KeyT &Key)
Definition MapVector.h:149
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:119
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
void clear()
Completely clear the SetVector.
Definition SetVector.h:265
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:260
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:98
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:149
value_type pop_back_val()
Definition SetVector.h:277
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:743
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:292
const Use & getOperandUse(unsigned i) const
Definition User.h:245
void setOperand(unsigned i, Value *Val)
Definition User.h:237
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:24
Value * getOperand(unsigned i) const
Definition User.h:232
unsigned getNumOperands() const
Definition User.h:254
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:390
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:242
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:701
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
iterator_range< use_iterator > uses()
Definition Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:838
user_iterator_impl< User > user_iterator
Definition Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:396
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1655
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2236
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1724
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2114
constexpr from_range_t from_range
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2076
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2128
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3761
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:207
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:71
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:592
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:1954
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:363
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
This contains information for each constraint that we are lowering.