LLVM 23.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/Argument.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InlineAsm.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/MDBuilder.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Operator.h"
73#include "llvm/IR/Statepoint.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/Use.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
79#include "llvm/IR/ValueMap.h"
81#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinking and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 BlockFrequencyInfo *BFI;
317 BranchProbabilityInfo *BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
329 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
347 MapVector<AssertingVH<Value>,
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare() = default;
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
380 SmallPtrSet<BasicBlock *, 32> FreshBBs;
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 }
388
389 bool run(Function &F, FunctionAnalysisManager &AM);
390
391private:
392 template <typename F>
393 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
394 // Substituting can cause recursive simplifications, which can invalidate
395 // our iterator. Use a WeakTrackingVH to hold onto it in case this
396 // happens.
397 Value *CurValue = &*CurInstIterator;
398 WeakTrackingVH IterHandle(CurValue);
399
400 f();
401
402 // If the iterator instruction was recursively deleted, start over at the
403 // start of the block.
404 if (IterHandle != CurValue) {
405 CurInstIterator = BB->begin();
406 SunkAddrs.clear();
407 }
408 }
409
410 // Get the DominatorTree, building if necessary.
411 DominatorTree &getDT(Function &F) {
412 if (!DT)
413 DT = std::make_unique<DominatorTree>(F);
414 return *DT;
415 }
416
417 void removeAllAssertingVHReferences(Value *V);
418 bool eliminateAssumptions(Function &F);
419 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
420 bool eliminateMostlyEmptyBlocks(Function &F);
421 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
422 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
423 void eliminateMostlyEmptyBlock(BasicBlock *BB);
424 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
425 bool isPreheader);
426 bool makeBitReverse(Instruction &I);
427 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
428 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
429 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
430 unsigned AddrSpace);
431 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
432 bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
433 ModifyDT &ModifiedDT);
434 bool optimizeInlineAsmInst(CallInst *CS);
435 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436 bool optimizeExt(Instruction *&I);
437 bool optimizeExtUses(Instruction *I);
438 bool optimizeLoadExt(LoadInst *Load);
439 bool optimizeShiftInst(BinaryOperator *BO);
440 bool optimizeFunnelShift(IntrinsicInst *Fsh);
441 bool optimizeSelectInst(SelectInst *SI);
442 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443 bool optimizeSwitchType(SwitchInst *SI);
444 bool optimizeSwitchPhiConstants(SwitchInst *SI);
445 bool optimizeSwitchInst(SwitchInst *SI);
446 bool optimizeExtractElementInst(Instruction *Inst);
447 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
455 const SmallVectorImpl<Instruction *> &Exts,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool optimizeURem(Instruction *Rem);
475 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 bool unfoldPowerOf2Test(CmpInst *Cmp);
478 void verifyBFIUpdates(Function &F);
479 bool _run(Function &F);
480};
481
482class CodeGenPrepareLegacyPass : public FunctionPass {
483public:
484 static char ID; // Pass identification, replacement for typeid
485
486 CodeGenPrepareLegacyPass() : FunctionPass(ID) {}
487
488 bool runOnFunction(Function &F) override;
489
490 StringRef getPassName() const override { return "CodeGen Prepare"; }
491
492 void getAnalysisUsage(AnalysisUsage &AU) const override {
493 // FIXME: When we can selectively preserve passes, preserve the domtree.
494 AU.addRequired<ProfileSummaryInfoWrapperPass>();
495 AU.addRequired<TargetLibraryInfoWrapperPass>();
496 AU.addRequired<TargetPassConfig>();
497 AU.addRequired<TargetTransformInfoWrapperPass>();
498 AU.addRequired<LoopInfoWrapperPass>();
499 AU.addRequired<BranchProbabilityInfoWrapperPass>();
500 AU.addRequired<BlockFrequencyInfoWrapperPass>();
501 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
522 CGP.BFI = &getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
540 "Optimize for code generation", false, false)
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
557 return PA;
558}
559
560bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
561 DL = &F.getDataLayout();
562 SubtargetInfo = TM->getSubtargetImpl(F);
563 TLI = SubtargetInfo->getTargetLowering();
564 TRI = SubtargetInfo->getRegisterInfo();
565 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
567 LI = &AM.getResult<LoopAnalysis>(F);
570 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
571 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
572 BBSectionsProfileReader =
574 return _run(F);
575}
576
577bool CodeGenPrepare::_run(Function &F) {
578 bool EverMadeChange = false;
579
580 OptSize = F.hasOptSize();
581 // Use the basic-block-sections profile to promote hot functions to .text.hot
582 // if requested.
583 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
584 BBSectionsProfileReader->isFunctionHot(F.getName())) {
585 (void)F.setSectionPrefix("hot");
586 } else if (ProfileGuidedSectionPrefix) {
587 // The hot attribute overwrites profile count based hotness while profile
588 // counts based hotness overwrite the cold attribute.
589 // This is a conservative behabvior.
590 if (F.hasFnAttribute(Attribute::Hot) ||
591 PSI->isFunctionHotInCallGraph(&F, *BFI))
592 (void)F.setSectionPrefix("hot");
593 // If PSI shows this function is not hot, we will placed the function
594 // into unlikely section if (1) PSI shows this is a cold function, or
595 // (2) the function has a attribute of cold.
596 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
597 F.hasFnAttribute(Attribute::Cold))
598 (void)F.setSectionPrefix("unlikely");
601 (void)F.setSectionPrefix("unknown");
602 }
603
604 /// This optimization identifies DIV instructions that can be
605 /// profitably bypassed and carried out with a shorter, faster divide.
606 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
607 const DenseMap<unsigned int, unsigned int> &BypassWidths =
609 BasicBlock *BB = &*F.begin();
610 while (BB != nullptr) {
611 // bypassSlowDivision may create new BBs, but we don't want to reapply the
612 // optimization to those blocks.
613 BasicBlock *Next = BB->getNextNode();
614 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI))
615 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620 // Get rid of @llvm.assume builtins before attempting to eliminate empty
621 // blocks, since there might be blocks that only contain @llvm.assume calls
622 // (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625 // Eliminate blocks that contain only PHI nodes and an
626 // unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633 // Split some critical edges where one of the sources is an indirect branch,
634 // to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
637
638 // If we are optimzing huge function, we need to consider the build time.
639 // Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
641
642 // Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647 bool MadeChange = true;
648 bool FuncIterated = false;
649 while (MadeChange) {
650 MadeChange = false;
651
652 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
653 if (FuncIterated && !FreshBBs.contains(&BB))
654 continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
658
659 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663 if (IsHugeFunc) {
664 // If the BB is updated, it may still has chance to be optimized.
665 // This usually happen at sink optimization.
666 // For example:
667 //
668 // bb0:
669 // %and = and i32 %a, 4
670 // %cmp = icmp eq i32 %and, 0
671 //
672 // If the %cmp sink to other BB, the %and will has chance to sink.
673 if (Changed)
674 FreshBBs.insert(&BB);
675 else if (FuncIterated)
676 FreshBBs.erase(&BB);
677 } else {
678 // For small/normal functions, we restart BB iteration if the dominator
679 // tree of the Function was changed.
680 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681 break;
682 }
683 }
684 // We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689 if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693 if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697 if (MadeChange && VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701 // Really free removed instructions during promotion.
702 for (Instruction *I : RemovedInsts)
703 I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716 if (!DisableBranchOpts) {
717 MadeChange = false;
718 // Use a set vector to get deterministic iteration order. The order the
719 // blocks are removed may affect whether or not PHI nodes in successors
720 // are removed.
721 SmallSetVector<BasicBlock *, 8> WorkList;
722 for (BasicBlock &BB : F) {
724 MadeChange |= ConstantFoldTerminator(&BB, true);
725 if (!MadeChange)
726 continue;
727
728 for (BasicBlock *Succ : Successors)
729 if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733 // Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735 while (!WorkList.empty()) {
736 BasicBlock *BB = WorkList.pop_back_val();
738
739 DeleteDeadBlock(BB);
740
741 for (BasicBlock *Succ : Successors)
742 if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746 // Merge pairs of basic blocks with unconditional branches, connected by
747 // a single edge.
748 if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754 if (!DisableGCOpts) {
756 for (BasicBlock &BB : F)
757 for (Instruction &I : BB)
758 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760 for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764 // Do this last to clean up use-before-def scenarios introduced by other
765 // preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
771 verifyBFIUpdates(F);
772#endif
773
774 return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778 bool MadeChange = false;
779 for (BasicBlock &BB : F) {
780 CurInstIterator = BB.begin();
781 while (CurInstIterator != BB.end()) {
782 Instruction *I = &*(CurInstIterator++);
783 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange = true;
785 Value *Operand = Assume->getOperand(0);
786 Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
790 });
791 }
792 }
793 }
794 return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
804 if (!GEP)
805 return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810 if (VecI == LargeOffsetGEPMap.end())
811 return;
812
813 auto &GEPVector = VecI->second;
814 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
815
816 if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
822 DominatorTree NewDT(F);
823 LoopInfo NewLI(NewDT);
824 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
833 bool Changed = false;
834 // Scan all of the blocks in the function, except for the entry block.
835 // Use a temporary array to avoid iterator being invalidated when
836 // deleting blocks.
839
840 SmallSet<WeakTrackingVH, 16> Preds;
841 for (auto &Block : Blocks) {
843 if (!BB)
844 continue;
845 // If the destination block has a single pred, then this is a trivial
846 // edge, just collapse it.
847 BasicBlock *SinglePred = BB->getSinglePredecessor();
848
849 // Don't merge if BB's address is taken.
850 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
851 continue;
852
853 // Make an effort to skip unreachable blocks.
854 if (DT && !DT->isReachableFromEntry(BB))
855 continue;
856
857 if (isa<UncondBrInst>(SinglePred->getTerminator())) {
858 Changed = true;
859 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
860
861 // Merge BB into SinglePred and delete it.
862 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
863 /* MemDep */ nullptr,
864 /* PredecessorWithTwoSuccessors */ false, DT);
865 Preds.insert(SinglePred);
866
867 if (IsHugeFunc) {
868 // Update FreshBBs to optimize the merged BB.
869 FreshBBs.insert(SinglePred);
870 FreshBBs.erase(BB);
871 }
872 }
873 }
874
875 // (Repeatedly) merging blocks into their predecessors can create redundant
876 // debug intrinsics.
877 for (const auto &Pred : Preds)
878 if (auto *BB = cast_or_null<BasicBlock>(Pred))
880
881 return Changed;
882}
883
884/// Find a destination block from BB if BB is mergeable empty block.
885BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
886 // If this block doesn't end with an uncond branch, ignore it.
887 UncondBrInst *BI = dyn_cast<UncondBrInst>(BB->getTerminator());
888 if (!BI)
889 return nullptr;
890
891 // If the instruction before the branch (skipping debug info) isn't a phi
892 // node, then other stuff is happening here.
894 if (BBI != BB->begin()) {
895 --BBI;
896 if (!isa<PHINode>(BBI))
897 return nullptr;
898 }
899
900 // Do not break infinite loops.
901 BasicBlock *DestBB = BI->getSuccessor();
902 if (DestBB == BB)
903 return nullptr;
904
905 if (!canMergeBlocks(BB, DestBB))
906 DestBB = nullptr;
907
908 return DestBB;
909}
910
911/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
912/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
913/// edges in ways that are non-optimal for isel. Start by eliminating these
914/// blocks so we can split them the way we want them.
915bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
916 SmallPtrSet<BasicBlock *, 16> Preheaders;
917 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
918 while (!LoopList.empty()) {
919 Loop *L = LoopList.pop_back_val();
920 llvm::append_range(LoopList, *L);
921 if (BasicBlock *Preheader = L->getLoopPreheader())
922 Preheaders.insert(Preheader);
923 }
924
925 bool MadeChange = false;
926 // Copy blocks into a temporary array to avoid iterator invalidation issues
927 // as we remove them.
928 // Note that this intentionally skips the entry block.
930 for (auto &Block : llvm::drop_begin(F)) {
931 // Delete phi nodes that could block deleting other empty blocks.
933 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
934 Blocks.push_back(&Block);
935 }
936
937 for (auto &Block : Blocks) {
939 if (!BB)
940 continue;
941 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
942 if (!DestBB ||
943 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
944 continue;
945
946 eliminateMostlyEmptyBlock(BB);
947 MadeChange = true;
948 }
949 return MadeChange;
950}
951
952bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
953 BasicBlock *DestBB,
954 bool isPreheader) {
955 // Do not delete loop preheaders if doing so would create a critical edge.
956 // Loop preheaders can be good locations to spill registers. If the
957 // preheader is deleted and we create a critical edge, registers may be
958 // spilled in the loop body instead.
959 if (!DisablePreheaderProtect && isPreheader &&
960 !(BB->getSinglePredecessor() &&
962 return false;
963
964 // Skip merging if the block's successor is also a successor to any callbr
965 // that leads to this block.
966 // FIXME: Is this really needed? Is this a correctness issue?
967 for (BasicBlock *Pred : predecessors(BB)) {
968 if (isa<CallBrInst>(Pred->getTerminator()) &&
969 llvm::is_contained(successors(Pred), DestBB))
970 return false;
971 }
972
973 // Try to skip merging if the unique predecessor of BB is terminated by a
974 // switch or indirect branch instruction, and BB is used as an incoming block
975 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
976 // add COPY instructions in the predecessor of BB instead of BB (if it is not
977 // merged). Note that the critical edge created by merging such blocks wont be
978 // split in MachineSink because the jump table is not analyzable. By keeping
979 // such empty block (BB), ISel will place COPY instructions in BB, not in the
980 // predecessor of BB.
981 BasicBlock *Pred = BB->getUniquePredecessor();
982 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
984 return true;
985
986 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
987 return true;
988
989 // We use a simple cost heuristic which determine skipping merging is
990 // profitable if the cost of skipping merging is less than the cost of
991 // merging : Cost(skipping merging) < Cost(merging BB), where the
992 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
993 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
994 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
995 // Freq(Pred) / Freq(BB) > 2.
996 // Note that if there are multiple empty blocks sharing the same incoming
997 // value for the PHIs in the DestBB, we consider them together. In such
998 // case, Cost(merging BB) will be the sum of their frequencies.
999
1000 if (!isa<PHINode>(DestBB->begin()))
1001 return true;
1002
1003 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1004
1005 // Find all other incoming blocks from which incoming values of all PHIs in
1006 // DestBB are the same as the ones from BB.
1007 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1008 if (DestBBPred == BB)
1009 continue;
1010
1011 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1012 return DestPN.getIncomingValueForBlock(BB) ==
1013 DestPN.getIncomingValueForBlock(DestBBPred);
1014 }))
1015 SameIncomingValueBBs.insert(DestBBPred);
1016 }
1017
1018 // See if all BB's incoming values are same as the value from Pred. In this
1019 // case, no reason to skip merging because COPYs are expected to be place in
1020 // Pred already.
1021 if (SameIncomingValueBBs.count(Pred))
1022 return true;
1023
1024 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1025 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1026
1027 for (auto *SameValueBB : SameIncomingValueBBs)
1028 if (SameValueBB->getUniquePredecessor() == Pred &&
1029 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1030 BBFreq += BFI->getBlockFreq(SameValueBB);
1031
1032 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1033 return !Limit || PredFreq <= *Limit;
1034}
1035
1036/// Return true if we can merge BB into DestBB if there is a single
1037/// unconditional branch between them, and BB contains no other non-phi
1038/// instructions.
1039bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1040 const BasicBlock *DestBB) const {
1041 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1042 // the successor. If there are more complex condition (e.g. preheaders),
1043 // don't mess around with them.
1044 for (const PHINode &PN : BB->phis()) {
1045 for (const User *U : PN.users()) {
1046 const Instruction *UI = cast<Instruction>(U);
1047 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1048 return false;
1049 // If User is inside DestBB block and it is a PHINode then check
1050 // incoming value. If incoming value is not from BB then this is
1051 // a complex condition (e.g. preheaders) we want to avoid here.
1052 if (UI->getParent() == DestBB) {
1053 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1054 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1055 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1056 if (Insn && Insn->getParent() == BB &&
1057 Insn->getParent() != UPN->getIncomingBlock(I))
1058 return false;
1059 }
1060 }
1061 }
1062 }
1063
1064 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1065 // and DestBB may have conflicting incoming values for the block. If so, we
1066 // can't merge the block.
1067 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1068 if (!DestBBPN)
1069 return true; // no conflict.
1070
1071 // Collect the preds of BB.
1072 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1073 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1074 // It is faster to get preds from a PHI than with pred_iterator.
1075 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1076 BBPreds.insert(BBPN->getIncomingBlock(i));
1077 } else {
1078 BBPreds.insert_range(predecessors(BB));
1079 }
1080
1081 // Walk the preds of DestBB.
1082 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1083 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1084 if (BBPreds.count(Pred)) { // Common predecessor?
1085 for (const PHINode &PN : DestBB->phis()) {
1086 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1087 const Value *V2 = PN.getIncomingValueForBlock(BB);
1088
1089 // If V2 is a phi node in BB, look up what the mapped value will be.
1090 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1091 if (V2PN->getParent() == BB)
1092 V2 = V2PN->getIncomingValueForBlock(Pred);
1093
1094 // If there is a conflict, bail out.
1095 if (V1 != V2)
1096 return false;
1097 }
1098 }
1099 }
1100
1101 return true;
1102}
1103
1104/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1105static void replaceAllUsesWith(Value *Old, Value *New,
1107 bool IsHuge) {
1108 auto *OldI = dyn_cast<Instruction>(Old);
1109 if (OldI) {
1110 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1111 UI != E; ++UI) {
1113 if (IsHuge)
1114 FreshBBs.insert(User->getParent());
1115 }
1116 }
1117 Old->replaceAllUsesWith(New);
1118}
1119
1120/// Eliminate a basic block that has only phi's and an unconditional branch in
1121/// it.
1122void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1123 UncondBrInst *BI = cast<UncondBrInst>(BB->getTerminator());
1124 BasicBlock *DestBB = BI->getSuccessor();
1125
1126 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1127 << *BB << *DestBB);
1128
1129 // If the destination block has a single pred, then this is a trivial edge,
1130 // just collapse it.
1131 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1132 if (SinglePred != DestBB) {
1133 assert(SinglePred == BB &&
1134 "Single predecessor not the same as predecessor");
1135 // Merge DestBB into SinglePred/BB and delete it.
1137 // Note: BB(=SinglePred) will not be deleted on this path.
1138 // DestBB(=its single successor) is the one that was deleted.
1139 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1140
1141 if (IsHugeFunc) {
1142 // Update FreshBBs to optimize the merged BB.
1143 FreshBBs.insert(SinglePred);
1144 FreshBBs.erase(DestBB);
1145 }
1146 return;
1147 }
1148 }
1149
1150 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1151 // to handle the new incoming edges it is about to have.
1152 for (PHINode &PN : DestBB->phis()) {
1153 // Remove the incoming value for BB, and remember it.
1154 Value *InVal = PN.removeIncomingValue(BB, false);
1155
1156 // Two options: either the InVal is a phi node defined in BB or it is some
1157 // value that dominates BB.
1158 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1159 if (InValPhi && InValPhi->getParent() == BB) {
1160 // Add all of the input values of the input PHI as inputs of this phi.
1161 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1162 PN.addIncoming(InValPhi->getIncomingValue(i),
1163 InValPhi->getIncomingBlock(i));
1164 } else {
1165 // Otherwise, add one instance of the dominating value for each edge that
1166 // we will be adding.
1167 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1168 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1169 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1170 } else {
1171 for (BasicBlock *Pred : predecessors(BB))
1172 PN.addIncoming(InVal, Pred);
1173 }
1174 }
1175 }
1176
1177 // Preserve loop Metadata.
1178 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1179 for (auto *Pred : predecessors(BB))
1180 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1181 }
1182
1183 // The PHIs are now updated, change everything that refers to BB to use
1184 // DestBB and remove BB.
1185 BB->replaceAllUsesWith(DestBB);
1186 BB->eraseFromParent();
1187 ++NumBlocksElim;
1188
1189 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1190}
1191
1192// Computes a map of base pointer relocation instructions to corresponding
1193// derived pointer relocation instructions given a vector of all relocate calls
1195 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1197 &RelocateInstMap) {
1198 // Collect information in two maps: one primarily for locating the base object
1199 // while filling the second map; the second map is the final structure holding
1200 // a mapping between Base and corresponding Derived relocate calls
1202 for (auto *ThisRelocate : AllRelocateCalls) {
1203 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1204 ThisRelocate->getDerivedPtrIndex());
1205 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1206 }
1207 for (auto &Item : RelocateIdxMap) {
1208 std::pair<unsigned, unsigned> Key = Item.first;
1209 if (Key.first == Key.second)
1210 // Base relocation: nothing to insert
1211 continue;
1212
1213 GCRelocateInst *I = Item.second;
1214 auto BaseKey = std::make_pair(Key.first, Key.first);
1215
1216 // We're iterating over RelocateIdxMap so we cannot modify it.
1217 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1218 if (MaybeBase == RelocateIdxMap.end())
1219 // TODO: We might want to insert a new base object relocate and gep off
1220 // that, if there are enough derived object relocates.
1221 continue;
1222
1223 RelocateInstMap[MaybeBase->second].push_back(I);
1224 }
1225}
1226
1227// Accepts a GEP and extracts the operands into a vector provided they're all
1228// small integer constants
1230 SmallVectorImpl<Value *> &OffsetV) {
1231 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1232 // Only accept small constant integer operands
1233 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1234 if (!Op || Op->getZExtValue() > 20)
1235 return false;
1236 }
1237
1238 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1239 OffsetV.push_back(GEP->getOperand(i));
1240 return true;
1241}
1242
1243// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1244// replace, computes a replacement, and affects it.
1245static bool
1247 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1248 bool MadeChange = false;
1249 // We must ensure the relocation of derived pointer is defined after
1250 // relocation of base pointer. If we find a relocation corresponding to base
1251 // defined earlier than relocation of base then we move relocation of base
1252 // right before found relocation. We consider only relocation in the same
1253 // basic block as relocation of base. Relocations from other basic block will
1254 // be skipped by optimization and we do not care about them.
1255 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1256 &*R != RelocatedBase; ++R)
1257 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1258 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1259 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1260 RelocatedBase->moveBefore(RI->getIterator());
1261 MadeChange = true;
1262 break;
1263 }
1264
1265 for (GCRelocateInst *ToReplace : Targets) {
1266 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1267 "Not relocating a derived object of the original base object");
1268 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1269 // A duplicate relocate call. TODO: coalesce duplicates.
1270 continue;
1271 }
1272
1273 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1274 // Base and derived relocates are in different basic blocks.
1275 // In this case transform is only valid when base dominates derived
1276 // relocate. However it would be too expensive to check dominance
1277 // for each such relocate, so we skip the whole transformation.
1278 continue;
1279 }
1280
1281 Value *Base = ToReplace->getBasePtr();
1282 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1283 if (!Derived || Derived->getPointerOperand() != Base)
1284 continue;
1285
1287 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1288 continue;
1289
1290 // Create a Builder and replace the target callsite with a gep
1291 assert(RelocatedBase->getNextNode() &&
1292 "Should always have one since it's not a terminator");
1293
1294 // Insert after RelocatedBase
1295 IRBuilder<> Builder(RelocatedBase->getNextNode());
1296 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1297
1298 // If gc_relocate does not match the actual type, cast it to the right type.
1299 // In theory, there must be a bitcast after gc_relocate if the type does not
1300 // match, and we should reuse it to get the derived pointer. But it could be
1301 // cases like this:
1302 // bb1:
1303 // ...
1304 // %g1 = call coldcc i8 addrspace(1)*
1305 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1306 //
1307 // bb2:
1308 // ...
1309 // %g2 = call coldcc i8 addrspace(1)*
1310 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1311 //
1312 // merge:
1313 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1314 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1315 //
1316 // In this case, we can not find the bitcast any more. So we insert a new
1317 // bitcast no matter there is already one or not. In this way, we can handle
1318 // all cases, and the extra bitcast should be optimized away in later
1319 // passes.
1320 Value *ActualRelocatedBase = RelocatedBase;
1321 if (RelocatedBase->getType() != Base->getType()) {
1322 ActualRelocatedBase =
1323 Builder.CreateBitCast(RelocatedBase, Base->getType());
1324 }
1325 Value *Replacement =
1326 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1327 ArrayRef(OffsetV));
1328 Replacement->takeName(ToReplace);
1329 // If the newly generated derived pointer's type does not match the original
1330 // derived pointer's type, cast the new derived pointer to match it. Same
1331 // reasoning as above.
1332 Value *ActualReplacement = Replacement;
1333 if (Replacement->getType() != ToReplace->getType()) {
1334 ActualReplacement =
1335 Builder.CreateBitCast(Replacement, ToReplace->getType());
1336 }
1337 ToReplace->replaceAllUsesWith(ActualReplacement);
1338 ToReplace->eraseFromParent();
1339
1340 MadeChange = true;
1341 }
1342 return MadeChange;
1343}
1344
1345// Turns this:
1346//
1347// %base = ...
1348// %ptr = gep %base + 15
1349// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1350// %base' = relocate(%tok, i32 4, i32 4)
1351// %ptr' = relocate(%tok, i32 4, i32 5)
1352// %val = load %ptr'
1353//
1354// into this:
1355//
1356// %base = ...
1357// %ptr = gep %base + 15
1358// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1359// %base' = gc.relocate(%tok, i32 4, i32 4)
1360// %ptr' = gep %base' + 15
1361// %val = load %ptr'
1362bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1363 bool MadeChange = false;
1364 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1365 for (auto *U : I.users())
1366 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1367 // Collect all the relocate calls associated with a statepoint
1368 AllRelocateCalls.push_back(Relocate);
1369
1370 // We need at least one base pointer relocation + one derived pointer
1371 // relocation to mangle
1372 if (AllRelocateCalls.size() < 2)
1373 return false;
1374
1375 // RelocateInstMap is a mapping from the base relocate instruction to the
1376 // corresponding derived relocate instructions
1377 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1378 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1379 if (RelocateInstMap.empty())
1380 return false;
1381
1382 for (auto &Item : RelocateInstMap)
1383 // Item.first is the RelocatedBase to offset against
1384 // Item.second is the vector of Targets to replace
1385 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1386 return MadeChange;
1387}
1388
1389/// Sink the specified cast instruction into its user blocks.
1390static bool SinkCast(CastInst *CI) {
1391 BasicBlock *DefBB = CI->getParent();
1392
1393 /// InsertedCasts - Only insert a cast in each block once.
1395
1396 bool MadeChange = false;
1397 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1398 UI != E;) {
1399 Use &TheUse = UI.getUse();
1401
1402 // Figure out which BB this cast is used in. For PHI's this is the
1403 // appropriate predecessor block.
1404 BasicBlock *UserBB = User->getParent();
1405 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1406 UserBB = PN->getIncomingBlock(TheUse);
1407 }
1408
1409 // Preincrement use iterator so we don't invalidate it.
1410 ++UI;
1411
1412 // The first insertion point of a block containing an EH pad is after the
1413 // pad. If the pad is the user, we cannot sink the cast past the pad.
1414 if (User->isEHPad())
1415 continue;
1416
1417 // If the block selected to receive the cast is an EH pad that does not
1418 // allow non-PHI instructions before the terminator, we can't sink the
1419 // cast.
1420 if (UserBB->getTerminator()->isEHPad())
1421 continue;
1422
1423 // If this user is in the same block as the cast, don't change the cast.
1424 if (UserBB == DefBB)
1425 continue;
1426
1427 // If we have already inserted a cast into this block, use it.
1428 CastInst *&InsertedCast = InsertedCasts[UserBB];
1429
1430 if (!InsertedCast) {
1431 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1432 assert(InsertPt != UserBB->end());
1433 InsertedCast = cast<CastInst>(CI->clone());
1434 InsertedCast->insertBefore(*UserBB, InsertPt);
1435 }
1436
1437 // Replace a use of the cast with a use of the new cast.
1438 TheUse = InsertedCast;
1439 MadeChange = true;
1440 ++NumCastUses;
1441 }
1442
1443 // If we removed all uses, nuke the cast.
1444 if (CI->use_empty()) {
1445 salvageDebugInfo(*CI);
1446 CI->eraseFromParent();
1447 MadeChange = true;
1448 }
1449
1450 return MadeChange;
1451}
1452
1453/// If the specified cast instruction is a noop copy (e.g. it's casting from
1454/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1455/// reduce the number of virtual registers that must be created and coalesced.
1456///
1457/// Return true if any changes are made.
1459 const DataLayout &DL) {
1460 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1461 // than sinking only nop casts, but is helpful on some platforms.
1462 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1463 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1464 ASC->getDestAddressSpace()))
1465 return false;
1466 }
1467
1468 // If this is a noop copy,
1469 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1470 EVT DstVT = TLI.getValueType(DL, CI->getType());
1471
1472 // This is an fp<->int conversion?
1473 if (SrcVT.isInteger() != DstVT.isInteger())
1474 return false;
1475
1476 // If this is an extension, it will be a zero or sign extension, which
1477 // isn't a noop.
1478 if (SrcVT.bitsLT(DstVT))
1479 return false;
1480
1481 // If these values will be promoted, find out what they will be promoted
1482 // to. This helps us consider truncates on PPC as noop copies when they
1483 // are.
1484 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1486 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1487 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1489 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1490
1491 // If, after promotion, these are the same types, this is a noop copy.
1492 if (SrcVT != DstVT)
1493 return false;
1494
1495 return SinkCast(CI);
1496}
1497
1498// Match a simple increment by constant operation. Note that if a sub is
1499// matched, the step is negated (as if the step had been canonicalized to
1500// an add, even though we leave the instruction alone.)
1501static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1502 Constant *&Step) {
1503 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1505 m_Instruction(LHS), m_Constant(Step)))))
1506 return true;
1507 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1509 m_Instruction(LHS), m_Constant(Step))))) {
1510 Step = ConstantExpr::getNeg(Step);
1511 return true;
1512 }
1513 return false;
1514}
1515
1516/// If given \p PN is an inductive variable with value IVInc coming from the
1517/// backedge, and on each iteration it gets increased by Step, return pair
1518/// <IVInc, Step>. Otherwise, return std::nullopt.
1519static std::optional<std::pair<Instruction *, Constant *>>
1520getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1521 const Loop *L = LI->getLoopFor(PN->getParent());
1522 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1523 return std::nullopt;
1524 auto *IVInc =
1525 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1526 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1527 return std::nullopt;
1528 Instruction *LHS = nullptr;
1529 Constant *Step = nullptr;
1530 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1531 return std::make_pair(IVInc, Step);
1532 return std::nullopt;
1533}
1534
1535static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1536 auto *I = dyn_cast<Instruction>(V);
1537 if (!I)
1538 return false;
1539 Instruction *LHS = nullptr;
1540 Constant *Step = nullptr;
1541 if (!matchIncrement(I, LHS, Step))
1542 return false;
1543 if (auto *PN = dyn_cast<PHINode>(LHS))
1544 if (auto IVInc = getIVIncrement(PN, LI))
1545 return IVInc->first == I;
1546 return false;
1547}
1548
1549bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1550 Value *Arg0, Value *Arg1,
1551 CmpInst *Cmp,
1552 Intrinsic::ID IID) {
1553 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1554 if (!isIVIncrement(BO, LI))
1555 return false;
1556 const Loop *L = LI->getLoopFor(BO->getParent());
1557 assert(L && "L should not be null after isIVIncrement()");
1558 // Do not risk on moving increment into a child loop.
1559 if (LI->getLoopFor(Cmp->getParent()) != L)
1560 return false;
1561
1562 // Finally, we need to ensure that the insert point will dominate all
1563 // existing uses of the increment.
1564
1565 auto &DT = getDT(*BO->getParent()->getParent());
1566 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1567 // If we're moving up the dom tree, all uses are trivially dominated.
1568 // (This is the common case for code produced by LSR.)
1569 return true;
1570
1571 // Otherwise, special case the single use in the phi recurrence.
1572 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1573 };
1574 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1575 // We used to use a dominator tree here to allow multi-block optimization.
1576 // But that was problematic because:
1577 // 1. It could cause a perf regression by hoisting the math op into the
1578 // critical path.
1579 // 2. It could cause a perf regression by creating a value that was live
1580 // across multiple blocks and increasing register pressure.
1581 // 3. Use of a dominator tree could cause large compile-time regression.
1582 // This is because we recompute the DT on every change in the main CGP
1583 // run-loop. The recomputing is probably unnecessary in many cases, so if
1584 // that was fixed, using a DT here would be ok.
1585 //
1586 // There is one important particular case we still want to handle: if BO is
1587 // the IV increment. Important properties that make it profitable:
1588 // - We can speculate IV increment anywhere in the loop (as long as the
1589 // indvar Phi is its only user);
1590 // - Upon computing Cmp, we effectively compute something equivalent to the
1591 // IV increment (despite it loops differently in the IR). So moving it up
1592 // to the cmp point does not really increase register pressure.
1593 return false;
1594 }
1595
1596 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1597 if (BO->getOpcode() == Instruction::Add &&
1598 IID == Intrinsic::usub_with_overflow) {
1599 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1601 }
1602
1603 // Insert at the first instruction of the pair.
1604 Instruction *InsertPt = nullptr;
1605 for (Instruction &Iter : *Cmp->getParent()) {
1606 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1607 // the overflow intrinsic are defined.
1608 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1609 InsertPt = &Iter;
1610 break;
1611 }
1612 }
1613 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1614
1615 IRBuilder<> Builder(InsertPt);
1616 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1617 if (BO->getOpcode() != Instruction::Xor) {
1618 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1619 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1620 } else
1621 assert(BO->hasOneUse() &&
1622 "Patterns with XOr should use the BO only in the compare");
1623 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1624 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1625 Cmp->eraseFromParent();
1626 BO->eraseFromParent();
1627 return true;
1628}
1629
1630/// Match special-case patterns that check for unsigned add overflow.
1632 BinaryOperator *&Add) {
1633 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1634 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1635 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1636
1637 // We are not expecting non-canonical/degenerate code. Just bail out.
1638 if (isa<Constant>(A))
1639 return false;
1640
1641 ICmpInst::Predicate Pred = Cmp->getPredicate();
1642 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1643 B = ConstantInt::get(B->getType(), 1);
1644 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1645 B = Constant::getAllOnesValue(B->getType());
1646 else
1647 return false;
1648
1649 // Check the users of the variable operand of the compare looking for an add
1650 // with the adjusted constant.
1651 for (User *U : A->users()) {
1652 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1654 return true;
1655 }
1656 }
1657 return false;
1658}
1659
1660/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1661/// intrinsic. Return true if any changes were made.
1662bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1663 ModifyDT &ModifiedDT) {
1664 bool EdgeCase = false;
1665 Value *A, *B;
1666 BinaryOperator *Add;
1667 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1669 return false;
1670 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1671 A = Add->getOperand(0);
1672 B = Add->getOperand(1);
1673 EdgeCase = true;
1674 }
1675
1677 TLI->getValueType(*DL, Add->getType()),
1678 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1679 return false;
1680
1681 // We don't want to move around uses of condition values this late, so we
1682 // check if it is legal to create the call to the intrinsic in the basic
1683 // block containing the icmp.
1684 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1685 return false;
1686
1687 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1688 Intrinsic::uadd_with_overflow))
1689 return false;
1690
1691 // Reset callers - do not crash by iterating over a dead instruction.
1692 ModifiedDT = ModifyDT::ModifyInstDT;
1693 return true;
1694}
1695
1696bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1697 ModifyDT &ModifiedDT) {
1698 // We are not expecting non-canonical/degenerate code. Just bail out.
1699 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1700 if (isa<Constant>(A) && isa<Constant>(B))
1701 return false;
1702
1703 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1704 ICmpInst::Predicate Pred = Cmp->getPredicate();
1705 if (Pred == ICmpInst::ICMP_UGT) {
1706 std::swap(A, B);
1707 Pred = ICmpInst::ICMP_ULT;
1708 }
1709 // Convert special-case: (A == 0) is the same as (A u< 1).
1710 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1711 B = ConstantInt::get(B->getType(), 1);
1712 Pred = ICmpInst::ICMP_ULT;
1713 }
1714 // Convert special-case: (A != 0) is the same as (0 u< A).
1715 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1716 std::swap(A, B);
1717 Pred = ICmpInst::ICMP_ULT;
1718 }
1719 if (Pred != ICmpInst::ICMP_ULT)
1720 return false;
1721
1722 // Walk the users of a variable operand of a compare looking for a subtract or
1723 // add with that same operand. Also match the 2nd operand of the compare to
1724 // the add/sub, but that may be a negated constant operand of an add.
1725 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1726 BinaryOperator *Sub = nullptr;
1727 for (User *U : CmpVariableOperand->users()) {
1728 // A - B, A u< B --> usubo(A, B)
1729 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1731 break;
1732 }
1733
1734 // A + (-C), A u< C (canonicalized form of (sub A, C))
1735 const APInt *CmpC, *AddC;
1736 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1737 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1739 break;
1740 }
1741 }
1742 if (!Sub)
1743 return false;
1744
1746 TLI->getValueType(*DL, Sub->getType()),
1747 Sub->hasNUsesOrMore(1)))
1748 return false;
1749
1750 // We don't want to move around uses of condition values this late, so we
1751 // check if it is legal to create the call to the intrinsic in the basic
1752 // block containing the icmp.
1753 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1754 return false;
1755
1756 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1757 Cmp, Intrinsic::usub_with_overflow))
1758 return false;
1759
1760 // Reset callers - do not crash by iterating over a dead instruction.
1761 ModifiedDT = ModifyDT::ModifyInstDT;
1762 return true;
1763}
1764
1765// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1766// The same transformation exists in DAG combiner, but we repeat it here because
1767// DAG builder can break the pattern by moving icmp into a successor block.
1768bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1769 CmpPredicate Pred;
1770 Value *X;
1771 const APInt *C;
1772
1773 // (icmp (ctpop x), c)
1776 return false;
1777
1778 // We're only interested in "is power of 2 [or zero]" patterns.
1779 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1780 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1781 (Pred == CmpInst::ICMP_UGT && *C == 1);
1782 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1783 return false;
1784
1785 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1786 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1787 // and otherwise expand ctpop into a few simple instructions.
1788 Type *OpTy = X->getType();
1789 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1790 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1791 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1792 return false;
1793
1794 // ctpop(x) == 1 -> ctpop(x) u< 2
1795 // ctpop(x) != 1 -> ctpop(x) u> 1
1796 if (Pred == ICmpInst::ICMP_EQ) {
1797 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1798 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1799 } else {
1800 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1801 }
1802 return true;
1803 }
1804
1805 Value *NewCmp;
1806 if (IsPowerOf2OrZeroTest ||
1807 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1808 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1809 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1810 IRBuilder<> Builder(Cmp);
1811 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1812 Value *And = Builder.CreateAnd(X, Sub);
1813 CmpInst::Predicate NewPred =
1814 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1816 : CmpInst::ICMP_NE;
1817 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1818 } else {
1819 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1820 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1821 IRBuilder<> Builder(Cmp);
1822 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1823 Value *Xor = Builder.CreateXor(X, Sub);
1824 CmpInst::Predicate NewPred =
1826 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1827 }
1828
1829 Cmp->replaceAllUsesWith(NewCmp);
1831 return true;
1832}
1833
1834/// Sink the given CmpInst into user blocks to reduce the number of virtual
1835/// registers that must be created and coalesced. This is a clear win except on
1836/// targets with multiple condition code registers (PowerPC), where it might
1837/// lose; some adjustment may be wanted there.
1838///
1839/// Return true if any changes are made.
1840static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1841 const DataLayout &DL) {
1842 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1843 return false;
1844
1845 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1846 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1847 return false;
1848
1849 bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1850 return isa<PHINode>(U) ||
1851 cast<Instruction>(U)->getParent() == Cmp->getParent();
1852 });
1853
1854 // Avoid sinking larger than legal integer comparisons unless its ONLY used in
1855 // another BB.
1856 if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1857 Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1858 DL.getLargestLegalIntTypeSizeInBits())
1859 return false;
1860
1861 // Only insert a cmp in each block once.
1863
1864 bool MadeChange = false;
1865 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1866 UI != E;) {
1867 Use &TheUse = UI.getUse();
1869
1870 // Preincrement use iterator so we don't invalidate it.
1871 ++UI;
1872
1873 // Don't bother for PHI nodes.
1874 if (isa<PHINode>(User))
1875 continue;
1876
1877 // Figure out which BB this cmp is used in.
1878 BasicBlock *UserBB = User->getParent();
1879 BasicBlock *DefBB = Cmp->getParent();
1880
1881 // If this user is in the same block as the cmp, don't change the cmp.
1882 if (UserBB == DefBB)
1883 continue;
1884
1885 // If we have already inserted a cmp into this block, use it.
1886 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1887
1888 if (!InsertedCmp) {
1889 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1890 assert(InsertPt != UserBB->end());
1891 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1892 Cmp->getOperand(0), Cmp->getOperand(1), "");
1893 InsertedCmp->insertBefore(*UserBB, InsertPt);
1894 // Propagate the debug info.
1895 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1896 }
1897
1898 // Replace a use of the cmp with a use of the new cmp.
1899 TheUse = InsertedCmp;
1900 MadeChange = true;
1901 ++NumCmpUses;
1902 }
1903
1904 // If we removed all uses, nuke the cmp.
1905 if (Cmp->use_empty()) {
1906 Cmp->eraseFromParent();
1907 MadeChange = true;
1908 }
1909
1910 return MadeChange;
1911}
1912
1913/// For pattern like:
1914///
1915/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1916/// ...
1917/// DomBB:
1918/// ...
1919/// br DomCond, TrueBB, CmpBB
1920/// CmpBB: (with DomBB being the single predecessor)
1921/// ...
1922/// Cmp = icmp eq CmpOp0, CmpOp1
1923/// ...
1924///
1925/// It would use two comparison on targets that lowering of icmp sgt/slt is
1926/// different from lowering of icmp eq (PowerPC). This function try to convert
1927/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1928/// After that, DomCond and Cmp can use the same comparison so reduce one
1929/// comparison.
1930///
1931/// Return true if any changes are made.
1933 const TargetLowering &TLI) {
1935 return false;
1936
1937 ICmpInst::Predicate Pred = Cmp->getPredicate();
1938 if (Pred != ICmpInst::ICMP_EQ)
1939 return false;
1940
1941 // If icmp eq has users other than CondBrInst and SelectInst, converting it to
1942 // icmp slt/sgt would introduce more redundant LLVM IR.
1943 for (User *U : Cmp->users()) {
1944 if (isa<CondBrInst>(U))
1945 continue;
1946 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1947 continue;
1948 return false;
1949 }
1950
1951 // This is a cheap/incomplete check for dominance - just match a single
1952 // predecessor with a conditional branch.
1953 BasicBlock *CmpBB = Cmp->getParent();
1954 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1955 if (!DomBB)
1956 return false;
1957
1958 // We want to ensure that the only way control gets to the comparison of
1959 // interest is that a less/greater than comparison on the same operands is
1960 // false.
1961 Value *DomCond;
1962 BasicBlock *TrueBB, *FalseBB;
1963 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1964 return false;
1965 if (CmpBB != FalseBB)
1966 return false;
1967
1968 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1969 CmpPredicate DomPred;
1970 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1971 return false;
1972 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1973 return false;
1974
1975 // Convert the equality comparison to the opposite of the dominating
1976 // comparison and swap the direction for all branch/select users.
1977 // We have conceptually converted:
1978 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1979 // to
1980 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1981 // And similarly for branches.
1982 for (User *U : Cmp->users()) {
1983 if (auto *BI = dyn_cast<CondBrInst>(U)) {
1984 BI->swapSuccessors();
1985 continue;
1986 }
1987 if (auto *SI = dyn_cast<SelectInst>(U)) {
1988 // Swap operands
1989 SI->swapValues();
1990 SI->swapProfMetadata();
1991 continue;
1992 }
1993 llvm_unreachable("Must be a branch or a select");
1994 }
1995 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1996 return true;
1997}
1998
1999/// Many architectures use the same instruction for both subtract and cmp. Try
2000/// to swap cmp operands to match subtract operations to allow for CSE.
2002 Value *Op0 = Cmp->getOperand(0);
2003 Value *Op1 = Cmp->getOperand(1);
2004 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
2005 isa<Constant>(Op1) || Op0 == Op1)
2006 return false;
2007
2008 // If a subtract already has the same operands as a compare, swapping would be
2009 // bad. If a subtract has the same operands as a compare but in reverse order,
2010 // then swapping is good.
2011 int GoodToSwap = 0;
2012 unsigned NumInspected = 0;
2013 for (const User *U : Op0->users()) {
2014 // Avoid walking many users.
2015 if (++NumInspected > 128)
2016 return false;
2017 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2018 GoodToSwap++;
2019 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2020 GoodToSwap--;
2021 }
2022
2023 if (GoodToSwap > 0) {
2024 Cmp->swapOperands();
2025 return true;
2026 }
2027 return false;
2028}
2029
2030static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2031 const DataLayout &DL) {
2032 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2033 if (!FCmp)
2034 return false;
2035
2036 // Don't fold if the target offers free fabs and the predicate is legal.
2037 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2038 if (TLI.isFAbsFree(VT) &&
2040 VT.getSimpleVT()))
2041 return false;
2042
2043 // Reverse the canonicalization if it is a FP class test
2044 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2045 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2046 };
2047 auto [ClassVal, ClassTest] =
2048 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2049 FCmp->getOperand(0), FCmp->getOperand(1));
2050 if (!ClassVal)
2051 return false;
2052
2053 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2054 return false;
2055
2056 IRBuilder<> Builder(Cmp);
2057 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2058 Cmp->replaceAllUsesWith(IsFPClass);
2060 return true;
2061}
2062
2064 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2065 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2066 Value *Incr, *RemAmt;
2067 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2068 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2069 return false;
2070
2071 Value *AddInst, *AddOffset;
2072 // Find out loop increment PHI.
2073 auto *PN = dyn_cast<PHINode>(Incr);
2074 if (PN != nullptr) {
2075 AddInst = nullptr;
2076 AddOffset = nullptr;
2077 } else {
2078 // Search through a NUW add on top of the loop increment.
2079 Value *V0, *V1;
2080 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2081 return false;
2082
2083 AddInst = Incr;
2084 PN = dyn_cast<PHINode>(V0);
2085 if (PN != nullptr) {
2086 AddOffset = V1;
2087 } else {
2088 PN = dyn_cast<PHINode>(V1);
2089 AddOffset = V0;
2090 }
2091 }
2092
2093 if (!PN)
2094 return false;
2095
2096 // This isn't strictly necessary, what we really need is one increment and any
2097 // amount of initial values all being the same.
2098 if (PN->getNumIncomingValues() != 2)
2099 return false;
2100
2101 // Only trivially analyzable loops.
2102 Loop *L = LI->getLoopFor(PN->getParent());
2103 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2104 return false;
2105
2106 // Req that the remainder is in the loop
2107 if (!L->contains(Rem))
2108 return false;
2109
2110 // Only works if the remainder amount is a loop invaraint
2111 if (!L->isLoopInvariant(RemAmt))
2112 return false;
2113
2114 // Only works if the AddOffset is a loop invaraint
2115 if (AddOffset && !L->isLoopInvariant(AddOffset))
2116 return false;
2117
2118 // Is the PHI a loop increment?
2119 auto LoopIncrInfo = getIVIncrement(PN, LI);
2120 if (!LoopIncrInfo)
2121 return false;
2122
2123 // We need remainder_amount % increment_amount to be zero. Increment of one
2124 // satisfies that without any special logic and is overwhelmingly the common
2125 // case.
2126 if (!match(LoopIncrInfo->second, m_One()))
2127 return false;
2128
2129 // Need the increment to not overflow.
2130 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2131 return false;
2132
2133 // Set output variables.
2134 RemAmtOut = RemAmt;
2135 LoopIncrPNOut = PN;
2136 AddInstOut = AddInst;
2137 AddOffsetOut = AddOffset;
2138
2139 return true;
2140}
2141
2142// Try to transform:
2143//
2144// for(i = Start; i < End; ++i)
2145// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2146//
2147// ->
2148//
2149// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2150// for(i = Start; i < End; ++i, ++rem)
2151// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2153 const LoopInfo *LI,
2155 bool IsHuge) {
2156 Value *AddOffset, *RemAmt, *AddInst;
2157 PHINode *LoopIncrPN;
2158 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2159 AddOffset, LoopIncrPN))
2160 return false;
2161
2162 // Only non-constant remainder as the extra IV is probably not profitable
2163 // in that case.
2164 //
2165 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2166 // we can rule out register pressure and ensure this `urem` is executed each
2167 // iteration, its probably profitable to handle the const case as well.
2168 //
2169 // Potential TODO(2): Should we have a check for how "nested" this remainder
2170 // operation is? The new code runs every iteration so if the remainder is
2171 // guarded behind unlikely conditions this might not be worth it.
2172 if (match(RemAmt, m_ImmConstant()))
2173 return false;
2174
2175 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2176 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2177 // If we have add create initial value for remainder.
2178 // The logic here is:
2179 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2180 //
2181 // Only proceed if the expression simplifies (otherwise we can't fully
2182 // optimize out the urem).
2183 if (AddInst) {
2184 assert(AddOffset && "We found an add but missing values");
2185 // Without dom-condition/assumption cache we aren't likely to get much out
2186 // of a context instruction.
2187 Start = simplifyAddInst(Start, AddOffset,
2188 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2189 /*IsNUW=*/true, *DL);
2190 if (!Start)
2191 return false;
2192 }
2193
2194 // If we can't fully optimize out the `rem`, skip this transform.
2195 Start = simplifyURemInst(Start, RemAmt, *DL);
2196 if (!Start)
2197 return false;
2198
2199 // Create new remainder with induction variable.
2200 Type *Ty = Rem->getType();
2201 IRBuilder<> Builder(Rem->getContext());
2202
2203 Builder.SetInsertPoint(LoopIncrPN);
2204 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2205
2206 Builder.SetInsertPoint(cast<Instruction>(
2207 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2208 // `(add (urem x, y), 1)` is always nuw.
2209 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2210 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2211 Value *RemSel =
2212 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2213
2214 NewRem->addIncoming(Start, L->getLoopPreheader());
2215 NewRem->addIncoming(RemSel, L->getLoopLatch());
2216
2217 // Insert all touched BBs.
2218 FreshBBs.insert(LoopIncrPN->getParent());
2219 FreshBBs.insert(L->getLoopLatch());
2220 FreshBBs.insert(Rem->getParent());
2221 if (AddInst)
2222 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2223 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2224 Rem->eraseFromParent();
2225 if (AddInst && AddInst->use_empty())
2226 cast<Instruction>(AddInst)->eraseFromParent();
2227 return true;
2228}
2229
2230bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2231 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2232 return true;
2233 return false;
2234}
2235
2236bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2237 if (sinkCmpExpression(Cmp, *TLI, *DL))
2238 return true;
2239
2240 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2241 return true;
2242
2243 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2244 return true;
2245
2246 if (unfoldPowerOf2Test(Cmp))
2247 return true;
2248
2249 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2250 return true;
2251
2253 return true;
2254
2255 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2256 return true;
2257
2258 return false;
2259}
2260
2261/// Duplicate and sink the given 'and' instruction into user blocks where it is
2262/// used in a compare to allow isel to generate better code for targets where
2263/// this operation can be combined.
2264///
2265/// Return true if any changes are made.
2267 SetOfInstrs &InsertedInsts) {
2268 // Double-check that we're not trying to optimize an instruction that was
2269 // already optimized by some other part of this pass.
2270 assert(!InsertedInsts.count(AndI) &&
2271 "Attempting to optimize already optimized and instruction");
2272 (void)InsertedInsts;
2273
2274 // Nothing to do for single use in same basic block.
2275 if (AndI->hasOneUse() &&
2276 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2277 return false;
2278
2279 // Try to avoid cases where sinking/duplicating is likely to increase register
2280 // pressure.
2281 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2282 !isa<ConstantInt>(AndI->getOperand(1)) &&
2283 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2284 return false;
2285
2286 for (auto *U : AndI->users()) {
2288
2289 // Only sink 'and' feeding icmp with 0.
2290 if (!isa<ICmpInst>(User))
2291 return false;
2292
2293 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2294 if (!CmpC || !CmpC->isZero())
2295 return false;
2296 }
2297
2298 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2299 return false;
2300
2301 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2302 LLVM_DEBUG(AndI->getParent()->dump());
2303
2304 // Push the 'and' into the same block as the icmp 0. There should only be
2305 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2306 // others, so we don't need to keep track of which BBs we insert into.
2307 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2308 UI != E;) {
2309 Use &TheUse = UI.getUse();
2311
2312 // Preincrement use iterator so we don't invalidate it.
2313 ++UI;
2314
2315 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2316
2317 // Keep the 'and' in the same place if the use is already in the same block.
2318 Instruction *InsertPt =
2319 User->getParent() == AndI->getParent() ? AndI : User;
2320 Instruction *InsertedAnd = BinaryOperator::Create(
2321 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2322 InsertPt->getIterator());
2323 // Propagate the debug info.
2324 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2325
2326 // Replace a use of the 'and' with a use of the new 'and'.
2327 TheUse = InsertedAnd;
2328 ++NumAndUses;
2329 LLVM_DEBUG(User->getParent()->dump());
2330 }
2331
2332 // We removed all uses, nuke the and.
2333 AndI->eraseFromParent();
2334 return true;
2335}
2336
2337/// Check if the candidates could be combined with a shift instruction, which
2338/// includes:
2339/// 1. Truncate instruction
2340/// 2. And instruction and the imm is a mask of the low bits:
2341/// imm & (imm+1) == 0
2343 if (!isa<TruncInst>(User)) {
2344 if (User->getOpcode() != Instruction::And ||
2346 return false;
2347
2348 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2349
2350 if ((Cimm & (Cimm + 1)).getBoolValue())
2351 return false;
2352 }
2353 return true;
2354}
2355
2356/// Sink both shift and truncate instruction to the use of truncate's BB.
2357static bool
2360 const TargetLowering &TLI, const DataLayout &DL) {
2361 BasicBlock *UserBB = User->getParent();
2363 auto *TruncI = cast<TruncInst>(User);
2364 bool MadeChange = false;
2365
2366 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2367 TruncE = TruncI->user_end();
2368 TruncUI != TruncE;) {
2369
2370 Use &TruncTheUse = TruncUI.getUse();
2371 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2372 // Preincrement use iterator so we don't invalidate it.
2373
2374 ++TruncUI;
2375
2376 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2377 if (!ISDOpcode)
2378 continue;
2379
2380 // If the use is actually a legal node, there will not be an
2381 // implicit truncate.
2382 // FIXME: always querying the result type is just an
2383 // approximation; some nodes' legality is determined by the
2384 // operand or other means. There's no good way to find out though.
2386 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2387 continue;
2388
2389 // Don't bother for PHI nodes.
2390 if (isa<PHINode>(TruncUser))
2391 continue;
2392
2393 BasicBlock *TruncUserBB = TruncUser->getParent();
2394
2395 if (UserBB == TruncUserBB)
2396 continue;
2397
2398 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2399 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2400
2401 if (!InsertedShift && !InsertedTrunc) {
2402 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2403 assert(InsertPt != TruncUserBB->end());
2404 // Sink the shift
2405 if (ShiftI->getOpcode() == Instruction::AShr)
2406 InsertedShift =
2407 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2408 else
2409 InsertedShift =
2410 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2411 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2412 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2413
2414 // Sink the trunc
2415 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2416 TruncInsertPt++;
2417 // It will go ahead of any debug-info.
2418 TruncInsertPt.setHeadBit(true);
2419 assert(TruncInsertPt != TruncUserBB->end());
2420
2421 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2422 TruncI->getType(), "");
2423 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2424 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2425
2426 MadeChange = true;
2427
2428 TruncTheUse = InsertedTrunc;
2429 }
2430 }
2431 return MadeChange;
2432}
2433
2434/// Sink the shift *right* instruction into user blocks if the uses could
2435/// potentially be combined with this shift instruction and generate BitExtract
2436/// instruction. It will only be applied if the architecture supports BitExtract
2437/// instruction. Here is an example:
2438/// BB1:
2439/// %x.extract.shift = lshr i64 %arg1, 32
2440/// BB2:
2441/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2442/// ==>
2443///
2444/// BB2:
2445/// %x.extract.shift.1 = lshr i64 %arg1, 32
2446/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2447///
2448/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2449/// instruction.
2450/// Return true if any changes are made.
2452 const TargetLowering &TLI,
2453 const DataLayout &DL) {
2454 BasicBlock *DefBB = ShiftI->getParent();
2455
2456 /// Only insert instructions in each block once.
2458
2459 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2460
2461 bool MadeChange = false;
2462 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2463 UI != E;) {
2464 Use &TheUse = UI.getUse();
2466 // Preincrement use iterator so we don't invalidate it.
2467 ++UI;
2468
2469 // Don't bother for PHI nodes.
2470 if (isa<PHINode>(User))
2471 continue;
2472
2474 continue;
2475
2476 BasicBlock *UserBB = User->getParent();
2477
2478 if (UserBB == DefBB) {
2479 // If the shift and truncate instruction are in the same BB. The use of
2480 // the truncate(TruncUse) may still introduce another truncate if not
2481 // legal. In this case, we would like to sink both shift and truncate
2482 // instruction to the BB of TruncUse.
2483 // for example:
2484 // BB1:
2485 // i64 shift.result = lshr i64 opnd, imm
2486 // trunc.result = trunc shift.result to i16
2487 //
2488 // BB2:
2489 // ----> We will have an implicit truncate here if the architecture does
2490 // not have i16 compare.
2491 // cmp i16 trunc.result, opnd2
2492 //
2493 if (isa<TruncInst>(User) &&
2494 shiftIsLegal
2495 // If the type of the truncate is legal, no truncate will be
2496 // introduced in other basic blocks.
2497 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2498 MadeChange =
2499 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2500
2501 continue;
2502 }
2503 // If we have already inserted a shift into this block, use it.
2504 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2505
2506 if (!InsertedShift) {
2507 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2508 assert(InsertPt != UserBB->end());
2509
2510 if (ShiftI->getOpcode() == Instruction::AShr)
2511 InsertedShift =
2512 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2513 else
2514 InsertedShift =
2515 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2516 InsertedShift->insertBefore(*UserBB, InsertPt);
2517 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2518
2519 MadeChange = true;
2520 }
2521
2522 // Replace a use of the shift with a use of the new shift.
2523 TheUse = InsertedShift;
2524 }
2525
2526 // If we removed all uses, or there are none, nuke the shift.
2527 if (ShiftI->use_empty()) {
2528 salvageDebugInfo(*ShiftI);
2529 ShiftI->eraseFromParent();
2530 MadeChange = true;
2531 }
2532
2533 return MadeChange;
2534}
2535
2536/// If counting leading or trailing zeros is an expensive operation and a zero
2537/// input is defined, add a check for zero to avoid calling the intrinsic.
2538///
2539/// We want to transform:
2540/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2541///
2542/// into:
2543/// entry:
2544/// %cmpz = icmp eq i64 %A, 0
2545/// br i1 %cmpz, label %cond.end, label %cond.false
2546/// cond.false:
2547/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2548/// br label %cond.end
2549/// cond.end:
2550/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2551///
2552/// If the transform is performed, return true and set ModifiedDT to true.
2553static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
2554 const TargetLowering *TLI,
2555 const DataLayout *DL, ModifyDT &ModifiedDT,
2557 bool IsHugeFunc) {
2558 // If a zero input is undefined, it doesn't make sense to despeculate that.
2559 if (match(CountZeros->getOperand(1), m_One()))
2560 return false;
2561
2562 // If it's cheap to speculate, there's nothing to do.
2563 Type *Ty = CountZeros->getType();
2564 auto IntrinsicID = CountZeros->getIntrinsicID();
2565 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2566 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2567 return false;
2568
2569 // Only handle scalar cases. Anything else requires too much work.
2570 unsigned SizeInBits = Ty->getScalarSizeInBits();
2571 if (Ty->isVectorTy())
2572 return false;
2573
2574 // Bail if the value is never zero.
2575 Use &Op = CountZeros->getOperandUse(0);
2576 if (isKnownNonZero(Op, *DL))
2577 return false;
2578
2579 // The intrinsic will be sunk behind a compare against zero and branch.
2580 BasicBlock *StartBlock = CountZeros->getParent();
2581 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2582 if (IsHugeFunc)
2583 FreshBBs.insert(CallBlock);
2584
2585 // Create another block after the count zero intrinsic. A PHI will be added
2586 // in this block to select the result of the intrinsic or the bit-width
2587 // constant if the input to the intrinsic is zero.
2588 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2589 // Any debug-info after CountZeros should not be included.
2590 SplitPt.setHeadBit(true);
2591 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2592 if (IsHugeFunc)
2593 FreshBBs.insert(EndBlock);
2594
2595 // Update the LoopInfo. The new blocks are in the same loop as the start
2596 // block.
2597 if (Loop *L = LI.getLoopFor(StartBlock)) {
2598 L->addBasicBlockToLoop(CallBlock, LI);
2599 L->addBasicBlockToLoop(EndBlock, LI);
2600 }
2601
2602 // Set up a builder to create a compare, conditional branch, and PHI.
2603 IRBuilder<> Builder(CountZeros->getContext());
2604 Builder.SetInsertPoint(StartBlock->getTerminator());
2605 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2606
2607 // Replace the unconditional branch that was created by the first split with
2608 // a compare against zero and a conditional branch.
2609 Value *Zero = Constant::getNullValue(Ty);
2610 // Avoid introducing branch on poison. This also replaces the ctz operand.
2612 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2613 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2614 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2615 StartBlock->getTerminator()->eraseFromParent();
2616
2617 // Create a PHI in the end block to select either the output of the intrinsic
2618 // or the bit width of the operand.
2619 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2620 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2621 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2622 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2623 PN->addIncoming(BitWidth, StartBlock);
2624 PN->addIncoming(CountZeros, CallBlock);
2625
2626 // We are explicitly handling the zero case, so we can set the intrinsic's
2627 // undefined zero argument to 'true'. This will also prevent reprocessing the
2628 // intrinsic; we only despeculate when a zero input is defined.
2629 CountZeros->setArgOperand(1, Builder.getTrue());
2630 ModifiedDT = ModifyDT::ModifyBBDT;
2631 return true;
2632}
2633
2634bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2635 BasicBlock *BB = CI->getParent();
2636
2637 // Sink address computing for memory operands into the block.
2638 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2639 return true;
2640
2641 // Align the pointer arguments to this call if the target thinks it's a good
2642 // idea
2643 unsigned MinSize;
2644 Align PrefAlign;
2645 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2646 for (auto &Arg : CI->args()) {
2647 // We want to align both objects whose address is used directly and
2648 // objects whose address is used in casts and GEPs, though it only makes
2649 // sense for GEPs if the offset is a multiple of the desired alignment and
2650 // if size - offset meets the size threshold.
2651 if (!Arg->getType()->isPointerTy())
2652 continue;
2653 APInt Offset(DL->getIndexSizeInBits(
2654 cast<PointerType>(Arg->getType())->getAddressSpace()),
2655 0);
2656 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2657 uint64_t Offset2 = Offset.getLimitedValue();
2658 if (!isAligned(PrefAlign, Offset2))
2659 continue;
2660 AllocaInst *AI;
2661 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign) {
2662 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(*DL);
2663 if (AllocaSize && AllocaSize->getKnownMinValue() >= MinSize + Offset2)
2664 AI->setAlignment(PrefAlign);
2665 }
2666 // Global variables can only be aligned if they are defined in this
2667 // object (i.e. they are uniquely initialized in this object), and
2668 // over-aligning global variables that have an explicit section is
2669 // forbidden.
2670 GlobalVariable *GV;
2671 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2672 GV->getPointerAlignment(*DL) < PrefAlign &&
2673 GV->getGlobalSize(*DL) >= MinSize + Offset2)
2674 GV->setAlignment(PrefAlign);
2675 }
2676 }
2677 // If this is a memcpy (or similar) then we may be able to improve the
2678 // alignment.
2679 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2680 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2681 MaybeAlign MIDestAlign = MI->getDestAlign();
2682 if (!MIDestAlign || DestAlign > *MIDestAlign)
2683 MI->setDestAlignment(DestAlign);
2684 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2685 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2686 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2687 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2688 MTI->setSourceAlignment(SrcAlign);
2689 }
2690 }
2691
2692 // If we have a cold call site, try to sink addressing computation into the
2693 // cold block. This interacts with our handling for loads and stores to
2694 // ensure that we can fold all uses of a potential addressing computation
2695 // into their uses. TODO: generalize this to work over profiling data
2696 if (CI->hasFnAttr(Attribute::Cold) &&
2697 !llvm::shouldOptimizeForSize(BB, PSI, BFI))
2698 for (auto &Arg : CI->args()) {
2699 if (!Arg->getType()->isPointerTy())
2700 continue;
2701 unsigned AS = Arg->getType()->getPointerAddressSpace();
2702 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2703 return true;
2704 }
2705
2706 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2707 if (II) {
2708 switch (II->getIntrinsicID()) {
2709 default:
2710 break;
2711 case Intrinsic::assume:
2712 llvm_unreachable("llvm.assume should have been removed already");
2713 case Intrinsic::allow_runtime_check:
2714 case Intrinsic::allow_ubsan_check:
2715 case Intrinsic::experimental_widenable_condition: {
2716 // Give up on future widening opportunities so that we can fold away dead
2717 // paths and merge blocks before going into block-local instruction
2718 // selection.
2719 if (II->use_empty()) {
2720 II->eraseFromParent();
2721 return true;
2722 }
2723 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2724 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2725 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2726 });
2727 return true;
2728 }
2729 case Intrinsic::objectsize:
2730 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2731 case Intrinsic::is_constant:
2732 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2733 case Intrinsic::aarch64_stlxr:
2734 case Intrinsic::aarch64_stxr: {
2735 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2736 if (!ExtVal || !ExtVal->hasOneUse() ||
2737 ExtVal->getParent() == CI->getParent())
2738 return false;
2739 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2740 ExtVal->moveBefore(CI->getIterator());
2741 // Mark this instruction as "inserted by CGP", so that other
2742 // optimizations don't touch it.
2743 InsertedInsts.insert(ExtVal);
2744 return true;
2745 }
2746
2747 case Intrinsic::launder_invariant_group:
2748 case Intrinsic::strip_invariant_group: {
2749 Value *ArgVal = II->getArgOperand(0);
2750 auto it = LargeOffsetGEPMap.find(II);
2751 if (it != LargeOffsetGEPMap.end()) {
2752 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2753 // Make sure not to have to deal with iterator invalidation
2754 // after possibly adding ArgVal to LargeOffsetGEPMap.
2755 auto GEPs = std::move(it->second);
2756 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2757 LargeOffsetGEPMap.erase(II);
2758 }
2759
2760 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2761 II->eraseFromParent();
2762 return true;
2763 }
2764 case Intrinsic::cttz:
2765 case Intrinsic::ctlz:
2766 // If counting zeros is expensive, try to avoid it.
2767 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2768 IsHugeFunc);
2769 case Intrinsic::fshl:
2770 case Intrinsic::fshr:
2771 return optimizeFunnelShift(II);
2772 case Intrinsic::masked_gather:
2773 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2774 case Intrinsic::masked_scatter:
2775 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2776 case Intrinsic::masked_load:
2777 // Treat v1X masked load as load X type.
2778 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2779 if (VT->getNumElements() == 1) {
2780 Value *PtrVal = II->getArgOperand(0);
2781 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2782 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2783 return true;
2784 }
2785 }
2786 return false;
2787 case Intrinsic::masked_store:
2788 // Treat v1X masked store as store X type.
2789 if (auto *VT =
2790 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2791 if (VT->getNumElements() == 1) {
2792 Value *PtrVal = II->getArgOperand(1);
2793 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2794 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2795 return true;
2796 }
2797 }
2798 return false;
2799 case Intrinsic::umul_with_overflow:
2800 return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
2801 case Intrinsic::smul_with_overflow:
2802 return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
2803 }
2804
2805 SmallVector<Value *, 2> PtrOps;
2806 Type *AccessTy;
2807 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2808 while (!PtrOps.empty()) {
2809 Value *PtrVal = PtrOps.pop_back_val();
2810 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2811 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2812 return true;
2813 }
2814 }
2815
2816 // From here on out we're working with named functions.
2817 auto *Callee = CI->getCalledFunction();
2818 if (!Callee)
2819 return false;
2820
2821 // Lower all default uses of _chk calls. This is very similar
2822 // to what InstCombineCalls does, but here we are only lowering calls
2823 // to fortified library functions (e.g. __memcpy_chk) that have the default
2824 // "don't know" as the objectsize. Anything else should be left alone.
2825 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2826 IRBuilder<> Builder(CI);
2827 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2828 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2829 CI->eraseFromParent();
2830 return true;
2831 }
2832
2833 // SCCP may have propagated, among other things, C++ static variables across
2834 // calls. If this happens to be the case, we may want to undo it in order to
2835 // avoid redundant pointer computation of the constant, as the function method
2836 // returning the constant needs to be executed anyways.
2837 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2838 if (!F->getReturnType()->isPointerTy())
2839 return nullptr;
2840
2841 GlobalVariable *UniformValue = nullptr;
2842 for (auto &BB : *F) {
2843 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2844 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2845 if (!UniformValue)
2846 UniformValue = V;
2847 else if (V != UniformValue)
2848 return nullptr;
2849 } else {
2850 return nullptr;
2851 }
2852 }
2853 }
2854
2855 return UniformValue;
2856 };
2857
2858 if (Callee->hasExactDefinition()) {
2859 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2860 bool MadeChange = false;
2861 for (Use &U : make_early_inc_range(RV->uses())) {
2862 auto *I = dyn_cast<Instruction>(U.getUser());
2863 if (!I || I->getParent() != CI->getParent()) {
2864 // Limit to the same basic block to avoid extending the call-site live
2865 // range, which otherwise could increase register pressure.
2866 continue;
2867 }
2868 if (CI->comesBefore(I)) {
2869 U.set(CI);
2870 MadeChange = true;
2871 }
2872 }
2873
2874 return MadeChange;
2875 }
2876 }
2877
2878 return false;
2879}
2880
2882 const CallInst *CI) {
2883 assert(CI && CI->use_empty());
2884
2885 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2886 switch (II->getIntrinsicID()) {
2887 case Intrinsic::memset:
2888 case Intrinsic::memcpy:
2889 case Intrinsic::memmove:
2890 return true;
2891 default:
2892 return false;
2893 }
2894
2895 LibFunc LF;
2896 Function *Callee = CI->getCalledFunction();
2897 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2898 switch (LF) {
2899 case LibFunc_strcpy:
2900 case LibFunc_strncpy:
2901 case LibFunc_strcat:
2902 case LibFunc_strncat:
2903 return true;
2904 default:
2905 return false;
2906 }
2907
2908 return false;
2909}
2910
2911/// Look for opportunities to duplicate return instructions to the predecessor
2912/// to enable tail call optimizations. The case it is currently looking for is
2913/// the following one. Known intrinsics or library function that may be tail
2914/// called are taken into account as well.
2915/// @code
2916/// bb0:
2917/// %tmp0 = tail call i32 @f0()
2918/// br label %return
2919/// bb1:
2920/// %tmp1 = tail call i32 @f1()
2921/// br label %return
2922/// bb2:
2923/// %tmp2 = tail call i32 @f2()
2924/// br label %return
2925/// return:
2926/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2927/// ret i32 %retval
2928/// @endcode
2929///
2930/// =>
2931///
2932/// @code
2933/// bb0:
2934/// %tmp0 = tail call i32 @f0()
2935/// ret i32 %tmp0
2936/// bb1:
2937/// %tmp1 = tail call i32 @f1()
2938/// ret i32 %tmp1
2939/// bb2:
2940/// %tmp2 = tail call i32 @f2()
2941/// ret i32 %tmp2
2942/// @endcode
2943bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2944 ModifyDT &ModifiedDT) {
2945 if (!BB->getTerminator())
2946 return false;
2947
2948 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2949 if (!RetI)
2950 return false;
2951
2952 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2953
2954 PHINode *PN = nullptr;
2955 ExtractValueInst *EVI = nullptr;
2956 BitCastInst *BCI = nullptr;
2957 Value *V = RetI->getReturnValue();
2958 if (V) {
2959 BCI = dyn_cast<BitCastInst>(V);
2960 if (BCI)
2961 V = BCI->getOperand(0);
2962
2964 if (EVI) {
2965 V = EVI->getOperand(0);
2966 if (!llvm::all_of(EVI->indices(), equal_to(0)))
2967 return false;
2968 }
2969
2970 PN = dyn_cast<PHINode>(V);
2971 }
2972
2973 if (PN && PN->getParent() != BB)
2974 return false;
2975
2976 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2977 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2978 if (BC && BC->hasOneUse())
2979 Inst = BC->user_back();
2980
2981 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2982 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2983 return false;
2984 };
2985
2987
2988 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2989 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2990 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2991 // Record the instruction so it can be preserved when the exit block is
2992 // removed. Do not preserve the fake use that uses the result of the
2993 // PHI instruction.
2994 // Do not copy fake uses that use the result of a PHI node.
2995 // FIXME: If we do want to copy the fake use into the return blocks, we
2996 // have to figure out which of the PHI node operands to use for each
2997 // copy.
2998 if (!isa<PHINode>(II->getOperand(0))) {
2999 FakeUses.push_back(II);
3000 }
3001 return true;
3002 }
3003
3004 return false;
3005 };
3006
3007 // Make sure there are no instructions between the first instruction
3008 // and return.
3010 // Skip over pseudo-probes and the bitcast.
3011 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3012 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3013 BI = std::next(BI);
3014 if (&*BI != RetI)
3015 return false;
3016
3017 // Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3018 // call.
3019 auto MayBePermittedAsTailCall = [&](const auto *CI) {
3020 return TLI->mayBeEmittedAsTailCall(CI) &&
3021 attributesPermitTailCall(BB->getParent(), CI, RetI, *TLI);
3022 };
3023
3024 SmallVector<BasicBlock *, 4> TailCallBBs;
3025 // Record the call instructions so we can insert any fake uses
3026 // that need to be preserved before them.
3028 if (PN) {
3029 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3030 // Look through bitcasts.
3031 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3032 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3033 BasicBlock *PredBB = PN->getIncomingBlock(I);
3034 // Make sure the phi value is indeed produced by the tail call.
3035 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3036 MayBePermittedAsTailCall(CI)) {
3037 TailCallBBs.push_back(PredBB);
3038 CallInsts.push_back(CI);
3039 } else {
3040 // Consider the cases in which the phi value is indirectly produced by
3041 // the tail call, for example when encountering memset(), memmove(),
3042 // strcpy(), whose return value may have been optimized out. In such
3043 // cases, the value needs to be the first function argument.
3044 //
3045 // bb0:
3046 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3047 // br label %return
3048 // return:
3049 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3050 if (PredBB && PredBB->getSingleSuccessor() == BB)
3052 PredBB->getTerminator()->getPrevNode());
3053
3054 if (CI && CI->use_empty() &&
3055 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3056 IncomingVal == CI->getArgOperand(0) &&
3057 MayBePermittedAsTailCall(CI)) {
3058 TailCallBBs.push_back(PredBB);
3059 CallInsts.push_back(CI);
3060 }
3061 }
3062 }
3063 } else {
3064 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3065 for (BasicBlock *Pred : predecessors(BB)) {
3066 if (!VisitedBBs.insert(Pred).second)
3067 continue;
3068 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3069 CallInst *CI = dyn_cast<CallInst>(I);
3070 if (CI && CI->use_empty() && MayBePermittedAsTailCall(CI)) {
3071 // Either we return void or the return value must be the first
3072 // argument of a known intrinsic or library function.
3073 if (!V || isa<UndefValue>(V) ||
3074 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3075 V == CI->getArgOperand(0))) {
3076 TailCallBBs.push_back(Pred);
3077 CallInsts.push_back(CI);
3078 }
3079 }
3080 }
3081 }
3082 }
3083
3084 bool Changed = false;
3085 for (auto const &TailCallBB : TailCallBBs) {
3086 // Make sure the call instruction is followed by an unconditional branch to
3087 // the return block.
3088 UncondBrInst *BI = dyn_cast<UncondBrInst>(TailCallBB->getTerminator());
3089 if (!BI || BI->getSuccessor() != BB)
3090 continue;
3091
3092 // Duplicate the return into TailCallBB.
3093 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3095 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3096 BFI->setBlockFreq(BB,
3097 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3098 ModifiedDT = ModifyDT::ModifyBBDT;
3099 Changed = true;
3100 ++NumRetsDup;
3101 }
3102
3103 // If we eliminated all predecessors of the block, delete the block now.
3104 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3105 // Copy the fake uses found in the original return block to all blocks
3106 // that contain tail calls.
3107 for (auto *CI : CallInsts) {
3108 for (auto const *FakeUse : FakeUses) {
3109 auto *ClonedInst = FakeUse->clone();
3110 ClonedInst->insertBefore(CI->getIterator());
3111 }
3112 }
3113 BB->eraseFromParent();
3114 }
3115
3116 return Changed;
3117}
3118
3119//===----------------------------------------------------------------------===//
3120// Memory Optimization
3121//===----------------------------------------------------------------------===//
3122
3123namespace {
3124
3125/// This is an extended version of TargetLowering::AddrMode
3126/// which holds actual Value*'s for register values.
3127struct ExtAddrMode : public TargetLowering::AddrMode {
3128 Value *BaseReg = nullptr;
3129 Value *ScaledReg = nullptr;
3130 Value *OriginalValue = nullptr;
3131 bool InBounds = true;
3132
3133 enum FieldName {
3134 NoField = 0x00,
3135 BaseRegField = 0x01,
3136 BaseGVField = 0x02,
3137 BaseOffsField = 0x04,
3138 ScaledRegField = 0x08,
3139 ScaleField = 0x10,
3140 MultipleFields = 0xff
3141 };
3142
3143 ExtAddrMode() = default;
3144
3145 void print(raw_ostream &OS) const;
3146 void dump() const;
3147
3148 // Replace From in ExtAddrMode with To.
3149 // E.g., SExt insts may be promoted and deleted. We should replace them with
3150 // the promoted values.
3151 void replaceWith(Value *From, Value *To) {
3152 if (ScaledReg == From)
3153 ScaledReg = To;
3154 }
3155
3156 FieldName compare(const ExtAddrMode &other) {
3157 // First check that the types are the same on each field, as differing types
3158 // is something we can't cope with later on.
3159 if (BaseReg && other.BaseReg &&
3160 BaseReg->getType() != other.BaseReg->getType())
3161 return MultipleFields;
3162 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3163 return MultipleFields;
3164 if (ScaledReg && other.ScaledReg &&
3165 ScaledReg->getType() != other.ScaledReg->getType())
3166 return MultipleFields;
3167
3168 // Conservatively reject 'inbounds' mismatches.
3169 if (InBounds != other.InBounds)
3170 return MultipleFields;
3171
3172 // Check each field to see if it differs.
3173 unsigned Result = NoField;
3174 if (BaseReg != other.BaseReg)
3175 Result |= BaseRegField;
3176 if (BaseGV != other.BaseGV)
3177 Result |= BaseGVField;
3178 if (BaseOffs != other.BaseOffs)
3179 Result |= BaseOffsField;
3180 if (ScaledReg != other.ScaledReg)
3181 Result |= ScaledRegField;
3182 // Don't count 0 as being a different scale, because that actually means
3183 // unscaled (which will already be counted by having no ScaledReg).
3184 if (Scale && other.Scale && Scale != other.Scale)
3185 Result |= ScaleField;
3186
3187 if (llvm::popcount(Result) > 1)
3188 return MultipleFields;
3189 else
3190 return static_cast<FieldName>(Result);
3191 }
3192
3193 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3194 // with no offset.
3195 bool isTrivial() {
3196 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3197 // trivial if at most one of these terms is nonzero, except that BaseGV and
3198 // BaseReg both being zero actually means a null pointer value, which we
3199 // consider to be 'non-zero' here.
3200 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3201 }
3202
3203 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3204 switch (Field) {
3205 default:
3206 return nullptr;
3207 case BaseRegField:
3208 return BaseReg;
3209 case BaseGVField:
3210 return BaseGV;
3211 case ScaledRegField:
3212 return ScaledReg;
3213 case BaseOffsField:
3214 return ConstantInt::getSigned(IntPtrTy, BaseOffs);
3215 }
3216 }
3217
3218 void SetCombinedField(FieldName Field, Value *V,
3219 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3220 switch (Field) {
3221 default:
3222 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3223 break;
3224 case ExtAddrMode::BaseRegField:
3225 BaseReg = V;
3226 break;
3227 case ExtAddrMode::BaseGVField:
3228 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3229 // in the BaseReg field.
3230 assert(BaseReg == nullptr);
3231 BaseReg = V;
3232 BaseGV = nullptr;
3233 break;
3234 case ExtAddrMode::ScaledRegField:
3235 ScaledReg = V;
3236 // If we have a mix of scaled and unscaled addrmodes then we want scale
3237 // to be the scale and not zero.
3238 if (!Scale)
3239 for (const ExtAddrMode &AM : AddrModes)
3240 if (AM.Scale) {
3241 Scale = AM.Scale;
3242 break;
3243 }
3244 break;
3245 case ExtAddrMode::BaseOffsField:
3246 // The offset is no longer a constant, so it goes in ScaledReg with a
3247 // scale of 1.
3248 assert(ScaledReg == nullptr);
3249 ScaledReg = V;
3250 Scale = 1;
3251 BaseOffs = 0;
3252 break;
3253 }
3254 }
3255};
3256
3257#ifndef NDEBUG
3258static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3259 AM.print(OS);
3260 return OS;
3261}
3262#endif
3263
3264#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3265void ExtAddrMode::print(raw_ostream &OS) const {
3266 bool NeedPlus = false;
3267 OS << "[";
3268 if (InBounds)
3269 OS << "inbounds ";
3270 if (BaseGV) {
3271 OS << "GV:";
3272 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3273 NeedPlus = true;
3274 }
3275
3276 if (BaseOffs) {
3277 OS << (NeedPlus ? " + " : "") << BaseOffs;
3278 NeedPlus = true;
3279 }
3280
3281 if (BaseReg) {
3282 OS << (NeedPlus ? " + " : "") << "Base:";
3283 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3284 NeedPlus = true;
3285 }
3286 if (Scale) {
3287 OS << (NeedPlus ? " + " : "") << Scale << "*";
3288 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3289 }
3290
3291 OS << ']';
3292}
3293
3294LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3295 print(dbgs());
3296 dbgs() << '\n';
3297}
3298#endif
3299
3300} // end anonymous namespace
3301
3302namespace {
3303
3304/// This class provides transaction based operation on the IR.
3305/// Every change made through this class is recorded in the internal state and
3306/// can be undone (rollback) until commit is called.
3307/// CGP does not check if instructions could be speculatively executed when
3308/// moved. Preserving the original location would pessimize the debugging
3309/// experience, as well as negatively impact the quality of sample PGO.
3310class TypePromotionTransaction {
3311 /// This represents the common interface of the individual transaction.
3312 /// Each class implements the logic for doing one specific modification on
3313 /// the IR via the TypePromotionTransaction.
3314 class TypePromotionAction {
3315 protected:
3316 /// The Instruction modified.
3317 Instruction *Inst;
3318
3319 public:
3320 /// Constructor of the action.
3321 /// The constructor performs the related action on the IR.
3322 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3323
3324 virtual ~TypePromotionAction() = default;
3325
3326 /// Undo the modification done by this action.
3327 /// When this method is called, the IR must be in the same state as it was
3328 /// before this action was applied.
3329 /// \pre Undoing the action works if and only if the IR is in the exact same
3330 /// state as it was directly after this action was applied.
3331 virtual void undo() = 0;
3332
3333 /// Advocate every change made by this action.
3334 /// When the results on the IR of the action are to be kept, it is important
3335 /// to call this function, otherwise hidden information may be kept forever.
3336 virtual void commit() {
3337 // Nothing to be done, this action is not doing anything.
3338 }
3339 };
3340
3341 /// Utility to remember the position of an instruction.
3342 class InsertionHandler {
3343 /// Position of an instruction.
3344 /// Either an instruction:
3345 /// - Is the first in a basic block: BB is used.
3346 /// - Has a previous instruction: PrevInst is used.
3347 struct {
3348 BasicBlock::iterator PrevInst;
3349 BasicBlock *BB;
3350 } Point;
3351 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3352
3353 /// Remember whether or not the instruction had a previous instruction.
3354 bool HasPrevInstruction;
3355
3356 public:
3357 /// Record the position of \p Inst.
3358 InsertionHandler(Instruction *Inst) {
3359 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3360 BasicBlock *BB = Inst->getParent();
3361
3362 // Record where we would have to re-insert the instruction in the sequence
3363 // of DbgRecords, if we ended up reinserting.
3364 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3365
3366 if (HasPrevInstruction) {
3367 Point.PrevInst = std::prev(Inst->getIterator());
3368 } else {
3369 Point.BB = BB;
3370 }
3371 }
3372
3373 /// Insert \p Inst at the recorded position.
3374 void insert(Instruction *Inst) {
3375 if (HasPrevInstruction) {
3376 if (Inst->getParent())
3377 Inst->removeFromParent();
3378 Inst->insertAfter(Point.PrevInst);
3379 } else {
3380 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3381 if (Inst->getParent())
3382 Inst->moveBefore(*Point.BB, Position);
3383 else
3384 Inst->insertBefore(*Point.BB, Position);
3385 }
3386
3387 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3388 }
3389 };
3390
3391 /// Move an instruction before another.
3392 class InstructionMoveBefore : public TypePromotionAction {
3393 /// Original position of the instruction.
3394 InsertionHandler Position;
3395
3396 public:
3397 /// Move \p Inst before \p Before.
3398 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3399 : TypePromotionAction(Inst), Position(Inst) {
3400 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3401 << "\n");
3402 Inst->moveBefore(Before);
3403 }
3404
3405 /// Move the instruction back to its original position.
3406 void undo() override {
3407 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3408 Position.insert(Inst);
3409 }
3410 };
3411
3412 /// Set the operand of an instruction with a new value.
3413 class OperandSetter : public TypePromotionAction {
3414 /// Original operand of the instruction.
3415 Value *Origin;
3416
3417 /// Index of the modified instruction.
3418 unsigned Idx;
3419
3420 public:
3421 /// Set \p Idx operand of \p Inst with \p NewVal.
3422 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3423 : TypePromotionAction(Inst), Idx(Idx) {
3424 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3425 << "for:" << *Inst << "\n"
3426 << "with:" << *NewVal << "\n");
3427 Origin = Inst->getOperand(Idx);
3428 Inst->setOperand(Idx, NewVal);
3429 }
3430
3431 /// Restore the original value of the instruction.
3432 void undo() override {
3433 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3434 << "for: " << *Inst << "\n"
3435 << "with: " << *Origin << "\n");
3436 Inst->setOperand(Idx, Origin);
3437 }
3438 };
3439
3440 /// Hide the operands of an instruction.
3441 /// Do as if this instruction was not using any of its operands.
3442 class OperandsHider : public TypePromotionAction {
3443 /// The list of original operands.
3444 SmallVector<Value *, 4> OriginalValues;
3445
3446 public:
3447 /// Remove \p Inst from the uses of the operands of \p Inst.
3448 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3449 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3450 unsigned NumOpnds = Inst->getNumOperands();
3451 OriginalValues.reserve(NumOpnds);
3452 for (unsigned It = 0; It < NumOpnds; ++It) {
3453 // Save the current operand.
3454 Value *Val = Inst->getOperand(It);
3455 OriginalValues.push_back(Val);
3456 // Set a dummy one.
3457 // We could use OperandSetter here, but that would imply an overhead
3458 // that we are not willing to pay.
3459 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3460 }
3461 }
3462
3463 /// Restore the original list of uses.
3464 void undo() override {
3465 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3466 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3467 Inst->setOperand(It, OriginalValues[It]);
3468 }
3469 };
3470
3471 /// Build a truncate instruction.
3472 class TruncBuilder : public TypePromotionAction {
3473 Value *Val;
3474
3475 public:
3476 /// Build a truncate instruction of \p Opnd producing a \p Ty
3477 /// result.
3478 /// trunc Opnd to Ty.
3479 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3480 IRBuilder<> Builder(Opnd);
3481 Builder.SetCurrentDebugLocation(DebugLoc());
3482 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3483 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3484 }
3485
3486 /// Get the built value.
3487 Value *getBuiltValue() { return Val; }
3488
3489 /// Remove the built instruction.
3490 void undo() override {
3491 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3492 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3493 IVal->eraseFromParent();
3494 }
3495 };
3496
3497 /// Build a sign extension instruction.
3498 class SExtBuilder : public TypePromotionAction {
3499 Value *Val;
3500
3501 public:
3502 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3503 /// result.
3504 /// sext Opnd to Ty.
3505 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3506 : TypePromotionAction(InsertPt) {
3507 IRBuilder<> Builder(InsertPt);
3508 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3509 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3510 }
3511
3512 /// Get the built value.
3513 Value *getBuiltValue() { return Val; }
3514
3515 /// Remove the built instruction.
3516 void undo() override {
3517 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3518 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3519 IVal->eraseFromParent();
3520 }
3521 };
3522
3523 /// Build a zero extension instruction.
3524 class ZExtBuilder : public TypePromotionAction {
3525 Value *Val;
3526
3527 public:
3528 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3529 /// result.
3530 /// zext Opnd to Ty.
3531 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3532 : TypePromotionAction(InsertPt) {
3533 IRBuilder<> Builder(InsertPt);
3534 Builder.SetCurrentDebugLocation(DebugLoc());
3535 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3536 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3537 }
3538
3539 /// Get the built value.
3540 Value *getBuiltValue() { return Val; }
3541
3542 /// Remove the built instruction.
3543 void undo() override {
3544 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3545 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3546 IVal->eraseFromParent();
3547 }
3548 };
3549
3550 /// Mutate an instruction to another type.
3551 class TypeMutator : public TypePromotionAction {
3552 /// Record the original type.
3553 Type *OrigTy;
3554
3555 public:
3556 /// Mutate the type of \p Inst into \p NewTy.
3557 TypeMutator(Instruction *Inst, Type *NewTy)
3558 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3559 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3560 << "\n");
3561 Inst->mutateType(NewTy);
3562 }
3563
3564 /// Mutate the instruction back to its original type.
3565 void undo() override {
3566 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3567 << "\n");
3568 Inst->mutateType(OrigTy);
3569 }
3570 };
3571
3572 /// Replace the uses of an instruction by another instruction.
3573 class UsesReplacer : public TypePromotionAction {
3574 /// Helper structure to keep track of the replaced uses.
3575 struct InstructionAndIdx {
3576 /// The instruction using the instruction.
3577 Instruction *Inst;
3578
3579 /// The index where this instruction is used for Inst.
3580 unsigned Idx;
3581
3582 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3583 : Inst(Inst), Idx(Idx) {}
3584 };
3585
3586 /// Keep track of the original uses (pair Instruction, Index).
3588 /// Keep track of the debug users.
3589 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3590
3591 /// Keep track of the new value so that we can undo it by replacing
3592 /// instances of the new value with the original value.
3593 Value *New;
3594
3596
3597 public:
3598 /// Replace all the use of \p Inst by \p New.
3599 UsesReplacer(Instruction *Inst, Value *New)
3600 : TypePromotionAction(Inst), New(New) {
3601 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3602 << "\n");
3603 // Record the original uses.
3604 for (Use &U : Inst->uses()) {
3605 Instruction *UserI = cast<Instruction>(U.getUser());
3606 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3607 }
3608 // Record the debug uses separately. They are not in the instruction's
3609 // use list, but they are replaced by RAUW.
3610 findDbgValues(Inst, DbgVariableRecords);
3611
3612 // Now, we can replace the uses.
3613 Inst->replaceAllUsesWith(New);
3614 }
3615
3616 /// Reassign the original uses of Inst to Inst.
3617 void undo() override {
3618 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3619 for (InstructionAndIdx &Use : OriginalUses)
3620 Use.Inst->setOperand(Use.Idx, Inst);
3621 // RAUW has replaced all original uses with references to the new value,
3622 // including the debug uses. Since we are undoing the replacements,
3623 // the original debug uses must also be reinstated to maintain the
3624 // correctness and utility of debug value records.
3625 for (DbgVariableRecord *DVR : DbgVariableRecords)
3626 DVR->replaceVariableLocationOp(New, Inst);
3627 }
3628 };
3629
3630 /// Remove an instruction from the IR.
3631 class InstructionRemover : public TypePromotionAction {
3632 /// Original position of the instruction.
3633 InsertionHandler Inserter;
3634
3635 /// Helper structure to hide all the link to the instruction. In other
3636 /// words, this helps to do as if the instruction was removed.
3637 OperandsHider Hider;
3638
3639 /// Keep track of the uses replaced, if any.
3640 UsesReplacer *Replacer = nullptr;
3641
3642 /// Keep track of instructions removed.
3643 SetOfInstrs &RemovedInsts;
3644
3645 public:
3646 /// Remove all reference of \p Inst and optionally replace all its
3647 /// uses with New.
3648 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3649 /// \pre If !Inst->use_empty(), then New != nullptr
3650 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3651 Value *New = nullptr)
3652 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3653 RemovedInsts(RemovedInsts) {
3654 if (New)
3655 Replacer = new UsesReplacer(Inst, New);
3656 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3657 RemovedInsts.insert(Inst);
3658 /// The instructions removed here will be freed after completing
3659 /// optimizeBlock() for all blocks as we need to keep track of the
3660 /// removed instructions during promotion.
3661 Inst->removeFromParent();
3662 }
3663
3664 ~InstructionRemover() override { delete Replacer; }
3665
3666 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3667 InstructionRemover(const InstructionRemover &other) = delete;
3668
3669 /// Resurrect the instruction and reassign it to the proper uses if
3670 /// new value was provided when build this action.
3671 void undo() override {
3672 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3673 Inserter.insert(Inst);
3674 if (Replacer)
3675 Replacer->undo();
3676 Hider.undo();
3677 RemovedInsts.erase(Inst);
3678 }
3679 };
3680
3681public:
3682 /// Restoration point.
3683 /// The restoration point is a pointer to an action instead of an iterator
3684 /// because the iterator may be invalidated but not the pointer.
3685 using ConstRestorationPt = const TypePromotionAction *;
3686
3687 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3688 : RemovedInsts(RemovedInsts) {}
3689
3690 /// Advocate every changes made in that transaction. Return true if any change
3691 /// happen.
3692 bool commit();
3693
3694 /// Undo all the changes made after the given point.
3695 void rollback(ConstRestorationPt Point);
3696
3697 /// Get the current restoration point.
3698 ConstRestorationPt getRestorationPoint() const;
3699
3700 /// \name API for IR modification with state keeping to support rollback.
3701 /// @{
3702 /// Same as Instruction::setOperand.
3703 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3704
3705 /// Same as Instruction::eraseFromParent.
3706 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3707
3708 /// Same as Value::replaceAllUsesWith.
3709 void replaceAllUsesWith(Instruction *Inst, Value *New);
3710
3711 /// Same as Value::mutateType.
3712 void mutateType(Instruction *Inst, Type *NewTy);
3713
3714 /// Same as IRBuilder::createTrunc.
3715 Value *createTrunc(Instruction *Opnd, Type *Ty);
3716
3717 /// Same as IRBuilder::createSExt.
3718 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3719
3720 /// Same as IRBuilder::createZExt.
3721 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3722
3723private:
3724 /// The ordered list of actions made so far.
3726
3727 using CommitPt =
3728 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3729
3730 SetOfInstrs &RemovedInsts;
3731};
3732
3733} // end anonymous namespace
3734
3735void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3736 Value *NewVal) {
3737 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3738 Inst, Idx, NewVal));
3739}
3740
3741void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3742 Value *NewVal) {
3743 Actions.push_back(
3744 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3745 Inst, RemovedInsts, NewVal));
3746}
3747
3748void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3749 Value *New) {
3750 Actions.push_back(
3751 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3752}
3753
3754void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3755 Actions.push_back(
3756 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3757}
3758
3759Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3760 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3761 Value *Val = Ptr->getBuiltValue();
3762 Actions.push_back(std::move(Ptr));
3763 return Val;
3764}
3765
3766Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3767 Type *Ty) {
3768 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3769 Value *Val = Ptr->getBuiltValue();
3770 Actions.push_back(std::move(Ptr));
3771 return Val;
3772}
3773
3774Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3775 Type *Ty) {
3776 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3777 Value *Val = Ptr->getBuiltValue();
3778 Actions.push_back(std::move(Ptr));
3779 return Val;
3780}
3781
3782TypePromotionTransaction::ConstRestorationPt
3783TypePromotionTransaction::getRestorationPoint() const {
3784 return !Actions.empty() ? Actions.back().get() : nullptr;
3785}
3786
3787bool TypePromotionTransaction::commit() {
3788 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3789 Action->commit();
3790 bool Modified = !Actions.empty();
3791 Actions.clear();
3792 return Modified;
3793}
3794
3795void TypePromotionTransaction::rollback(
3796 TypePromotionTransaction::ConstRestorationPt Point) {
3797 while (!Actions.empty() && Point != Actions.back().get()) {
3798 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3799 Curr->undo();
3800 }
3801}
3802
3803namespace {
3804
3805/// A helper class for matching addressing modes.
3806///
3807/// This encapsulates the logic for matching the target-legal addressing modes.
3808class AddressingModeMatcher {
3809 SmallVectorImpl<Instruction *> &AddrModeInsts;
3810 const TargetLowering &TLI;
3811 const TargetRegisterInfo &TRI;
3812 const DataLayout &DL;
3813 const LoopInfo &LI;
3814 const std::function<const DominatorTree &()> getDTFn;
3815
3816 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3817 /// the memory instruction that we're computing this address for.
3818 Type *AccessTy;
3819 unsigned AddrSpace;
3820 Instruction *MemoryInst;
3821
3822 /// This is the addressing mode that we're building up. This is
3823 /// part of the return value of this addressing mode matching stuff.
3824 ExtAddrMode &AddrMode;
3825
3826 /// The instructions inserted by other CodeGenPrepare optimizations.
3827 const SetOfInstrs &InsertedInsts;
3828
3829 /// A map from the instructions to their type before promotion.
3830 InstrToOrigTy &PromotedInsts;
3831
3832 /// The ongoing transaction where every action should be registered.
3833 TypePromotionTransaction &TPT;
3834
3835 // A GEP which has too large offset to be folded into the addressing mode.
3836 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3837
3838 /// This is set to true when we should not do profitability checks.
3839 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3840 bool IgnoreProfitability;
3841
3842 /// True if we are optimizing for size.
3843 bool OptSize = false;
3844
3845 ProfileSummaryInfo *PSI;
3846 BlockFrequencyInfo *BFI;
3847
3848 AddressingModeMatcher(
3849 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3850 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3851 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3852 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3853 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3854 TypePromotionTransaction &TPT,
3855 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3856 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3857 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3858 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3859 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3860 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3861 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3862 IgnoreProfitability = false;
3863 }
3864
3865public:
3866 /// Find the maximal addressing mode that a load/store of V can fold,
3867 /// give an access type of AccessTy. This returns a list of involved
3868 /// instructions in AddrModeInsts.
3869 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3870 /// optimizations.
3871 /// \p PromotedInsts maps the instructions to their type before promotion.
3872 /// \p The ongoing transaction where every action should be registered.
3873 static ExtAddrMode
3874 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3875 SmallVectorImpl<Instruction *> &AddrModeInsts,
3876 const TargetLowering &TLI, const LoopInfo &LI,
3877 const std::function<const DominatorTree &()> getDTFn,
3878 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3879 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3880 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3881 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3882 ExtAddrMode Result;
3883
3884 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3885 AccessTy, AS, MemoryInst, Result,
3886 InsertedInsts, PromotedInsts, TPT,
3887 LargeOffsetGEP, OptSize, PSI, BFI)
3888 .matchAddr(V, 0);
3889 (void)Success;
3890 assert(Success && "Couldn't select *anything*?");
3891 return Result;
3892 }
3893
3894private:
3895 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3896 bool matchAddr(Value *Addr, unsigned Depth);
3897 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3898 bool *MovedAway = nullptr);
3899 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3900 ExtAddrMode &AMBefore,
3901 ExtAddrMode &AMAfter);
3902 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3903 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3904 Value *PromotedOperand) const;
3905};
3906
3907class PhiNodeSet;
3908
3909/// An iterator for PhiNodeSet.
3910class PhiNodeSetIterator {
3911 PhiNodeSet *const Set;
3912 size_t CurrentIndex = 0;
3913
3914public:
3915 /// The constructor. Start should point to either a valid element, or be equal
3916 /// to the size of the underlying SmallVector of the PhiNodeSet.
3917 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3918 PHINode *operator*() const;
3919 PhiNodeSetIterator &operator++();
3920 bool operator==(const PhiNodeSetIterator &RHS) const;
3921 bool operator!=(const PhiNodeSetIterator &RHS) const;
3922};
3923
3924/// Keeps a set of PHINodes.
3925///
3926/// This is a minimal set implementation for a specific use case:
3927/// It is very fast when there are very few elements, but also provides good
3928/// performance when there are many. It is similar to SmallPtrSet, but also
3929/// provides iteration by insertion order, which is deterministic and stable
3930/// across runs. It is also similar to SmallSetVector, but provides removing
3931/// elements in O(1) time. This is achieved by not actually removing the element
3932/// from the underlying vector, so comes at the cost of using more memory, but
3933/// that is fine, since PhiNodeSets are used as short lived objects.
3934class PhiNodeSet {
3935 friend class PhiNodeSetIterator;
3936
3937 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3938 using iterator = PhiNodeSetIterator;
3939
3940 /// Keeps the elements in the order of their insertion in the underlying
3941 /// vector. To achieve constant time removal, it never deletes any element.
3943
3944 /// Keeps the elements in the underlying set implementation. This (and not the
3945 /// NodeList defined above) is the source of truth on whether an element
3946 /// is actually in the collection.
3947 MapType NodeMap;
3948
3949 /// Points to the first valid (not deleted) element when the set is not empty
3950 /// and the value is not zero. Equals to the size of the underlying vector
3951 /// when the set is empty. When the value is 0, as in the beginning, the
3952 /// first element may or may not be valid.
3953 size_t FirstValidElement = 0;
3954
3955public:
3956 /// Inserts a new element to the collection.
3957 /// \returns true if the element is actually added, i.e. was not in the
3958 /// collection before the operation.
3959 bool insert(PHINode *Ptr) {
3960 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3961 NodeList.push_back(Ptr);
3962 return true;
3963 }
3964 return false;
3965 }
3966
3967 /// Removes the element from the collection.
3968 /// \returns whether the element is actually removed, i.e. was in the
3969 /// collection before the operation.
3970 bool erase(PHINode *Ptr) {
3971 if (NodeMap.erase(Ptr)) {
3972 SkipRemovedElements(FirstValidElement);
3973 return true;
3974 }
3975 return false;
3976 }
3977
3978 /// Removes all elements and clears the collection.
3979 void clear() {
3980 NodeMap.clear();
3981 NodeList.clear();
3982 FirstValidElement = 0;
3983 }
3984
3985 /// \returns an iterator that will iterate the elements in the order of
3986 /// insertion.
3987 iterator begin() {
3988 if (FirstValidElement == 0)
3989 SkipRemovedElements(FirstValidElement);
3990 return PhiNodeSetIterator(this, FirstValidElement);
3991 }
3992
3993 /// \returns an iterator that points to the end of the collection.
3994 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3995
3996 /// Returns the number of elements in the collection.
3997 size_t size() const { return NodeMap.size(); }
3998
3999 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
4000 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
4001
4002private:
4003 /// Updates the CurrentIndex so that it will point to a valid element.
4004 ///
4005 /// If the element of NodeList at CurrentIndex is valid, it does not
4006 /// change it. If there are no more valid elements, it updates CurrentIndex
4007 /// to point to the end of the NodeList.
4008 void SkipRemovedElements(size_t &CurrentIndex) {
4009 while (CurrentIndex < NodeList.size()) {
4010 auto it = NodeMap.find(NodeList[CurrentIndex]);
4011 // If the element has been deleted and added again later, NodeMap will
4012 // point to a different index, so CurrentIndex will still be invalid.
4013 if (it != NodeMap.end() && it->second == CurrentIndex)
4014 break;
4015 ++CurrentIndex;
4016 }
4017 }
4018};
4019
4020PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4021 : Set(Set), CurrentIndex(Start) {}
4022
4023PHINode *PhiNodeSetIterator::operator*() const {
4024 assert(CurrentIndex < Set->NodeList.size() &&
4025 "PhiNodeSet access out of range");
4026 return Set->NodeList[CurrentIndex];
4027}
4028
4029PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4030 assert(CurrentIndex < Set->NodeList.size() &&
4031 "PhiNodeSet access out of range");
4032 ++CurrentIndex;
4033 Set->SkipRemovedElements(CurrentIndex);
4034 return *this;
4035}
4036
4037bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4038 return CurrentIndex == RHS.CurrentIndex;
4039}
4040
4041bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4042 return !((*this) == RHS);
4043}
4044
4045/// Keep track of simplification of Phi nodes.
4046/// Accept the set of all phi nodes and erase phi node from this set
4047/// if it is simplified.
4048class SimplificationTracker {
4049 DenseMap<Value *, Value *> Storage;
4050 // Tracks newly created Phi nodes. The elements are iterated by insertion
4051 // order.
4052 PhiNodeSet AllPhiNodes;
4053 // Tracks newly created Select nodes.
4054 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4055
4056public:
4057 Value *Get(Value *V) {
4058 do {
4059 auto SV = Storage.find(V);
4060 if (SV == Storage.end())
4061 return V;
4062 V = SV->second;
4063 } while (true);
4064 }
4065
4066 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4067
4068 void ReplacePhi(PHINode *From, PHINode *To) {
4069 Value *OldReplacement = Get(From);
4070 while (OldReplacement != From) {
4071 From = To;
4072 To = dyn_cast<PHINode>(OldReplacement);
4073 OldReplacement = Get(From);
4074 }
4075 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4076 Put(From, To);
4077 From->replaceAllUsesWith(To);
4078 AllPhiNodes.erase(From);
4079 From->eraseFromParent();
4080 }
4081
4082 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4083
4084 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4085
4086 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4087
4088 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4089
4090 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4091
4092 void destroyNewNodes(Type *CommonType) {
4093 // For safe erasing, replace the uses with dummy value first.
4094 auto *Dummy = PoisonValue::get(CommonType);
4095 for (auto *I : AllPhiNodes) {
4096 I->replaceAllUsesWith(Dummy);
4097 I->eraseFromParent();
4098 }
4099 AllPhiNodes.clear();
4100 for (auto *I : AllSelectNodes) {
4101 I->replaceAllUsesWith(Dummy);
4102 I->eraseFromParent();
4103 }
4104 AllSelectNodes.clear();
4105 }
4106};
4107
4108/// A helper class for combining addressing modes.
4109class AddressingModeCombiner {
4110 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4111 typedef std::pair<PHINode *, PHINode *> PHIPair;
4112
4113private:
4114 /// The addressing modes we've collected.
4116
4117 /// The field in which the AddrModes differ, when we have more than one.
4118 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4119
4120 /// Are the AddrModes that we have all just equal to their original values?
4121 bool AllAddrModesTrivial = true;
4122
4123 /// Common Type for all different fields in addressing modes.
4124 Type *CommonType = nullptr;
4125
4126 const DataLayout &DL;
4127
4128 /// Original Address.
4129 Value *Original;
4130
4131 /// Common value among addresses
4132 Value *CommonValue = nullptr;
4133
4134public:
4135 AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
4136 : DL(DL), Original(OriginalValue) {}
4137
4138 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4139
4140 /// Get the combined AddrMode
4141 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4142
4143 /// Add a new AddrMode if it's compatible with the AddrModes we already
4144 /// have.
4145 /// \return True iff we succeeded in doing so.
4146 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4147 // Take note of if we have any non-trivial AddrModes, as we need to detect
4148 // when all AddrModes are trivial as then we would introduce a phi or select
4149 // which just duplicates what's already there.
4150 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4151
4152 // If this is the first addrmode then everything is fine.
4153 if (AddrModes.empty()) {
4154 AddrModes.emplace_back(NewAddrMode);
4155 return true;
4156 }
4157
4158 // Figure out how different this is from the other address modes, which we
4159 // can do just by comparing against the first one given that we only care
4160 // about the cumulative difference.
4161 ExtAddrMode::FieldName ThisDifferentField =
4162 AddrModes[0].compare(NewAddrMode);
4163 if (DifferentField == ExtAddrMode::NoField)
4164 DifferentField = ThisDifferentField;
4165 else if (DifferentField != ThisDifferentField)
4166 DifferentField = ExtAddrMode::MultipleFields;
4167
4168 // If NewAddrMode differs in more than one dimension we cannot handle it.
4169 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4170
4171 // If Scale Field is different then we reject.
4172 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4173
4174 // We also must reject the case when base offset is different and
4175 // scale reg is not null, we cannot handle this case due to merge of
4176 // different offsets will be used as ScaleReg.
4177 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4178 !NewAddrMode.ScaledReg);
4179
4180 // We also must reject the case when GV is different and BaseReg installed
4181 // due to we want to use base reg as a merge of GV values.
4182 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4183 !NewAddrMode.HasBaseReg);
4184
4185 // Even if NewAddMode is the same we still need to collect it due to
4186 // original value is different. And later we will need all original values
4187 // as anchors during finding the common Phi node.
4188 if (CanHandle)
4189 AddrModes.emplace_back(NewAddrMode);
4190 else
4191 AddrModes.clear();
4192
4193 return CanHandle;
4194 }
4195
4196 /// Combine the addressing modes we've collected into a single
4197 /// addressing mode.
4198 /// \return True iff we successfully combined them or we only had one so
4199 /// didn't need to combine them anyway.
4200 bool combineAddrModes() {
4201 // If we have no AddrModes then they can't be combined.
4202 if (AddrModes.size() == 0)
4203 return false;
4204
4205 // A single AddrMode can trivially be combined.
4206 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4207 return true;
4208
4209 // If the AddrModes we collected are all just equal to the value they are
4210 // derived from then combining them wouldn't do anything useful.
4211 if (AllAddrModesTrivial)
4212 return false;
4213
4214 if (!addrModeCombiningAllowed())
4215 return false;
4216
4217 // Build a map between <original value, basic block where we saw it> to
4218 // value of base register.
4219 // Bail out if there is no common type.
4220 FoldAddrToValueMapping Map;
4221 if (!initializeMap(Map))
4222 return false;
4223
4224 CommonValue = findCommon(Map);
4225 if (CommonValue)
4226 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4227 return CommonValue != nullptr;
4228 }
4229
4230private:
4231 /// `CommonValue` may be a placeholder inserted by us.
4232 /// If the placeholder is not used, we should remove this dead instruction.
4233 void eraseCommonValueIfDead() {
4234 if (CommonValue && CommonValue->use_empty())
4235 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4236 CommonInst->eraseFromParent();
4237 }
4238
4239 /// Initialize Map with anchor values. For address seen
4240 /// we set the value of different field saw in this address.
4241 /// At the same time we find a common type for different field we will
4242 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4243 /// Return false if there is no common type found.
4244 bool initializeMap(FoldAddrToValueMapping &Map) {
4245 // Keep track of keys where the value is null. We will need to replace it
4246 // with constant null when we know the common type.
4247 SmallVector<Value *, 2> NullValue;
4248 Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4249 for (auto &AM : AddrModes) {
4250 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4251 if (DV) {
4252 auto *Type = DV->getType();
4253 if (CommonType && CommonType != Type)
4254 return false;
4255 CommonType = Type;
4256 Map[AM.OriginalValue] = DV;
4257 } else {
4258 NullValue.push_back(AM.OriginalValue);
4259 }
4260 }
4261 assert(CommonType && "At least one non-null value must be!");
4262 for (auto *V : NullValue)
4263 Map[V] = Constant::getNullValue(CommonType);
4264 return true;
4265 }
4266
4267 /// We have mapping between value A and other value B where B was a field in
4268 /// addressing mode represented by A. Also we have an original value C
4269 /// representing an address we start with. Traversing from C through phi and
4270 /// selects we ended up with A's in a map. This utility function tries to find
4271 /// a value V which is a field in addressing mode C and traversing through phi
4272 /// nodes and selects we will end up in corresponded values B in a map.
4273 /// The utility will create a new Phi/Selects if needed.
4274 // The simple example looks as follows:
4275 // BB1:
4276 // p1 = b1 + 40
4277 // br cond BB2, BB3
4278 // BB2:
4279 // p2 = b2 + 40
4280 // br BB3
4281 // BB3:
4282 // p = phi [p1, BB1], [p2, BB2]
4283 // v = load p
4284 // Map is
4285 // p1 -> b1
4286 // p2 -> b2
4287 // Request is
4288 // p -> ?
4289 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4290 Value *findCommon(FoldAddrToValueMapping &Map) {
4291 // Tracks the simplification of newly created phi nodes. The reason we use
4292 // this mapping is because we will add new created Phi nodes in AddrToBase.
4293 // Simplification of Phi nodes is recursive, so some Phi node may
4294 // be simplified after we added it to AddrToBase. In reality this
4295 // simplification is possible only if original phi/selects were not
4296 // simplified yet.
4297 // Using this mapping we can find the current value in AddrToBase.
4298 SimplificationTracker ST;
4299
4300 // First step, DFS to create PHI nodes for all intermediate blocks.
4301 // Also fill traverse order for the second step.
4302 SmallVector<Value *, 32> TraverseOrder;
4303 InsertPlaceholders(Map, TraverseOrder, ST);
4304
4305 // Second Step, fill new nodes by merged values and simplify if possible.
4306 FillPlaceholders(Map, TraverseOrder, ST);
4307
4308 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4309 ST.destroyNewNodes(CommonType);
4310 return nullptr;
4311 }
4312
4313 // Now we'd like to match New Phi nodes to existed ones.
4314 unsigned PhiNotMatchedCount = 0;
4315 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4316 ST.destroyNewNodes(CommonType);
4317 return nullptr;
4318 }
4319
4320 auto *Result = ST.Get(Map.find(Original)->second);
4321 if (Result) {
4322 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4323 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4324 }
4325 return Result;
4326 }
4327
4328 /// Try to match PHI node to Candidate.
4329 /// Matcher tracks the matched Phi nodes.
4330 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4331 SmallSetVector<PHIPair, 8> &Matcher,
4332 PhiNodeSet &PhiNodesToMatch) {
4333 SmallVector<PHIPair, 8> WorkList;
4334 Matcher.insert({PHI, Candidate});
4335 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4336 MatchedPHIs.insert(PHI);
4337 WorkList.push_back({PHI, Candidate});
4338 SmallSet<PHIPair, 8> Visited;
4339 while (!WorkList.empty()) {
4340 auto Item = WorkList.pop_back_val();
4341 if (!Visited.insert(Item).second)
4342 continue;
4343 // We iterate over all incoming values to Phi to compare them.
4344 // If values are different and both of them Phi and the first one is a
4345 // Phi we added (subject to match) and both of them is in the same basic
4346 // block then we can match our pair if values match. So we state that
4347 // these values match and add it to work list to verify that.
4348 for (auto *B : Item.first->blocks()) {
4349 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4350 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4351 if (FirstValue == SecondValue)
4352 continue;
4353
4354 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4355 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4356
4357 // One of them is not Phi or
4358 // The first one is not Phi node from the set we'd like to match or
4359 // Phi nodes from different basic blocks then
4360 // we will not be able to match.
4361 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4362 FirstPhi->getParent() != SecondPhi->getParent())
4363 return false;
4364
4365 // If we already matched them then continue.
4366 if (Matcher.count({FirstPhi, SecondPhi}))
4367 continue;
4368 // So the values are different and does not match. So we need them to
4369 // match. (But we register no more than one match per PHI node, so that
4370 // we won't later try to replace them twice.)
4371 if (MatchedPHIs.insert(FirstPhi).second)
4372 Matcher.insert({FirstPhi, SecondPhi});
4373 // But me must check it.
4374 WorkList.push_back({FirstPhi, SecondPhi});
4375 }
4376 }
4377 return true;
4378 }
4379
4380 /// For the given set of PHI nodes (in the SimplificationTracker) try
4381 /// to find their equivalents.
4382 /// Returns false if this matching fails and creation of new Phi is disabled.
4383 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4384 unsigned &PhiNotMatchedCount) {
4385 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4386 // order, so the replacements (ReplacePhi) are also done in a deterministic
4387 // order.
4388 SmallSetVector<PHIPair, 8> Matched;
4389 SmallPtrSet<PHINode *, 8> WillNotMatch;
4390 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4391 while (PhiNodesToMatch.size()) {
4392 PHINode *PHI = *PhiNodesToMatch.begin();
4393
4394 // Add us, if no Phi nodes in the basic block we do not match.
4395 WillNotMatch.clear();
4396 WillNotMatch.insert(PHI);
4397
4398 // Traverse all Phis until we found equivalent or fail to do that.
4399 bool IsMatched = false;
4400 for (auto &P : PHI->getParent()->phis()) {
4401 // Skip new Phi nodes.
4402 if (PhiNodesToMatch.count(&P))
4403 continue;
4404 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4405 break;
4406 // If it does not match, collect all Phi nodes from matcher.
4407 // if we end up with no match, them all these Phi nodes will not match
4408 // later.
4409 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4410 Matched.clear();
4411 }
4412 if (IsMatched) {
4413 // Replace all matched values and erase them.
4414 for (auto MV : Matched)
4415 ST.ReplacePhi(MV.first, MV.second);
4416 Matched.clear();
4417 continue;
4418 }
4419 // If we are not allowed to create new nodes then bail out.
4420 if (!AllowNewPhiNodes)
4421 return false;
4422 // Just remove all seen values in matcher. They will not match anything.
4423 PhiNotMatchedCount += WillNotMatch.size();
4424 for (auto *P : WillNotMatch)
4425 PhiNodesToMatch.erase(P);
4426 }
4427 return true;
4428 }
4429 /// Fill the placeholders with values from predecessors and simplify them.
4430 void FillPlaceholders(FoldAddrToValueMapping &Map,
4431 SmallVectorImpl<Value *> &TraverseOrder,
4432 SimplificationTracker &ST) {
4433 while (!TraverseOrder.empty()) {
4434 Value *Current = TraverseOrder.pop_back_val();
4435 assert(Map.contains(Current) && "No node to fill!!!");
4436 Value *V = Map[Current];
4437
4438 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4439 // CurrentValue also must be Select.
4440 auto *CurrentSelect = cast<SelectInst>(Current);
4441 auto *TrueValue = CurrentSelect->getTrueValue();
4442 assert(Map.contains(TrueValue) && "No True Value!");
4443 Select->setTrueValue(ST.Get(Map[TrueValue]));
4444 auto *FalseValue = CurrentSelect->getFalseValue();
4445 assert(Map.contains(FalseValue) && "No False Value!");
4446 Select->setFalseValue(ST.Get(Map[FalseValue]));
4447 } else {
4448 // Must be a Phi node then.
4449 auto *PHI = cast<PHINode>(V);
4450 // Fill the Phi node with values from predecessors.
4451 for (auto *B : predecessors(PHI->getParent())) {
4452 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4453 assert(Map.contains(PV) && "No predecessor Value!");
4454 PHI->addIncoming(ST.Get(Map[PV]), B);
4455 }
4456 }
4457 }
4458 }
4459
4460 /// Starting from original value recursively iterates over def-use chain up to
4461 /// known ending values represented in a map. For each traversed phi/select
4462 /// inserts a placeholder Phi or Select.
4463 /// Reports all new created Phi/Select nodes by adding them to set.
4464 /// Also reports and order in what values have been traversed.
4465 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4466 SmallVectorImpl<Value *> &TraverseOrder,
4467 SimplificationTracker &ST) {
4468 SmallVector<Value *, 32> Worklist;
4469 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4470 "Address must be a Phi or Select node");
4471 auto *Dummy = PoisonValue::get(CommonType);
4472 Worklist.push_back(Original);
4473 while (!Worklist.empty()) {
4474 Value *Current = Worklist.pop_back_val();
4475 // if it is already visited or it is an ending value then skip it.
4476 if (Map.contains(Current))
4477 continue;
4478 TraverseOrder.push_back(Current);
4479
4480 // CurrentValue must be a Phi node or select. All others must be covered
4481 // by anchors.
4482 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4483 // Is it OK to get metadata from OrigSelect?!
4484 // Create a Select placeholder with dummy value.
4485 SelectInst *Select =
4486 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4487 CurrentSelect->getName(),
4488 CurrentSelect->getIterator(), CurrentSelect);
4489 Map[Current] = Select;
4490 ST.insertNewSelect(Select);
4491 // We are interested in True and False values.
4492 Worklist.push_back(CurrentSelect->getTrueValue());
4493 Worklist.push_back(CurrentSelect->getFalseValue());
4494 } else {
4495 // It must be a Phi node then.
4496 PHINode *CurrentPhi = cast<PHINode>(Current);
4497 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4498 PHINode *PHI =
4499 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4500 Map[Current] = PHI;
4501 ST.insertNewPhi(PHI);
4502 append_range(Worklist, CurrentPhi->incoming_values());
4503 }
4504 }
4505 }
4506
4507 bool addrModeCombiningAllowed() {
4509 return false;
4510 switch (DifferentField) {
4511 default:
4512 return false;
4513 case ExtAddrMode::BaseRegField:
4515 case ExtAddrMode::BaseGVField:
4516 return AddrSinkCombineBaseGV;
4517 case ExtAddrMode::BaseOffsField:
4519 case ExtAddrMode::ScaledRegField:
4521 }
4522 }
4523};
4524} // end anonymous namespace
4525
4526/// Try adding ScaleReg*Scale to the current addressing mode.
4527/// Return true and update AddrMode if this addr mode is legal for the target,
4528/// false if not.
4529bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4530 unsigned Depth) {
4531 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4532 // mode. Just process that directly.
4533 if (Scale == 1)
4534 return matchAddr(ScaleReg, Depth);
4535
4536 // If the scale is 0, it takes nothing to add this.
4537 if (Scale == 0)
4538 return true;
4539
4540 // If we already have a scale of this value, we can add to it, otherwise, we
4541 // need an available scale field.
4542 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4543 return false;
4544
4545 ExtAddrMode TestAddrMode = AddrMode;
4546
4547 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4548 // [A+B + A*7] -> [B+A*8].
4549 TestAddrMode.Scale += Scale;
4550 TestAddrMode.ScaledReg = ScaleReg;
4551
4552 // If the new address isn't legal, bail out.
4553 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4554 return false;
4555
4556 // It was legal, so commit it.
4557 AddrMode = TestAddrMode;
4558
4559 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4560 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4561 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4562 // go any further: we can reuse it and cannot eliminate it.
4563 ConstantInt *CI = nullptr;
4564 Value *AddLHS = nullptr;
4565 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4566 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4567 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4568 TestAddrMode.InBounds = false;
4569 TestAddrMode.ScaledReg = AddLHS;
4570 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4571
4572 // If this addressing mode is legal, commit it and remember that we folded
4573 // this instruction.
4574 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4575 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4576 AddrMode = TestAddrMode;
4577 return true;
4578 }
4579 // Restore status quo.
4580 TestAddrMode = AddrMode;
4581 }
4582
4583 // If this is an add recurrence with a constant step, return the increment
4584 // instruction and the canonicalized step.
4585 auto GetConstantStep =
4586 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4587 auto *PN = dyn_cast<PHINode>(V);
4588 if (!PN)
4589 return std::nullopt;
4590 auto IVInc = getIVIncrement(PN, &LI);
4591 if (!IVInc)
4592 return std::nullopt;
4593 // TODO: The result of the intrinsics above is two-complement. However when
4594 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4595 // If it has nuw or nsw flags, we need to make sure that these flags are
4596 // inferrable at the point of memory instruction. Otherwise we are replacing
4597 // well-defined two-complement computation with poison. Currently, to avoid
4598 // potentially complex analysis needed to prove this, we reject such cases.
4599 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4600 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4601 return std::nullopt;
4602 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4603 return std::make_pair(IVInc->first, ConstantStep->getValue());
4604 return std::nullopt;
4605 };
4606
4607 // Try to account for the following special case:
4608 // 1. ScaleReg is an inductive variable;
4609 // 2. We use it with non-zero offset;
4610 // 3. IV's increment is available at the point of memory instruction.
4611 //
4612 // In this case, we may reuse the IV increment instead of the IV Phi to
4613 // achieve the following advantages:
4614 // 1. If IV step matches the offset, we will have no need in the offset;
4615 // 2. Even if they don't match, we will reduce the overlap of living IV
4616 // and IV increment, that will potentially lead to better register
4617 // assignment.
4618 if (AddrMode.BaseOffs) {
4619 if (auto IVStep = GetConstantStep(ScaleReg)) {
4620 Instruction *IVInc = IVStep->first;
4621 // The following assert is important to ensure a lack of infinite loops.
4622 // This transforms is (intentionally) the inverse of the one just above.
4623 // If they don't agree on the definition of an increment, we'd alternate
4624 // back and forth indefinitely.
4625 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4626 APInt Step = IVStep->second;
4627 APInt Offset = Step * AddrMode.Scale;
4628 if (Offset.isSignedIntN(64)) {
4629 TestAddrMode.InBounds = false;
4630 TestAddrMode.ScaledReg = IVInc;
4631 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4632 // If this addressing mode is legal, commit it..
4633 // (Note that we defer the (expensive) domtree base legality check
4634 // to the very last possible point.)
4635 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4636 getDTFn().dominates(IVInc, MemoryInst)) {
4637 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4638 AddrMode = TestAddrMode;
4639 return true;
4640 }
4641 // Restore status quo.
4642 TestAddrMode = AddrMode;
4643 }
4644 }
4645 }
4646
4647 // Otherwise, just return what we have.
4648 return true;
4649}
4650
4651/// This is a little filter, which returns true if an addressing computation
4652/// involving I might be folded into a load/store accessing it.
4653/// This doesn't need to be perfect, but needs to accept at least
4654/// the set of instructions that MatchOperationAddr can.
4656 switch (I->getOpcode()) {
4657 case Instruction::BitCast:
4658 case Instruction::AddrSpaceCast:
4659 // Don't touch identity bitcasts.
4660 if (I->getType() == I->getOperand(0)->getType())
4661 return false;
4662 return I->getType()->isIntOrPtrTy();
4663 case Instruction::PtrToInt:
4664 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4665 return true;
4666 case Instruction::IntToPtr:
4667 // We know the input is intptr_t, so this is foldable.
4668 return true;
4669 case Instruction::Add:
4670 return true;
4671 case Instruction::Mul:
4672 case Instruction::Shl:
4673 // Can only handle X*C and X << C.
4674 return isa<ConstantInt>(I->getOperand(1));
4675 case Instruction::GetElementPtr:
4676 return true;
4677 default:
4678 return false;
4679 }
4680}
4681
4682/// Check whether or not \p Val is a legal instruction for \p TLI.
4683/// \note \p Val is assumed to be the product of some type promotion.
4684/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4685/// to be legal, as the non-promoted value would have had the same state.
4687 const DataLayout &DL, Value *Val) {
4688 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4689 if (!PromotedInst)
4690 return false;
4691 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4692 // If the ISDOpcode is undefined, it was undefined before the promotion.
4693 if (!ISDOpcode)
4694 return true;
4695 // Otherwise, check if the promoted instruction is legal or not.
4696 return TLI.isOperationLegalOrCustom(
4697 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4698}
4699
4700namespace {
4701
4702/// Hepler class to perform type promotion.
4703class TypePromotionHelper {
4704 /// Utility function to add a promoted instruction \p ExtOpnd to
4705 /// \p PromotedInsts and record the type of extension we have seen.
4706 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4707 Instruction *ExtOpnd, bool IsSExt) {
4708 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4709 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4710 if (!Inserted) {
4711 // If the new extension is same as original, the information in
4712 // PromotedInsts[ExtOpnd] is still correct.
4713 if (It->second.getInt() == ExtTy)
4714 return;
4715
4716 // Now the new extension is different from old extension, we make
4717 // the type information invalid by setting extension type to
4718 // BothExtension.
4719 ExtTy = BothExtension;
4720 }
4721 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4722 }
4723
4724 /// Utility function to query the original type of instruction \p Opnd
4725 /// with a matched extension type. If the extension doesn't match, we
4726 /// cannot use the information we had on the original type.
4727 /// BothExtension doesn't match any extension type.
4728 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4729 Instruction *Opnd, bool IsSExt) {
4730 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4731 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4732 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4733 return It->second.getPointer();
4734 return nullptr;
4735 }
4736
4737 /// Utility function to check whether or not a sign or zero extension
4738 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4739 /// either using the operands of \p Inst or promoting \p Inst.
4740 /// The type of the extension is defined by \p IsSExt.
4741 /// In other words, check if:
4742 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4743 /// #1 Promotion applies:
4744 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4745 /// #2 Operand reuses:
4746 /// ext opnd1 to ConsideredExtType.
4747 /// \p PromotedInsts maps the instructions to their type before promotion.
4748 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4749 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4750
4751 /// Utility function to determine if \p OpIdx should be promoted when
4752 /// promoting \p Inst.
4753 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4754 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4755 }
4756
4757 /// Utility function to promote the operand of \p Ext when this
4758 /// operand is a promotable trunc or sext or zext.
4759 /// \p PromotedInsts maps the instructions to their type before promotion.
4760 /// \p CreatedInstsCost[out] contains the cost of all instructions
4761 /// created to promote the operand of Ext.
4762 /// Newly added extensions are inserted in \p Exts.
4763 /// Newly added truncates are inserted in \p Truncs.
4764 /// Should never be called directly.
4765 /// \return The promoted value which is used instead of Ext.
4766 static Value *promoteOperandForTruncAndAnyExt(
4767 Instruction *Ext, TypePromotionTransaction &TPT,
4768 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4769 SmallVectorImpl<Instruction *> *Exts,
4770 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4771
4772 /// Utility function to promote the operand of \p Ext when this
4773 /// operand is promotable and is not a supported trunc or sext.
4774 /// \p PromotedInsts maps the instructions to their type before promotion.
4775 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4776 /// created to promote the operand of Ext.
4777 /// Newly added extensions are inserted in \p Exts.
4778 /// Newly added truncates are inserted in \p Truncs.
4779 /// Should never be called directly.
4780 /// \return The promoted value which is used instead of Ext.
4781 static Value *promoteOperandForOther(Instruction *Ext,
4782 TypePromotionTransaction &TPT,
4783 InstrToOrigTy &PromotedInsts,
4784 unsigned &CreatedInstsCost,
4785 SmallVectorImpl<Instruction *> *Exts,
4786 SmallVectorImpl<Instruction *> *Truncs,
4787 const TargetLowering &TLI, bool IsSExt);
4788
4789 /// \see promoteOperandForOther.
4790 static Value *signExtendOperandForOther(
4791 Instruction *Ext, TypePromotionTransaction &TPT,
4792 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4793 SmallVectorImpl<Instruction *> *Exts,
4794 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4795 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4796 Exts, Truncs, TLI, true);
4797 }
4798
4799 /// \see promoteOperandForOther.
4800 static Value *zeroExtendOperandForOther(
4801 Instruction *Ext, TypePromotionTransaction &TPT,
4802 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4803 SmallVectorImpl<Instruction *> *Exts,
4804 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4805 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4806 Exts, Truncs, TLI, false);
4807 }
4808
4809public:
4810 /// Type for the utility function that promotes the operand of Ext.
4811 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4812 InstrToOrigTy &PromotedInsts,
4813 unsigned &CreatedInstsCost,
4814 SmallVectorImpl<Instruction *> *Exts,
4815 SmallVectorImpl<Instruction *> *Truncs,
4816 const TargetLowering &TLI);
4817
4818 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4819 /// action to promote the operand of \p Ext instead of using Ext.
4820 /// \return NULL if no promotable action is possible with the current
4821 /// sign extension.
4822 /// \p InsertedInsts keeps track of all the instructions inserted by the
4823 /// other CodeGenPrepare optimizations. This information is important
4824 /// because we do not want to promote these instructions as CodeGenPrepare
4825 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4826 /// \p PromotedInsts maps the instructions to their type before promotion.
4827 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4828 const TargetLowering &TLI,
4829 const InstrToOrigTy &PromotedInsts);
4830};
4831
4832} // end anonymous namespace
4833
4834bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4835 Type *ConsideredExtType,
4836 const InstrToOrigTy &PromotedInsts,
4837 bool IsSExt) {
4838 // The promotion helper does not know how to deal with vector types yet.
4839 // To be able to fix that, we would need to fix the places where we
4840 // statically extend, e.g., constants and such.
4841 if (Inst->getType()->isVectorTy())
4842 return false;
4843
4844 // We can always get through zext.
4845 if (isa<ZExtInst>(Inst))
4846 return true;
4847
4848 // sext(sext) is ok too.
4849 if (IsSExt && isa<SExtInst>(Inst))
4850 return true;
4851
4852 // We can get through binary operator, if it is legal. In other words, the
4853 // binary operator must have a nuw or nsw flag.
4854 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4855 if (isa<OverflowingBinaryOperator>(BinOp) &&
4856 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4857 (IsSExt && BinOp->hasNoSignedWrap())))
4858 return true;
4859
4860 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4861 if ((Inst->getOpcode() == Instruction::And ||
4862 Inst->getOpcode() == Instruction::Or))
4863 return true;
4864
4865 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4866 if (Inst->getOpcode() == Instruction::Xor) {
4867 // Make sure it is not a NOT.
4868 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4869 if (!Cst->getValue().isAllOnes())
4870 return true;
4871 }
4872
4873 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4874 // It may change a poisoned value into a regular value, like
4875 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4876 // poisoned value regular value
4877 // It should be OK since undef covers valid value.
4878 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4879 return true;
4880
4881 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4882 // It may change a poisoned value into a regular value, like
4883 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4884 // poisoned value regular value
4885 // It should be OK since undef covers valid value.
4886 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4887 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4888 if (ExtInst->hasOneUse()) {
4889 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4890 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4891 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4892 if (Cst &&
4893 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4894 return true;
4895 }
4896 }
4897 }
4898
4899 // Check if we can do the following simplification.
4900 // ext(trunc(opnd)) --> ext(opnd)
4901 if (!isa<TruncInst>(Inst))
4902 return false;
4903
4904 Value *OpndVal = Inst->getOperand(0);
4905 // Check if we can use this operand in the extension.
4906 // If the type is larger than the result type of the extension, we cannot.
4907 if (!OpndVal->getType()->isIntegerTy() ||
4908 OpndVal->getType()->getIntegerBitWidth() >
4909 ConsideredExtType->getIntegerBitWidth())
4910 return false;
4911
4912 // If the operand of the truncate is not an instruction, we will not have
4913 // any information on the dropped bits.
4914 // (Actually we could for constant but it is not worth the extra logic).
4915 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4916 if (!Opnd)
4917 return false;
4918
4919 // Check if the source of the type is narrow enough.
4920 // I.e., check that trunc just drops extended bits of the same kind of
4921 // the extension.
4922 // #1 get the type of the operand and check the kind of the extended bits.
4923 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4924 if (OpndType)
4925 ;
4926 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4927 OpndType = Opnd->getOperand(0)->getType();
4928 else
4929 return false;
4930
4931 // #2 check that the truncate just drops extended bits.
4932 return Inst->getType()->getIntegerBitWidth() >=
4933 OpndType->getIntegerBitWidth();
4934}
4935
4936TypePromotionHelper::Action TypePromotionHelper::getAction(
4937 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4938 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4939 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4940 "Unexpected instruction type");
4941 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4942 Type *ExtTy = Ext->getType();
4943 bool IsSExt = isa<SExtInst>(Ext);
4944 // If the operand of the extension is not an instruction, we cannot
4945 // get through.
4946 // If it, check we can get through.
4947 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4948 return nullptr;
4949
4950 // Do not promote if the operand has been added by codegenprepare.
4951 // Otherwise, it means we are undoing an optimization that is likely to be
4952 // redone, thus causing potential infinite loop.
4953 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4954 return nullptr;
4955
4956 // SExt or Trunc instructions.
4957 // Return the related handler.
4958 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4959 isa<ZExtInst>(ExtOpnd))
4960 return promoteOperandForTruncAndAnyExt;
4961
4962 // Regular instruction.
4963 // Abort early if we will have to insert non-free instructions.
4964 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4965 return nullptr;
4966 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4967}
4968
4969Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4970 Instruction *SExt, TypePromotionTransaction &TPT,
4971 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4972 SmallVectorImpl<Instruction *> *Exts,
4973 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4974 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4975 // get through it and this method should not be called.
4976 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4977 Value *ExtVal = SExt;
4978 bool HasMergedNonFreeExt = false;
4979 if (isa<ZExtInst>(SExtOpnd)) {
4980 // Replace s|zext(zext(opnd))
4981 // => zext(opnd).
4982 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4983 Value *ZExt =
4984 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4985 TPT.replaceAllUsesWith(SExt, ZExt);
4986 TPT.eraseInstruction(SExt);
4987 ExtVal = ZExt;
4988 } else {
4989 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4990 // => z|sext(opnd).
4991 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4992 }
4993 CreatedInstsCost = 0;
4994
4995 // Remove dead code.
4996 if (SExtOpnd->use_empty())
4997 TPT.eraseInstruction(SExtOpnd);
4998
4999 // Check if the extension is still needed.
5000 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5001 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5002 if (ExtInst) {
5003 if (Exts)
5004 Exts->push_back(ExtInst);
5005 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5006 }
5007 return ExtVal;
5008 }
5009
5010 // At this point we have: ext ty opnd to ty.
5011 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5012 Value *NextVal = ExtInst->getOperand(0);
5013 TPT.eraseInstruction(ExtInst, NextVal);
5014 return NextVal;
5015}
5016
5017Value *TypePromotionHelper::promoteOperandForOther(
5018 Instruction *Ext, TypePromotionTransaction &TPT,
5019 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5020 SmallVectorImpl<Instruction *> *Exts,
5021 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5022 bool IsSExt) {
5023 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5024 // get through it and this method should not be called.
5025 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5026 CreatedInstsCost = 0;
5027 if (!ExtOpnd->hasOneUse()) {
5028 // ExtOpnd will be promoted.
5029 // All its uses, but Ext, will need to use a truncated value of the
5030 // promoted version.
5031 // Create the truncate now.
5032 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5033 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5034 // Insert it just after the definition.
5035 ITrunc->moveAfter(ExtOpnd);
5036 if (Truncs)
5037 Truncs->push_back(ITrunc);
5038 }
5039
5040 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5041 // Restore the operand of Ext (which has been replaced by the previous call
5042 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5043 TPT.setOperand(Ext, 0, ExtOpnd);
5044 }
5045
5046 // Get through the Instruction:
5047 // 1. Update its type.
5048 // 2. Replace the uses of Ext by Inst.
5049 // 3. Extend each operand that needs to be extended.
5050
5051 // Remember the original type of the instruction before promotion.
5052 // This is useful to know that the high bits are sign extended bits.
5053 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5054 // Step #1.
5055 TPT.mutateType(ExtOpnd, Ext->getType());
5056 // Step #2.
5057 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5058 // Step #3.
5059 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5060 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5061 ++OpIdx) {
5062 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5063 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5064 !shouldExtOperand(ExtOpnd, OpIdx)) {
5065 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5066 continue;
5067 }
5068 // Check if we can statically extend the operand.
5069 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5070 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5071 LLVM_DEBUG(dbgs() << "Statically extend\n");
5072 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5073 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5074 : Cst->getValue().zext(BitWidth);
5075 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5076 continue;
5077 }
5078 // UndefValue are typed, so we have to statically sign extend them.
5079 if (isa<UndefValue>(Opnd)) {
5080 LLVM_DEBUG(dbgs() << "Statically extend\n");
5081 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5082 continue;
5083 }
5084
5085 // Otherwise we have to explicitly sign extend the operand.
5086 Value *ValForExtOpnd = IsSExt
5087 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5088 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5089 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5090 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5091 if (!InstForExtOpnd)
5092 continue;
5093
5094 if (Exts)
5095 Exts->push_back(InstForExtOpnd);
5096
5097 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5098 }
5099 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5100 TPT.eraseInstruction(Ext);
5101 return ExtOpnd;
5102}
5103
5104/// Check whether or not promoting an instruction to a wider type is profitable.
5105/// \p NewCost gives the cost of extension instructions created by the
5106/// promotion.
5107/// \p OldCost gives the cost of extension instructions before the promotion
5108/// plus the number of instructions that have been
5109/// matched in the addressing mode the promotion.
5110/// \p PromotedOperand is the value that has been promoted.
5111/// \return True if the promotion is profitable, false otherwise.
5112bool AddressingModeMatcher::isPromotionProfitable(
5113 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5114 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5115 << '\n');
5116 // The cost of the new extensions is greater than the cost of the
5117 // old extension plus what we folded.
5118 // This is not profitable.
5119 if (NewCost > OldCost)
5120 return false;
5121 if (NewCost < OldCost)
5122 return true;
5123 // The promotion is neutral but it may help folding the sign extension in
5124 // loads for instance.
5125 // Check that we did not create an illegal instruction.
5126 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5127}
5128
5129/// Given an instruction or constant expr, see if we can fold the operation
5130/// into the addressing mode. If so, update the addressing mode and return
5131/// true, otherwise return false without modifying AddrMode.
5132/// If \p MovedAway is not NULL, it contains the information of whether or
5133/// not AddrInst has to be folded into the addressing mode on success.
5134/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5135/// because it has been moved away.
5136/// Thus AddrInst must not be added in the matched instructions.
5137/// This state can happen when AddrInst is a sext, since it may be moved away.
5138/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5139/// not be referenced anymore.
5140bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5141 unsigned Depth,
5142 bool *MovedAway) {
5143 // Avoid exponential behavior on extremely deep expression trees.
5144 if (Depth >= 5)
5145 return false;
5146
5147 // By default, all matched instructions stay in place.
5148 if (MovedAway)
5149 *MovedAway = false;
5150
5151 switch (Opcode) {
5152 case Instruction::PtrToInt:
5153 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5154 return matchAddr(AddrInst->getOperand(0), Depth);
5155 case Instruction::IntToPtr: {
5156 auto AS = AddrInst->getType()->getPointerAddressSpace();
5157 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5158 // This inttoptr is a no-op if the integer type is pointer sized.
5159 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5160 return matchAddr(AddrInst->getOperand(0), Depth);
5161 return false;
5162 }
5163 case Instruction::BitCast:
5164 // BitCast is always a noop, and we can handle it as long as it is
5165 // int->int or pointer->pointer (we don't want int<->fp or something).
5166 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5167 // Don't touch identity bitcasts. These were probably put here by LSR,
5168 // and we don't want to mess around with them. Assume it knows what it
5169 // is doing.
5170 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5171 return matchAddr(AddrInst->getOperand(0), Depth);
5172 return false;
5173 case Instruction::AddrSpaceCast: {
5174 unsigned SrcAS =
5175 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5176 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5177 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5178 return matchAddr(AddrInst->getOperand(0), Depth);
5179 return false;
5180 }
5181 case Instruction::Add: {
5182 // Check to see if we can merge in one operand, then the other. If so, we
5183 // win.
5184 ExtAddrMode BackupAddrMode = AddrMode;
5185 unsigned OldSize = AddrModeInsts.size();
5186 // Start a transaction at this point.
5187 // The LHS may match but not the RHS.
5188 // Therefore, we need a higher level restoration point to undo partially
5189 // matched operation.
5190 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5191 TPT.getRestorationPoint();
5192
5193 // Try to match an integer constant second to increase its chance of ending
5194 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5195 int First = 0, Second = 1;
5196 if (isa<ConstantInt>(AddrInst->getOperand(First))
5197 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5198 std::swap(First, Second);
5199 AddrMode.InBounds = false;
5200 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5201 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5202 return true;
5203
5204 // Restore the old addr mode info.
5205 AddrMode = BackupAddrMode;
5206 AddrModeInsts.resize(OldSize);
5207 TPT.rollback(LastKnownGood);
5208
5209 // Otherwise this was over-aggressive. Try merging operands in the opposite
5210 // order.
5211 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5212 matchAddr(AddrInst->getOperand(First), Depth + 1))
5213 return true;
5214
5215 // Otherwise we definitely can't merge the ADD in.
5216 AddrMode = BackupAddrMode;
5217 AddrModeInsts.resize(OldSize);
5218 TPT.rollback(LastKnownGood);
5219 break;
5220 }
5221 // case Instruction::Or:
5222 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5223 // break;
5224 case Instruction::Mul:
5225 case Instruction::Shl: {
5226 // Can only handle X*C and X << C.
5227 AddrMode.InBounds = false;
5228 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5229 if (!RHS || RHS->getBitWidth() > 64)
5230 return false;
5231 int64_t Scale = Opcode == Instruction::Shl
5232 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5233 : RHS->getSExtValue();
5234
5235 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5236 }
5237 case Instruction::GetElementPtr: {
5238 // Scan the GEP. We check it if it contains constant offsets and at most
5239 // one variable offset.
5240 int VariableOperand = -1;
5241 unsigned VariableScale = 0;
5242
5243 int64_t ConstantOffset = 0;
5244 gep_type_iterator GTI = gep_type_begin(AddrInst);
5245 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5246 if (StructType *STy = GTI.getStructTypeOrNull()) {
5247 const StructLayout *SL = DL.getStructLayout(STy);
5248 unsigned Idx =
5249 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5250 ConstantOffset += SL->getElementOffset(Idx);
5251 } else {
5252 TypeSize TS = GTI.getSequentialElementStride(DL);
5253 if (TS.isNonZero()) {
5254 // The optimisations below currently only work for fixed offsets.
5255 if (TS.isScalable())
5256 return false;
5257 int64_t TypeSize = TS.getFixedValue();
5258 if (ConstantInt *CI =
5259 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5260 const APInt &CVal = CI->getValue();
5261 if (CVal.getSignificantBits() <= 64) {
5262 ConstantOffset += CVal.getSExtValue() * TypeSize;
5263 continue;
5264 }
5265 }
5266 // We only allow one variable index at the moment.
5267 if (VariableOperand != -1)
5268 return false;
5269
5270 // Remember the variable index.
5271 VariableOperand = i;
5272 VariableScale = TypeSize;
5273 }
5274 }
5275 }
5276
5277 // A common case is for the GEP to only do a constant offset. In this case,
5278 // just add it to the disp field and check validity.
5279 if (VariableOperand == -1) {
5280 AddrMode.BaseOffs += ConstantOffset;
5281 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5282 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5283 AddrMode.InBounds = false;
5284 return true;
5285 }
5286 AddrMode.BaseOffs -= ConstantOffset;
5287
5289 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5290 ConstantOffset > 0) {
5291 // Record GEPs with non-zero offsets as candidates for splitting in
5292 // the event that the offset cannot fit into the r+i addressing mode.
5293 // Simple and common case that only one GEP is used in calculating the
5294 // address for the memory access.
5295 Value *Base = AddrInst->getOperand(0);
5296 auto *BaseI = dyn_cast<Instruction>(Base);
5297 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5299 (BaseI && !isa<CastInst>(BaseI) &&
5300 !isa<GetElementPtrInst>(BaseI))) {
5301 // Make sure the parent block allows inserting non-PHI instructions
5302 // before the terminator.
5303 BasicBlock *Parent = BaseI ? BaseI->getParent()
5304 : &GEP->getFunction()->getEntryBlock();
5305 if (!Parent->getTerminator()->isEHPad())
5306 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5307 }
5308 }
5309
5310 return false;
5311 }
5312
5313 // Save the valid addressing mode in case we can't match.
5314 ExtAddrMode BackupAddrMode = AddrMode;
5315 unsigned OldSize = AddrModeInsts.size();
5316
5317 // See if the scale and offset amount is valid for this target.
5318 AddrMode.BaseOffs += ConstantOffset;
5319 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5320 AddrMode.InBounds = false;
5321
5322 // Match the base operand of the GEP.
5323 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5324 // If it couldn't be matched, just stuff the value in a register.
5325 if (AddrMode.HasBaseReg) {
5326 AddrMode = BackupAddrMode;
5327 AddrModeInsts.resize(OldSize);
5328 return false;
5329 }
5330 AddrMode.HasBaseReg = true;
5331 AddrMode.BaseReg = AddrInst->getOperand(0);
5332 }
5333
5334 // Match the remaining variable portion of the GEP.
5335 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5336 Depth)) {
5337 // If it couldn't be matched, try stuffing the base into a register
5338 // instead of matching it, and retrying the match of the scale.
5339 AddrMode = BackupAddrMode;
5340 AddrModeInsts.resize(OldSize);
5341 if (AddrMode.HasBaseReg)
5342 return false;
5343 AddrMode.HasBaseReg = true;
5344 AddrMode.BaseReg = AddrInst->getOperand(0);
5345 AddrMode.BaseOffs += ConstantOffset;
5346 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5347 VariableScale, Depth)) {
5348 // If even that didn't work, bail.
5349 AddrMode = BackupAddrMode;
5350 AddrModeInsts.resize(OldSize);
5351 return false;
5352 }
5353 }
5354
5355 return true;
5356 }
5357 case Instruction::SExt:
5358 case Instruction::ZExt: {
5359 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5360 if (!Ext)
5361 return false;
5362
5363 // Try to move this ext out of the way of the addressing mode.
5364 // Ask for a method for doing so.
5365 TypePromotionHelper::Action TPH =
5366 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5367 if (!TPH)
5368 return false;
5369
5370 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5371 TPT.getRestorationPoint();
5372 unsigned CreatedInstsCost = 0;
5373 unsigned ExtCost = !TLI.isExtFree(Ext);
5374 Value *PromotedOperand =
5375 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5376 // SExt has been moved away.
5377 // Thus either it will be rematched later in the recursive calls or it is
5378 // gone. Anyway, we must not fold it into the addressing mode at this point.
5379 // E.g.,
5380 // op = add opnd, 1
5381 // idx = ext op
5382 // addr = gep base, idx
5383 // is now:
5384 // promotedOpnd = ext opnd <- no match here
5385 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5386 // addr = gep base, op <- match
5387 if (MovedAway)
5388 *MovedAway = true;
5389
5390 assert(PromotedOperand &&
5391 "TypePromotionHelper should have filtered out those cases");
5392
5393 ExtAddrMode BackupAddrMode = AddrMode;
5394 unsigned OldSize = AddrModeInsts.size();
5395
5396 if (!matchAddr(PromotedOperand, Depth) ||
5397 // The total of the new cost is equal to the cost of the created
5398 // instructions.
5399 // The total of the old cost is equal to the cost of the extension plus
5400 // what we have saved in the addressing mode.
5401 !isPromotionProfitable(CreatedInstsCost,
5402 ExtCost + (AddrModeInsts.size() - OldSize),
5403 PromotedOperand)) {
5404 AddrMode = BackupAddrMode;
5405 AddrModeInsts.resize(OldSize);
5406 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5407 TPT.rollback(LastKnownGood);
5408 return false;
5409 }
5410
5411 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5412 AddrMode.replaceWith(Ext, PromotedOperand);
5413 return true;
5414 }
5415 case Instruction::Call:
5416 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5417 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5418 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5419 if (TLI.addressingModeSupportsTLS(GV))
5420 return matchAddr(AddrInst->getOperand(0), Depth);
5421 }
5422 }
5423 break;
5424 }
5425 return false;
5426}
5427
5428/// If we can, try to add the value of 'Addr' into the current addressing mode.
5429/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5430/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5431/// for the target.
5432///
5433bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5434 // Start a transaction at this point that we will rollback if the matching
5435 // fails.
5436 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5437 TPT.getRestorationPoint();
5438 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5439 if (CI->getValue().isSignedIntN(64)) {
5440 // Check if the addition would result in a signed overflow.
5441 int64_t Result;
5442 bool Overflow =
5443 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5444 if (!Overflow) {
5445 // Fold in immediates if legal for the target.
5446 AddrMode.BaseOffs = Result;
5447 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5448 return true;
5449 AddrMode.BaseOffs -= CI->getSExtValue();
5450 }
5451 }
5452 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5453 // If this is a global variable, try to fold it into the addressing mode.
5454 if (!AddrMode.BaseGV) {
5455 AddrMode.BaseGV = GV;
5456 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5457 return true;
5458 AddrMode.BaseGV = nullptr;
5459 }
5460 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5461 ExtAddrMode BackupAddrMode = AddrMode;
5462 unsigned OldSize = AddrModeInsts.size();
5463
5464 // Check to see if it is possible to fold this operation.
5465 bool MovedAway = false;
5466 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5467 // This instruction may have been moved away. If so, there is nothing
5468 // to check here.
5469 if (MovedAway)
5470 return true;
5471 // Okay, it's possible to fold this. Check to see if it is actually
5472 // *profitable* to do so. We use a simple cost model to avoid increasing
5473 // register pressure too much.
5474 if (I->hasOneUse() ||
5475 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5476 AddrModeInsts.push_back(I);
5477 return true;
5478 }
5479
5480 // It isn't profitable to do this, roll back.
5481 AddrMode = BackupAddrMode;
5482 AddrModeInsts.resize(OldSize);
5483 TPT.rollback(LastKnownGood);
5484 }
5485 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5486 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5487 return true;
5488 TPT.rollback(LastKnownGood);
5489 } else if (isa<ConstantPointerNull>(Addr)) {
5490 // Null pointer gets folded without affecting the addressing mode.
5491 return true;
5492 }
5493
5494 // Worse case, the target should support [reg] addressing modes. :)
5495 if (!AddrMode.HasBaseReg) {
5496 AddrMode.HasBaseReg = true;
5497 AddrMode.BaseReg = Addr;
5498 // Still check for legality in case the target supports [imm] but not [i+r].
5499 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5500 return true;
5501 AddrMode.HasBaseReg = false;
5502 AddrMode.BaseReg = nullptr;
5503 }
5504
5505 // If the base register is already taken, see if we can do [r+r].
5506 if (AddrMode.Scale == 0) {
5507 AddrMode.Scale = 1;
5508 AddrMode.ScaledReg = Addr;
5509 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5510 return true;
5511 AddrMode.Scale = 0;
5512 AddrMode.ScaledReg = nullptr;
5513 }
5514 // Couldn't match.
5515 TPT.rollback(LastKnownGood);
5516 return false;
5517}
5518
5519/// Check to see if all uses of OpVal by the specified inline asm call are due
5520/// to memory operands. If so, return true, otherwise return false.
5522 const TargetLowering &TLI,
5523 const TargetRegisterInfo &TRI) {
5524 const Function *F = CI->getFunction();
5525 TargetLowering::AsmOperandInfoVector TargetConstraints =
5526 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5527
5528 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5529 // Compute the constraint code and ConstraintType to use.
5530 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5531
5532 // If this asm operand is our Value*, and if it isn't an indirect memory
5533 // operand, we can't fold it! TODO: Also handle C_Address?
5534 if (OpInfo.CallOperandVal == OpVal &&
5535 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5536 !OpInfo.isIndirect))
5537 return false;
5538 }
5539
5540 return true;
5541}
5542
5543/// Recursively walk all the uses of I until we find a memory use.
5544/// If we find an obviously non-foldable instruction, return true.
5545/// Add accessed addresses and types to MemoryUses.
5547 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5548 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5549 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5550 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5551 // If we already considered this instruction, we're done.
5552 if (!ConsideredInsts.insert(I).second)
5553 return false;
5554
5555 // If this is an obviously unfoldable instruction, bail out.
5556 if (!MightBeFoldableInst(I))
5557 return true;
5558
5559 // Loop over all the uses, recursively processing them.
5560 for (Use &U : I->uses()) {
5561 // Conservatively return true if we're seeing a large number or a deep chain
5562 // of users. This avoids excessive compilation times in pathological cases.
5563 if (SeenInsts++ >= MaxAddressUsersToScan)
5564 return true;
5565
5566 Instruction *UserI = cast<Instruction>(U.getUser());
5567 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5568 MemoryUses.push_back({&U, LI->getType()});
5569 continue;
5570 }
5571
5572 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5573 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5574 return true; // Storing addr, not into addr.
5575 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5576 continue;
5577 }
5578
5579 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5580 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5581 return true; // Storing addr, not into addr.
5582 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5583 continue;
5584 }
5585
5587 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5588 return true; // Storing addr, not into addr.
5589 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5590 continue;
5591 }
5592
5595 Type *AccessTy;
5596 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5597 return true;
5598
5599 if (!find(PtrOps, U.get()))
5600 return true;
5601
5602 MemoryUses.push_back({&U, AccessTy});
5603 continue;
5604 }
5605
5606 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5607 if (CI->hasFnAttr(Attribute::Cold)) {
5608 // If this is a cold call, we can sink the addressing calculation into
5609 // the cold path. See optimizeCallInst
5610 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5611 continue;
5612 }
5613
5614 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5615 if (!IA)
5616 return true;
5617
5618 // If this is a memory operand, we're cool, otherwise bail out.
5619 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5620 return true;
5621 continue;
5622 }
5623
5624 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5625 PSI, BFI, SeenInsts))
5626 return true;
5627 }
5628
5629 return false;
5630}
5631
5633 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5634 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5636 unsigned SeenInsts = 0;
5637 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5638 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5639 PSI, BFI, SeenInsts);
5640}
5641
5642
5643/// Return true if Val is already known to be live at the use site that we're
5644/// folding it into. If so, there is no cost to include it in the addressing
5645/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5646/// instruction already.
5647bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5648 Value *KnownLive1,
5649 Value *KnownLive2) {
5650 // If Val is either of the known-live values, we know it is live!
5651 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5652 return true;
5653
5654 // All values other than instructions and arguments (e.g. constants) are live.
5655 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5656 return true;
5657
5658 // If Val is a constant sized alloca in the entry block, it is live, this is
5659 // true because it is just a reference to the stack/frame pointer, which is
5660 // live for the whole function.
5661 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5662 if (AI->isStaticAlloca())
5663 return true;
5664
5665 // Check to see if this value is already used in the memory instruction's
5666 // block. If so, it's already live into the block at the very least, so we
5667 // can reasonably fold it.
5668 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5669}
5670
5671/// It is possible for the addressing mode of the machine to fold the specified
5672/// instruction into a load or store that ultimately uses it.
5673/// However, the specified instruction has multiple uses.
5674/// Given this, it may actually increase register pressure to fold it
5675/// into the load. For example, consider this code:
5676///
5677/// X = ...
5678/// Y = X+1
5679/// use(Y) -> nonload/store
5680/// Z = Y+1
5681/// load Z
5682///
5683/// In this case, Y has multiple uses, and can be folded into the load of Z
5684/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5685/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5686/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5687/// number of computations either.
5688///
5689/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5690/// X was live across 'load Z' for other reasons, we actually *would* want to
5691/// fold the addressing mode in the Z case. This would make Y die earlier.
5692bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5693 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5694 if (IgnoreProfitability)
5695 return true;
5696
5697 // AMBefore is the addressing mode before this instruction was folded into it,
5698 // and AMAfter is the addressing mode after the instruction was folded. Get
5699 // the set of registers referenced by AMAfter and subtract out those
5700 // referenced by AMBefore: this is the set of values which folding in this
5701 // address extends the lifetime of.
5702 //
5703 // Note that there are only two potential values being referenced here,
5704 // BaseReg and ScaleReg (global addresses are always available, as are any
5705 // folded immediates).
5706 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5707
5708 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5709 // lifetime wasn't extended by adding this instruction.
5710 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5711 BaseReg = nullptr;
5712 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5713 ScaledReg = nullptr;
5714
5715 // If folding this instruction (and it's subexprs) didn't extend any live
5716 // ranges, we're ok with it.
5717 if (!BaseReg && !ScaledReg)
5718 return true;
5719
5720 // If all uses of this instruction can have the address mode sunk into them,
5721 // we can remove the addressing mode and effectively trade one live register
5722 // for another (at worst.) In this context, folding an addressing mode into
5723 // the use is just a particularly nice way of sinking it.
5725 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5726 return false; // Has a non-memory, non-foldable use!
5727
5728 // Now that we know that all uses of this instruction are part of a chain of
5729 // computation involving only operations that could theoretically be folded
5730 // into a memory use, loop over each of these memory operation uses and see
5731 // if they could *actually* fold the instruction. The assumption is that
5732 // addressing modes are cheap and that duplicating the computation involved
5733 // many times is worthwhile, even on a fastpath. For sinking candidates
5734 // (i.e. cold call sites), this serves as a way to prevent excessive code
5735 // growth since most architectures have some reasonable small and fast way to
5736 // compute an effective address. (i.e LEA on x86)
5737 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5738 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5739 Value *Address = Pair.first->get();
5740 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5741 Type *AddressAccessTy = Pair.second;
5742 unsigned AS = Address->getType()->getPointerAddressSpace();
5743
5744 // Do a match against the root of this address, ignoring profitability. This
5745 // will tell us if the addressing mode for the memory operation will
5746 // *actually* cover the shared instruction.
5747 ExtAddrMode Result;
5748 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5749 0);
5750 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5751 TPT.getRestorationPoint();
5752 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5753 AddressAccessTy, AS, UserI, Result,
5754 InsertedInsts, PromotedInsts, TPT,
5755 LargeOffsetGEP, OptSize, PSI, BFI);
5756 Matcher.IgnoreProfitability = true;
5757 bool Success = Matcher.matchAddr(Address, 0);
5758 (void)Success;
5759 assert(Success && "Couldn't select *anything*?");
5760
5761 // The match was to check the profitability, the changes made are not
5762 // part of the original matcher. Therefore, they should be dropped
5763 // otherwise the original matcher will not present the right state.
5764 TPT.rollback(LastKnownGood);
5765
5766 // If the match didn't cover I, then it won't be shared by it.
5767 if (!is_contained(MatchedAddrModeInsts, I))
5768 return false;
5769
5770 MatchedAddrModeInsts.clear();
5771 }
5772
5773 return true;
5774}
5775
5776/// Return true if the specified values are defined in a
5777/// different basic block than BB.
5778static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5780 return I->getParent() != BB;
5781 return false;
5782}
5783
5784// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5785// is the first instruction that will use Addr. So we need to find the first
5786// user of Addr in current BB.
5788 Value *SunkAddr) {
5789 if (Addr->hasOneUse())
5790 return MemoryInst->getIterator();
5791
5792 // We already have a SunkAddr in current BB, but we may need to insert cast
5793 // instruction after it.
5794 if (SunkAddr) {
5795 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5796 return std::next(AddrInst->getIterator());
5797 }
5798
5799 // Find the first user of Addr in current BB.
5800 Instruction *Earliest = MemoryInst;
5801 for (User *U : Addr->users()) {
5802 Instruction *UserInst = dyn_cast<Instruction>(U);
5803 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5804 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5805 continue;
5806 if (UserInst->comesBefore(Earliest))
5807 Earliest = UserInst;
5808 }
5809 }
5810 return Earliest->getIterator();
5811}
5812
5813/// Sink addressing mode computation immediate before MemoryInst if doing so
5814/// can be done without increasing register pressure. The need for the
5815/// register pressure constraint means this can end up being an all or nothing
5816/// decision for all uses of the same addressing computation.
5817///
5818/// Load and Store Instructions often have addressing modes that can do
5819/// significant amounts of computation. As such, instruction selection will try
5820/// to get the load or store to do as much computation as possible for the
5821/// program. The problem is that isel can only see within a single block. As
5822/// such, we sink as much legal addressing mode work into the block as possible.
5823///
5824/// This method is used to optimize both load/store and inline asms with memory
5825/// operands. It's also used to sink addressing computations feeding into cold
5826/// call sites into their (cold) basic block.
5827///
5828/// The motivation for handling sinking into cold blocks is that doing so can
5829/// both enable other address mode sinking (by satisfying the register pressure
5830/// constraint above), and reduce register pressure globally (by removing the
5831/// addressing mode computation from the fast path entirely.).
5832bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5833 Type *AccessTy, unsigned AddrSpace) {
5834 Value *Repl = Addr;
5835
5836 // Try to collapse single-value PHI nodes. This is necessary to undo
5837 // unprofitable PRE transformations.
5838 SmallVector<Value *, 8> worklist;
5839 SmallPtrSet<Value *, 16> Visited;
5840 worklist.push_back(Addr);
5841
5842 // Use a worklist to iteratively look through PHI and select nodes, and
5843 // ensure that the addressing mode obtained from the non-PHI/select roots of
5844 // the graph are compatible.
5845 bool PhiOrSelectSeen = false;
5846 SmallVector<Instruction *, 16> AddrModeInsts;
5847 AddressingModeCombiner AddrModes(*DL, Addr);
5848 TypePromotionTransaction TPT(RemovedInsts);
5849 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5850 TPT.getRestorationPoint();
5851 while (!worklist.empty()) {
5852 Value *V = worklist.pop_back_val();
5853
5854 // We allow traversing cyclic Phi nodes.
5855 // In case of success after this loop we ensure that traversing through
5856 // Phi nodes ends up with all cases to compute address of the form
5857 // BaseGV + Base + Scale * Index + Offset
5858 // where Scale and Offset are constans and BaseGV, Base and Index
5859 // are exactly the same Values in all cases.
5860 // It means that BaseGV, Scale and Offset dominate our memory instruction
5861 // and have the same value as they had in address computation represented
5862 // as Phi. So we can safely sink address computation to memory instruction.
5863 if (!Visited.insert(V).second)
5864 continue;
5865
5866 // For a PHI node, push all of its incoming values.
5867 if (PHINode *P = dyn_cast<PHINode>(V)) {
5868 append_range(worklist, P->incoming_values());
5869 PhiOrSelectSeen = true;
5870 continue;
5871 }
5872 // Similar for select.
5873 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5874 worklist.push_back(SI->getFalseValue());
5875 worklist.push_back(SI->getTrueValue());
5876 PhiOrSelectSeen = true;
5877 continue;
5878 }
5879
5880 // For non-PHIs, determine the addressing mode being computed. Note that
5881 // the result may differ depending on what other uses our candidate
5882 // addressing instructions might have.
5883 AddrModeInsts.clear();
5884 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5885 0);
5886 // Defer the query (and possible computation of) the dom tree to point of
5887 // actual use. It's expected that most address matches don't actually need
5888 // the domtree.
5889 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5890 Function *F = MemoryInst->getParent()->getParent();
5891 return this->getDT(*F);
5892 };
5893 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5894 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5895 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5896 BFI);
5897
5898 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5899 if (GEP && !NewGEPBases.count(GEP)) {
5900 // If splitting the underlying data structure can reduce the offset of a
5901 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5902 // previously split data structures.
5903 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5904 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5905 }
5906
5907 NewAddrMode.OriginalValue = V;
5908 if (!AddrModes.addNewAddrMode(NewAddrMode))
5909 break;
5910 }
5911
5912 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5913 // or we have multiple but either couldn't combine them or combining them
5914 // wouldn't do anything useful, bail out now.
5915 if (!AddrModes.combineAddrModes()) {
5916 TPT.rollback(LastKnownGood);
5917 return false;
5918 }
5919 bool Modified = TPT.commit();
5920
5921 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5922 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5923
5924 // If all the instructions matched are already in this BB, don't do anything.
5925 // If we saw a Phi node then it is not local definitely, and if we saw a
5926 // select then we want to push the address calculation past it even if it's
5927 // already in this BB.
5928 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5929 return IsNonLocalValue(V, MemoryInst->getParent());
5930 })) {
5931 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5932 << "\n");
5933 return Modified;
5934 }
5935
5936 // Now that we determined the addressing expression we want to use and know
5937 // that we have to sink it into this block. Check to see if we have already
5938 // done this for some other load/store instr in this block. If so, reuse
5939 // the computation. Before attempting reuse, check if the address is valid
5940 // as it may have been erased.
5941
5942 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5943
5944 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5945 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5946
5947 // The current BB may be optimized multiple times, we can't guarantee the
5948 // reuse of Addr happens later, call findInsertPos to find an appropriate
5949 // insert position.
5950 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5951
5952 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5953 if (!SunkAddr) {
5954 auto &DT = getDT(*MemoryInst->getFunction());
5955 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5956 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5957 return Modified;
5958 }
5959
5960 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5961
5962 if (SunkAddr) {
5963 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5964 << " for " << *MemoryInst << "\n");
5965 if (SunkAddr->getType() != Addr->getType()) {
5966 if (SunkAddr->getType()->getPointerAddressSpace() !=
5967 Addr->getType()->getPointerAddressSpace() &&
5968 !DL->isNonIntegralPointerType(Addr->getType())) {
5969 // There are two reasons the address spaces might not match: a no-op
5970 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5971 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5972 // TODO: allow bitcast between different address space pointers with the
5973 // same size.
5974 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5975 SunkAddr =
5976 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5977 } else
5978 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5979 }
5981 SubtargetInfo->addrSinkUsingGEPs())) {
5982 // By default, we use the GEP-based method when AA is used later. This
5983 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5984 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5985 << " for " << *MemoryInst << "\n");
5986 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5987
5988 // First, find the pointer.
5989 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5990 ResultPtr = AddrMode.BaseReg;
5991 AddrMode.BaseReg = nullptr;
5992 }
5993
5994 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5995 // We can't add more than one pointer together, nor can we scale a
5996 // pointer (both of which seem meaningless).
5997 if (ResultPtr || AddrMode.Scale != 1)
5998 return Modified;
5999
6000 ResultPtr = AddrMode.ScaledReg;
6001 AddrMode.Scale = 0;
6002 }
6003
6004 // It is only safe to sign extend the BaseReg if we know that the math
6005 // required to create it did not overflow before we extend it. Since
6006 // the original IR value was tossed in favor of a constant back when
6007 // the AddrMode was created we need to bail out gracefully if widths
6008 // do not match instead of extending it.
6009 //
6010 // (See below for code to add the scale.)
6011 if (AddrMode.Scale) {
6012 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6013 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6014 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6015 return Modified;
6016 }
6017
6018 GlobalValue *BaseGV = AddrMode.BaseGV;
6019 if (BaseGV != nullptr) {
6020 if (ResultPtr)
6021 return Modified;
6022
6023 if (BaseGV->isThreadLocal()) {
6024 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6025 } else {
6026 ResultPtr = BaseGV;
6027 }
6028 }
6029
6030 // If the real base value actually came from an inttoptr, then the matcher
6031 // will look through it and provide only the integer value. In that case,
6032 // use it here.
6033 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6034 if (!ResultPtr && AddrMode.BaseReg) {
6035 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6036 "sunkaddr");
6037 AddrMode.BaseReg = nullptr;
6038 } else if (!ResultPtr && AddrMode.Scale == 1) {
6039 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6040 "sunkaddr");
6041 AddrMode.Scale = 0;
6042 }
6043 }
6044
6045 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6046 !AddrMode.BaseOffs) {
6047 SunkAddr = Constant::getNullValue(Addr->getType());
6048 } else if (!ResultPtr) {
6049 return Modified;
6050 } else {
6051 Type *I8PtrTy =
6052 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6053
6054 // Start with the base register. Do this first so that subsequent address
6055 // matching finds it last, which will prevent it from trying to match it
6056 // as the scaled value in case it happens to be a mul. That would be
6057 // problematic if we've sunk a different mul for the scale, because then
6058 // we'd end up sinking both muls.
6059 if (AddrMode.BaseReg) {
6060 Value *V = AddrMode.BaseReg;
6061 if (V->getType() != IntPtrTy)
6062 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6063
6064 ResultIndex = V;
6065 }
6066
6067 // Add the scale value.
6068 if (AddrMode.Scale) {
6069 Value *V = AddrMode.ScaledReg;
6070 if (V->getType() == IntPtrTy) {
6071 // done.
6072 } else {
6073 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6074 cast<IntegerType>(V->getType())->getBitWidth() &&
6075 "We can't transform if ScaledReg is too narrow");
6076 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6077 }
6078
6079 if (AddrMode.Scale != 1)
6080 V = Builder.CreateMul(
6081 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6082 if (ResultIndex)
6083 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6084 else
6085 ResultIndex = V;
6086 }
6087
6088 // Add in the Base Offset if present.
6089 if (AddrMode.BaseOffs) {
6090 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6091 if (ResultIndex) {
6092 // We need to add this separately from the scale above to help with
6093 // SDAG consecutive load/store merging.
6094 if (ResultPtr->getType() != I8PtrTy)
6095 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6096 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6097 AddrMode.InBounds);
6098 }
6099
6100 ResultIndex = V;
6101 }
6102
6103 if (!ResultIndex) {
6104 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6105 // We know that we have a pointer without any offsets. If this pointer
6106 // originates from a different basic block than the current one, we
6107 // must be able to recreate it in the current basic block.
6108 // We do not support the recreation of any instructions yet.
6109 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6110 return Modified;
6111 SunkAddr = ResultPtr;
6112 } else {
6113 if (ResultPtr->getType() != I8PtrTy)
6114 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6115 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6116 AddrMode.InBounds);
6117 }
6118
6119 if (SunkAddr->getType() != Addr->getType()) {
6120 if (SunkAddr->getType()->getPointerAddressSpace() !=
6121 Addr->getType()->getPointerAddressSpace() &&
6122 !DL->isNonIntegralPointerType(Addr->getType())) {
6123 // There are two reasons the address spaces might not match: a no-op
6124 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6125 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6126 // TODO: allow bitcast between different address space pointers with
6127 // the same size.
6128 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6129 SunkAddr =
6130 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6131 } else
6132 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6133 }
6134 }
6135 } else {
6136 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6137 // non-integral pointers, so in that case bail out now.
6138 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6139 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6140 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6141 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6142 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6143 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6144 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6145 (AddrMode.BaseGV &&
6146 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6147 return Modified;
6148
6149 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6150 << " for " << *MemoryInst << "\n");
6151 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6152 Value *Result = nullptr;
6153
6154 // Start with the base register. Do this first so that subsequent address
6155 // matching finds it last, which will prevent it from trying to match it
6156 // as the scaled value in case it happens to be a mul. That would be
6157 // problematic if we've sunk a different mul for the scale, because then
6158 // we'd end up sinking both muls.
6159 if (AddrMode.BaseReg) {
6160 Value *V = AddrMode.BaseReg;
6161 if (V->getType()->isPointerTy())
6162 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6163 if (V->getType() != IntPtrTy)
6164 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6165 Result = V;
6166 }
6167
6168 // Add the scale value.
6169 if (AddrMode.Scale) {
6170 Value *V = AddrMode.ScaledReg;
6171 if (V->getType() == IntPtrTy) {
6172 // done.
6173 } else if (V->getType()->isPointerTy()) {
6174 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6175 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6176 cast<IntegerType>(V->getType())->getBitWidth()) {
6177 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6178 } else {
6179 // It is only safe to sign extend the BaseReg if we know that the math
6180 // required to create it did not overflow before we extend it. Since
6181 // the original IR value was tossed in favor of a constant back when
6182 // the AddrMode was created we need to bail out gracefully if widths
6183 // do not match instead of extending it.
6185 if (I && (Result != AddrMode.BaseReg))
6186 I->eraseFromParent();
6187 return Modified;
6188 }
6189 if (AddrMode.Scale != 1)
6190 V = Builder.CreateMul(
6191 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6192 if (Result)
6193 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6194 else
6195 Result = V;
6196 }
6197
6198 // Add in the BaseGV if present.
6199 GlobalValue *BaseGV = AddrMode.BaseGV;
6200 if (BaseGV != nullptr) {
6201 Value *BaseGVPtr;
6202 if (BaseGV->isThreadLocal()) {
6203 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6204 } else {
6205 BaseGVPtr = BaseGV;
6206 }
6207 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6208 if (Result)
6209 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6210 else
6211 Result = V;
6212 }
6213
6214 // Add in the Base Offset if present.
6215 if (AddrMode.BaseOffs) {
6216 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6217 if (Result)
6218 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6219 else
6220 Result = V;
6221 }
6222
6223 if (!Result)
6224 SunkAddr = Constant::getNullValue(Addr->getType());
6225 else
6226 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6227 }
6228
6229 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6230 // Store the newly computed address into the cache. In the case we reused a
6231 // value, this should be idempotent.
6232 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6233
6234 // If we have no uses, recursively delete the value and all dead instructions
6235 // using it.
6236 if (Repl->use_empty()) {
6237 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6238 RecursivelyDeleteTriviallyDeadInstructions(
6239 Repl, TLInfo, nullptr,
6240 [&](Value *V) { removeAllAssertingVHReferences(V); });
6241 });
6242 }
6243 ++NumMemoryInsts;
6244 return true;
6245}
6246
6247/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6248/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6249/// only handle a 2 operand GEP in the same basic block or a splat constant
6250/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6251/// index.
6252///
6253/// If the existing GEP has a vector base pointer that is splat, we can look
6254/// through the splat to find the scalar pointer. If we can't find a scalar
6255/// pointer there's nothing we can do.
6256///
6257/// If we have a GEP with more than 2 indices where the middle indices are all
6258/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6259///
6260/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6261/// followed by a GEP with an all zeroes vector index. This will enable
6262/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6263/// zero index.
6264bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6265 Value *Ptr) {
6266 Value *NewAddr;
6267
6268 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6269 // Don't optimize GEPs that don't have indices.
6270 if (!GEP->hasIndices())
6271 return false;
6272
6273 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6274 // FIXME: We should support this by sinking the GEP.
6275 if (MemoryInst->getParent() != GEP->getParent())
6276 return false;
6277
6278 SmallVector<Value *, 2> Ops(GEP->operands());
6279
6280 bool RewriteGEP = false;
6281
6282 if (Ops[0]->getType()->isVectorTy()) {
6283 Ops[0] = getSplatValue(Ops[0]);
6284 if (!Ops[0])
6285 return false;
6286 RewriteGEP = true;
6287 }
6288
6289 unsigned FinalIndex = Ops.size() - 1;
6290
6291 // Ensure all but the last index is 0.
6292 // FIXME: This isn't strictly required. All that's required is that they are
6293 // all scalars or splats.
6294 for (unsigned i = 1; i < FinalIndex; ++i) {
6295 auto *C = dyn_cast<Constant>(Ops[i]);
6296 if (!C)
6297 return false;
6298 if (isa<VectorType>(C->getType()))
6299 C = C->getSplatValue();
6300 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6301 if (!CI || !CI->isZero())
6302 return false;
6303 // Scalarize the index if needed.
6304 Ops[i] = CI;
6305 }
6306
6307 // Try to scalarize the final index.
6308 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6309 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6310 auto *C = dyn_cast<ConstantInt>(V);
6311 // Don't scalarize all zeros vector.
6312 if (!C || !C->isZero()) {
6313 Ops[FinalIndex] = V;
6314 RewriteGEP = true;
6315 }
6316 }
6317 }
6318
6319 // If we made any changes or the we have extra operands, we need to generate
6320 // new instructions.
6321 if (!RewriteGEP && Ops.size() == 2)
6322 return false;
6323
6324 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6325
6326 IRBuilder<> Builder(MemoryInst);
6327
6328 Type *SourceTy = GEP->getSourceElementType();
6329 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6330
6331 // If the final index isn't a vector, emit a scalar GEP containing all ops
6332 // and a vector GEP with all zeroes final index.
6333 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6334 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6335 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6336 auto *SecondTy = GetElementPtrInst::getIndexedType(
6337 SourceTy, ArrayRef(Ops).drop_front());
6338 NewAddr =
6339 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6340 } else {
6341 Value *Base = Ops[0];
6342 Value *Index = Ops[FinalIndex];
6343
6344 // Create a scalar GEP if there are more than 2 operands.
6345 if (Ops.size() != 2) {
6346 // Replace the last index with 0.
6347 Ops[FinalIndex] =
6348 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6349 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6351 SourceTy, ArrayRef(Ops).drop_front());
6352 }
6353
6354 // Now create the GEP with scalar pointer and vector index.
6355 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6356 }
6357 } else if (!isa<Constant>(Ptr)) {
6358 // Not a GEP, maybe its a splat and we can create a GEP to enable
6359 // SelectionDAGBuilder to use it as a uniform base.
6360 Value *V = getSplatValue(Ptr);
6361 if (!V)
6362 return false;
6363
6364 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6365
6366 IRBuilder<> Builder(MemoryInst);
6367
6368 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6369 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6370 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6371 Type *ScalarTy;
6372 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6373 Intrinsic::masked_gather) {
6374 ScalarTy = MemoryInst->getType()->getScalarType();
6375 } else {
6376 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6377 Intrinsic::masked_scatter);
6378 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6379 }
6380 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6381 } else {
6382 // Constant, SelectionDAGBuilder knows to check if its a splat.
6383 return false;
6384 }
6385
6386 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6387
6388 // If we have no uses, recursively delete the value and all dead instructions
6389 // using it.
6390 if (Ptr->use_empty())
6392 Ptr, TLInfo, nullptr,
6393 [&](Value *V) { removeAllAssertingVHReferences(V); });
6394
6395 return true;
6396}
6397
6398// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
6399// Check the pattern we are interested in where there are maximum 2 uses
6400// of the intrinsic which are the extract instructions.
6402 ExtractValueInst *&OverflowExtract) {
6403 // Bail out if it's more than 2 users:
6404 if (I->hasNUsesOrMore(3))
6405 return false;
6406
6407 for (User *U : I->users()) {
6408 auto *Extract = dyn_cast<ExtractValueInst>(U);
6409 if (!Extract || Extract->getNumIndices() != 1)
6410 return false;
6411
6412 unsigned Index = Extract->getIndices()[0];
6413 if (Index == 0)
6414 MulExtract = Extract;
6415 else if (Index == 1)
6416 OverflowExtract = Extract;
6417 else
6418 return false;
6419 }
6420 return true;
6421}
6422
6423// Rewrite the mul_with_overflow intrinsic by checking if both of the
6424// operands' value ranges are within the legal type. If so, we can optimize the
6425// multiplication algorithm. This code is supposed to be written during the step
6426// of type legalization, but given that we need to reconstruct the IR which is
6427// not doable there, we do it here.
6428// The IR after the optimization will look like:
6429// entry:
6430// if signed:
6431// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
6432// overflow_no
6433// else:
6434// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
6435// overflow_no:
6436// overflow:
6437// overflow.res:
6438// \returns true if optimization was applied
6439// TODO: This optimization can be further improved to optimize branching on
6440// overflow where the 'overflow_no' BB can branch directly to the false
6441// successor of overflow, but that would add additional complexity so we leave
6442// it for future work.
6443bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
6444 ModifyDT &ModifiedDT) {
6445 // Check if target supports this optimization.
6447 I->getContext(),
6448 TLI->getValueType(*DL, I->getType()->getContainedType(0))))
6449 return false;
6450
6451 ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
6452 if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
6453 return false;
6454
6455 // Keep track of the instruction to stop reoptimizing it again.
6456 InsertedInsts.insert(I);
6457
6458 Value *LHS = I->getOperand(0);
6459 Value *RHS = I->getOperand(1);
6460 Type *Ty = LHS->getType();
6461 unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
6462 Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
6463
6464 // New BBs:
6465 BasicBlock *OverflowEntryBB = I->getParent()->splitBasicBlockBefore(I, "");
6466 OverflowEntryBB->takeName(I->getParent());
6467 // Keep the 'br' instruction that is generated as a result of the split to be
6468 // erased/replaced later.
6469 Instruction *OldTerminator = OverflowEntryBB->getTerminator();
6470 BasicBlock *NoOverflowBB =
6471 BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
6472 NoOverflowBB->moveAfter(OverflowEntryBB);
6473 BasicBlock *OverflowBB =
6474 BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
6475 OverflowBB->moveAfter(NoOverflowBB);
6476
6477 // BB overflow.entry:
6478 IRBuilder<> Builder(OverflowEntryBB);
6479 // Extract low and high halves of LHS:
6480 Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
6481 Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
6482 HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
6483
6484 // Extract low and high halves of RHS:
6485 Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
6486 Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
6487 HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
6488
6489 Value *IsAnyBitTrue;
6490 if (IsSigned) {
6491 Value *SignLoLHS =
6492 Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
6493 Value *SignLoRHS =
6494 Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
6495 Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
6496 Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
6497 Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
6498 IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
6499 ConstantInt::getNullValue(Or->getType()));
6500 } else {
6501 Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
6502 ConstantInt::getNullValue(LegalTy));
6503 Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
6504 ConstantInt::getNullValue(LegalTy));
6505 IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
6506 }
6507 Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
6508
6509 // BB overflow.no:
6510 Builder.SetInsertPoint(NoOverflowBB);
6511 Value *ExtLoLHS, *ExtLoRHS;
6512 if (IsSigned) {
6513 ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
6514 ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
6515 } else {
6516 ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
6517 ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
6518 }
6519
6520 Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
6521
6522 // Create the 'overflow.res' BB to merge the results of
6523 // the two paths:
6524 BasicBlock *OverflowResBB = I->getParent();
6525 OverflowResBB->setName("overflow.res");
6526
6527 // BB overflow.no: jump to overflow.res BB
6528 Builder.CreateBr(OverflowResBB);
6529 // No we don't need the old terminator in overflow.entry BB, erase it:
6530 OldTerminator->eraseFromParent();
6531
6532 // BB overflow.res:
6533 Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6534 // Create PHI nodes to merge results from no.overflow BB and overflow BB to
6535 // replace the extract instructions.
6536 PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
6537 *OverflowFlagPHI =
6538 Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6539
6540 // Add the incoming values from no.overflow BB and later from overflow BB.
6541 OverflowResPHI->addIncoming(Mul, NoOverflowBB);
6542 OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
6543 NoOverflowBB);
6544
6545 // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
6546 if (MulExtract) {
6547 MulExtract->replaceAllUsesWith(OverflowResPHI);
6548 MulExtract->eraseFromParent();
6549 }
6550 if (OverflowExtract) {
6551 OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
6552 OverflowExtract->eraseFromParent();
6553 }
6554
6555 // Remove the intrinsic from parent (overflow.res BB) as it will be part of
6556 // overflow BB
6557 I->removeFromParent();
6558 // BB overflow:
6559 I->insertInto(OverflowBB, OverflowBB->end());
6560 Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6561 Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6562 Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
6563 Builder.CreateBr(OverflowResBB);
6564
6565 // Add The Extracted values to the PHINodes in the overflow.res BB.
6566 OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
6567 OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
6568
6569 ModifiedDT = ModifyDT::ModifyBBDT;
6570 return true;
6571}
6572
6573/// If there are any memory operands, use OptimizeMemoryInst to sink their
6574/// address computing into the block when possible / profitable.
6575bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6576 bool MadeChange = false;
6577
6578 const TargetRegisterInfo *TRI =
6580 TargetLowering::AsmOperandInfoVector TargetConstraints =
6581 TLI->ParseConstraints(*DL, TRI, *CS);
6582 unsigned ArgNo = 0;
6583 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6584 // Compute the constraint code and ConstraintType to use.
6585 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6586
6587 // TODO: Also handle C_Address?
6588 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6589 OpInfo.isIndirect) {
6590 Value *OpVal = CS->getArgOperand(ArgNo++);
6591 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6592 } else if (OpInfo.Type == InlineAsm::isInput)
6593 ArgNo++;
6594 }
6595
6596 return MadeChange;
6597}
6598
6599/// Check if all the uses of \p Val are equivalent (or free) zero or
6600/// sign extensions.
6601static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6602 assert(!Val->use_empty() && "Input must have at least one use");
6603 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6604 bool IsSExt = isa<SExtInst>(FirstUser);
6605 Type *ExtTy = FirstUser->getType();
6606 for (const User *U : Val->users()) {
6607 const Instruction *UI = cast<Instruction>(U);
6608 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6609 return false;
6610 Type *CurTy = UI->getType();
6611 // Same input and output types: Same instruction after CSE.
6612 if (CurTy == ExtTy)
6613 continue;
6614
6615 // If IsSExt is true, we are in this situation:
6616 // a = Val
6617 // b = sext ty1 a to ty2
6618 // c = sext ty1 a to ty3
6619 // Assuming ty2 is shorter than ty3, this could be turned into:
6620 // a = Val
6621 // b = sext ty1 a to ty2
6622 // c = sext ty2 b to ty3
6623 // However, the last sext is not free.
6624 if (IsSExt)
6625 return false;
6626
6627 // This is a ZExt, maybe this is free to extend from one type to another.
6628 // In that case, we would not account for a different use.
6629 Type *NarrowTy;
6630 Type *LargeTy;
6631 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6632 CurTy->getScalarType()->getIntegerBitWidth()) {
6633 NarrowTy = CurTy;
6634 LargeTy = ExtTy;
6635 } else {
6636 NarrowTy = ExtTy;
6637 LargeTy = CurTy;
6638 }
6639
6640 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6641 return false;
6642 }
6643 // All uses are the same or can be derived from one another for free.
6644 return true;
6645}
6646
6647/// Try to speculatively promote extensions in \p Exts and continue
6648/// promoting through newly promoted operands recursively as far as doing so is
6649/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6650/// When some promotion happened, \p TPT contains the proper state to revert
6651/// them.
6652///
6653/// \return true if some promotion happened, false otherwise.
6654bool CodeGenPrepare::tryToPromoteExts(
6655 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6656 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6657 unsigned CreatedInstsCost) {
6658 bool Promoted = false;
6659
6660 // Iterate over all the extensions to try to promote them.
6661 for (auto *I : Exts) {
6662 // Early check if we directly have ext(load).
6663 if (isa<LoadInst>(I->getOperand(0))) {
6664 ProfitablyMovedExts.push_back(I);
6665 continue;
6666 }
6667
6668 // Check whether or not we want to do any promotion. The reason we have
6669 // this check inside the for loop is to catch the case where an extension
6670 // is directly fed by a load because in such case the extension can be moved
6671 // up without any promotion on its operands.
6673 return false;
6674
6675 // Get the action to perform the promotion.
6676 TypePromotionHelper::Action TPH =
6677 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6678 // Check if we can promote.
6679 if (!TPH) {
6680 // Save the current extension as we cannot move up through its operand.
6681 ProfitablyMovedExts.push_back(I);
6682 continue;
6683 }
6684
6685 // Save the current state.
6686 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6687 TPT.getRestorationPoint();
6688 SmallVector<Instruction *, 4> NewExts;
6689 unsigned NewCreatedInstsCost = 0;
6690 unsigned ExtCost = !TLI->isExtFree(I);
6691 // Promote.
6692 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6693 &NewExts, nullptr, *TLI);
6694 assert(PromotedVal &&
6695 "TypePromotionHelper should have filtered out those cases");
6696
6697 // We would be able to merge only one extension in a load.
6698 // Therefore, if we have more than 1 new extension we heuristically
6699 // cut this search path, because it means we degrade the code quality.
6700 // With exactly 2, the transformation is neutral, because we will merge
6701 // one extension but leave one. However, we optimistically keep going,
6702 // because the new extension may be removed too. Also avoid replacing a
6703 // single free extension with multiple extensions, as this increases the
6704 // number of IR instructions while not providing any savings.
6705 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6706 // FIXME: It would be possible to propagate a negative value instead of
6707 // conservatively ceiling it to 0.
6708 TotalCreatedInstsCost =
6709 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6710 if (!StressExtLdPromotion &&
6711 (TotalCreatedInstsCost > 1 ||
6712 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6713 (ExtCost == 0 && NewExts.size() > 1))) {
6714 // This promotion is not profitable, rollback to the previous state, and
6715 // save the current extension in ProfitablyMovedExts as the latest
6716 // speculative promotion turned out to be unprofitable.
6717 TPT.rollback(LastKnownGood);
6718 ProfitablyMovedExts.push_back(I);
6719 continue;
6720 }
6721 // Continue promoting NewExts as far as doing so is profitable.
6722 SmallVector<Instruction *, 2> NewlyMovedExts;
6723 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6724 bool NewPromoted = false;
6725 for (auto *ExtInst : NewlyMovedExts) {
6726 Instruction *MovedExt = cast<Instruction>(ExtInst);
6727 Value *ExtOperand = MovedExt->getOperand(0);
6728 // If we have reached to a load, we need this extra profitability check
6729 // as it could potentially be merged into an ext(load).
6730 if (isa<LoadInst>(ExtOperand) &&
6731 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6732 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6733 continue;
6734
6735 ProfitablyMovedExts.push_back(MovedExt);
6736 NewPromoted = true;
6737 }
6738
6739 // If none of speculative promotions for NewExts is profitable, rollback
6740 // and save the current extension (I) as the last profitable extension.
6741 if (!NewPromoted) {
6742 TPT.rollback(LastKnownGood);
6743 ProfitablyMovedExts.push_back(I);
6744 continue;
6745 }
6746 // The promotion is profitable.
6747 Promoted = true;
6748 }
6749 return Promoted;
6750}
6751
6752/// Merging redundant sexts when one is dominating the other.
6753bool CodeGenPrepare::mergeSExts(Function &F) {
6754 bool Changed = false;
6755 for (auto &Entry : ValToSExtendedUses) {
6756 SExts &Insts = Entry.second;
6757 SExts CurPts;
6758 for (Instruction *Inst : Insts) {
6759 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6760 Inst->getOperand(0) != Entry.first)
6761 continue;
6762 bool inserted = false;
6763 for (auto &Pt : CurPts) {
6764 if (getDT(F).dominates(Inst, Pt)) {
6765 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6766 RemovedInsts.insert(Pt);
6767 Pt->removeFromParent();
6768 Pt = Inst;
6769 inserted = true;
6770 Changed = true;
6771 break;
6772 }
6773 if (!getDT(F).dominates(Pt, Inst))
6774 // Give up if we need to merge in a common dominator as the
6775 // experiments show it is not profitable.
6776 continue;
6777 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6778 RemovedInsts.insert(Inst);
6779 Inst->removeFromParent();
6780 inserted = true;
6781 Changed = true;
6782 break;
6783 }
6784 if (!inserted)
6785 CurPts.push_back(Inst);
6786 }
6787 }
6788 return Changed;
6789}
6790
6791// Splitting large data structures so that the GEPs accessing them can have
6792// smaller offsets so that they can be sunk to the same blocks as their users.
6793// For example, a large struct starting from %base is split into two parts
6794// where the second part starts from %new_base.
6795//
6796// Before:
6797// BB0:
6798// %base =
6799//
6800// BB1:
6801// %gep0 = gep %base, off0
6802// %gep1 = gep %base, off1
6803// %gep2 = gep %base, off2
6804//
6805// BB2:
6806// %load1 = load %gep0
6807// %load2 = load %gep1
6808// %load3 = load %gep2
6809//
6810// After:
6811// BB0:
6812// %base =
6813// %new_base = gep %base, off0
6814//
6815// BB1:
6816// %new_gep0 = %new_base
6817// %new_gep1 = gep %new_base, off1 - off0
6818// %new_gep2 = gep %new_base, off2 - off0
6819//
6820// BB2:
6821// %load1 = load i32, i32* %new_gep0
6822// %load2 = load i32, i32* %new_gep1
6823// %load3 = load i32, i32* %new_gep2
6824//
6825// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6826// their offsets are smaller enough to fit into the addressing mode.
6827bool CodeGenPrepare::splitLargeGEPOffsets() {
6828 bool Changed = false;
6829 for (auto &Entry : LargeOffsetGEPMap) {
6830 Value *OldBase = Entry.first;
6831 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6832 &LargeOffsetGEPs = Entry.second;
6833 auto compareGEPOffset =
6834 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6835 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6836 if (LHS.first == RHS.first)
6837 return false;
6838 if (LHS.second != RHS.second)
6839 return LHS.second < RHS.second;
6840 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6841 };
6842 // Sorting all the GEPs of the same data structures based on the offsets.
6843 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6844 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6845 // Skip if all the GEPs have the same offsets.
6846 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6847 continue;
6848 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6849 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6850 Value *NewBaseGEP = nullptr;
6851
6852 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6853 GetElementPtrInst *GEP) {
6854 LLVMContext &Ctx = GEP->getContext();
6855 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6856 Type *I8PtrTy =
6857 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6858
6859 BasicBlock::iterator NewBaseInsertPt;
6860 BasicBlock *NewBaseInsertBB;
6861 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6862 // If the base of the struct is an instruction, the new base will be
6863 // inserted close to it.
6864 NewBaseInsertBB = BaseI->getParent();
6865 if (isa<PHINode>(BaseI))
6866 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6867 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6868 NewBaseInsertBB =
6869 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(),
6870 &getDT(*NewBaseInsertBB->getParent()), LI);
6871 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6872 } else
6873 NewBaseInsertPt = std::next(BaseI->getIterator());
6874 } else {
6875 // If the current base is an argument or global value, the new base
6876 // will be inserted to the entry block.
6877 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6878 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6879 }
6880 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6881 // Create a new base.
6882 // TODO: Avoid implicit trunc?
6883 // See https://github.com/llvm/llvm-project/issues/112510.
6884 Value *BaseIndex =
6885 ConstantInt::getSigned(PtrIdxTy, BaseOffset, /*ImplicitTrunc=*/true);
6886 NewBaseGEP = OldBase;
6887 if (NewBaseGEP->getType() != I8PtrTy)
6888 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6889 NewBaseGEP =
6890 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6891 NewGEPBases.insert(NewBaseGEP);
6892 return;
6893 };
6894
6895 // Check whether all the offsets can be encoded with prefered common base.
6896 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6897 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6898 BaseOffset = PreferBase;
6899 // Create a new base if the offset of the BaseGEP can be decoded with one
6900 // instruction.
6901 createNewBase(BaseOffset, OldBase, BaseGEP);
6902 }
6903
6904 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6905 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6906 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6907 int64_t Offset = LargeOffsetGEP->second;
6908 if (Offset != BaseOffset) {
6909 TargetLowering::AddrMode AddrMode;
6910 AddrMode.HasBaseReg = true;
6911 AddrMode.BaseOffs = Offset - BaseOffset;
6912 // The result type of the GEP might not be the type of the memory
6913 // access.
6914 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6915 GEP->getResultElementType(),
6916 GEP->getAddressSpace())) {
6917 // We need to create a new base if the offset to the current base is
6918 // too large to fit into the addressing mode. So, a very large struct
6919 // may be split into several parts.
6920 BaseGEP = GEP;
6921 BaseOffset = Offset;
6922 NewBaseGEP = nullptr;
6923 }
6924 }
6925
6926 // Generate a new GEP to replace the current one.
6927 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6928
6929 if (!NewBaseGEP) {
6930 // Create a new base if we don't have one yet. Find the insertion
6931 // pointer for the new base first.
6932 createNewBase(BaseOffset, OldBase, GEP);
6933 }
6934
6935 IRBuilder<> Builder(GEP);
6936 Value *NewGEP = NewBaseGEP;
6937 if (Offset != BaseOffset) {
6938 // Calculate the new offset for the new GEP.
6939 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6940 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6941 }
6942 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6943 LargeOffsetGEPID.erase(GEP);
6944 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6945 GEP->eraseFromParent();
6946 Changed = true;
6947 }
6948 }
6949 return Changed;
6950}
6951
6952bool CodeGenPrepare::optimizePhiType(
6953 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6954 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6955 // We are looking for a collection on interconnected phi nodes that together
6956 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6957 // are of the same type. Convert the whole set of nodes to the type of the
6958 // bitcast.
6959 Type *PhiTy = I->getType();
6960 Type *ConvertTy = nullptr;
6961 if (Visited.count(I) ||
6962 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6963 return false;
6964
6965 SmallVector<Instruction *, 4> Worklist;
6966 Worklist.push_back(cast<Instruction>(I));
6967 SmallPtrSet<PHINode *, 4> PhiNodes;
6968 SmallPtrSet<ConstantData *, 4> Constants;
6969 PhiNodes.insert(I);
6970 Visited.insert(I);
6971 SmallPtrSet<Instruction *, 4> Defs;
6972 SmallPtrSet<Instruction *, 4> Uses;
6973 // This works by adding extra bitcasts between load/stores and removing
6974 // existing bitcasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6975 // we can get in the situation where we remove a bitcast in one iteration
6976 // just to add it again in the next. We need to ensure that at least one
6977 // bitcast we remove are anchored to something that will not change back.
6978 bool AnyAnchored = false;
6979
6980 while (!Worklist.empty()) {
6981 Instruction *II = Worklist.pop_back_val();
6982
6983 if (auto *Phi = dyn_cast<PHINode>(II)) {
6984 // Handle Defs, which might also be PHI's
6985 for (Value *V : Phi->incoming_values()) {
6986 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6987 if (!PhiNodes.count(OpPhi)) {
6988 if (!Visited.insert(OpPhi).second)
6989 return false;
6990 PhiNodes.insert(OpPhi);
6991 Worklist.push_back(OpPhi);
6992 }
6993 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6994 if (!OpLoad->isSimple())
6995 return false;
6996 if (Defs.insert(OpLoad).second)
6997 Worklist.push_back(OpLoad);
6998 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6999 if (Defs.insert(OpEx).second)
7000 Worklist.push_back(OpEx);
7001 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7002 if (!ConvertTy)
7003 ConvertTy = OpBC->getOperand(0)->getType();
7004 if (OpBC->getOperand(0)->getType() != ConvertTy)
7005 return false;
7006 if (Defs.insert(OpBC).second) {
7007 Worklist.push_back(OpBC);
7008 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
7009 !isa<ExtractElementInst>(OpBC->getOperand(0));
7010 }
7011 } else if (auto *OpC = dyn_cast<ConstantData>(V))
7012 Constants.insert(OpC);
7013 else
7014 return false;
7015 }
7016 }
7017
7018 // Handle uses which might also be phi's
7019 for (User *V : II->users()) {
7020 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7021 if (!PhiNodes.count(OpPhi)) {
7022 if (Visited.count(OpPhi))
7023 return false;
7024 PhiNodes.insert(OpPhi);
7025 Visited.insert(OpPhi);
7026 Worklist.push_back(OpPhi);
7027 }
7028 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
7029 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
7030 return false;
7031 Uses.insert(OpStore);
7032 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7033 if (!ConvertTy)
7034 ConvertTy = OpBC->getType();
7035 if (OpBC->getType() != ConvertTy)
7036 return false;
7037 Uses.insert(OpBC);
7038 AnyAnchored |=
7039 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
7040 } else {
7041 return false;
7042 }
7043 }
7044 }
7045
7046 if (!ConvertTy || !AnyAnchored || PhiTy == ConvertTy ||
7047 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
7048 return false;
7049
7050 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
7051 << *ConvertTy << "\n");
7052
7053 // Create all the new phi nodes of the new type, and bitcast any loads to the
7054 // correct type.
7055 ValueToValueMap ValMap;
7056 for (ConstantData *C : Constants)
7057 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
7058 for (Instruction *D : Defs) {
7059 if (isa<BitCastInst>(D)) {
7060 ValMap[D] = D->getOperand(0);
7061 DeletedInstrs.insert(D);
7062 } else {
7063 BasicBlock::iterator insertPt = std::next(D->getIterator());
7064 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
7065 }
7066 }
7067 for (PHINode *Phi : PhiNodes)
7068 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
7069 Phi->getName() + ".tc", Phi->getIterator());
7070 // Pipe together all the PhiNodes.
7071 for (PHINode *Phi : PhiNodes) {
7072 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
7073 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
7074 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
7075 Phi->getIncomingBlock(i));
7076 Visited.insert(NewPhi);
7077 }
7078 // And finally pipe up the stores and bitcasts
7079 for (Instruction *U : Uses) {
7080 if (isa<BitCastInst>(U)) {
7081 DeletedInstrs.insert(U);
7082 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
7083 } else {
7084 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
7085 U->getIterator()));
7086 }
7087 }
7088
7089 // Save the removed phis to be deleted later.
7090 DeletedInstrs.insert_range(PhiNodes);
7091 return true;
7092}
7093
7094bool CodeGenPrepare::optimizePhiTypes(Function &F) {
7095 if (!OptimizePhiTypes)
7096 return false;
7097
7098 bool Changed = false;
7099 SmallPtrSet<PHINode *, 4> Visited;
7100 SmallPtrSet<Instruction *, 4> DeletedInstrs;
7101
7102 // Attempt to optimize all the phis in the functions to the correct type.
7103 for (auto &BB : F)
7104 for (auto &Phi : BB.phis())
7105 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
7106
7107 // Remove any old phi's that have been converted.
7108 for (auto *I : DeletedInstrs) {
7109 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
7110 I->eraseFromParent();
7111 }
7112
7113 return Changed;
7114}
7115
7116/// Return true, if an ext(load) can be formed from an extension in
7117/// \p MovedExts.
7118bool CodeGenPrepare::canFormExtLd(
7119 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
7120 Instruction *&Inst, bool HasPromoted) {
7121 for (auto *MovedExtInst : MovedExts) {
7122 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
7123 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
7124 Inst = MovedExtInst;
7125 break;
7126 }
7127 }
7128 if (!LI)
7129 return false;
7130
7131 // If they're already in the same block, there's nothing to do.
7132 // Make the cheap checks first if we did not promote.
7133 // If we promoted, we need to check if it is indeed profitable.
7134 if (!HasPromoted && LI->getParent() == Inst->getParent())
7135 return false;
7136
7137 return TLI->isExtLoad(LI, Inst, *DL);
7138}
7139
7140/// Move a zext or sext fed by a load into the same basic block as the load,
7141/// unless conditions are unfavorable. This allows SelectionDAG to fold the
7142/// extend into the load.
7143///
7144/// E.g.,
7145/// \code
7146/// %ld = load i32* %addr
7147/// %add = add nuw i32 %ld, 4
7148/// %zext = zext i32 %add to i64
7149// \endcode
7150/// =>
7151/// \code
7152/// %ld = load i32* %addr
7153/// %zext = zext i32 %ld to i64
7154/// %add = add nuw i64 %zext, 4
7155/// \encode
7156/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
7157/// allow us to match zext(load i32*) to i64.
7158///
7159/// Also, try to promote the computations used to obtain a sign extended
7160/// value used into memory accesses.
7161/// E.g.,
7162/// \code
7163/// a = add nsw i32 b, 3
7164/// d = sext i32 a to i64
7165/// e = getelementptr ..., i64 d
7166/// \endcode
7167/// =>
7168/// \code
7169/// f = sext i32 b to i64
7170/// a = add nsw i64 f, 3
7171/// e = getelementptr ..., i64 a
7172/// \endcode
7173///
7174/// \p Inst[in/out] the extension may be modified during the process if some
7175/// promotions apply.
7176bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7177 bool AllowPromotionWithoutCommonHeader = false;
7178 /// See if it is an interesting sext operations for the address type
7179 /// promotion before trying to promote it, e.g., the ones with the right
7180 /// type and used in memory accesses.
7181 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7182 *Inst, AllowPromotionWithoutCommonHeader);
7183 TypePromotionTransaction TPT(RemovedInsts);
7184 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7185 TPT.getRestorationPoint();
7187 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7188 Exts.push_back(Inst);
7189
7190 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7191
7192 // Look for a load being extended.
7193 LoadInst *LI = nullptr;
7194 Instruction *ExtFedByLoad;
7195
7196 // Try to promote a chain of computation if it allows to form an extended
7197 // load.
7198 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7199 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7200 TPT.commit();
7201 // Move the extend into the same block as the load.
7202 ExtFedByLoad->moveAfter(LI);
7203 ++NumExtsMoved;
7204 Inst = ExtFedByLoad;
7205 return true;
7206 }
7207
7208 // Continue promoting SExts if known as considerable depending on targets.
7209 if (ATPConsiderable &&
7210 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7211 HasPromoted, TPT, SpeculativelyMovedExts))
7212 return true;
7213
7214 TPT.rollback(LastKnownGood);
7215 return false;
7216}
7217
7218// Perform address type promotion if doing so is profitable.
7219// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7220// instructions that sign extended the same initial value. However, if
7221// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7222// extension is just profitable.
7223bool CodeGenPrepare::performAddressTypePromotion(
7224 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7225 bool HasPromoted, TypePromotionTransaction &TPT,
7226 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7227 bool Promoted = false;
7228 SmallPtrSet<Instruction *, 1> UnhandledExts;
7229 bool AllSeenFirst = true;
7230 for (auto *I : SpeculativelyMovedExts) {
7231 Value *HeadOfChain = I->getOperand(0);
7232 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
7233 SeenChainsForSExt.find(HeadOfChain);
7234 // If there is an unhandled SExt which has the same header, try to promote
7235 // it as well.
7236 if (AlreadySeen != SeenChainsForSExt.end()) {
7237 if (AlreadySeen->second != nullptr)
7238 UnhandledExts.insert(AlreadySeen->second);
7239 AllSeenFirst = false;
7240 }
7241 }
7242
7243 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7244 SpeculativelyMovedExts.size() == 1)) {
7245 TPT.commit();
7246 if (HasPromoted)
7247 Promoted = true;
7248 for (auto *I : SpeculativelyMovedExts) {
7249 Value *HeadOfChain = I->getOperand(0);
7250 SeenChainsForSExt[HeadOfChain] = nullptr;
7251 ValToSExtendedUses[HeadOfChain].push_back(I);
7252 }
7253 // Update Inst as promotion happen.
7254 Inst = SpeculativelyMovedExts.pop_back_val();
7255 } else {
7256 // This is the first chain visited from the header, keep the current chain
7257 // as unhandled. Defer to promote this until we encounter another SExt
7258 // chain derived from the same header.
7259 for (auto *I : SpeculativelyMovedExts) {
7260 Value *HeadOfChain = I->getOperand(0);
7261 SeenChainsForSExt[HeadOfChain] = Inst;
7262 }
7263 return false;
7264 }
7265
7266 if (!AllSeenFirst && !UnhandledExts.empty())
7267 for (auto *VisitedSExt : UnhandledExts) {
7268 if (RemovedInsts.count(VisitedSExt))
7269 continue;
7270 TypePromotionTransaction TPT(RemovedInsts);
7272 SmallVector<Instruction *, 2> Chains;
7273 Exts.push_back(VisitedSExt);
7274 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7275 TPT.commit();
7276 if (HasPromoted)
7277 Promoted = true;
7278 for (auto *I : Chains) {
7279 Value *HeadOfChain = I->getOperand(0);
7280 // Mark this as handled.
7281 SeenChainsForSExt[HeadOfChain] = nullptr;
7282 ValToSExtendedUses[HeadOfChain].push_back(I);
7283 }
7284 }
7285 return Promoted;
7286}
7287
7288bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7289 BasicBlock *DefBB = I->getParent();
7290
7291 // If the result of a {s|z}ext and its source are both live out, rewrite all
7292 // other uses of the source with result of extension.
7293 Value *Src = I->getOperand(0);
7294 if (Src->hasOneUse())
7295 return false;
7296
7297 // Only do this xform if truncating is free.
7298 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7299 return false;
7300
7301 // Only safe to perform the optimization if the source is also defined in
7302 // this block.
7303 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7304 return false;
7305
7306 bool DefIsLiveOut = false;
7307 for (User *U : I->users()) {
7309
7310 // Figure out which BB this ext is used in.
7311 BasicBlock *UserBB = UI->getParent();
7312 if (UserBB == DefBB)
7313 continue;
7314 DefIsLiveOut = true;
7315 break;
7316 }
7317 if (!DefIsLiveOut)
7318 return false;
7319
7320 // Make sure none of the uses are PHI nodes.
7321 for (User *U : Src->users()) {
7323 BasicBlock *UserBB = UI->getParent();
7324 if (UserBB == DefBB)
7325 continue;
7326 // Be conservative. We don't want this xform to end up introducing
7327 // reloads just before load / store instructions.
7328 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7329 return false;
7330 }
7331
7332 // InsertedTruncs - Only insert one trunc in each block once.
7333 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7334
7335 bool MadeChange = false;
7336 for (Use &U : Src->uses()) {
7337 Instruction *User = cast<Instruction>(U.getUser());
7338
7339 // Figure out which BB this ext is used in.
7340 BasicBlock *UserBB = User->getParent();
7341 if (UserBB == DefBB)
7342 continue;
7343
7344 // Both src and def are live in this block. Rewrite the use.
7345 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7346
7347 if (!InsertedTrunc) {
7348 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7349 assert(InsertPt != UserBB->end());
7350 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7351 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7352 InsertedInsts.insert(InsertedTrunc);
7353 }
7354
7355 // Replace a use of the {s|z}ext source with a use of the result.
7356 U = InsertedTrunc;
7357 ++NumExtUses;
7358 MadeChange = true;
7359 }
7360
7361 return MadeChange;
7362}
7363
7364// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7365// just after the load if the target can fold this into one extload instruction,
7366// with the hope of eliminating some of the other later "and" instructions using
7367// the loaded value. "and"s that are made trivially redundant by the insertion
7368// of the new "and" are removed by this function, while others (e.g. those whose
7369// path from the load goes through a phi) are left for isel to potentially
7370// remove.
7371//
7372// For example:
7373//
7374// b0:
7375// x = load i32
7376// ...
7377// b1:
7378// y = and x, 0xff
7379// z = use y
7380//
7381// becomes:
7382//
7383// b0:
7384// x = load i32
7385// x' = and x, 0xff
7386// ...
7387// b1:
7388// z = use x'
7389//
7390// whereas:
7391//
7392// b0:
7393// x1 = load i32
7394// ...
7395// b1:
7396// x2 = load i32
7397// ...
7398// b2:
7399// x = phi x1, x2
7400// y = and x, 0xff
7401//
7402// becomes (after a call to optimizeLoadExt for each load):
7403//
7404// b0:
7405// x1 = load i32
7406// x1' = and x1, 0xff
7407// ...
7408// b1:
7409// x2 = load i32
7410// x2' = and x2, 0xff
7411// ...
7412// b2:
7413// x = phi x1', x2'
7414// y = and x, 0xff
7415bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7416 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7417 return false;
7418
7419 // Skip loads we've already transformed.
7420 if (Load->hasOneUse() &&
7421 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7422 return false;
7423
7424 // Look at all uses of Load, looking through phis, to determine how many bits
7425 // of the loaded value are needed.
7426 SmallVector<Instruction *, 8> WorkList;
7427 SmallPtrSet<Instruction *, 16> Visited;
7428 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7429 SmallVector<Instruction *, 8> DropFlags;
7430 for (auto *U : Load->users())
7431 WorkList.push_back(cast<Instruction>(U));
7432
7433 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7434 unsigned BitWidth = LoadResultVT.getSizeInBits();
7435 // If the BitWidth is 0, do not try to optimize the type
7436 if (BitWidth == 0)
7437 return false;
7438
7439 APInt DemandBits(BitWidth, 0);
7440 APInt WidestAndBits(BitWidth, 0);
7441
7442 while (!WorkList.empty()) {
7443 Instruction *I = WorkList.pop_back_val();
7444
7445 // Break use-def graph loops.
7446 if (!Visited.insert(I).second)
7447 continue;
7448
7449 // For a PHI node, push all of its users.
7450 if (auto *Phi = dyn_cast<PHINode>(I)) {
7451 for (auto *U : Phi->users())
7452 WorkList.push_back(cast<Instruction>(U));
7453 continue;
7454 }
7455
7456 switch (I->getOpcode()) {
7457 case Instruction::And: {
7458 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7459 if (!AndC)
7460 return false;
7461 APInt AndBits = AndC->getValue();
7462 DemandBits |= AndBits;
7463 // Keep track of the widest and mask we see.
7464 if (AndBits.ugt(WidestAndBits))
7465 WidestAndBits = AndBits;
7466 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7467 AndsToMaybeRemove.push_back(I);
7468 break;
7469 }
7470
7471 case Instruction::Shl: {
7472 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7473 if (!ShlC)
7474 return false;
7475 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7476 DemandBits.setLowBits(BitWidth - ShiftAmt);
7477 DropFlags.push_back(I);
7478 break;
7479 }
7480
7481 case Instruction::Trunc: {
7482 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7483 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7484 DemandBits.setLowBits(TruncBitWidth);
7485 DropFlags.push_back(I);
7486 break;
7487 }
7488
7489 default:
7490 return false;
7491 }
7492 }
7493
7494 uint32_t ActiveBits = DemandBits.getActiveBits();
7495 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7496 // target even if isLoadLegal says an i1 EXTLOAD is valid. For example,
7497 // for the AArch64 target isLoadLegal(i32, i1, ..., ZEXTLOAD, false) returns
7498 // true, but (and (load x) 1) is not matched as a single instruction, rather
7499 // as a LDR followed by an AND.
7500 // TODO: Look into removing this restriction by fixing backends to either
7501 // return false for isLoadLegal for i1 or have them select this pattern to
7502 // a single instruction.
7503 //
7504 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7505 // mask, since these are the only ands that will be removed by isel.
7506 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7507 WidestAndBits != DemandBits)
7508 return false;
7509
7510 LLVMContext &Ctx = Load->getType()->getContext();
7511 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7512 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7513
7514 // Reject cases that won't be matched as extloads.
7515 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7516 !TLI->isLoadLegal(LoadResultVT, TruncVT, Load->getAlign(),
7517 Load->getPointerAddressSpace(), ISD::ZEXTLOAD, false))
7518 return false;
7519
7520 IRBuilder<> Builder(Load->getNextNode());
7521 auto *NewAnd = cast<Instruction>(
7522 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7523 // Mark this instruction as "inserted by CGP", so that other
7524 // optimizations don't touch it.
7525 InsertedInsts.insert(NewAnd);
7526
7527 // Replace all uses of load with new and (except for the use of load in the
7528 // new and itself).
7529 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7530 NewAnd->setOperand(0, Load);
7531
7532 // Remove any and instructions that are now redundant.
7533 for (auto *And : AndsToMaybeRemove)
7534 // Check that the and mask is the same as the one we decided to put on the
7535 // new and.
7536 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7537 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7538 if (&*CurInstIterator == And)
7539 CurInstIterator = std::next(And->getIterator());
7540 And->eraseFromParent();
7541 ++NumAndUses;
7542 }
7543
7544 // NSW flags may not longer hold.
7545 for (auto *Inst : DropFlags)
7546 Inst->setHasNoSignedWrap(false);
7547
7548 ++NumAndsAdded;
7549 return true;
7550}
7551
7552/// Check if V (an operand of a select instruction) is an expensive instruction
7553/// that is only used once.
7555 auto *I = dyn_cast<Instruction>(V);
7556 // If it's safe to speculatively execute, then it should not have side
7557 // effects; therefore, it's safe to sink and possibly *not* execute.
7558 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7559 TTI->isExpensiveToSpeculativelyExecute(I);
7560}
7561
7562/// Returns true if a SelectInst should be turned into an explicit branch.
7564 const TargetLowering *TLI,
7565 SelectInst *SI) {
7566 // If even a predictable select is cheap, then a branch can't be cheaper.
7567 if (!TLI->isPredictableSelectExpensive())
7568 return false;
7569
7570 // FIXME: This should use the same heuristics as IfConversion to determine
7571 // whether a select is better represented as a branch.
7572
7573 // If metadata tells us that the select condition is obviously predictable,
7574 // then we want to replace the select with a branch.
7575 uint64_t TrueWeight, FalseWeight;
7576 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7577 uint64_t Max = std::max(TrueWeight, FalseWeight);
7578 uint64_t Sum = TrueWeight + FalseWeight;
7579 if (Sum != 0) {
7580 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7581 if (Probability > TTI->getPredictableBranchThreshold())
7582 return true;
7583 }
7584 }
7585
7586 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7587
7588 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7589 // comparison condition. If the compare has more than one use, there's
7590 // probably another cmov or setcc around, so it's not worth emitting a branch.
7591 if (!Cmp || !Cmp->hasOneUse())
7592 return false;
7593
7594 // If either operand of the select is expensive and only needed on one side
7595 // of the select, we should form a branch.
7596 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7597 sinkSelectOperand(TTI, SI->getFalseValue()))
7598 return true;
7599
7600 return false;
7601}
7602
7603/// If \p isTrue is true, return the true value of \p SI, otherwise return
7604/// false value of \p SI. If the true/false value of \p SI is defined by any
7605/// select instructions in \p Selects, look through the defining select
7606/// instruction until the true/false value is not defined in \p Selects.
7607static Value *
7609 const SmallPtrSet<const Instruction *, 2> &Selects) {
7610 Value *V = nullptr;
7611
7612 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7613 DefSI = dyn_cast<SelectInst>(V)) {
7614 assert(DefSI->getCondition() == SI->getCondition() &&
7615 "The condition of DefSI does not match with SI");
7616 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7617 }
7618
7619 assert(V && "Failed to get select true/false value");
7620 return V;
7621}
7622
7623bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7624 assert(Shift->isShift() && "Expected a shift");
7625
7626 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7627 // general vector shifts, and (3) the shift amount is a select-of-splatted
7628 // values, hoist the shifts before the select:
7629 // shift Op0, (select Cond, TVal, FVal) -->
7630 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7631 //
7632 // This is inverting a generic IR transform when we know that the cost of a
7633 // general vector shift is more than the cost of 2 shift-by-scalars.
7634 // We can't do this effectively in SDAG because we may not be able to
7635 // determine if the select operands are splats from within a basic block.
7636 Type *Ty = Shift->getType();
7637 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7638 return false;
7639 Value *Cond, *TVal, *FVal;
7640 if (!match(Shift->getOperand(1),
7641 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7642 return false;
7643 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7644 return false;
7645
7646 IRBuilder<> Builder(Shift);
7647 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7648 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7649 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7650 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7651 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7652 Shift->eraseFromParent();
7653 return true;
7654}
7655
7656bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7657 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7658 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7659 "Expected a funnel shift");
7660
7661 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7662 // than general vector shifts, and (3) the shift amount is select-of-splatted
7663 // values, hoist the funnel shifts before the select:
7664 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7665 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7666 //
7667 // This is inverting a generic IR transform when we know that the cost of a
7668 // general vector shift is more than the cost of 2 shift-by-scalars.
7669 // We can't do this effectively in SDAG because we may not be able to
7670 // determine if the select operands are splats from within a basic block.
7671 Type *Ty = Fsh->getType();
7672 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7673 return false;
7674 Value *Cond, *TVal, *FVal;
7675 if (!match(Fsh->getOperand(2),
7676 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7677 return false;
7678 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7679 return false;
7680
7681 IRBuilder<> Builder(Fsh);
7682 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7683 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7684 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7685 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7686 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7687 Fsh->eraseFromParent();
7688 return true;
7689}
7690
7691/// If we have a SelectInst that will likely profit from branch prediction,
7692/// turn it into a branch.
7693bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7695 return false;
7696
7697 // If the SelectOptimize pass is enabled, selects have already been optimized.
7699 return false;
7700
7701 // Find all consecutive select instructions that share the same condition.
7703 ASI.push_back(SI);
7705 It != SI->getParent()->end(); ++It) {
7706 SelectInst *I = dyn_cast<SelectInst>(&*It);
7707 if (I && SI->getCondition() == I->getCondition()) {
7708 ASI.push_back(I);
7709 } else {
7710 break;
7711 }
7712 }
7713
7714 SelectInst *LastSI = ASI.back();
7715 // Increment the current iterator to skip all the rest of select instructions
7716 // because they will be either "not lowered" or "all lowered" to branch.
7717 CurInstIterator = std::next(LastSI->getIterator());
7718 // Examine debug-info attached to the consecutive select instructions. They
7719 // won't be individually optimised by optimizeInst, so we need to perform
7720 // DbgVariableRecord maintenence here instead.
7721 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7722 fixupDbgVariableRecordsOnInst(*SI);
7723
7724 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7725
7726 // Can we convert the 'select' to CF ?
7727 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7728 return false;
7729
7730 TargetLowering::SelectSupportKind SelectKind;
7731 if (SI->getType()->isVectorTy())
7732 SelectKind = TargetLowering::ScalarCondVectorVal;
7733 else
7734 SelectKind = TargetLowering::ScalarValSelect;
7735
7736 if (TLI->isSelectSupported(SelectKind) &&
7738 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI)))
7739 return false;
7740
7741 // The DominatorTree needs to be rebuilt by any consumers after this
7742 // transformation. We simply reset here rather than setting the ModifiedDT
7743 // flag to avoid restarting the function walk in runOnFunction for each
7744 // select optimized.
7745 DT.reset();
7746
7747 // Transform a sequence like this:
7748 // start:
7749 // %cmp = cmp uge i32 %a, %b
7750 // %sel = select i1 %cmp, i32 %c, i32 %d
7751 //
7752 // Into:
7753 // start:
7754 // %cmp = cmp uge i32 %a, %b
7755 // %cmp.frozen = freeze %cmp
7756 // br i1 %cmp.frozen, label %select.true, label %select.false
7757 // select.true:
7758 // br label %select.end
7759 // select.false:
7760 // br label %select.end
7761 // select.end:
7762 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7763 //
7764 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7765 // In addition, we may sink instructions that produce %c or %d from
7766 // the entry block into the destination(s) of the new branch.
7767 // If the true or false blocks do not contain a sunken instruction, that
7768 // block and its branch may be optimized away. In that case, one side of the
7769 // first branch will point directly to select.end, and the corresponding PHI
7770 // predecessor block will be the start block.
7771
7772 // Collect values that go on the true side and the values that go on the false
7773 // side.
7774 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7775 for (SelectInst *SI : ASI) {
7776 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7777 TrueInstrs.push_back(cast<Instruction>(V));
7778 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7779 FalseInstrs.push_back(cast<Instruction>(V));
7780 }
7781
7782 // Split the select block, according to how many (if any) values go on each
7783 // side.
7784 BasicBlock *StartBlock = SI->getParent();
7785 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7786 // We should split before any debug-info.
7787 SplitPt.setHeadBit(true);
7788
7789 IRBuilder<> IB(SI);
7790 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7791
7792 BasicBlock *TrueBlock = nullptr;
7793 BasicBlock *FalseBlock = nullptr;
7794 BasicBlock *EndBlock = nullptr;
7795 UncondBrInst *TrueBranch = nullptr;
7796 UncondBrInst *FalseBranch = nullptr;
7797 if (TrueInstrs.size() == 0) {
7799 CondFr, SplitPt, false, nullptr, nullptr, LI));
7800 FalseBlock = FalseBranch->getParent();
7801 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7802 } else if (FalseInstrs.size() == 0) {
7804 CondFr, SplitPt, false, nullptr, nullptr, LI));
7805 TrueBlock = TrueBranch->getParent();
7806 EndBlock = TrueBranch->getSuccessor();
7807 } else {
7808 Instruction *ThenTerm = nullptr;
7809 Instruction *ElseTerm = nullptr;
7810 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7811 nullptr, nullptr, LI);
7812 TrueBranch = cast<UncondBrInst>(ThenTerm);
7813 FalseBranch = cast<UncondBrInst>(ElseTerm);
7814 TrueBlock = TrueBranch->getParent();
7815 FalseBlock = FalseBranch->getParent();
7816 EndBlock = TrueBranch->getSuccessor();
7817 }
7818
7819 EndBlock->setName("select.end");
7820 if (TrueBlock)
7821 TrueBlock->setName("select.true.sink");
7822 if (FalseBlock)
7823 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7824 : "select.false.sink");
7825
7826 if (IsHugeFunc) {
7827 if (TrueBlock)
7828 FreshBBs.insert(TrueBlock);
7829 if (FalseBlock)
7830 FreshBBs.insert(FalseBlock);
7831 FreshBBs.insert(EndBlock);
7832 }
7833
7834 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7835
7836 static const unsigned MD[] = {
7837 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7838 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7839 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7840
7841 // Sink expensive instructions into the conditional blocks to avoid executing
7842 // them speculatively.
7843 for (Instruction *I : TrueInstrs)
7844 I->moveBefore(TrueBranch->getIterator());
7845 for (Instruction *I : FalseInstrs)
7846 I->moveBefore(FalseBranch->getIterator());
7847
7848 // If we did not create a new block for one of the 'true' or 'false' paths
7849 // of the condition, it means that side of the branch goes to the end block
7850 // directly and the path originates from the start block from the point of
7851 // view of the new PHI.
7852 if (TrueBlock == nullptr)
7853 TrueBlock = StartBlock;
7854 else if (FalseBlock == nullptr)
7855 FalseBlock = StartBlock;
7856
7857 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7858 // Use reverse iterator because later select may use the value of the
7859 // earlier select, and we need to propagate value through earlier select
7860 // to get the PHI operand.
7861 for (SelectInst *SI : llvm::reverse(ASI)) {
7862 // The select itself is replaced with a PHI Node.
7863 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7864 PN->insertBefore(EndBlock->begin());
7865 PN->takeName(SI);
7866 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7867 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7868 PN->setDebugLoc(SI->getDebugLoc());
7869
7870 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7871 SI->eraseFromParent();
7872 INS.erase(SI);
7873 ++NumSelectsExpanded;
7874 }
7875
7876 // Instruct OptimizeBlock to skip to the next block.
7877 CurInstIterator = StartBlock->end();
7878 return true;
7879}
7880
7881/// Some targets only accept certain types for splat inputs. For example a VDUP
7882/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7883/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7884bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7885 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7887 m_Undef(), m_ZeroMask())))
7888 return false;
7889 Type *NewType = TLI->shouldConvertSplatType(SVI);
7890 if (!NewType)
7891 return false;
7892
7893 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7894 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7895 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7896 "Expected a type of the same size!");
7897 auto *NewVecType =
7898 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7899
7900 // Create a bitcast (shuffle (insert (bitcast(..))))
7901 IRBuilder<> Builder(SVI->getContext());
7902 Builder.SetInsertPoint(SVI);
7903 Value *BC1 = Builder.CreateBitCast(
7904 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7905 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7906 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7907
7908 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7910 SVI, TLInfo, nullptr,
7911 [&](Value *V) { removeAllAssertingVHReferences(V); });
7912
7913 // Also hoist the bitcast up to its operand if it they are not in the same
7914 // block.
7915 if (auto *BCI = dyn_cast<Instruction>(BC1))
7916 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7917 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7918 !Op->isTerminator() && !Op->isEHPad())
7919 BCI->moveAfter(Op);
7920
7921 return true;
7922}
7923
7924bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7925 // If the operands of I can be folded into a target instruction together with
7926 // I, duplicate and sink them.
7927 SmallVector<Use *, 4> OpsToSink;
7928 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7929 return false;
7930
7931 // OpsToSink can contain multiple uses in a use chain (e.g.
7932 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7933 // uses must come first, so we process the ops in reverse order so as to not
7934 // create invalid IR.
7935 BasicBlock *TargetBB = I->getParent();
7936 bool Changed = false;
7937 SmallVector<Use *, 4> ToReplace;
7938 Instruction *InsertPoint = I;
7939 DenseMap<const Instruction *, unsigned long> InstOrdering;
7940 unsigned long InstNumber = 0;
7941 for (const auto &I : *TargetBB)
7942 InstOrdering[&I] = InstNumber++;
7943
7944 for (Use *U : reverse(OpsToSink)) {
7945 auto *UI = cast<Instruction>(U->get());
7946 if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
7947 continue;
7948 if (UI->getParent() == TargetBB) {
7949 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7950 InsertPoint = UI;
7951 continue;
7952 }
7953 ToReplace.push_back(U);
7954 }
7955
7956 SetVector<Instruction *> MaybeDead;
7957 DenseMap<Instruction *, Instruction *> NewInstructions;
7958 for (Use *U : ToReplace) {
7959 auto *UI = cast<Instruction>(U->get());
7960 Instruction *NI = UI->clone();
7961
7962 if (IsHugeFunc) {
7963 // Now we clone an instruction, its operands' defs may sink to this BB
7964 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7965 for (Value *Op : NI->operands())
7966 if (auto *OpDef = dyn_cast<Instruction>(Op))
7967 FreshBBs.insert(OpDef->getParent());
7968 }
7969
7970 NewInstructions[UI] = NI;
7971 MaybeDead.insert(UI);
7972 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7973 NI->insertBefore(InsertPoint->getIterator());
7974 InsertPoint = NI;
7975 InsertedInsts.insert(NI);
7976
7977 // Update the use for the new instruction, making sure that we update the
7978 // sunk instruction uses, if it is part of a chain that has already been
7979 // sunk.
7980 Instruction *OldI = cast<Instruction>(U->getUser());
7981 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7982 It->second->setOperand(U->getOperandNo(), NI);
7983 else
7984 U->set(NI);
7985 Changed = true;
7986 }
7987
7988 // Remove instructions that are dead after sinking.
7989 for (auto *I : MaybeDead) {
7990 if (!I->hasNUsesOrMore(1)) {
7991 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7992 I->eraseFromParent();
7993 }
7994 }
7995
7996 return Changed;
7997}
7998
7999bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
8000 Value *Cond = SI->getCondition();
8001 Type *OldType = Cond->getType();
8002 LLVMContext &Context = Cond->getContext();
8003 EVT OldVT = TLI->getValueType(*DL, OldType);
8005 unsigned RegWidth = RegType.getSizeInBits();
8006
8007 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
8008 return false;
8009
8010 // If the register width is greater than the type width, expand the condition
8011 // of the switch instruction and each case constant to the width of the
8012 // register. By widening the type of the switch condition, subsequent
8013 // comparisons (for case comparisons) will not need to be extended to the
8014 // preferred register width, so we will potentially eliminate N-1 extends,
8015 // where N is the number of cases in the switch.
8016 auto *NewType = Type::getIntNTy(Context, RegWidth);
8017
8018 // Extend the switch condition and case constants using the target preferred
8019 // extend unless the switch condition is a function argument with an extend
8020 // attribute. In that case, we can avoid an unnecessary mask/extension by
8021 // matching the argument extension instead.
8022 Instruction::CastOps ExtType = Instruction::ZExt;
8023 // Some targets prefer SExt over ZExt.
8024 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
8025 ExtType = Instruction::SExt;
8026
8027 if (auto *Arg = dyn_cast<Argument>(Cond)) {
8028 if (Arg->hasSExtAttr())
8029 ExtType = Instruction::SExt;
8030 if (Arg->hasZExtAttr())
8031 ExtType = Instruction::ZExt;
8032 }
8033
8034 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
8035 ExtInst->insertBefore(SI->getIterator());
8036 ExtInst->setDebugLoc(SI->getDebugLoc());
8037 SI->setCondition(ExtInst);
8038 for (auto Case : SI->cases()) {
8039 const APInt &NarrowConst = Case.getCaseValue()->getValue();
8040 APInt WideConst = (ExtType == Instruction::ZExt)
8041 ? NarrowConst.zext(RegWidth)
8042 : NarrowConst.sext(RegWidth);
8043 Case.setValue(ConstantInt::get(Context, WideConst));
8044 }
8045
8046 return true;
8047}
8048
8049bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
8050 // The SCCP optimization tends to produce code like this:
8051 // switch(x) { case 42: phi(42, ...) }
8052 // Materializing the constant for the phi-argument needs instructions; So we
8053 // change the code to:
8054 // switch(x) { case 42: phi(x, ...) }
8055
8056 Value *Condition = SI->getCondition();
8057 // Avoid endless loop in degenerate case.
8058 if (isa<ConstantInt>(*Condition))
8059 return false;
8060
8061 bool Changed = false;
8062 BasicBlock *SwitchBB = SI->getParent();
8063 Type *ConditionType = Condition->getType();
8064
8065 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
8066 ConstantInt *CaseValue = Case.getCaseValue();
8067 BasicBlock *CaseBB = Case.getCaseSuccessor();
8068 // Set to true if we previously checked that `CaseBB` is only reached by
8069 // a single case from this switch.
8070 bool CheckedForSinglePred = false;
8071 for (PHINode &PHI : CaseBB->phis()) {
8072 Type *PHIType = PHI.getType();
8073 // If ZExt is free then we can also catch patterns like this:
8074 // switch((i32)x) { case 42: phi((i64)42, ...); }
8075 // and replace `(i64)42` with `zext i32 %x to i64`.
8076 bool TryZExt =
8077 PHIType->isIntegerTy() &&
8078 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
8079 TLI->isZExtFree(ConditionType, PHIType);
8080 if (PHIType == ConditionType || TryZExt) {
8081 // Set to true to skip this case because of multiple preds.
8082 bool SkipCase = false;
8083 Value *Replacement = nullptr;
8084 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
8085 Value *PHIValue = PHI.getIncomingValue(I);
8086 if (PHIValue != CaseValue) {
8087 if (!TryZExt)
8088 continue;
8089 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
8090 if (!PHIValueInt ||
8091 PHIValueInt->getValue() !=
8092 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
8093 continue;
8094 }
8095 if (PHI.getIncomingBlock(I) != SwitchBB)
8096 continue;
8097 // We cannot optimize if there are multiple case labels jumping to
8098 // this block. This check may get expensive when there are many
8099 // case labels so we test for it last.
8100 if (!CheckedForSinglePred) {
8101 CheckedForSinglePred = true;
8102 if (SI->findCaseDest(CaseBB) == nullptr) {
8103 SkipCase = true;
8104 break;
8105 }
8106 }
8107
8108 if (Replacement == nullptr) {
8109 if (PHIValue == CaseValue) {
8110 Replacement = Condition;
8111 } else {
8112 IRBuilder<> Builder(SI);
8113 Replacement = Builder.CreateZExt(Condition, PHIType);
8114 }
8115 }
8116 PHI.setIncomingValue(I, Replacement);
8117 Changed = true;
8118 }
8119 if (SkipCase)
8120 break;
8121 }
8122 }
8123 }
8124 return Changed;
8125}
8126
8127bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
8128 bool Changed = optimizeSwitchType(SI);
8129 Changed |= optimizeSwitchPhiConstants(SI);
8130 return Changed;
8131}
8132
8133namespace {
8134
8135/// Helper class to promote a scalar operation to a vector one.
8136/// This class is used to move downward extractelement transition.
8137/// E.g.,
8138/// a = vector_op <2 x i32>
8139/// b = extractelement <2 x i32> a, i32 0
8140/// c = scalar_op b
8141/// store c
8142///
8143/// =>
8144/// a = vector_op <2 x i32>
8145/// c = vector_op a (equivalent to scalar_op on the related lane)
8146/// * d = extractelement <2 x i32> c, i32 0
8147/// * store d
8148/// Assuming both extractelement and store can be combine, we get rid of the
8149/// transition.
8150class VectorPromoteHelper {
8151 /// DataLayout associated with the current module.
8152 const DataLayout &DL;
8153
8154 /// Used to perform some checks on the legality of vector operations.
8155 const TargetLowering &TLI;
8156
8157 /// Used to estimated the cost of the promoted chain.
8158 const TargetTransformInfo &TTI;
8159
8160 /// The transition being moved downwards.
8161 Instruction *Transition;
8162
8163 /// The sequence of instructions to be promoted.
8164 SmallVector<Instruction *, 4> InstsToBePromoted;
8165
8166 /// Cost of combining a store and an extract.
8167 unsigned StoreExtractCombineCost;
8168
8169 /// Instruction that will be combined with the transition.
8170 Instruction *CombineInst = nullptr;
8171
8172 /// The instruction that represents the current end of the transition.
8173 /// Since we are faking the promotion until we reach the end of the chain
8174 /// of computation, we need a way to get the current end of the transition.
8175 Instruction *getEndOfTransition() const {
8176 if (InstsToBePromoted.empty())
8177 return Transition;
8178 return InstsToBePromoted.back();
8179 }
8180
8181 /// Return the index of the original value in the transition.
8182 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8183 /// c, is at index 0.
8184 unsigned getTransitionOriginalValueIdx() const {
8185 assert(isa<ExtractElementInst>(Transition) &&
8186 "Other kind of transitions are not supported yet");
8187 return 0;
8188 }
8189
8190 /// Return the index of the index in the transition.
8191 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8192 /// is at index 1.
8193 unsigned getTransitionIdx() const {
8194 assert(isa<ExtractElementInst>(Transition) &&
8195 "Other kind of transitions are not supported yet");
8196 return 1;
8197 }
8198
8199 /// Get the type of the transition.
8200 /// This is the type of the original value.
8201 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8202 /// transition is <2 x i32>.
8203 Type *getTransitionType() const {
8204 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8205 }
8206
8207 /// Promote \p ToBePromoted by moving \p Def downward through.
8208 /// I.e., we have the following sequence:
8209 /// Def = Transition <ty1> a to <ty2>
8210 /// b = ToBePromoted <ty2> Def, ...
8211 /// =>
8212 /// b = ToBePromoted <ty1> a, ...
8213 /// Def = Transition <ty1> ToBePromoted to <ty2>
8214 void promoteImpl(Instruction *ToBePromoted);
8215
8216 /// Check whether or not it is profitable to promote all the
8217 /// instructions enqueued to be promoted.
8218 bool isProfitableToPromote() {
8219 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8220 unsigned Index = isa<ConstantInt>(ValIdx)
8221 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8222 : -1;
8223 Type *PromotedType = getTransitionType();
8224
8225 StoreInst *ST = cast<StoreInst>(CombineInst);
8226 unsigned AS = ST->getPointerAddressSpace();
8227 // Check if this store is supported.
8229 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8230 ST->getAlign())) {
8231 // If this is not supported, there is no way we can combine
8232 // the extract with the store.
8233 return false;
8234 }
8235
8236 // The scalar chain of computation has to pay for the transition
8237 // scalar to vector.
8238 // The vector chain has to account for the combining cost.
8241 InstructionCost ScalarCost =
8242 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8243 InstructionCost VectorCost = StoreExtractCombineCost;
8244 for (const auto &Inst : InstsToBePromoted) {
8245 // Compute the cost.
8246 // By construction, all instructions being promoted are arithmetic ones.
8247 // Moreover, one argument is a constant that can be viewed as a splat
8248 // constant.
8249 Value *Arg0 = Inst->getOperand(0);
8250 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8251 isa<ConstantFP>(Arg0);
8252 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8253 if (IsArg0Constant)
8255 else
8257
8258 ScalarCost += TTI.getArithmeticInstrCost(
8259 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8260 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8261 CostKind, Arg0Info, Arg1Info);
8262 }
8263 LLVM_DEBUG(
8264 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8265 << ScalarCost << "\nVector: " << VectorCost << '\n');
8266 return ScalarCost > VectorCost;
8267 }
8268
8269 /// Generate a constant vector with \p Val with the same
8270 /// number of elements as the transition.
8271 /// \p UseSplat defines whether or not \p Val should be replicated
8272 /// across the whole vector.
8273 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8274 /// otherwise we generate a vector with as many poison as possible:
8275 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8276 /// used at the index of the extract.
8277 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8278 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8279 if (!UseSplat) {
8280 // If we cannot determine where the constant must be, we have to
8281 // use a splat constant.
8282 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8283 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8284 ExtractIdx = CstVal->getSExtValue();
8285 else
8286 UseSplat = true;
8287 }
8288
8289 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8290 if (UseSplat)
8291 return ConstantVector::getSplat(EC, Val);
8292
8293 if (!EC.isScalable()) {
8294 SmallVector<Constant *, 4> ConstVec;
8295 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8296 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8297 if (Idx == ExtractIdx)
8298 ConstVec.push_back(Val);
8299 else
8300 ConstVec.push_back(PoisonVal);
8301 }
8302 return ConstantVector::get(ConstVec);
8303 } else
8305 "Generate scalable vector for non-splat is unimplemented");
8306 }
8307
8308 /// Check if promoting to a vector type an operand at \p OperandIdx
8309 /// in \p Use can trigger undefined behavior.
8310 static bool canCauseUndefinedBehavior(const Instruction *Use,
8311 unsigned OperandIdx) {
8312 // This is not safe to introduce undef when the operand is on
8313 // the right hand side of a division-like instruction.
8314 if (OperandIdx != 1)
8315 return false;
8316 switch (Use->getOpcode()) {
8317 default:
8318 return false;
8319 case Instruction::SDiv:
8320 case Instruction::UDiv:
8321 case Instruction::SRem:
8322 case Instruction::URem:
8323 return true;
8324 case Instruction::FDiv:
8325 case Instruction::FRem:
8326 return !Use->hasNoNaNs();
8327 }
8328 llvm_unreachable(nullptr);
8329 }
8330
8331public:
8332 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8333 const TargetTransformInfo &TTI, Instruction *Transition,
8334 unsigned CombineCost)
8335 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8336 StoreExtractCombineCost(CombineCost) {
8337 assert(Transition && "Do not know how to promote null");
8338 }
8339
8340 /// Check if we can promote \p ToBePromoted to \p Type.
8341 bool canPromote(const Instruction *ToBePromoted) const {
8342 // We could support CastInst too.
8343 return isa<BinaryOperator>(ToBePromoted);
8344 }
8345
8346 /// Check if it is profitable to promote \p ToBePromoted
8347 /// by moving downward the transition through.
8348 bool shouldPromote(const Instruction *ToBePromoted) const {
8349 // Promote only if all the operands can be statically expanded.
8350 // Indeed, we do not want to introduce any new kind of transitions.
8351 for (const Use &U : ToBePromoted->operands()) {
8352 const Value *Val = U.get();
8353 if (Val == getEndOfTransition()) {
8354 // If the use is a division and the transition is on the rhs,
8355 // we cannot promote the operation, otherwise we may create a
8356 // division by zero.
8357 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8358 return false;
8359 continue;
8360 }
8361 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8362 !isa<ConstantFP>(Val))
8363 return false;
8364 }
8365 // Check that the resulting operation is legal.
8366 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8367 if (!ISDOpcode)
8368 return false;
8369 return StressStoreExtract ||
8371 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8372 }
8373
8374 /// Check whether or not \p Use can be combined
8375 /// with the transition.
8376 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8377 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8378
8379 /// Record \p ToBePromoted as part of the chain to be promoted.
8380 void enqueueForPromotion(Instruction *ToBePromoted) {
8381 InstsToBePromoted.push_back(ToBePromoted);
8382 }
8383
8384 /// Set the instruction that will be combined with the transition.
8385 void recordCombineInstruction(Instruction *ToBeCombined) {
8386 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8387 CombineInst = ToBeCombined;
8388 }
8389
8390 /// Promote all the instructions enqueued for promotion if it is
8391 /// is profitable.
8392 /// \return True if the promotion happened, false otherwise.
8393 bool promote() {
8394 // Check if there is something to promote.
8395 // Right now, if we do not have anything to combine with,
8396 // we assume the promotion is not profitable.
8397 if (InstsToBePromoted.empty() || !CombineInst)
8398 return false;
8399
8400 // Check cost.
8401 if (!StressStoreExtract && !isProfitableToPromote())
8402 return false;
8403
8404 // Promote.
8405 for (auto &ToBePromoted : InstsToBePromoted)
8406 promoteImpl(ToBePromoted);
8407 InstsToBePromoted.clear();
8408 return true;
8409 }
8410};
8411
8412} // end anonymous namespace
8413
8414void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8415 // At this point, we know that all the operands of ToBePromoted but Def
8416 // can be statically promoted.
8417 // For Def, we need to use its parameter in ToBePromoted:
8418 // b = ToBePromoted ty1 a
8419 // Def = Transition ty1 b to ty2
8420 // Move the transition down.
8421 // 1. Replace all uses of the promoted operation by the transition.
8422 // = ... b => = ... Def.
8423 assert(ToBePromoted->getType() == Transition->getType() &&
8424 "The type of the result of the transition does not match "
8425 "the final type");
8426 ToBePromoted->replaceAllUsesWith(Transition);
8427 // 2. Update the type of the uses.
8428 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8429 Type *TransitionTy = getTransitionType();
8430 ToBePromoted->mutateType(TransitionTy);
8431 // 3. Update all the operands of the promoted operation with promoted
8432 // operands.
8433 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8434 for (Use &U : ToBePromoted->operands()) {
8435 Value *Val = U.get();
8436 Value *NewVal = nullptr;
8437 if (Val == Transition)
8438 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8439 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8440 isa<ConstantFP>(Val)) {
8441 // Use a splat constant if it is not safe to use undef.
8442 NewVal = getConstantVector(
8443 cast<Constant>(Val),
8444 isa<UndefValue>(Val) ||
8445 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8446 } else
8447 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8448 "this?");
8449 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8450 }
8451 Transition->moveAfter(ToBePromoted);
8452 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8453}
8454
8455/// Some targets can do store(extractelement) with one instruction.
8456/// Try to push the extractelement towards the stores when the target
8457/// has this feature and this is profitable.
8458bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8459 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8460 if (DisableStoreExtract ||
8463 Inst->getOperand(1), CombineCost)))
8464 return false;
8465
8466 // At this point we know that Inst is a vector to scalar transition.
8467 // Try to move it down the def-use chain, until:
8468 // - We can combine the transition with its single use
8469 // => we got rid of the transition.
8470 // - We escape the current basic block
8471 // => we would need to check that we are moving it at a cheaper place and
8472 // we do not do that for now.
8473 BasicBlock *Parent = Inst->getParent();
8474 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8475 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8476 // If the transition has more than one use, assume this is not going to be
8477 // beneficial.
8478 while (Inst->hasOneUse()) {
8479 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8480 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8481
8482 if (ToBePromoted->getParent() != Parent) {
8483 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8484 << ToBePromoted->getParent()->getName()
8485 << ") than the transition (" << Parent->getName()
8486 << ").\n");
8487 return false;
8488 }
8489
8490 if (VPH.canCombine(ToBePromoted)) {
8491 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8492 << "will be combined with: " << *ToBePromoted << '\n');
8493 VPH.recordCombineInstruction(ToBePromoted);
8494 bool Changed = VPH.promote();
8495 NumStoreExtractExposed += Changed;
8496 return Changed;
8497 }
8498
8499 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8500 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8501 return false;
8502
8503 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8504
8505 VPH.enqueueForPromotion(ToBePromoted);
8506 Inst = ToBePromoted;
8507 }
8508 return false;
8509}
8510
8511/// For the instruction sequence of store below, F and I values
8512/// are bundled together as an i64 value before being stored into memory.
8513/// Sometimes it is more efficient to generate separate stores for F and I,
8514/// which can remove the bitwise instructions or sink them to colder places.
8515///
8516/// (store (or (zext (bitcast F to i32) to i64),
8517/// (shl (zext I to i64), 32)), addr) -->
8518/// (store F, addr) and (store I, addr+4)
8519///
8520/// Similarly, splitting for other merged store can also be beneficial, like:
8521/// For pair of {i32, i32}, i64 store --> two i32 stores.
8522/// For pair of {i32, i16}, i64 store --> two i32 stores.
8523/// For pair of {i16, i16}, i32 store --> two i16 stores.
8524/// For pair of {i16, i8}, i32 store --> two i16 stores.
8525/// For pair of {i8, i8}, i16 store --> two i8 stores.
8526///
8527/// We allow each target to determine specifically which kind of splitting is
8528/// supported.
8529///
8530/// The store patterns are commonly seen from the simple code snippet below
8531/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8532/// void goo(const std::pair<int, float> &);
8533/// hoo() {
8534/// ...
8535/// goo(std::make_pair(tmp, ftmp));
8536/// ...
8537/// }
8538///
8539/// Although we already have similar splitting in DAG Combine, we duplicate
8540/// it in CodeGenPrepare to catch the case in which pattern is across
8541/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8542/// during code expansion.
8544 const TargetLowering &TLI) {
8545 // Handle simple but common cases only.
8546 Type *StoreType = SI.getValueOperand()->getType();
8547
8548 // The code below assumes shifting a value by <number of bits>,
8549 // whereas scalable vectors would have to be shifted by
8550 // <2log(vscale) + number of bits> in order to store the
8551 // low/high parts. Bailing out for now.
8552 if (StoreType->isScalableTy())
8553 return false;
8554
8555 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8556 DL.getTypeSizeInBits(StoreType) == 0)
8557 return false;
8558
8559 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8560 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8561 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8562 return false;
8563
8564 // Don't split the store if it is volatile.
8565 if (SI.isVolatile())
8566 return false;
8567
8568 // Match the following patterns:
8569 // (store (or (zext LValue to i64),
8570 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8571 // or
8572 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8573 // (zext LValue to i64),
8574 // Expect both operands of OR and the first operand of SHL have only
8575 // one use.
8576 Value *LValue, *HValue;
8577 if (!match(SI.getValueOperand(),
8580 m_SpecificInt(HalfValBitSize))))))
8581 return false;
8582
8583 // Check LValue and HValue are int with size less or equal than 32.
8584 if (!LValue->getType()->isIntegerTy() ||
8585 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8586 !HValue->getType()->isIntegerTy() ||
8587 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8588 return false;
8589
8590 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8591 // as the input of target query.
8592 auto *LBC = dyn_cast<BitCastInst>(LValue);
8593 auto *HBC = dyn_cast<BitCastInst>(HValue);
8594 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8595 : EVT::getEVT(LValue->getType());
8596 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8597 : EVT::getEVT(HValue->getType());
8598 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8599 return false;
8600
8601 // Start to split store.
8602 IRBuilder<> Builder(SI.getContext());
8603 Builder.SetInsertPoint(&SI);
8604
8605 // If LValue/HValue is a bitcast in another BB, create a new one in current
8606 // BB so it may be merged with the splitted stores by dag combiner.
8607 if (LBC && LBC->getParent() != SI.getParent())
8608 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8609 if (HBC && HBC->getParent() != SI.getParent())
8610 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8611
8612 bool IsLE = SI.getDataLayout().isLittleEndian();
8613 auto CreateSplitStore = [&](Value *V, bool Upper) {
8614 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8615 Value *Addr = SI.getPointerOperand();
8616 Align Alignment = SI.getAlign();
8617 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8618 if (IsOffsetStore) {
8619 Addr = Builder.CreateGEP(
8620 SplitStoreType, Addr,
8621 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8622
8623 // When splitting the store in half, naturally one half will retain the
8624 // alignment of the original wider store, regardless of whether it was
8625 // over-aligned or not, while the other will require adjustment.
8626 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8627 }
8628 Builder.CreateAlignedStore(V, Addr, Alignment);
8629 };
8630
8631 CreateSplitStore(LValue, false);
8632 CreateSplitStore(HValue, true);
8633
8634 // Delete the old store.
8635 SI.eraseFromParent();
8636 return true;
8637}
8638
8639// Return true if the GEP has two operands, the first operand is of a sequential
8640// type, and the second operand is a constant.
8643 return GEP->getNumOperands() == 2 && I.isSequential() &&
8644 isa<ConstantInt>(GEP->getOperand(1));
8645}
8646
8647// Try unmerging GEPs to reduce liveness interference (register pressure) across
8648// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8649// reducing liveness interference across those edges benefits global register
8650// allocation. Currently handles only certain cases.
8651//
8652// For example, unmerge %GEPI and %UGEPI as below.
8653//
8654// ---------- BEFORE ----------
8655// SrcBlock:
8656// ...
8657// %GEPIOp = ...
8658// ...
8659// %GEPI = gep %GEPIOp, Idx
8660// ...
8661// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8662// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8663// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8664// %UGEPI)
8665//
8666// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8667// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8668// ...
8669//
8670// DstBi:
8671// ...
8672// %UGEPI = gep %GEPIOp, UIdx
8673// ...
8674// ---------------------------
8675//
8676// ---------- AFTER ----------
8677// SrcBlock:
8678// ... (same as above)
8679// (* %GEPI is still alive on the indirectbr edges)
8680// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8681// unmerging)
8682// ...
8683//
8684// DstBi:
8685// ...
8686// %UGEPI = gep %GEPI, (UIdx-Idx)
8687// ...
8688// ---------------------------
8689//
8690// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8691// no longer alive on them.
8692//
8693// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8694// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8695// not to disable further simplications and optimizations as a result of GEP
8696// merging.
8697//
8698// Note this unmerging may increase the length of the data flow critical path
8699// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8700// between the register pressure and the length of data-flow critical
8701// path. Restricting this to the uncommon IndirectBr case would minimize the
8702// impact of potentially longer critical path, if any, and the impact on compile
8703// time.
8705 const TargetTransformInfo *TTI) {
8706 BasicBlock *SrcBlock = GEPI->getParent();
8707 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8708 // (non-IndirectBr) cases exit early here.
8709 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8710 return false;
8711 // Check that GEPI is a simple gep with a single constant index.
8712 if (!GEPSequentialConstIndexed(GEPI))
8713 return false;
8714 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8715 // Check that GEPI is a cheap one.
8716 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8719 return false;
8720 Value *GEPIOp = GEPI->getOperand(0);
8721 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8722 if (!isa<Instruction>(GEPIOp))
8723 return false;
8724 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8725 if (GEPIOpI->getParent() != SrcBlock)
8726 return false;
8727 // Check that GEP is used outside the block, meaning it's alive on the
8728 // IndirectBr edge(s).
8729 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8730 if (auto *I = dyn_cast<Instruction>(Usr)) {
8731 if (I->getParent() != SrcBlock) {
8732 return true;
8733 }
8734 }
8735 return false;
8736 }))
8737 return false;
8738 // The second elements of the GEP chains to be unmerged.
8739 std::vector<GetElementPtrInst *> UGEPIs;
8740 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8741 // on IndirectBr edges.
8742 for (User *Usr : GEPIOp->users()) {
8743 if (Usr == GEPI)
8744 continue;
8745 // Check if Usr is an Instruction. If not, give up.
8746 if (!isa<Instruction>(Usr))
8747 return false;
8748 auto *UI = cast<Instruction>(Usr);
8749 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8750 if (UI->getParent() == SrcBlock)
8751 continue;
8752 // Check if Usr is a GEP. If not, give up.
8753 if (!isa<GetElementPtrInst>(Usr))
8754 return false;
8755 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8756 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8757 // the pointer operand to it. If so, record it in the vector. If not, give
8758 // up.
8759 if (!GEPSequentialConstIndexed(UGEPI))
8760 return false;
8761 if (UGEPI->getOperand(0) != GEPIOp)
8762 return false;
8763 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8764 return false;
8765 if (GEPIIdx->getType() !=
8766 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8767 return false;
8768 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8769 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8772 return false;
8773 UGEPIs.push_back(UGEPI);
8774 }
8775 if (UGEPIs.size() == 0)
8776 return false;
8777 // Check the materializing cost of (Uidx-Idx).
8778 for (GetElementPtrInst *UGEPI : UGEPIs) {
8779 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8780 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8782 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8783 if (ImmCost > TargetTransformInfo::TCC_Basic)
8784 return false;
8785 }
8786 // Now unmerge between GEPI and UGEPIs.
8787 for (GetElementPtrInst *UGEPI : UGEPIs) {
8788 UGEPI->setOperand(0, GEPI);
8789 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8790 Constant *NewUGEPIIdx = ConstantInt::get(
8791 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8792 UGEPI->setOperand(1, NewUGEPIIdx);
8793 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8794 // inbounds to avoid UB.
8795 if (!GEPI->isInBounds()) {
8796 UGEPI->setIsInBounds(false);
8797 }
8798 }
8799 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8800 // alive on IndirectBr edges).
8801 assert(llvm::none_of(GEPIOp->users(),
8802 [&](User *Usr) {
8803 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8804 }) &&
8805 "GEPIOp is used outside SrcBlock");
8806 return true;
8807}
8808
8809static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI,
8811 bool IsHugeFunc) {
8812 // Try and convert
8813 // %c = icmp ult %x, 8
8814 // br %c, bla, blb
8815 // %tc = lshr %x, 3
8816 // to
8817 // %tc = lshr %x, 3
8818 // %c = icmp eq %tc, 0
8819 // br %c, bla, blb
8820 // Creating the cmp to zero can be better for the backend, especially if the
8821 // lshr produces flags that can be used automatically.
8822 if (!TLI.preferZeroCompareBranch())
8823 return false;
8824
8825 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8826 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8827 return false;
8828
8829 Value *X = Cmp->getOperand(0);
8830 if (!X->hasUseList())
8831 return false;
8832
8833 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8834
8835 for (auto *U : X->users()) {
8837 // A quick dominance check
8838 if (!UI ||
8839 (UI->getParent() != Branch->getParent() &&
8840 UI->getParent() != Branch->getSuccessor(0) &&
8841 UI->getParent() != Branch->getSuccessor(1)) ||
8842 (UI->getParent() != Branch->getParent() &&
8843 !UI->getParent()->getSinglePredecessor()))
8844 continue;
8845
8846 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8847 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8848 IRBuilder<> Builder(Branch);
8849 if (UI->getParent() != Branch->getParent())
8850 UI->moveBefore(Branch->getIterator());
8852 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8853 ConstantInt::get(UI->getType(), 0));
8854 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8855 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8856 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8857 return true;
8858 }
8859 if (Cmp->isEquality() &&
8860 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8861 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8862 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8863 IRBuilder<> Builder(Branch);
8864 if (UI->getParent() != Branch->getParent())
8865 UI->moveBefore(Branch->getIterator());
8867 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8868 ConstantInt::get(UI->getType(), 0));
8869 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8870 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8871 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8872 return true;
8873 }
8874 }
8875 return false;
8876}
8877
8878bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8879 bool AnyChange = false;
8880 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8881
8882 // Bail out if we inserted the instruction to prevent optimizations from
8883 // stepping on each other's toes.
8884 if (InsertedInsts.count(I))
8885 return AnyChange;
8886
8887 // TODO: Move into the switch on opcode below here.
8888 if (PHINode *P = dyn_cast<PHINode>(I)) {
8889 // It is possible for very late stage optimizations (such as SimplifyCFG)
8890 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8891 // trivial PHI, go ahead and zap it here.
8892 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8893 LargeOffsetGEPMap.erase(P);
8894 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8895 P->eraseFromParent();
8896 ++NumPHIsElim;
8897 return true;
8898 }
8899 return AnyChange;
8900 }
8901
8902 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8903 // If the source of the cast is a constant, then this should have
8904 // already been constant folded. The only reason NOT to constant fold
8905 // it is if something (e.g. LSR) was careful to place the constant
8906 // evaluation in a block other than then one that uses it (e.g. to hoist
8907 // the address of globals out of a loop). If this is the case, we don't
8908 // want to forward-subst the cast.
8909 if (isa<Constant>(CI->getOperand(0)))
8910 return AnyChange;
8911
8912 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8913 return true;
8914
8916 isa<TruncInst>(I)) &&
8918 I, LI->getLoopFor(I->getParent()), *TTI))
8919 return true;
8920
8921 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8922 /// Sink a zext or sext into its user blocks if the target type doesn't
8923 /// fit in one register
8924 if (TLI->getTypeAction(CI->getContext(),
8925 TLI->getValueType(*DL, CI->getType())) ==
8926 TargetLowering::TypeExpandInteger) {
8927 return SinkCast(CI);
8928 } else {
8930 I, LI->getLoopFor(I->getParent()), *TTI))
8931 return true;
8932
8933 bool MadeChange = optimizeExt(I);
8934 return MadeChange | optimizeExtUses(I);
8935 }
8936 }
8937 return AnyChange;
8938 }
8939
8940 if (auto *Cmp = dyn_cast<CmpInst>(I))
8941 if (optimizeCmp(Cmp, ModifiedDT))
8942 return true;
8943
8944 if (match(I, m_URem(m_Value(), m_Value())))
8945 if (optimizeURem(I))
8946 return true;
8947
8948 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8949 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8950 bool Modified = optimizeLoadExt(LI);
8951 unsigned AS = LI->getPointerAddressSpace();
8952 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8953 return Modified;
8954 }
8955
8956 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8957 if (splitMergedValStore(*SI, *DL, *TLI))
8958 return true;
8959 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8960 unsigned AS = SI->getPointerAddressSpace();
8961 return optimizeMemoryInst(I, SI->getOperand(1),
8962 SI->getOperand(0)->getType(), AS);
8963 }
8964
8965 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8966 unsigned AS = RMW->getPointerAddressSpace();
8967 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8968 }
8969
8970 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8971 unsigned AS = CmpX->getPointerAddressSpace();
8972 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8973 CmpX->getCompareOperand()->getType(), AS);
8974 }
8975
8976 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8977
8978 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8979 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8980 return true;
8981
8982 // TODO: Move this into the switch on opcode - it handles shifts already.
8983 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8984 BinOp->getOpcode() == Instruction::LShr)) {
8985 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8986 if (CI && TLI->hasExtractBitsInsn())
8987 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8988 return true;
8989 }
8990
8991 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8992 if (GEPI->hasAllZeroIndices()) {
8993 /// The GEP operand must be a pointer, so must its result -> BitCast
8994 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8995 GEPI->getName(), GEPI->getIterator());
8996 NC->setDebugLoc(GEPI->getDebugLoc());
8997 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8999 GEPI, TLInfo, nullptr,
9000 [&](Value *V) { removeAllAssertingVHReferences(V); });
9001 ++NumGEPsElim;
9002 optimizeInst(NC, ModifiedDT);
9003 return true;
9004 }
9006 return true;
9007 }
9008 }
9009
9010 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
9011 // freeze(icmp a, const)) -> icmp (freeze a), const
9012 // This helps generate efficient conditional jumps.
9013 Instruction *CmpI = nullptr;
9014 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
9015 CmpI = II;
9016 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
9017 CmpI = F->getFastMathFlags().none() ? F : nullptr;
9018
9019 if (CmpI && CmpI->hasOneUse()) {
9020 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
9021 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
9023 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
9025 if (Const0 || Const1) {
9026 if (!Const0 || !Const1) {
9027 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
9028 F->takeName(FI);
9029 CmpI->setOperand(Const0 ? 1 : 0, F);
9030 }
9031 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
9032 FI->eraseFromParent();
9033 return true;
9034 }
9035 }
9036 return AnyChange;
9037 }
9038
9039 if (tryToSinkFreeOperands(I))
9040 return true;
9041
9042 switch (I->getOpcode()) {
9043 case Instruction::Shl:
9044 case Instruction::LShr:
9045 case Instruction::AShr:
9046 return optimizeShiftInst(cast<BinaryOperator>(I));
9047 case Instruction::Call:
9048 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
9049 case Instruction::Select:
9050 return optimizeSelectInst(cast<SelectInst>(I));
9051 case Instruction::ShuffleVector:
9052 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
9053 case Instruction::Switch:
9054 return optimizeSwitchInst(cast<SwitchInst>(I));
9055 case Instruction::ExtractElement:
9056 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
9057 case Instruction::CondBr:
9058 return optimizeBranch(cast<CondBrInst>(I), *TLI, FreshBBs, IsHugeFunc);
9059 }
9060
9061 return AnyChange;
9062}
9063
9064/// Given an OR instruction, check to see if this is a bitreverse
9065/// idiom. If so, insert the new intrinsic and return true.
9066bool CodeGenPrepare::makeBitReverse(Instruction &I) {
9067 if (!I.getType()->isIntegerTy() ||
9069 TLI->getValueType(*DL, I.getType(), true)))
9070 return false;
9071
9072 SmallVector<Instruction *, 4> Insts;
9073 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
9074 return false;
9075 Instruction *LastInst = Insts.back();
9076 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
9078 &I, TLInfo, nullptr,
9079 [&](Value *V) { removeAllAssertingVHReferences(V); });
9080 return true;
9081}
9082
9083// In this pass we look for GEP and cast instructions that are used
9084// across basic blocks and rewrite them to improve basic-block-at-a-time
9085// selection.
9086bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
9087 SunkAddrs.clear();
9088 bool MadeChange = false;
9089
9090 do {
9091 CurInstIterator = BB.begin();
9092 ModifiedDT = ModifyDT::NotModifyDT;
9093 while (CurInstIterator != BB.end()) {
9094 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
9095 if (ModifiedDT != ModifyDT::NotModifyDT) {
9096 // For huge function we tend to quickly go though the inner optmization
9097 // opportunities in the BB. So we go back to the BB head to re-optimize
9098 // each instruction instead of go back to the function head.
9099 if (IsHugeFunc) {
9100 DT.reset();
9101 getDT(*BB.getParent());
9102 break;
9103 } else {
9104 return true;
9105 }
9106 }
9107 }
9108 } while (ModifiedDT == ModifyDT::ModifyInstDT);
9109
9110 bool MadeBitReverse = true;
9111 while (MadeBitReverse) {
9112 MadeBitReverse = false;
9113 for (auto &I : reverse(BB)) {
9114 if (makeBitReverse(I)) {
9115 MadeBitReverse = MadeChange = true;
9116 break;
9117 }
9118 }
9119 }
9120 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
9121
9122 return MadeChange;
9123}
9124
9125bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
9126 bool AnyChange = false;
9127 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
9128 AnyChange |= fixupDbgVariableRecord(DVR);
9129 return AnyChange;
9130}
9131
9132// FIXME: should updating debug-info really cause the "changed" flag to fire,
9133// which can cause a function to be reprocessed?
9134bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
9135 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
9136 DVR.Type != DbgVariableRecord::LocationType::Assign)
9137 return false;
9138
9139 // Does this DbgVariableRecord refer to a sunk address calculation?
9140 bool AnyChange = false;
9141 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
9142 DVR.location_ops().end());
9143 for (Value *Location : LocationOps) {
9144 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
9145 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
9146 if (SunkAddr) {
9147 // Point dbg.value at locally computed address, which should give the best
9148 // opportunity to be accurately lowered. This update may change the type
9149 // of pointer being referred to; however this makes no difference to
9150 // debugging information, and we can't generate bitcasts that may affect
9151 // codegen.
9152 DVR.replaceVariableLocationOp(Location, SunkAddr);
9153 AnyChange = true;
9154 }
9155 }
9156 return AnyChange;
9157}
9158
9160 DVR->removeFromParent();
9161 BasicBlock *VIBB = VI->getParent();
9162 if (isa<PHINode>(VI))
9163 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
9164 else
9165 VIBB->insertDbgRecordAfter(DVR, &*VI);
9166}
9167
9168// A llvm.dbg.value may be using a value before its definition, due to
9169// optimizations in this pass and others. Scan for such dbg.values, and rescue
9170// them by moving the dbg.value to immediately after the value definition.
9171// FIXME: Ideally this should never be necessary, and this has the potential
9172// to re-order dbg.value intrinsics.
9173bool CodeGenPrepare::placeDbgValues(Function &F) {
9174 bool MadeChange = false;
9175 DominatorTree DT(F);
9176
9177 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9178 SmallVector<Instruction *, 4> VIs;
9179 for (Value *V : DbgItem->location_ops())
9180 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9181 VIs.push_back(VI);
9182
9183 // This item may depend on multiple instructions, complicating any
9184 // potential sink. This block takes the defensive approach, opting to
9185 // "undef" the item if it has more than one instruction and any of them do
9186 // not dominate iem.
9187 for (Instruction *VI : VIs) {
9188 if (VI->isTerminator())
9189 continue;
9190
9191 // If VI is a phi in a block with an EHPad terminator, we can't insert
9192 // after it.
9193 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9194 continue;
9195
9196 // If the defining instruction dominates the dbg.value, we do not need
9197 // to move the dbg.value.
9198 if (DT.dominates(VI, Position))
9199 continue;
9200
9201 // If we depend on multiple instructions and any of them doesn't
9202 // dominate this DVI, we probably can't salvage it: moving it to
9203 // after any of the instructions could cause us to lose the others.
9204 if (VIs.size() > 1) {
9205 LLVM_DEBUG(
9206 dbgs()
9207 << "Unable to find valid location for Debug Value, undefing:\n"
9208 << *DbgItem);
9209 DbgItem->setKillLocation();
9210 break;
9211 }
9212
9213 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9214 << *DbgItem << ' ' << *VI);
9215 DbgInserterHelper(DbgItem, VI->getIterator());
9216 MadeChange = true;
9217 ++NumDbgValueMoved;
9218 }
9219 };
9220
9221 for (BasicBlock &BB : F) {
9222 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9223 // Process any DbgVariableRecord records attached to this
9224 // instruction.
9225 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9226 filterDbgVars(Insn.getDbgRecordRange()))) {
9227 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9228 continue;
9229 DbgProcessor(&DVR, &Insn);
9230 }
9231 }
9232 }
9233
9234 return MadeChange;
9235}
9236
9237// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9238// probes can be chained dependencies of other regular DAG nodes and block DAG
9239// combine optimizations.
9240bool CodeGenPrepare::placePseudoProbes(Function &F) {
9241 bool MadeChange = false;
9242 for (auto &Block : F) {
9243 // Move the rest probes to the beginning of the block.
9244 auto FirstInst = Block.getFirstInsertionPt();
9245 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9246 ++FirstInst;
9247 BasicBlock::iterator I(FirstInst);
9248 I++;
9249 while (I != Block.end()) {
9250 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9251 II->moveBefore(FirstInst);
9252 MadeChange = true;
9253 }
9254 }
9255 }
9256 return MadeChange;
9257}
9258
9259/// Scale down both weights to fit into uint32_t.
9260static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9261 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9262 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9263 NewTrue = NewTrue / Scale;
9264 NewFalse = NewFalse / Scale;
9265}
9266
9267/// Some targets prefer to split a conditional branch like:
9268/// \code
9269/// %0 = icmp ne i32 %a, 0
9270/// %1 = icmp ne i32 %b, 0
9271/// %or.cond = or i1 %0, %1
9272/// br i1 %or.cond, label %TrueBB, label %FalseBB
9273/// \endcode
9274/// into multiple branch instructions like:
9275/// \code
9276/// bb1:
9277/// %0 = icmp ne i32 %a, 0
9278/// br i1 %0, label %TrueBB, label %bb2
9279/// bb2:
9280/// %1 = icmp ne i32 %b, 0
9281/// br i1 %1, label %TrueBB, label %FalseBB
9282/// \endcode
9283/// This usually allows instruction selection to do even further optimizations
9284/// and combine the compare with the branch instruction. Currently this is
9285/// applied for targets which have "cheap" jump instructions.
9286///
9287/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9288///
9289bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9290 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9291 return false;
9292
9293 bool MadeChange = false;
9294 for (auto &BB : F) {
9295 // Does this BB end with the following?
9296 // %cond1 = icmp|fcmp|binary instruction ...
9297 // %cond2 = icmp|fcmp|binary instruction ...
9298 // %cond.or = or|and i1 %cond1, cond2
9299 // br i1 %cond.or label %dest1, label %dest2"
9300 Instruction *LogicOp;
9301 BasicBlock *TBB, *FBB;
9302 if (!match(BB.getTerminator(),
9303 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9304 continue;
9305
9306 auto *Br1 = cast<CondBrInst>(BB.getTerminator());
9307 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9308 continue;
9309
9310 // The merging of mostly empty BB can cause a degenerate branch.
9311 if (TBB == FBB)
9312 continue;
9313
9314 unsigned Opc;
9315 Value *Cond1, *Cond2;
9316 if (match(LogicOp,
9317 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9318 Opc = Instruction::And;
9319 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9320 m_OneUse(m_Value(Cond2)))))
9321 Opc = Instruction::Or;
9322 else
9323 continue;
9324
9325 auto IsGoodCond = [](Value *Cond) {
9326 return match(
9327 Cond,
9329 m_LogicalOr(m_Value(), m_Value()))));
9330 };
9331 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9332 continue;
9333
9334 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9335
9336 // Create a new BB.
9337 auto *TmpBB =
9338 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9339 BB.getParent(), BB.getNextNode());
9340 if (IsHugeFunc)
9341 FreshBBs.insert(TmpBB);
9342
9343 // Update original basic block by using the first condition directly by the
9344 // branch instruction and removing the no longer needed and/or instruction.
9345 Br1->setCondition(Cond1);
9346 LogicOp->eraseFromParent();
9347
9348 // Depending on the condition we have to either replace the true or the
9349 // false successor of the original branch instruction.
9350 if (Opc == Instruction::And)
9351 Br1->setSuccessor(0, TmpBB);
9352 else
9353 Br1->setSuccessor(1, TmpBB);
9354
9355 // Fill in the new basic block.
9356 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9357 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9358 I->removeFromParent();
9359 I->insertBefore(Br2->getIterator());
9360 }
9361
9362 // Update PHI nodes in both successors. The original BB needs to be
9363 // replaced in one successor's PHI nodes, because the branch comes now from
9364 // the newly generated BB (NewBB). In the other successor we need to add one
9365 // incoming edge to the PHI nodes, because both branch instructions target
9366 // now the same successor. Depending on the original branch condition
9367 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9368 // we perform the correct update for the PHI nodes.
9369 // This doesn't change the successor order of the just created branch
9370 // instruction (or any other instruction).
9371 if (Opc == Instruction::Or)
9372 std::swap(TBB, FBB);
9373
9374 // Replace the old BB with the new BB.
9375 TBB->replacePhiUsesWith(&BB, TmpBB);
9376
9377 // Add another incoming edge from the new BB.
9378 for (PHINode &PN : FBB->phis()) {
9379 auto *Val = PN.getIncomingValueForBlock(&BB);
9380 PN.addIncoming(Val, TmpBB);
9381 }
9382
9383 // Update the branch weights (from SelectionDAGBuilder::
9384 // FindMergedConditions).
9385 if (Opc == Instruction::Or) {
9386 // Codegen X | Y as:
9387 // BB1:
9388 // jmp_if_X TBB
9389 // jmp TmpBB
9390 // TmpBB:
9391 // jmp_if_Y TBB
9392 // jmp FBB
9393 //
9394
9395 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9396 // The requirement is that
9397 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9398 // = TrueProb for original BB.
9399 // Assuming the original weights are A and B, one choice is to set BB1's
9400 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9401 // assumes that
9402 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9403 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9404 // TmpBB, but the math is more complicated.
9405 uint64_t TrueWeight, FalseWeight;
9406 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9407 uint64_t NewTrueWeight = TrueWeight;
9408 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9409 scaleWeights(NewTrueWeight, NewFalseWeight);
9410 Br1->setMetadata(LLVMContext::MD_prof,
9411 MDBuilder(Br1->getContext())
9412 .createBranchWeights(TrueWeight, FalseWeight,
9413 hasBranchWeightOrigin(*Br1)));
9414
9415 NewTrueWeight = TrueWeight;
9416 NewFalseWeight = 2 * FalseWeight;
9417 scaleWeights(NewTrueWeight, NewFalseWeight);
9418 Br2->setMetadata(LLVMContext::MD_prof,
9419 MDBuilder(Br2->getContext())
9420 .createBranchWeights(TrueWeight, FalseWeight));
9421 }
9422 } else {
9423 // Codegen X & Y as:
9424 // BB1:
9425 // jmp_if_X TmpBB
9426 // jmp FBB
9427 // TmpBB:
9428 // jmp_if_Y TBB
9429 // jmp FBB
9430 //
9431 // This requires creation of TmpBB after CurBB.
9432
9433 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9434 // The requirement is that
9435 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9436 // = FalseProb for original BB.
9437 // Assuming the original weights are A and B, one choice is to set BB1's
9438 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9439 // assumes that
9440 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9441 uint64_t TrueWeight, FalseWeight;
9442 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9443 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9444 uint64_t NewFalseWeight = FalseWeight;
9445 scaleWeights(NewTrueWeight, NewFalseWeight);
9446 Br1->setMetadata(LLVMContext::MD_prof,
9447 MDBuilder(Br1->getContext())
9448 .createBranchWeights(TrueWeight, FalseWeight));
9449
9450 NewTrueWeight = 2 * TrueWeight;
9451 NewFalseWeight = FalseWeight;
9452 scaleWeights(NewTrueWeight, NewFalseWeight);
9453 Br2->setMetadata(LLVMContext::MD_prof,
9454 MDBuilder(Br2->getContext())
9455 .createBranchWeights(TrueWeight, FalseWeight));
9456 }
9457 }
9458
9459 ModifiedDT = ModifyDT::ModifyBBDT;
9460 MadeChange = true;
9461
9462 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9463 TmpBB->dump());
9464 }
9465 return MadeChange;
9466}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
#define X(NUM, ENUM, NAME)
Definition ELF.h:849
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(CondBrInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, ExtractValueInst *&OverflowExtract)
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1448
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
unsigned logBase2() const
Definition APInt.h:1776
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:462
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:518
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:675
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
LLVM_ABI std::optional< BlockFrequency > mul(uint64_t Factor) const
Multiplies frequency with Factor. Returns nullopt in case of overflow.
Analysis pass which computes BranchProbabilityInfo.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Conditional Branch instruction.
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:60
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:809
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:343
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:561
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
void verify(const DominatorTreeBase< BlockT, false > &DomTree) const
void analyze(const DominatorTreeBase< BlockT, false > &DomTree)
Create the loop forest using a stable algorithm.
iterator end() const
iterator begin() const
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:194
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
bool erase(const T &V)
Definition SmallSet.h:200
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:767
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
bool isLoadLegal(EVT ValVT, EVT MemVT, Align Alignment, unsigned AddrSpace, unsigned ExtType, bool Atomic) const
Return true if the specified load with extension is legal on this target.
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:65
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:272
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
BasicBlock * getSuccessor(unsigned i=0) const
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:403
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:967
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:242
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:347
user_iterator user_end()
Definition Value.h:411
iterator_range< use_iterator > uses()
Definition Value.h:381
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:840
user_iterator_impl< User > user_iterator
Definition Value.h:392
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
std::enable_if_t< std::is_signed_v< T >, T > MulOverflow(T X, T Y, T &Result)
Multiply two signed integers, computing the two's complement truncated result, returning true if an o...
Definition MathExtras.h:753
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1765
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1739
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:535
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2250
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1726
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2128
constexpr from_range_t from_range
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2134
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2173
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2200
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3785
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1399
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:203
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:52
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:588
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2012
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:256
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
This contains information for each constraint that we are lowering.