LLVM 23.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/Argument.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InlineAsm.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/MDBuilder.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Operator.h"
73#include "llvm/IR/Statepoint.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/Use.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
79#include "llvm/IR/ValueMap.h"
81#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinking and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
329 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
347 MapVector<AssertingVH<Value>,
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare() = default;
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
380 SmallPtrSet<BasicBlock *, 32> FreshBBs;
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
391 bool run(Function &F, FunctionAnalysisManager &AM);
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
435 ModifyDT &ModifiedDT);
436 bool optimizeInlineAsmInst(CallInst *CS);
437 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
438 bool optimizeExt(Instruction *&I);
439 bool optimizeExtUses(Instruction *I);
440 bool optimizeLoadExt(LoadInst *Load);
441 bool optimizeShiftInst(BinaryOperator *BO);
442 bool optimizeFunnelShift(IntrinsicInst *Fsh);
443 bool optimizeSelectInst(SelectInst *SI);
444 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
445 bool optimizeSwitchType(SwitchInst *SI);
446 bool optimizeSwitchPhiConstants(SwitchInst *SI);
447 bool optimizeSwitchInst(SwitchInst *SI);
448 bool optimizeExtractElementInst(Instruction *Inst);
449 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
450 bool fixupDbgVariableRecord(DbgVariableRecord &I);
451 bool fixupDbgVariableRecordsOnInst(Instruction &I);
452 bool placeDbgValues(Function &F);
453 bool placePseudoProbes(Function &F);
454 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
455 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
456 bool tryToPromoteExts(TypePromotionTransaction &TPT,
457 const SmallVectorImpl<Instruction *> &Exts,
458 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
459 unsigned CreatedInstsCost = 0);
460 bool mergeSExts(Function &F);
461 bool splitLargeGEPOffsets();
462 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
463 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
464 bool optimizePhiTypes(Function &F);
465 bool performAddressTypePromotion(
466 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
467 bool HasPromoted, TypePromotionTransaction &TPT,
468 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
469 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
470 bool simplifyOffsetableRelocate(GCStatepointInst &I);
471
472 bool tryToSinkFreeOperands(Instruction *I);
473 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
474 CmpInst *Cmp, Intrinsic::ID IID);
475 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool optimizeURem(Instruction *Rem);
477 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
478 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
479 bool unfoldPowerOf2Test(CmpInst *Cmp);
480 void verifyBFIUpdates(Function &F);
481 bool _run(Function &F);
482};
483
484class CodeGenPrepareLegacyPass : public FunctionPass {
485public:
486 static char ID; // Pass identification, replacement for typeid
487
488 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
490 }
491
492 bool runOnFunction(Function &F) override;
493
494 StringRef getPassName() const override { return "CodeGen Prepare"; }
495
496 void getAnalysisUsage(AnalysisUsage &AU) const override {
497 // FIXME: When we can selectively preserve passes, preserve the domtree.
498 AU.addRequired<ProfileSummaryInfoWrapperPass>();
499 AU.addRequired<TargetLibraryInfoWrapperPass>();
500 AU.addRequired<TargetPassConfig>();
501 AU.addRequired<TargetTransformInfoWrapperPass>();
502 AU.addRequired<LoopInfoWrapperPass>();
503 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
504 }
505};
506
507} // end anonymous namespace
508
509char CodeGenPrepareLegacyPass::ID = 0;
510
511bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
512 if (skipFunction(F))
513 return false;
514 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
515 CodeGenPrepare CGP(TM);
516 CGP.DL = &F.getDataLayout();
517 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
518 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
519 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
520 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
521 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
522 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
523 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
524 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
525 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
526 auto BBSPRWP =
527 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
528 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
529
530 return CGP._run(F);
531}
532
533INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
534 "Optimize for code generation", false, false)
541INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
542 "Optimize for code generation", false, false)
543
545 return new CodeGenPrepareLegacyPass();
546}
547
550 CodeGenPrepare CGP(TM);
551
552 bool Changed = CGP.run(F, AM);
553 if (!Changed)
554 return PreservedAnalyses::all();
555
559 return PA;
560}
561
562bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
563 DL = &F.getDataLayout();
564 SubtargetInfo = TM->getSubtargetImpl(F);
565 TLI = SubtargetInfo->getTargetLowering();
566 TRI = SubtargetInfo->getRegisterInfo();
567 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
569 LI = &AM.getResult<LoopAnalysis>(F);
570 BPI.reset(new BranchProbabilityInfo(F, *LI));
571 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
572 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
573 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
574 BBSectionsProfileReader =
576 return _run(F);
577}
578
579bool CodeGenPrepare::_run(Function &F) {
580 bool EverMadeChange = false;
581
582 OptSize = F.hasOptSize();
583 // Use the basic-block-sections profile to promote hot functions to .text.hot
584 // if requested.
585 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
586 BBSectionsProfileReader->isFunctionHot(F.getName())) {
587 (void)F.setSectionPrefix("hot");
588 } else if (ProfileGuidedSectionPrefix) {
589 // The hot attribute overwrites profile count based hotness while profile
590 // counts based hotness overwrite the cold attribute.
591 // This is a conservative behabvior.
592 if (F.hasFnAttribute(Attribute::Hot) ||
593 PSI->isFunctionHotInCallGraph(&F, *BFI))
594 (void)F.setSectionPrefix("hot");
595 // If PSI shows this function is not hot, we will placed the function
596 // into unlikely section if (1) PSI shows this is a cold function, or
597 // (2) the function has a attribute of cold.
598 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
599 F.hasFnAttribute(Attribute::Cold))
600 (void)F.setSectionPrefix("unlikely");
603 (void)F.setSectionPrefix("unknown");
604 }
605
606 /// This optimization identifies DIV instructions that can be
607 /// profitably bypassed and carried out with a shorter, faster divide.
608 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
609 const DenseMap<unsigned int, unsigned int> &BypassWidths =
611 BasicBlock *BB = &*F.begin();
612 while (BB != nullptr) {
613 // bypassSlowDivision may create new BBs, but we don't want to reapply the
614 // optimization to those blocks.
615 BasicBlock *Next = BB->getNextNode();
616 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
617 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
618 BB = Next;
619 }
620 }
621
622 // Get rid of @llvm.assume builtins before attempting to eliminate empty
623 // blocks, since there might be blocks that only contain @llvm.assume calls
624 // (plus arguments that we can get rid of).
625 EverMadeChange |= eliminateAssumptions(F);
626
627 // Eliminate blocks that contain only PHI nodes and an
628 // unconditional branch.
629 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
630
631 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
633 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
634
635 // Split some critical edges where one of the sources is an indirect branch,
636 // to help generate sane code for PHIs involving such edges.
637 EverMadeChange |=
638 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
639
640 // If we are optimzing huge function, we need to consider the build time.
641 // Because the basic algorithm's complex is near O(N!).
642 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
643
644 // Transformations above may invalidate dominator tree and/or loop info.
645 DT.reset();
646 LI->releaseMemory();
647 LI->analyze(getDT(F));
648
649 bool MadeChange = true;
650 bool FuncIterated = false;
651 while (MadeChange) {
652 MadeChange = false;
653
654 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
655 if (FuncIterated && !FreshBBs.contains(&BB))
656 continue;
657
658 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
659 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
660
661 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
662 DT.reset();
663
664 MadeChange |= Changed;
665 if (IsHugeFunc) {
666 // If the BB is updated, it may still has chance to be optimized.
667 // This usually happen at sink optimization.
668 // For example:
669 //
670 // bb0:
671 // %and = and i32 %a, 4
672 // %cmp = icmp eq i32 %and, 0
673 //
674 // If the %cmp sink to other BB, the %and will has chance to sink.
675 if (Changed)
676 FreshBBs.insert(&BB);
677 else if (FuncIterated)
678 FreshBBs.erase(&BB);
679 } else {
680 // For small/normal functions, we restart BB iteration if the dominator
681 // tree of the Function was changed.
682 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
683 break;
684 }
685 }
686 // We have iterated all the BB in the (only work for huge) function.
687 FuncIterated = IsHugeFunc;
688
689 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
690 MadeChange |= mergeSExts(F);
691 if (!LargeOffsetGEPMap.empty())
692 MadeChange |= splitLargeGEPOffsets();
693 MadeChange |= optimizePhiTypes(F);
694
695 if (MadeChange)
696 eliminateFallThrough(F, DT.get());
697
698#ifndef NDEBUG
699 if (MadeChange && VerifyLoopInfo)
700 LI->verify(getDT(F));
701#endif
702
703 // Really free removed instructions during promotion.
704 for (Instruction *I : RemovedInsts)
705 I->deleteValue();
706
707 EverMadeChange |= MadeChange;
708 SeenChainsForSExt.clear();
709 ValToSExtendedUses.clear();
710 RemovedInsts.clear();
711 LargeOffsetGEPMap.clear();
712 LargeOffsetGEPID.clear();
713 }
714
715 NewGEPBases.clear();
716 SunkAddrs.clear();
717
718 if (!DisableBranchOpts) {
719 MadeChange = false;
720 // Use a set vector to get deterministic iteration order. The order the
721 // blocks are removed may affect whether or not PHI nodes in successors
722 // are removed.
723 SmallSetVector<BasicBlock *, 8> WorkList;
724 for (BasicBlock &BB : F) {
726 MadeChange |= ConstantFoldTerminator(&BB, true);
727 if (!MadeChange)
728 continue;
729
730 for (BasicBlock *Succ : Successors)
731 if (pred_empty(Succ))
732 WorkList.insert(Succ);
733 }
734
735 // Delete the dead blocks and any of their dead successors.
736 MadeChange |= !WorkList.empty();
737 while (!WorkList.empty()) {
738 BasicBlock *BB = WorkList.pop_back_val();
740
741 DeleteDeadBlock(BB);
742
743 for (BasicBlock *Succ : Successors)
744 if (pred_empty(Succ))
745 WorkList.insert(Succ);
746 }
747
748 // Merge pairs of basic blocks with unconditional branches, connected by
749 // a single edge.
750 if (EverMadeChange || MadeChange)
751 MadeChange |= eliminateFallThrough(F);
752
753 EverMadeChange |= MadeChange;
754 }
755
756 if (!DisableGCOpts) {
758 for (BasicBlock &BB : F)
759 for (Instruction &I : BB)
760 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
761 Statepoints.push_back(SP);
762 for (auto &I : Statepoints)
763 EverMadeChange |= simplifyOffsetableRelocate(*I);
764 }
765
766 // Do this last to clean up use-before-def scenarios introduced by other
767 // preparatory transforms.
768 EverMadeChange |= placeDbgValues(F);
769 EverMadeChange |= placePseudoProbes(F);
770
771#ifndef NDEBUG
773 verifyBFIUpdates(F);
774#endif
775
776 return EverMadeChange;
777}
778
779bool CodeGenPrepare::eliminateAssumptions(Function &F) {
780 bool MadeChange = false;
781 for (BasicBlock &BB : F) {
782 CurInstIterator = BB.begin();
783 while (CurInstIterator != BB.end()) {
784 Instruction *I = &*(CurInstIterator++);
785 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
786 MadeChange = true;
787 Value *Operand = Assume->getOperand(0);
788 Assume->eraseFromParent();
789
790 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
791 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
792 });
793 }
794 }
795 }
796 return MadeChange;
797}
798
799/// An instruction is about to be deleted, so remove all references to it in our
800/// GEP-tracking data strcutures.
801void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
802 LargeOffsetGEPMap.erase(V);
803 NewGEPBases.erase(V);
804
806 if (!GEP)
807 return;
808
809 LargeOffsetGEPID.erase(GEP);
810
811 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
812 if (VecI == LargeOffsetGEPMap.end())
813 return;
814
815 auto &GEPVector = VecI->second;
816 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
817
818 if (GEPVector.empty())
819 LargeOffsetGEPMap.erase(VecI);
820}
821
822// Verify BFI has been updated correctly by recomputing BFI and comparing them.
823[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
824 DominatorTree NewDT(F);
825 LoopInfo NewLI(NewDT);
826 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
827 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
828 NewBFI.verifyMatch(*BFI);
829}
830
831/// Merge basic blocks which are connected by a single edge, where one of the
832/// basic blocks has a single successor pointing to the other basic block,
833/// which has a single predecessor.
834bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
835 bool Changed = false;
836 // Scan all of the blocks in the function, except for the entry block.
837 // Use a temporary array to avoid iterator being invalidated when
838 // deleting blocks.
841
842 SmallSet<WeakTrackingVH, 16> Preds;
843 for (auto &Block : Blocks) {
845 if (!BB)
846 continue;
847 // If the destination block has a single pred, then this is a trivial
848 // edge, just collapse it.
849 BasicBlock *SinglePred = BB->getSinglePredecessor();
850
851 // Don't merge if BB's address is taken.
852 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
853 continue;
854
855 // Make an effort to skip unreachable blocks.
856 if (DT && !DT->isReachableFromEntry(BB))
857 continue;
858
859 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
860 if (Term && !Term->isConditional()) {
861 Changed = true;
862 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
863
864 // Merge BB into SinglePred and delete it.
865 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
866 /* MemDep */ nullptr,
867 /* PredecessorWithTwoSuccessors */ false, DT);
868 Preds.insert(SinglePred);
869
870 if (IsHugeFunc) {
871 // Update FreshBBs to optimize the merged BB.
872 FreshBBs.insert(SinglePred);
873 FreshBBs.erase(BB);
874 }
875 }
876 }
877
878 // (Repeatedly) merging blocks into their predecessors can create redundant
879 // debug intrinsics.
880 for (const auto &Pred : Preds)
881 if (auto *BB = cast_or_null<BasicBlock>(Pred))
883
884 return Changed;
885}
886
887/// Find a destination block from BB if BB is mergeable empty block.
888BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
889 // If this block doesn't end with an uncond branch, ignore it.
890 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
891 if (!BI || !BI->isUnconditional())
892 return nullptr;
893
894 // If the instruction before the branch (skipping debug info) isn't a phi
895 // node, then other stuff is happening here.
897 if (BBI != BB->begin()) {
898 --BBI;
899 if (!isa<PHINode>(BBI))
900 return nullptr;
901 }
902
903 // Do not break infinite loops.
904 BasicBlock *DestBB = BI->getSuccessor(0);
905 if (DestBB == BB)
906 return nullptr;
907
908 if (!canMergeBlocks(BB, DestBB))
909 DestBB = nullptr;
910
911 return DestBB;
912}
913
914/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
915/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
916/// edges in ways that are non-optimal for isel. Start by eliminating these
917/// blocks so we can split them the way we want them.
918bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
919 SmallPtrSet<BasicBlock *, 16> Preheaders;
920 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
921 while (!LoopList.empty()) {
922 Loop *L = LoopList.pop_back_val();
923 llvm::append_range(LoopList, *L);
924 if (BasicBlock *Preheader = L->getLoopPreheader())
925 Preheaders.insert(Preheader);
926 }
927
928 bool MadeChange = false;
929 // Copy blocks into a temporary array to avoid iterator invalidation issues
930 // as we remove them.
931 // Note that this intentionally skips the entry block.
933 for (auto &Block : llvm::drop_begin(F)) {
934 // Delete phi nodes that could block deleting other empty blocks.
936 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
937 Blocks.push_back(&Block);
938 }
939
940 for (auto &Block : Blocks) {
942 if (!BB)
943 continue;
944 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
945 if (!DestBB ||
946 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
947 continue;
948
949 eliminateMostlyEmptyBlock(BB);
950 MadeChange = true;
951 }
952 return MadeChange;
953}
954
955bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
956 BasicBlock *DestBB,
957 bool isPreheader) {
958 // Do not delete loop preheaders if doing so would create a critical edge.
959 // Loop preheaders can be good locations to spill registers. If the
960 // preheader is deleted and we create a critical edge, registers may be
961 // spilled in the loop body instead.
962 if (!DisablePreheaderProtect && isPreheader &&
963 !(BB->getSinglePredecessor() &&
965 return false;
966
967 // Skip merging if the block's successor is also a successor to any callbr
968 // that leads to this block.
969 // FIXME: Is this really needed? Is this a correctness issue?
970 for (BasicBlock *Pred : predecessors(BB)) {
971 if (isa<CallBrInst>(Pred->getTerminator()) &&
972 llvm::is_contained(successors(Pred), DestBB))
973 return false;
974 }
975
976 // Try to skip merging if the unique predecessor of BB is terminated by a
977 // switch or indirect branch instruction, and BB is used as an incoming block
978 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
979 // add COPY instructions in the predecessor of BB instead of BB (if it is not
980 // merged). Note that the critical edge created by merging such blocks wont be
981 // split in MachineSink because the jump table is not analyzable. By keeping
982 // such empty block (BB), ISel will place COPY instructions in BB, not in the
983 // predecessor of BB.
984 BasicBlock *Pred = BB->getUniquePredecessor();
985 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
987 return true;
988
989 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
990 return true;
991
992 // We use a simple cost heuristic which determine skipping merging is
993 // profitable if the cost of skipping merging is less than the cost of
994 // merging : Cost(skipping merging) < Cost(merging BB), where the
995 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
996 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
997 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
998 // Freq(Pred) / Freq(BB) > 2.
999 // Note that if there are multiple empty blocks sharing the same incoming
1000 // value for the PHIs in the DestBB, we consider them together. In such
1001 // case, Cost(merging BB) will be the sum of their frequencies.
1002
1003 if (!isa<PHINode>(DestBB->begin()))
1004 return true;
1005
1006 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1007
1008 // Find all other incoming blocks from which incoming values of all PHIs in
1009 // DestBB are the same as the ones from BB.
1010 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1011 if (DestBBPred == BB)
1012 continue;
1013
1014 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1015 return DestPN.getIncomingValueForBlock(BB) ==
1016 DestPN.getIncomingValueForBlock(DestBBPred);
1017 }))
1018 SameIncomingValueBBs.insert(DestBBPred);
1019 }
1020
1021 // See if all BB's incoming values are same as the value from Pred. In this
1022 // case, no reason to skip merging because COPYs are expected to be place in
1023 // Pred already.
1024 if (SameIncomingValueBBs.count(Pred))
1025 return true;
1026
1027 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1028 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1029
1030 for (auto *SameValueBB : SameIncomingValueBBs)
1031 if (SameValueBB->getUniquePredecessor() == Pred &&
1032 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1033 BBFreq += BFI->getBlockFreq(SameValueBB);
1034
1035 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1036 return !Limit || PredFreq <= *Limit;
1037}
1038
1039/// Return true if we can merge BB into DestBB if there is a single
1040/// unconditional branch between them, and BB contains no other non-phi
1041/// instructions.
1042bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1043 const BasicBlock *DestBB) const {
1044 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1045 // the successor. If there are more complex condition (e.g. preheaders),
1046 // don't mess around with them.
1047 for (const PHINode &PN : BB->phis()) {
1048 for (const User *U : PN.users()) {
1049 const Instruction *UI = cast<Instruction>(U);
1050 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1051 return false;
1052 // If User is inside DestBB block and it is a PHINode then check
1053 // incoming value. If incoming value is not from BB then this is
1054 // a complex condition (e.g. preheaders) we want to avoid here.
1055 if (UI->getParent() == DestBB) {
1056 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1057 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1058 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1059 if (Insn && Insn->getParent() == BB &&
1060 Insn->getParent() != UPN->getIncomingBlock(I))
1061 return false;
1062 }
1063 }
1064 }
1065 }
1066
1067 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1068 // and DestBB may have conflicting incoming values for the block. If so, we
1069 // can't merge the block.
1070 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1071 if (!DestBBPN)
1072 return true; // no conflict.
1073
1074 // Collect the preds of BB.
1075 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1076 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1077 // It is faster to get preds from a PHI than with pred_iterator.
1078 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1079 BBPreds.insert(BBPN->getIncomingBlock(i));
1080 } else {
1081 BBPreds.insert_range(predecessors(BB));
1082 }
1083
1084 // Walk the preds of DestBB.
1085 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1086 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1087 if (BBPreds.count(Pred)) { // Common predecessor?
1088 for (const PHINode &PN : DestBB->phis()) {
1089 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1090 const Value *V2 = PN.getIncomingValueForBlock(BB);
1091
1092 // If V2 is a phi node in BB, look up what the mapped value will be.
1093 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1094 if (V2PN->getParent() == BB)
1095 V2 = V2PN->getIncomingValueForBlock(Pred);
1096
1097 // If there is a conflict, bail out.
1098 if (V1 != V2)
1099 return false;
1100 }
1101 }
1102 }
1103
1104 return true;
1105}
1106
1107/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1108static void replaceAllUsesWith(Value *Old, Value *New,
1110 bool IsHuge) {
1111 auto *OldI = dyn_cast<Instruction>(Old);
1112 if (OldI) {
1113 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1114 UI != E; ++UI) {
1116 if (IsHuge)
1117 FreshBBs.insert(User->getParent());
1118 }
1119 }
1120 Old->replaceAllUsesWith(New);
1121}
1122
1123/// Eliminate a basic block that has only phi's and an unconditional branch in
1124/// it.
1125void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1126 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1127 BasicBlock *DestBB = BI->getSuccessor(0);
1128
1129 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1130 << *BB << *DestBB);
1131
1132 // If the destination block has a single pred, then this is a trivial edge,
1133 // just collapse it.
1134 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1135 if (SinglePred != DestBB) {
1136 assert(SinglePred == BB &&
1137 "Single predecessor not the same as predecessor");
1138 // Merge DestBB into SinglePred/BB and delete it.
1140 // Note: BB(=SinglePred) will not be deleted on this path.
1141 // DestBB(=its single successor) is the one that was deleted.
1142 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1143
1144 if (IsHugeFunc) {
1145 // Update FreshBBs to optimize the merged BB.
1146 FreshBBs.insert(SinglePred);
1147 FreshBBs.erase(DestBB);
1148 }
1149 return;
1150 }
1151 }
1152
1153 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1154 // to handle the new incoming edges it is about to have.
1155 for (PHINode &PN : DestBB->phis()) {
1156 // Remove the incoming value for BB, and remember it.
1157 Value *InVal = PN.removeIncomingValue(BB, false);
1158
1159 // Two options: either the InVal is a phi node defined in BB or it is some
1160 // value that dominates BB.
1161 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1162 if (InValPhi && InValPhi->getParent() == BB) {
1163 // Add all of the input values of the input PHI as inputs of this phi.
1164 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1165 PN.addIncoming(InValPhi->getIncomingValue(i),
1166 InValPhi->getIncomingBlock(i));
1167 } else {
1168 // Otherwise, add one instance of the dominating value for each edge that
1169 // we will be adding.
1170 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1171 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1172 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1173 } else {
1174 for (BasicBlock *Pred : predecessors(BB))
1175 PN.addIncoming(InVal, Pred);
1176 }
1177 }
1178 }
1179
1180 // Preserve loop Metadata.
1181 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1182 for (auto *Pred : predecessors(BB))
1183 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1184 }
1185
1186 // The PHIs are now updated, change everything that refers to BB to use
1187 // DestBB and remove BB.
1188 BB->replaceAllUsesWith(DestBB);
1189 BB->eraseFromParent();
1190 ++NumBlocksElim;
1191
1192 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1193}
1194
1195// Computes a map of base pointer relocation instructions to corresponding
1196// derived pointer relocation instructions given a vector of all relocate calls
1198 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1200 &RelocateInstMap) {
1201 // Collect information in two maps: one primarily for locating the base object
1202 // while filling the second map; the second map is the final structure holding
1203 // a mapping between Base and corresponding Derived relocate calls
1205 for (auto *ThisRelocate : AllRelocateCalls) {
1206 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1207 ThisRelocate->getDerivedPtrIndex());
1208 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1209 }
1210 for (auto &Item : RelocateIdxMap) {
1211 std::pair<unsigned, unsigned> Key = Item.first;
1212 if (Key.first == Key.second)
1213 // Base relocation: nothing to insert
1214 continue;
1215
1216 GCRelocateInst *I = Item.second;
1217 auto BaseKey = std::make_pair(Key.first, Key.first);
1218
1219 // We're iterating over RelocateIdxMap so we cannot modify it.
1220 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1221 if (MaybeBase == RelocateIdxMap.end())
1222 // TODO: We might want to insert a new base object relocate and gep off
1223 // that, if there are enough derived object relocates.
1224 continue;
1225
1226 RelocateInstMap[MaybeBase->second].push_back(I);
1227 }
1228}
1229
1230// Accepts a GEP and extracts the operands into a vector provided they're all
1231// small integer constants
1233 SmallVectorImpl<Value *> &OffsetV) {
1234 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1235 // Only accept small constant integer operands
1236 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1237 if (!Op || Op->getZExtValue() > 20)
1238 return false;
1239 }
1240
1241 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1242 OffsetV.push_back(GEP->getOperand(i));
1243 return true;
1244}
1245
1246// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1247// replace, computes a replacement, and affects it.
1248static bool
1250 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1251 bool MadeChange = false;
1252 // We must ensure the relocation of derived pointer is defined after
1253 // relocation of base pointer. If we find a relocation corresponding to base
1254 // defined earlier than relocation of base then we move relocation of base
1255 // right before found relocation. We consider only relocation in the same
1256 // basic block as relocation of base. Relocations from other basic block will
1257 // be skipped by optimization and we do not care about them.
1258 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1259 &*R != RelocatedBase; ++R)
1260 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1261 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1262 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1263 RelocatedBase->moveBefore(RI->getIterator());
1264 MadeChange = true;
1265 break;
1266 }
1267
1268 for (GCRelocateInst *ToReplace : Targets) {
1269 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1270 "Not relocating a derived object of the original base object");
1271 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1272 // A duplicate relocate call. TODO: coalesce duplicates.
1273 continue;
1274 }
1275
1276 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1277 // Base and derived relocates are in different basic blocks.
1278 // In this case transform is only valid when base dominates derived
1279 // relocate. However it would be too expensive to check dominance
1280 // for each such relocate, so we skip the whole transformation.
1281 continue;
1282 }
1283
1284 Value *Base = ToReplace->getBasePtr();
1285 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1286 if (!Derived || Derived->getPointerOperand() != Base)
1287 continue;
1288
1290 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1291 continue;
1292
1293 // Create a Builder and replace the target callsite with a gep
1294 assert(RelocatedBase->getNextNode() &&
1295 "Should always have one since it's not a terminator");
1296
1297 // Insert after RelocatedBase
1298 IRBuilder<> Builder(RelocatedBase->getNextNode());
1299 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1300
1301 // If gc_relocate does not match the actual type, cast it to the right type.
1302 // In theory, there must be a bitcast after gc_relocate if the type does not
1303 // match, and we should reuse it to get the derived pointer. But it could be
1304 // cases like this:
1305 // bb1:
1306 // ...
1307 // %g1 = call coldcc i8 addrspace(1)*
1308 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1309 //
1310 // bb2:
1311 // ...
1312 // %g2 = call coldcc i8 addrspace(1)*
1313 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1314 //
1315 // merge:
1316 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1317 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1318 //
1319 // In this case, we can not find the bitcast any more. So we insert a new
1320 // bitcast no matter there is already one or not. In this way, we can handle
1321 // all cases, and the extra bitcast should be optimized away in later
1322 // passes.
1323 Value *ActualRelocatedBase = RelocatedBase;
1324 if (RelocatedBase->getType() != Base->getType()) {
1325 ActualRelocatedBase =
1326 Builder.CreateBitCast(RelocatedBase, Base->getType());
1327 }
1328 Value *Replacement =
1329 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1330 ArrayRef(OffsetV));
1331 Replacement->takeName(ToReplace);
1332 // If the newly generated derived pointer's type does not match the original
1333 // derived pointer's type, cast the new derived pointer to match it. Same
1334 // reasoning as above.
1335 Value *ActualReplacement = Replacement;
1336 if (Replacement->getType() != ToReplace->getType()) {
1337 ActualReplacement =
1338 Builder.CreateBitCast(Replacement, ToReplace->getType());
1339 }
1340 ToReplace->replaceAllUsesWith(ActualReplacement);
1341 ToReplace->eraseFromParent();
1342
1343 MadeChange = true;
1344 }
1345 return MadeChange;
1346}
1347
1348// Turns this:
1349//
1350// %base = ...
1351// %ptr = gep %base + 15
1352// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1353// %base' = relocate(%tok, i32 4, i32 4)
1354// %ptr' = relocate(%tok, i32 4, i32 5)
1355// %val = load %ptr'
1356//
1357// into this:
1358//
1359// %base = ...
1360// %ptr = gep %base + 15
1361// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1362// %base' = gc.relocate(%tok, i32 4, i32 4)
1363// %ptr' = gep %base' + 15
1364// %val = load %ptr'
1365bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1366 bool MadeChange = false;
1367 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1368 for (auto *U : I.users())
1369 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1370 // Collect all the relocate calls associated with a statepoint
1371 AllRelocateCalls.push_back(Relocate);
1372
1373 // We need at least one base pointer relocation + one derived pointer
1374 // relocation to mangle
1375 if (AllRelocateCalls.size() < 2)
1376 return false;
1377
1378 // RelocateInstMap is a mapping from the base relocate instruction to the
1379 // corresponding derived relocate instructions
1380 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1381 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1382 if (RelocateInstMap.empty())
1383 return false;
1384
1385 for (auto &Item : RelocateInstMap)
1386 // Item.first is the RelocatedBase to offset against
1387 // Item.second is the vector of Targets to replace
1388 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1389 return MadeChange;
1390}
1391
1392/// Sink the specified cast instruction into its user blocks.
1393static bool SinkCast(CastInst *CI) {
1394 BasicBlock *DefBB = CI->getParent();
1395
1396 /// InsertedCasts - Only insert a cast in each block once.
1398
1399 bool MadeChange = false;
1400 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1401 UI != E;) {
1402 Use &TheUse = UI.getUse();
1404
1405 // Figure out which BB this cast is used in. For PHI's this is the
1406 // appropriate predecessor block.
1407 BasicBlock *UserBB = User->getParent();
1408 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1409 UserBB = PN->getIncomingBlock(TheUse);
1410 }
1411
1412 // Preincrement use iterator so we don't invalidate it.
1413 ++UI;
1414
1415 // The first insertion point of a block containing an EH pad is after the
1416 // pad. If the pad is the user, we cannot sink the cast past the pad.
1417 if (User->isEHPad())
1418 continue;
1419
1420 // If the block selected to receive the cast is an EH pad that does not
1421 // allow non-PHI instructions before the terminator, we can't sink the
1422 // cast.
1423 if (UserBB->getTerminator()->isEHPad())
1424 continue;
1425
1426 // If this user is in the same block as the cast, don't change the cast.
1427 if (UserBB == DefBB)
1428 continue;
1429
1430 // If we have already inserted a cast into this block, use it.
1431 CastInst *&InsertedCast = InsertedCasts[UserBB];
1432
1433 if (!InsertedCast) {
1434 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1435 assert(InsertPt != UserBB->end());
1436 InsertedCast = cast<CastInst>(CI->clone());
1437 InsertedCast->insertBefore(*UserBB, InsertPt);
1438 }
1439
1440 // Replace a use of the cast with a use of the new cast.
1441 TheUse = InsertedCast;
1442 MadeChange = true;
1443 ++NumCastUses;
1444 }
1445
1446 // If we removed all uses, nuke the cast.
1447 if (CI->use_empty()) {
1448 salvageDebugInfo(*CI);
1449 CI->eraseFromParent();
1450 MadeChange = true;
1451 }
1452
1453 return MadeChange;
1454}
1455
1456/// If the specified cast instruction is a noop copy (e.g. it's casting from
1457/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1458/// reduce the number of virtual registers that must be created and coalesced.
1459///
1460/// Return true if any changes are made.
1462 const DataLayout &DL) {
1463 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1464 // than sinking only nop casts, but is helpful on some platforms.
1465 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1466 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1467 ASC->getDestAddressSpace()))
1468 return false;
1469 }
1470
1471 // If this is a noop copy,
1472 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1473 EVT DstVT = TLI.getValueType(DL, CI->getType());
1474
1475 // This is an fp<->int conversion?
1476 if (SrcVT.isInteger() != DstVT.isInteger())
1477 return false;
1478
1479 // If this is an extension, it will be a zero or sign extension, which
1480 // isn't a noop.
1481 if (SrcVT.bitsLT(DstVT))
1482 return false;
1483
1484 // If these values will be promoted, find out what they will be promoted
1485 // to. This helps us consider truncates on PPC as noop copies when they
1486 // are.
1487 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1489 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1490 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1492 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1493
1494 // If, after promotion, these are the same types, this is a noop copy.
1495 if (SrcVT != DstVT)
1496 return false;
1497
1498 return SinkCast(CI);
1499}
1500
1501// Match a simple increment by constant operation. Note that if a sub is
1502// matched, the step is negated (as if the step had been canonicalized to
1503// an add, even though we leave the instruction alone.)
1504static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1505 Constant *&Step) {
1506 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1508 m_Instruction(LHS), m_Constant(Step)))))
1509 return true;
1510 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1512 m_Instruction(LHS), m_Constant(Step))))) {
1513 Step = ConstantExpr::getNeg(Step);
1514 return true;
1515 }
1516 return false;
1517}
1518
1519/// If given \p PN is an inductive variable with value IVInc coming from the
1520/// backedge, and on each iteration it gets increased by Step, return pair
1521/// <IVInc, Step>. Otherwise, return std::nullopt.
1522static std::optional<std::pair<Instruction *, Constant *>>
1523getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1524 const Loop *L = LI->getLoopFor(PN->getParent());
1525 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1526 return std::nullopt;
1527 auto *IVInc =
1528 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1529 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1530 return std::nullopt;
1531 Instruction *LHS = nullptr;
1532 Constant *Step = nullptr;
1533 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1534 return std::make_pair(IVInc, Step);
1535 return std::nullopt;
1536}
1537
1538static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1539 auto *I = dyn_cast<Instruction>(V);
1540 if (!I)
1541 return false;
1542 Instruction *LHS = nullptr;
1543 Constant *Step = nullptr;
1544 if (!matchIncrement(I, LHS, Step))
1545 return false;
1546 if (auto *PN = dyn_cast<PHINode>(LHS))
1547 if (auto IVInc = getIVIncrement(PN, LI))
1548 return IVInc->first == I;
1549 return false;
1550}
1551
1552bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1553 Value *Arg0, Value *Arg1,
1554 CmpInst *Cmp,
1555 Intrinsic::ID IID) {
1556 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1557 if (!isIVIncrement(BO, LI))
1558 return false;
1559 const Loop *L = LI->getLoopFor(BO->getParent());
1560 assert(L && "L should not be null after isIVIncrement()");
1561 // Do not risk on moving increment into a child loop.
1562 if (LI->getLoopFor(Cmp->getParent()) != L)
1563 return false;
1564
1565 // Finally, we need to ensure that the insert point will dominate all
1566 // existing uses of the increment.
1567
1568 auto &DT = getDT(*BO->getParent()->getParent());
1569 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1570 // If we're moving up the dom tree, all uses are trivially dominated.
1571 // (This is the common case for code produced by LSR.)
1572 return true;
1573
1574 // Otherwise, special case the single use in the phi recurrence.
1575 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1576 };
1577 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1578 // We used to use a dominator tree here to allow multi-block optimization.
1579 // But that was problematic because:
1580 // 1. It could cause a perf regression by hoisting the math op into the
1581 // critical path.
1582 // 2. It could cause a perf regression by creating a value that was live
1583 // across multiple blocks and increasing register pressure.
1584 // 3. Use of a dominator tree could cause large compile-time regression.
1585 // This is because we recompute the DT on every change in the main CGP
1586 // run-loop. The recomputing is probably unnecessary in many cases, so if
1587 // that was fixed, using a DT here would be ok.
1588 //
1589 // There is one important particular case we still want to handle: if BO is
1590 // the IV increment. Important properties that make it profitable:
1591 // - We can speculate IV increment anywhere in the loop (as long as the
1592 // indvar Phi is its only user);
1593 // - Upon computing Cmp, we effectively compute something equivalent to the
1594 // IV increment (despite it loops differently in the IR). So moving it up
1595 // to the cmp point does not really increase register pressure.
1596 return false;
1597 }
1598
1599 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1600 if (BO->getOpcode() == Instruction::Add &&
1601 IID == Intrinsic::usub_with_overflow) {
1602 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1604 }
1605
1606 // Insert at the first instruction of the pair.
1607 Instruction *InsertPt = nullptr;
1608 for (Instruction &Iter : *Cmp->getParent()) {
1609 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1610 // the overflow intrinsic are defined.
1611 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1612 InsertPt = &Iter;
1613 break;
1614 }
1615 }
1616 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1617
1618 IRBuilder<> Builder(InsertPt);
1619 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1620 if (BO->getOpcode() != Instruction::Xor) {
1621 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1622 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1623 } else
1624 assert(BO->hasOneUse() &&
1625 "Patterns with XOr should use the BO only in the compare");
1626 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1627 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1628 Cmp->eraseFromParent();
1629 BO->eraseFromParent();
1630 return true;
1631}
1632
1633/// Match special-case patterns that check for unsigned add overflow.
1635 BinaryOperator *&Add) {
1636 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1637 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1638 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1639
1640 // We are not expecting non-canonical/degenerate code. Just bail out.
1641 if (isa<Constant>(A))
1642 return false;
1643
1644 ICmpInst::Predicate Pred = Cmp->getPredicate();
1645 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1646 B = ConstantInt::get(B->getType(), 1);
1647 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1648 B = Constant::getAllOnesValue(B->getType());
1649 else
1650 return false;
1651
1652 // Check the users of the variable operand of the compare looking for an add
1653 // with the adjusted constant.
1654 for (User *U : A->users()) {
1655 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1657 return true;
1658 }
1659 }
1660 return false;
1661}
1662
1663/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1664/// intrinsic. Return true if any changes were made.
1665bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1666 ModifyDT &ModifiedDT) {
1667 bool EdgeCase = false;
1668 Value *A, *B;
1669 BinaryOperator *Add;
1670 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1672 return false;
1673 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1674 A = Add->getOperand(0);
1675 B = Add->getOperand(1);
1676 EdgeCase = true;
1677 }
1678
1680 TLI->getValueType(*DL, Add->getType()),
1681 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1682 return false;
1683
1684 // We don't want to move around uses of condition values this late, so we
1685 // check if it is legal to create the call to the intrinsic in the basic
1686 // block containing the icmp.
1687 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1688 return false;
1689
1690 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1691 Intrinsic::uadd_with_overflow))
1692 return false;
1693
1694 // Reset callers - do not crash by iterating over a dead instruction.
1695 ModifiedDT = ModifyDT::ModifyInstDT;
1696 return true;
1697}
1698
1699bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1700 ModifyDT &ModifiedDT) {
1701 // We are not expecting non-canonical/degenerate code. Just bail out.
1702 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1703 if (isa<Constant>(A) && isa<Constant>(B))
1704 return false;
1705
1706 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1707 ICmpInst::Predicate Pred = Cmp->getPredicate();
1708 if (Pred == ICmpInst::ICMP_UGT) {
1709 std::swap(A, B);
1710 Pred = ICmpInst::ICMP_ULT;
1711 }
1712 // Convert special-case: (A == 0) is the same as (A u< 1).
1713 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1714 B = ConstantInt::get(B->getType(), 1);
1715 Pred = ICmpInst::ICMP_ULT;
1716 }
1717 // Convert special-case: (A != 0) is the same as (0 u< A).
1718 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1719 std::swap(A, B);
1720 Pred = ICmpInst::ICMP_ULT;
1721 }
1722 if (Pred != ICmpInst::ICMP_ULT)
1723 return false;
1724
1725 // Walk the users of a variable operand of a compare looking for a subtract or
1726 // add with that same operand. Also match the 2nd operand of the compare to
1727 // the add/sub, but that may be a negated constant operand of an add.
1728 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1729 BinaryOperator *Sub = nullptr;
1730 for (User *U : CmpVariableOperand->users()) {
1731 // A - B, A u< B --> usubo(A, B)
1732 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1734 break;
1735 }
1736
1737 // A + (-C), A u< C (canonicalized form of (sub A, C))
1738 const APInt *CmpC, *AddC;
1739 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1740 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1742 break;
1743 }
1744 }
1745 if (!Sub)
1746 return false;
1747
1749 TLI->getValueType(*DL, Sub->getType()),
1750 Sub->hasNUsesOrMore(1)))
1751 return false;
1752
1753 // We don't want to move around uses of condition values this late, so we
1754 // check if it is legal to create the call to the intrinsic in the basic
1755 // block containing the icmp.
1756 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1757 return false;
1758
1759 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1760 Cmp, Intrinsic::usub_with_overflow))
1761 return false;
1762
1763 // Reset callers - do not crash by iterating over a dead instruction.
1764 ModifiedDT = ModifyDT::ModifyInstDT;
1765 return true;
1766}
1767
1768// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1769// The same transformation exists in DAG combiner, but we repeat it here because
1770// DAG builder can break the pattern by moving icmp into a successor block.
1771bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1772 CmpPredicate Pred;
1773 Value *X;
1774 const APInt *C;
1775
1776 // (icmp (ctpop x), c)
1779 return false;
1780
1781 // We're only interested in "is power of 2 [or zero]" patterns.
1782 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1783 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1784 (Pred == CmpInst::ICMP_UGT && *C == 1);
1785 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1786 return false;
1787
1788 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1789 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1790 // and otherwise expand ctpop into a few simple instructions.
1791 Type *OpTy = X->getType();
1792 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1793 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1794 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1795 return false;
1796
1797 // ctpop(x) == 1 -> ctpop(x) u< 2
1798 // ctpop(x) != 1 -> ctpop(x) u> 1
1799 if (Pred == ICmpInst::ICMP_EQ) {
1800 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1801 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1802 } else {
1803 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1804 }
1805 return true;
1806 }
1807
1808 Value *NewCmp;
1809 if (IsPowerOf2OrZeroTest ||
1810 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1811 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1812 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1813 IRBuilder<> Builder(Cmp);
1814 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1815 Value *And = Builder.CreateAnd(X, Sub);
1816 CmpInst::Predicate NewPred =
1817 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1819 : CmpInst::ICMP_NE;
1820 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1821 } else {
1822 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1823 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1824 IRBuilder<> Builder(Cmp);
1825 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1826 Value *Xor = Builder.CreateXor(X, Sub);
1827 CmpInst::Predicate NewPred =
1829 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1830 }
1831
1832 Cmp->replaceAllUsesWith(NewCmp);
1834 return true;
1835}
1836
1837/// Sink the given CmpInst into user blocks to reduce the number of virtual
1838/// registers that must be created and coalesced. This is a clear win except on
1839/// targets with multiple condition code registers (PowerPC), where it might
1840/// lose; some adjustment may be wanted there.
1841///
1842/// Return true if any changes are made.
1843static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1844 const DataLayout &DL) {
1845 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1846 return false;
1847
1848 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1849 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1850 return false;
1851
1852 bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1853 return isa<PHINode>(U) ||
1854 cast<Instruction>(U)->getParent() == Cmp->getParent();
1855 });
1856
1857 // Avoid sinking larger than legal integer comparisons unless its ONLY used in
1858 // another BB.
1859 if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1860 Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1861 DL.getLargestLegalIntTypeSizeInBits())
1862 return false;
1863
1864 // Only insert a cmp in each block once.
1866
1867 bool MadeChange = false;
1868 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1869 UI != E;) {
1870 Use &TheUse = UI.getUse();
1872
1873 // Preincrement use iterator so we don't invalidate it.
1874 ++UI;
1875
1876 // Don't bother for PHI nodes.
1877 if (isa<PHINode>(User))
1878 continue;
1879
1880 // Figure out which BB this cmp is used in.
1881 BasicBlock *UserBB = User->getParent();
1882 BasicBlock *DefBB = Cmp->getParent();
1883
1884 // If this user is in the same block as the cmp, don't change the cmp.
1885 if (UserBB == DefBB)
1886 continue;
1887
1888 // If we have already inserted a cmp into this block, use it.
1889 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1890
1891 if (!InsertedCmp) {
1892 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1893 assert(InsertPt != UserBB->end());
1894 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1895 Cmp->getOperand(0), Cmp->getOperand(1), "");
1896 InsertedCmp->insertBefore(*UserBB, InsertPt);
1897 // Propagate the debug info.
1898 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1899 }
1900
1901 // Replace a use of the cmp with a use of the new cmp.
1902 TheUse = InsertedCmp;
1903 MadeChange = true;
1904 ++NumCmpUses;
1905 }
1906
1907 // If we removed all uses, nuke the cmp.
1908 if (Cmp->use_empty()) {
1909 Cmp->eraseFromParent();
1910 MadeChange = true;
1911 }
1912
1913 return MadeChange;
1914}
1915
1916/// For pattern like:
1917///
1918/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1919/// ...
1920/// DomBB:
1921/// ...
1922/// br DomCond, TrueBB, CmpBB
1923/// CmpBB: (with DomBB being the single predecessor)
1924/// ...
1925/// Cmp = icmp eq CmpOp0, CmpOp1
1926/// ...
1927///
1928/// It would use two comparison on targets that lowering of icmp sgt/slt is
1929/// different from lowering of icmp eq (PowerPC). This function try to convert
1930/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1931/// After that, DomCond and Cmp can use the same comparison so reduce one
1932/// comparison.
1933///
1934/// Return true if any changes are made.
1936 const TargetLowering &TLI) {
1938 return false;
1939
1940 ICmpInst::Predicate Pred = Cmp->getPredicate();
1941 if (Pred != ICmpInst::ICMP_EQ)
1942 return false;
1943
1944 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1945 // icmp slt/sgt would introduce more redundant LLVM IR.
1946 for (User *U : Cmp->users()) {
1947 if (isa<BranchInst>(U))
1948 continue;
1949 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1950 continue;
1951 return false;
1952 }
1953
1954 // This is a cheap/incomplete check for dominance - just match a single
1955 // predecessor with a conditional branch.
1956 BasicBlock *CmpBB = Cmp->getParent();
1957 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1958 if (!DomBB)
1959 return false;
1960
1961 // We want to ensure that the only way control gets to the comparison of
1962 // interest is that a less/greater than comparison on the same operands is
1963 // false.
1964 Value *DomCond;
1965 BasicBlock *TrueBB, *FalseBB;
1966 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1967 return false;
1968 if (CmpBB != FalseBB)
1969 return false;
1970
1971 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1972 CmpPredicate DomPred;
1973 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1974 return false;
1975 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1976 return false;
1977
1978 // Convert the equality comparison to the opposite of the dominating
1979 // comparison and swap the direction for all branch/select users.
1980 // We have conceptually converted:
1981 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1982 // to
1983 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1984 // And similarly for branches.
1985 for (User *U : Cmp->users()) {
1986 if (auto *BI = dyn_cast<BranchInst>(U)) {
1987 assert(BI->isConditional() && "Must be conditional");
1988 BI->swapSuccessors();
1989 continue;
1990 }
1991 if (auto *SI = dyn_cast<SelectInst>(U)) {
1992 // Swap operands
1993 SI->swapValues();
1994 SI->swapProfMetadata();
1995 continue;
1996 }
1997 llvm_unreachable("Must be a branch or a select");
1998 }
1999 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
2000 return true;
2001}
2002
2003/// Many architectures use the same instruction for both subtract and cmp. Try
2004/// to swap cmp operands to match subtract operations to allow for CSE.
2006 Value *Op0 = Cmp->getOperand(0);
2007 Value *Op1 = Cmp->getOperand(1);
2008 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
2009 isa<Constant>(Op1) || Op0 == Op1)
2010 return false;
2011
2012 // If a subtract already has the same operands as a compare, swapping would be
2013 // bad. If a subtract has the same operands as a compare but in reverse order,
2014 // then swapping is good.
2015 int GoodToSwap = 0;
2016 unsigned NumInspected = 0;
2017 for (const User *U : Op0->users()) {
2018 // Avoid walking many users.
2019 if (++NumInspected > 128)
2020 return false;
2021 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2022 GoodToSwap++;
2023 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2024 GoodToSwap--;
2025 }
2026
2027 if (GoodToSwap > 0) {
2028 Cmp->swapOperands();
2029 return true;
2030 }
2031 return false;
2032}
2033
2034static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2035 const DataLayout &DL) {
2036 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2037 if (!FCmp)
2038 return false;
2039
2040 // Don't fold if the target offers free fabs and the predicate is legal.
2041 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2042 if (TLI.isFAbsFree(VT) &&
2044 VT.getSimpleVT()))
2045 return false;
2046
2047 // Reverse the canonicalization if it is a FP class test
2048 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2049 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2050 };
2051 auto [ClassVal, ClassTest] =
2052 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2053 FCmp->getOperand(0), FCmp->getOperand(1));
2054 if (!ClassVal)
2055 return false;
2056
2057 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2058 return false;
2059
2060 IRBuilder<> Builder(Cmp);
2061 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2062 Cmp->replaceAllUsesWith(IsFPClass);
2064 return true;
2065}
2066
2068 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2069 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2070 Value *Incr, *RemAmt;
2071 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2072 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2073 return false;
2074
2075 Value *AddInst, *AddOffset;
2076 // Find out loop increment PHI.
2077 auto *PN = dyn_cast<PHINode>(Incr);
2078 if (PN != nullptr) {
2079 AddInst = nullptr;
2080 AddOffset = nullptr;
2081 } else {
2082 // Search through a NUW add on top of the loop increment.
2083 Value *V0, *V1;
2084 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2085 return false;
2086
2087 AddInst = Incr;
2088 PN = dyn_cast<PHINode>(V0);
2089 if (PN != nullptr) {
2090 AddOffset = V1;
2091 } else {
2092 PN = dyn_cast<PHINode>(V1);
2093 AddOffset = V0;
2094 }
2095 }
2096
2097 if (!PN)
2098 return false;
2099
2100 // This isn't strictly necessary, what we really need is one increment and any
2101 // amount of initial values all being the same.
2102 if (PN->getNumIncomingValues() != 2)
2103 return false;
2104
2105 // Only trivially analyzable loops.
2106 Loop *L = LI->getLoopFor(PN->getParent());
2107 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2108 return false;
2109
2110 // Req that the remainder is in the loop
2111 if (!L->contains(Rem))
2112 return false;
2113
2114 // Only works if the remainder amount is a loop invaraint
2115 if (!L->isLoopInvariant(RemAmt))
2116 return false;
2117
2118 // Only works if the AddOffset is a loop invaraint
2119 if (AddOffset && !L->isLoopInvariant(AddOffset))
2120 return false;
2121
2122 // Is the PHI a loop increment?
2123 auto LoopIncrInfo = getIVIncrement(PN, LI);
2124 if (!LoopIncrInfo)
2125 return false;
2126
2127 // We need remainder_amount % increment_amount to be zero. Increment of one
2128 // satisfies that without any special logic and is overwhelmingly the common
2129 // case.
2130 if (!match(LoopIncrInfo->second, m_One()))
2131 return false;
2132
2133 // Need the increment to not overflow.
2134 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2135 return false;
2136
2137 // Set output variables.
2138 RemAmtOut = RemAmt;
2139 LoopIncrPNOut = PN;
2140 AddInstOut = AddInst;
2141 AddOffsetOut = AddOffset;
2142
2143 return true;
2144}
2145
2146// Try to transform:
2147//
2148// for(i = Start; i < End; ++i)
2149// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2150//
2151// ->
2152//
2153// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2154// for(i = Start; i < End; ++i, ++rem)
2155// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2157 const LoopInfo *LI,
2159 bool IsHuge) {
2160 Value *AddOffset, *RemAmt, *AddInst;
2161 PHINode *LoopIncrPN;
2162 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2163 AddOffset, LoopIncrPN))
2164 return false;
2165
2166 // Only non-constant remainder as the extra IV is probably not profitable
2167 // in that case.
2168 //
2169 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2170 // we can rule out register pressure and ensure this `urem` is executed each
2171 // iteration, its probably profitable to handle the const case as well.
2172 //
2173 // Potential TODO(2): Should we have a check for how "nested" this remainder
2174 // operation is? The new code runs every iteration so if the remainder is
2175 // guarded behind unlikely conditions this might not be worth it.
2176 if (match(RemAmt, m_ImmConstant()))
2177 return false;
2178
2179 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2180 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2181 // If we have add create initial value for remainder.
2182 // The logic here is:
2183 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2184 //
2185 // Only proceed if the expression simplifies (otherwise we can't fully
2186 // optimize out the urem).
2187 if (AddInst) {
2188 assert(AddOffset && "We found an add but missing values");
2189 // Without dom-condition/assumption cache we aren't likely to get much out
2190 // of a context instruction.
2191 Start = simplifyAddInst(Start, AddOffset,
2192 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2193 /*IsNUW=*/true, *DL);
2194 if (!Start)
2195 return false;
2196 }
2197
2198 // If we can't fully optimize out the `rem`, skip this transform.
2199 Start = simplifyURemInst(Start, RemAmt, *DL);
2200 if (!Start)
2201 return false;
2202
2203 // Create new remainder with induction variable.
2204 Type *Ty = Rem->getType();
2205 IRBuilder<> Builder(Rem->getContext());
2206
2207 Builder.SetInsertPoint(LoopIncrPN);
2208 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2209
2210 Builder.SetInsertPoint(cast<Instruction>(
2211 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2212 // `(add (urem x, y), 1)` is always nuw.
2213 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2214 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2215 Value *RemSel =
2216 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2217
2218 NewRem->addIncoming(Start, L->getLoopPreheader());
2219 NewRem->addIncoming(RemSel, L->getLoopLatch());
2220
2221 // Insert all touched BBs.
2222 FreshBBs.insert(LoopIncrPN->getParent());
2223 FreshBBs.insert(L->getLoopLatch());
2224 FreshBBs.insert(Rem->getParent());
2225 if (AddInst)
2226 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2227 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2228 Rem->eraseFromParent();
2229 if (AddInst && AddInst->use_empty())
2230 cast<Instruction>(AddInst)->eraseFromParent();
2231 return true;
2232}
2233
2234bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2235 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2236 return true;
2237 return false;
2238}
2239
2240bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2241 if (sinkCmpExpression(Cmp, *TLI, *DL))
2242 return true;
2243
2244 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2245 return true;
2246
2247 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2248 return true;
2249
2250 if (unfoldPowerOf2Test(Cmp))
2251 return true;
2252
2253 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2254 return true;
2255
2257 return true;
2258
2259 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2260 return true;
2261
2262 return false;
2263}
2264
2265/// Duplicate and sink the given 'and' instruction into user blocks where it is
2266/// used in a compare to allow isel to generate better code for targets where
2267/// this operation can be combined.
2268///
2269/// Return true if any changes are made.
2271 SetOfInstrs &InsertedInsts) {
2272 // Double-check that we're not trying to optimize an instruction that was
2273 // already optimized by some other part of this pass.
2274 assert(!InsertedInsts.count(AndI) &&
2275 "Attempting to optimize already optimized and instruction");
2276 (void)InsertedInsts;
2277
2278 // Nothing to do for single use in same basic block.
2279 if (AndI->hasOneUse() &&
2280 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2281 return false;
2282
2283 // Try to avoid cases where sinking/duplicating is likely to increase register
2284 // pressure.
2285 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2286 !isa<ConstantInt>(AndI->getOperand(1)) &&
2287 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2288 return false;
2289
2290 for (auto *U : AndI->users()) {
2292
2293 // Only sink 'and' feeding icmp with 0.
2294 if (!isa<ICmpInst>(User))
2295 return false;
2296
2297 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2298 if (!CmpC || !CmpC->isZero())
2299 return false;
2300 }
2301
2302 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2303 return false;
2304
2305 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2306 LLVM_DEBUG(AndI->getParent()->dump());
2307
2308 // Push the 'and' into the same block as the icmp 0. There should only be
2309 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2310 // others, so we don't need to keep track of which BBs we insert into.
2311 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2312 UI != E;) {
2313 Use &TheUse = UI.getUse();
2315
2316 // Preincrement use iterator so we don't invalidate it.
2317 ++UI;
2318
2319 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2320
2321 // Keep the 'and' in the same place if the use is already in the same block.
2322 Instruction *InsertPt =
2323 User->getParent() == AndI->getParent() ? AndI : User;
2324 Instruction *InsertedAnd = BinaryOperator::Create(
2325 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2326 InsertPt->getIterator());
2327 // Propagate the debug info.
2328 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2329
2330 // Replace a use of the 'and' with a use of the new 'and'.
2331 TheUse = InsertedAnd;
2332 ++NumAndUses;
2333 LLVM_DEBUG(User->getParent()->dump());
2334 }
2335
2336 // We removed all uses, nuke the and.
2337 AndI->eraseFromParent();
2338 return true;
2339}
2340
2341/// Check if the candidates could be combined with a shift instruction, which
2342/// includes:
2343/// 1. Truncate instruction
2344/// 2. And instruction and the imm is a mask of the low bits:
2345/// imm & (imm+1) == 0
2347 if (!isa<TruncInst>(User)) {
2348 if (User->getOpcode() != Instruction::And ||
2350 return false;
2351
2352 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2353
2354 if ((Cimm & (Cimm + 1)).getBoolValue())
2355 return false;
2356 }
2357 return true;
2358}
2359
2360/// Sink both shift and truncate instruction to the use of truncate's BB.
2361static bool
2364 const TargetLowering &TLI, const DataLayout &DL) {
2365 BasicBlock *UserBB = User->getParent();
2367 auto *TruncI = cast<TruncInst>(User);
2368 bool MadeChange = false;
2369
2370 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2371 TruncE = TruncI->user_end();
2372 TruncUI != TruncE;) {
2373
2374 Use &TruncTheUse = TruncUI.getUse();
2375 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2376 // Preincrement use iterator so we don't invalidate it.
2377
2378 ++TruncUI;
2379
2380 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2381 if (!ISDOpcode)
2382 continue;
2383
2384 // If the use is actually a legal node, there will not be an
2385 // implicit truncate.
2386 // FIXME: always querying the result type is just an
2387 // approximation; some nodes' legality is determined by the
2388 // operand or other means. There's no good way to find out though.
2390 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2391 continue;
2392
2393 // Don't bother for PHI nodes.
2394 if (isa<PHINode>(TruncUser))
2395 continue;
2396
2397 BasicBlock *TruncUserBB = TruncUser->getParent();
2398
2399 if (UserBB == TruncUserBB)
2400 continue;
2401
2402 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2403 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2404
2405 if (!InsertedShift && !InsertedTrunc) {
2406 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2407 assert(InsertPt != TruncUserBB->end());
2408 // Sink the shift
2409 if (ShiftI->getOpcode() == Instruction::AShr)
2410 InsertedShift =
2411 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2412 else
2413 InsertedShift =
2414 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2415 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2416 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2417
2418 // Sink the trunc
2419 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2420 TruncInsertPt++;
2421 // It will go ahead of any debug-info.
2422 TruncInsertPt.setHeadBit(true);
2423 assert(TruncInsertPt != TruncUserBB->end());
2424
2425 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2426 TruncI->getType(), "");
2427 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2428 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2429
2430 MadeChange = true;
2431
2432 TruncTheUse = InsertedTrunc;
2433 }
2434 }
2435 return MadeChange;
2436}
2437
2438/// Sink the shift *right* instruction into user blocks if the uses could
2439/// potentially be combined with this shift instruction and generate BitExtract
2440/// instruction. It will only be applied if the architecture supports BitExtract
2441/// instruction. Here is an example:
2442/// BB1:
2443/// %x.extract.shift = lshr i64 %arg1, 32
2444/// BB2:
2445/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2446/// ==>
2447///
2448/// BB2:
2449/// %x.extract.shift.1 = lshr i64 %arg1, 32
2450/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2451///
2452/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2453/// instruction.
2454/// Return true if any changes are made.
2456 const TargetLowering &TLI,
2457 const DataLayout &DL) {
2458 BasicBlock *DefBB = ShiftI->getParent();
2459
2460 /// Only insert instructions in each block once.
2462
2463 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2464
2465 bool MadeChange = false;
2466 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2467 UI != E;) {
2468 Use &TheUse = UI.getUse();
2470 // Preincrement use iterator so we don't invalidate it.
2471 ++UI;
2472
2473 // Don't bother for PHI nodes.
2474 if (isa<PHINode>(User))
2475 continue;
2476
2478 continue;
2479
2480 BasicBlock *UserBB = User->getParent();
2481
2482 if (UserBB == DefBB) {
2483 // If the shift and truncate instruction are in the same BB. The use of
2484 // the truncate(TruncUse) may still introduce another truncate if not
2485 // legal. In this case, we would like to sink both shift and truncate
2486 // instruction to the BB of TruncUse.
2487 // for example:
2488 // BB1:
2489 // i64 shift.result = lshr i64 opnd, imm
2490 // trunc.result = trunc shift.result to i16
2491 //
2492 // BB2:
2493 // ----> We will have an implicit truncate here if the architecture does
2494 // not have i16 compare.
2495 // cmp i16 trunc.result, opnd2
2496 //
2497 if (isa<TruncInst>(User) &&
2498 shiftIsLegal
2499 // If the type of the truncate is legal, no truncate will be
2500 // introduced in other basic blocks.
2501 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2502 MadeChange =
2503 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2504
2505 continue;
2506 }
2507 // If we have already inserted a shift into this block, use it.
2508 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2509
2510 if (!InsertedShift) {
2511 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2512 assert(InsertPt != UserBB->end());
2513
2514 if (ShiftI->getOpcode() == Instruction::AShr)
2515 InsertedShift =
2516 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2517 else
2518 InsertedShift =
2519 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2520 InsertedShift->insertBefore(*UserBB, InsertPt);
2521 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2522
2523 MadeChange = true;
2524 }
2525
2526 // Replace a use of the shift with a use of the new shift.
2527 TheUse = InsertedShift;
2528 }
2529
2530 // If we removed all uses, or there are none, nuke the shift.
2531 if (ShiftI->use_empty()) {
2532 salvageDebugInfo(*ShiftI);
2533 ShiftI->eraseFromParent();
2534 MadeChange = true;
2535 }
2536
2537 return MadeChange;
2538}
2539
2540/// If counting leading or trailing zeros is an expensive operation and a zero
2541/// input is defined, add a check for zero to avoid calling the intrinsic.
2542///
2543/// We want to transform:
2544/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2545///
2546/// into:
2547/// entry:
2548/// %cmpz = icmp eq i64 %A, 0
2549/// br i1 %cmpz, label %cond.end, label %cond.false
2550/// cond.false:
2551/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2552/// br label %cond.end
2553/// cond.end:
2554/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2555///
2556/// If the transform is performed, return true and set ModifiedDT to true.
2557static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
2558 const TargetLowering *TLI,
2559 const DataLayout *DL, ModifyDT &ModifiedDT,
2561 bool IsHugeFunc) {
2562 // If a zero input is undefined, it doesn't make sense to despeculate that.
2563 if (match(CountZeros->getOperand(1), m_One()))
2564 return false;
2565
2566 // If it's cheap to speculate, there's nothing to do.
2567 Type *Ty = CountZeros->getType();
2568 auto IntrinsicID = CountZeros->getIntrinsicID();
2569 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2570 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2571 return false;
2572
2573 // Only handle scalar cases. Anything else requires too much work.
2574 unsigned SizeInBits = Ty->getScalarSizeInBits();
2575 if (Ty->isVectorTy())
2576 return false;
2577
2578 // Bail if the value is never zero.
2579 Use &Op = CountZeros->getOperandUse(0);
2580 if (isKnownNonZero(Op, *DL))
2581 return false;
2582
2583 // The intrinsic will be sunk behind a compare against zero and branch.
2584 BasicBlock *StartBlock = CountZeros->getParent();
2585 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2586 if (IsHugeFunc)
2587 FreshBBs.insert(CallBlock);
2588
2589 // Create another block after the count zero intrinsic. A PHI will be added
2590 // in this block to select the result of the intrinsic or the bit-width
2591 // constant if the input to the intrinsic is zero.
2592 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2593 // Any debug-info after CountZeros should not be included.
2594 SplitPt.setHeadBit(true);
2595 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2596 if (IsHugeFunc)
2597 FreshBBs.insert(EndBlock);
2598
2599 // Update the LoopInfo. The new blocks are in the same loop as the start
2600 // block.
2601 if (Loop *L = LI.getLoopFor(StartBlock)) {
2602 L->addBasicBlockToLoop(CallBlock, LI);
2603 L->addBasicBlockToLoop(EndBlock, LI);
2604 }
2605
2606 // Set up a builder to create a compare, conditional branch, and PHI.
2607 IRBuilder<> Builder(CountZeros->getContext());
2608 Builder.SetInsertPoint(StartBlock->getTerminator());
2609 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2610
2611 // Replace the unconditional branch that was created by the first split with
2612 // a compare against zero and a conditional branch.
2613 Value *Zero = Constant::getNullValue(Ty);
2614 // Avoid introducing branch on poison. This also replaces the ctz operand.
2616 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2617 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2618 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2619 StartBlock->getTerminator()->eraseFromParent();
2620
2621 // Create a PHI in the end block to select either the output of the intrinsic
2622 // or the bit width of the operand.
2623 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2624 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2625 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2626 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2627 PN->addIncoming(BitWidth, StartBlock);
2628 PN->addIncoming(CountZeros, CallBlock);
2629
2630 // We are explicitly handling the zero case, so we can set the intrinsic's
2631 // undefined zero argument to 'true'. This will also prevent reprocessing the
2632 // intrinsic; we only despeculate when a zero input is defined.
2633 CountZeros->setArgOperand(1, Builder.getTrue());
2634 ModifiedDT = ModifyDT::ModifyBBDT;
2635 return true;
2636}
2637
2638bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2639 BasicBlock *BB = CI->getParent();
2640
2641 // Sink address computing for memory operands into the block.
2642 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2643 return true;
2644
2645 // Align the pointer arguments to this call if the target thinks it's a good
2646 // idea
2647 unsigned MinSize;
2648 Align PrefAlign;
2649 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2650 for (auto &Arg : CI->args()) {
2651 // We want to align both objects whose address is used directly and
2652 // objects whose address is used in casts and GEPs, though it only makes
2653 // sense for GEPs if the offset is a multiple of the desired alignment and
2654 // if size - offset meets the size threshold.
2655 if (!Arg->getType()->isPointerTy())
2656 continue;
2657 APInt Offset(DL->getIndexSizeInBits(
2658 cast<PointerType>(Arg->getType())->getAddressSpace()),
2659 0);
2660 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2661 uint64_t Offset2 = Offset.getLimitedValue();
2662 if (!isAligned(PrefAlign, Offset2))
2663 continue;
2664 AllocaInst *AI;
2665 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign) {
2666 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(*DL);
2667 if (AllocaSize && AllocaSize->getKnownMinValue() >= MinSize + Offset2)
2668 AI->setAlignment(PrefAlign);
2669 }
2670 // Global variables can only be aligned if they are defined in this
2671 // object (i.e. they are uniquely initialized in this object), and
2672 // over-aligning global variables that have an explicit section is
2673 // forbidden.
2674 GlobalVariable *GV;
2675 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2676 GV->getPointerAlignment(*DL) < PrefAlign &&
2677 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2678 GV->setAlignment(PrefAlign);
2679 }
2680 }
2681 // If this is a memcpy (or similar) then we may be able to improve the
2682 // alignment.
2683 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2684 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2685 MaybeAlign MIDestAlign = MI->getDestAlign();
2686 if (!MIDestAlign || DestAlign > *MIDestAlign)
2687 MI->setDestAlignment(DestAlign);
2688 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2689 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2690 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2691 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2692 MTI->setSourceAlignment(SrcAlign);
2693 }
2694 }
2695
2696 // If we have a cold call site, try to sink addressing computation into the
2697 // cold block. This interacts with our handling for loads and stores to
2698 // ensure that we can fold all uses of a potential addressing computation
2699 // into their uses. TODO: generalize this to work over profiling data
2700 if (CI->hasFnAttr(Attribute::Cold) &&
2701 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2702 for (auto &Arg : CI->args()) {
2703 if (!Arg->getType()->isPointerTy())
2704 continue;
2705 unsigned AS = Arg->getType()->getPointerAddressSpace();
2706 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2707 return true;
2708 }
2709
2710 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2711 if (II) {
2712 switch (II->getIntrinsicID()) {
2713 default:
2714 break;
2715 case Intrinsic::assume:
2716 llvm_unreachable("llvm.assume should have been removed already");
2717 case Intrinsic::allow_runtime_check:
2718 case Intrinsic::allow_ubsan_check:
2719 case Intrinsic::experimental_widenable_condition: {
2720 // Give up on future widening opportunities so that we can fold away dead
2721 // paths and merge blocks before going into block-local instruction
2722 // selection.
2723 if (II->use_empty()) {
2724 II->eraseFromParent();
2725 return true;
2726 }
2727 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2728 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2729 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2730 });
2731 return true;
2732 }
2733 case Intrinsic::objectsize:
2734 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2735 case Intrinsic::is_constant:
2736 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2737 case Intrinsic::aarch64_stlxr:
2738 case Intrinsic::aarch64_stxr: {
2739 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2740 if (!ExtVal || !ExtVal->hasOneUse() ||
2741 ExtVal->getParent() == CI->getParent())
2742 return false;
2743 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2744 ExtVal->moveBefore(CI->getIterator());
2745 // Mark this instruction as "inserted by CGP", so that other
2746 // optimizations don't touch it.
2747 InsertedInsts.insert(ExtVal);
2748 return true;
2749 }
2750
2751 case Intrinsic::launder_invariant_group:
2752 case Intrinsic::strip_invariant_group: {
2753 Value *ArgVal = II->getArgOperand(0);
2754 auto it = LargeOffsetGEPMap.find(II);
2755 if (it != LargeOffsetGEPMap.end()) {
2756 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2757 // Make sure not to have to deal with iterator invalidation
2758 // after possibly adding ArgVal to LargeOffsetGEPMap.
2759 auto GEPs = std::move(it->second);
2760 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2761 LargeOffsetGEPMap.erase(II);
2762 }
2763
2764 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2765 II->eraseFromParent();
2766 return true;
2767 }
2768 case Intrinsic::cttz:
2769 case Intrinsic::ctlz:
2770 // If counting zeros is expensive, try to avoid it.
2771 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2772 IsHugeFunc);
2773 case Intrinsic::fshl:
2774 case Intrinsic::fshr:
2775 return optimizeFunnelShift(II);
2776 case Intrinsic::masked_gather:
2777 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2778 case Intrinsic::masked_scatter:
2779 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2780 case Intrinsic::masked_load:
2781 // Treat v1X masked load as load X type.
2782 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2783 if (VT->getNumElements() == 1) {
2784 Value *PtrVal = II->getArgOperand(0);
2785 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2786 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2787 return true;
2788 }
2789 }
2790 return false;
2791 case Intrinsic::masked_store:
2792 // Treat v1X masked store as store X type.
2793 if (auto *VT =
2794 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2795 if (VT->getNumElements() == 1) {
2796 Value *PtrVal = II->getArgOperand(1);
2797 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2798 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2799 return true;
2800 }
2801 }
2802 return false;
2803 case Intrinsic::umul_with_overflow:
2804 return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
2805 case Intrinsic::smul_with_overflow:
2806 return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
2807 }
2808
2809 SmallVector<Value *, 2> PtrOps;
2810 Type *AccessTy;
2811 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2812 while (!PtrOps.empty()) {
2813 Value *PtrVal = PtrOps.pop_back_val();
2814 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2815 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2816 return true;
2817 }
2818 }
2819
2820 // From here on out we're working with named functions.
2821 auto *Callee = CI->getCalledFunction();
2822 if (!Callee)
2823 return false;
2824
2825 // Lower all default uses of _chk calls. This is very similar
2826 // to what InstCombineCalls does, but here we are only lowering calls
2827 // to fortified library functions (e.g. __memcpy_chk) that have the default
2828 // "don't know" as the objectsize. Anything else should be left alone.
2829 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2830 IRBuilder<> Builder(CI);
2831 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2832 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2833 CI->eraseFromParent();
2834 return true;
2835 }
2836
2837 // SCCP may have propagated, among other things, C++ static variables across
2838 // calls. If this happens to be the case, we may want to undo it in order to
2839 // avoid redundant pointer computation of the constant, as the function method
2840 // returning the constant needs to be executed anyways.
2841 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2842 if (!F->getReturnType()->isPointerTy())
2843 return nullptr;
2844
2845 GlobalVariable *UniformValue = nullptr;
2846 for (auto &BB : *F) {
2847 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2848 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2849 if (!UniformValue)
2850 UniformValue = V;
2851 else if (V != UniformValue)
2852 return nullptr;
2853 } else {
2854 return nullptr;
2855 }
2856 }
2857 }
2858
2859 return UniformValue;
2860 };
2861
2862 if (Callee->hasExactDefinition()) {
2863 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2864 bool MadeChange = false;
2865 for (Use &U : make_early_inc_range(RV->uses())) {
2866 auto *I = dyn_cast<Instruction>(U.getUser());
2867 if (!I || I->getParent() != CI->getParent()) {
2868 // Limit to the same basic block to avoid extending the call-site live
2869 // range, which otherwise could increase register pressure.
2870 continue;
2871 }
2872 if (CI->comesBefore(I)) {
2873 U.set(CI);
2874 MadeChange = true;
2875 }
2876 }
2877
2878 return MadeChange;
2879 }
2880 }
2881
2882 return false;
2883}
2884
2886 const CallInst *CI) {
2887 assert(CI && CI->use_empty());
2888
2889 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2890 switch (II->getIntrinsicID()) {
2891 case Intrinsic::memset:
2892 case Intrinsic::memcpy:
2893 case Intrinsic::memmove:
2894 return true;
2895 default:
2896 return false;
2897 }
2898
2899 LibFunc LF;
2900 Function *Callee = CI->getCalledFunction();
2901 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2902 switch (LF) {
2903 case LibFunc_strcpy:
2904 case LibFunc_strncpy:
2905 case LibFunc_strcat:
2906 case LibFunc_strncat:
2907 return true;
2908 default:
2909 return false;
2910 }
2911
2912 return false;
2913}
2914
2915/// Look for opportunities to duplicate return instructions to the predecessor
2916/// to enable tail call optimizations. The case it is currently looking for is
2917/// the following one. Known intrinsics or library function that may be tail
2918/// called are taken into account as well.
2919/// @code
2920/// bb0:
2921/// %tmp0 = tail call i32 @f0()
2922/// br label %return
2923/// bb1:
2924/// %tmp1 = tail call i32 @f1()
2925/// br label %return
2926/// bb2:
2927/// %tmp2 = tail call i32 @f2()
2928/// br label %return
2929/// return:
2930/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2931/// ret i32 %retval
2932/// @endcode
2933///
2934/// =>
2935///
2936/// @code
2937/// bb0:
2938/// %tmp0 = tail call i32 @f0()
2939/// ret i32 %tmp0
2940/// bb1:
2941/// %tmp1 = tail call i32 @f1()
2942/// ret i32 %tmp1
2943/// bb2:
2944/// %tmp2 = tail call i32 @f2()
2945/// ret i32 %tmp2
2946/// @endcode
2947bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2948 ModifyDT &ModifiedDT) {
2949 if (!BB->getTerminator())
2950 return false;
2951
2952 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2953 if (!RetI)
2954 return false;
2955
2956 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2957
2958 PHINode *PN = nullptr;
2959 ExtractValueInst *EVI = nullptr;
2960 BitCastInst *BCI = nullptr;
2961 Value *V = RetI->getReturnValue();
2962 if (V) {
2963 BCI = dyn_cast<BitCastInst>(V);
2964 if (BCI)
2965 V = BCI->getOperand(0);
2966
2968 if (EVI) {
2969 V = EVI->getOperand(0);
2970 if (!llvm::all_of(EVI->indices(), equal_to(0)))
2971 return false;
2972 }
2973
2974 PN = dyn_cast<PHINode>(V);
2975 }
2976
2977 if (PN && PN->getParent() != BB)
2978 return false;
2979
2980 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2981 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2982 if (BC && BC->hasOneUse())
2983 Inst = BC->user_back();
2984
2985 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2986 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2987 return false;
2988 };
2989
2991
2992 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2993 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2994 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2995 // Record the instruction so it can be preserved when the exit block is
2996 // removed. Do not preserve the fake use that uses the result of the
2997 // PHI instruction.
2998 // Do not copy fake uses that use the result of a PHI node.
2999 // FIXME: If we do want to copy the fake use into the return blocks, we
3000 // have to figure out which of the PHI node operands to use for each
3001 // copy.
3002 if (!isa<PHINode>(II->getOperand(0))) {
3003 FakeUses.push_back(II);
3004 }
3005 return true;
3006 }
3007
3008 return false;
3009 };
3010
3011 // Make sure there are no instructions between the first instruction
3012 // and return.
3014 // Skip over pseudo-probes and the bitcast.
3015 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3016 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3017 BI = std::next(BI);
3018 if (&*BI != RetI)
3019 return false;
3020
3021 // Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3022 // call.
3023 auto MayBePermittedAsTailCall = [&](const auto *CI) {
3024 return TLI->mayBeEmittedAsTailCall(CI) &&
3025 attributesPermitTailCall(BB->getParent(), CI, RetI, *TLI);
3026 };
3027
3028 SmallVector<BasicBlock *, 4> TailCallBBs;
3029 // Record the call instructions so we can insert any fake uses
3030 // that need to be preserved before them.
3032 if (PN) {
3033 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3034 // Look through bitcasts.
3035 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3036 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3037 BasicBlock *PredBB = PN->getIncomingBlock(I);
3038 // Make sure the phi value is indeed produced by the tail call.
3039 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3040 MayBePermittedAsTailCall(CI)) {
3041 TailCallBBs.push_back(PredBB);
3042 CallInsts.push_back(CI);
3043 } else {
3044 // Consider the cases in which the phi value is indirectly produced by
3045 // the tail call, for example when encountering memset(), memmove(),
3046 // strcpy(), whose return value may have been optimized out. In such
3047 // cases, the value needs to be the first function argument.
3048 //
3049 // bb0:
3050 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3051 // br label %return
3052 // return:
3053 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3054 if (PredBB && PredBB->getSingleSuccessor() == BB)
3056 PredBB->getTerminator()->getPrevNode());
3057
3058 if (CI && CI->use_empty() &&
3059 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3060 IncomingVal == CI->getArgOperand(0) &&
3061 MayBePermittedAsTailCall(CI)) {
3062 TailCallBBs.push_back(PredBB);
3063 CallInsts.push_back(CI);
3064 }
3065 }
3066 }
3067 } else {
3068 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3069 for (BasicBlock *Pred : predecessors(BB)) {
3070 if (!VisitedBBs.insert(Pred).second)
3071 continue;
3072 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3073 CallInst *CI = dyn_cast<CallInst>(I);
3074 if (CI && CI->use_empty() && MayBePermittedAsTailCall(CI)) {
3075 // Either we return void or the return value must be the first
3076 // argument of a known intrinsic or library function.
3077 if (!V || isa<UndefValue>(V) ||
3078 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3079 V == CI->getArgOperand(0))) {
3080 TailCallBBs.push_back(Pred);
3081 CallInsts.push_back(CI);
3082 }
3083 }
3084 }
3085 }
3086 }
3087
3088 bool Changed = false;
3089 for (auto const &TailCallBB : TailCallBBs) {
3090 // Make sure the call instruction is followed by an unconditional branch to
3091 // the return block.
3092 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3093 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3094 continue;
3095
3096 // Duplicate the return into TailCallBB.
3097 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3099 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3100 BFI->setBlockFreq(BB,
3101 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3102 ModifiedDT = ModifyDT::ModifyBBDT;
3103 Changed = true;
3104 ++NumRetsDup;
3105 }
3106
3107 // If we eliminated all predecessors of the block, delete the block now.
3108 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3109 // Copy the fake uses found in the original return block to all blocks
3110 // that contain tail calls.
3111 for (auto *CI : CallInsts) {
3112 for (auto const *FakeUse : FakeUses) {
3113 auto *ClonedInst = FakeUse->clone();
3114 ClonedInst->insertBefore(CI->getIterator());
3115 }
3116 }
3117 BB->eraseFromParent();
3118 }
3119
3120 return Changed;
3121}
3122
3123//===----------------------------------------------------------------------===//
3124// Memory Optimization
3125//===----------------------------------------------------------------------===//
3126
3127namespace {
3128
3129/// This is an extended version of TargetLowering::AddrMode
3130/// which holds actual Value*'s for register values.
3131struct ExtAddrMode : public TargetLowering::AddrMode {
3132 Value *BaseReg = nullptr;
3133 Value *ScaledReg = nullptr;
3134 Value *OriginalValue = nullptr;
3135 bool InBounds = true;
3136
3137 enum FieldName {
3138 NoField = 0x00,
3139 BaseRegField = 0x01,
3140 BaseGVField = 0x02,
3141 BaseOffsField = 0x04,
3142 ScaledRegField = 0x08,
3143 ScaleField = 0x10,
3144 MultipleFields = 0xff
3145 };
3146
3147 ExtAddrMode() = default;
3148
3149 void print(raw_ostream &OS) const;
3150 void dump() const;
3151
3152 // Replace From in ExtAddrMode with To.
3153 // E.g., SExt insts may be promoted and deleted. We should replace them with
3154 // the promoted values.
3155 void replaceWith(Value *From, Value *To) {
3156 if (ScaledReg == From)
3157 ScaledReg = To;
3158 }
3159
3160 FieldName compare(const ExtAddrMode &other) {
3161 // First check that the types are the same on each field, as differing types
3162 // is something we can't cope with later on.
3163 if (BaseReg && other.BaseReg &&
3164 BaseReg->getType() != other.BaseReg->getType())
3165 return MultipleFields;
3166 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3167 return MultipleFields;
3168 if (ScaledReg && other.ScaledReg &&
3169 ScaledReg->getType() != other.ScaledReg->getType())
3170 return MultipleFields;
3171
3172 // Conservatively reject 'inbounds' mismatches.
3173 if (InBounds != other.InBounds)
3174 return MultipleFields;
3175
3176 // Check each field to see if it differs.
3177 unsigned Result = NoField;
3178 if (BaseReg != other.BaseReg)
3179 Result |= BaseRegField;
3180 if (BaseGV != other.BaseGV)
3181 Result |= BaseGVField;
3182 if (BaseOffs != other.BaseOffs)
3183 Result |= BaseOffsField;
3184 if (ScaledReg != other.ScaledReg)
3185 Result |= ScaledRegField;
3186 // Don't count 0 as being a different scale, because that actually means
3187 // unscaled (which will already be counted by having no ScaledReg).
3188 if (Scale && other.Scale && Scale != other.Scale)
3189 Result |= ScaleField;
3190
3191 if (llvm::popcount(Result) > 1)
3192 return MultipleFields;
3193 else
3194 return static_cast<FieldName>(Result);
3195 }
3196
3197 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3198 // with no offset.
3199 bool isTrivial() {
3200 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3201 // trivial if at most one of these terms is nonzero, except that BaseGV and
3202 // BaseReg both being zero actually means a null pointer value, which we
3203 // consider to be 'non-zero' here.
3204 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3205 }
3206
3207 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3208 switch (Field) {
3209 default:
3210 return nullptr;
3211 case BaseRegField:
3212 return BaseReg;
3213 case BaseGVField:
3214 return BaseGV;
3215 case ScaledRegField:
3216 return ScaledReg;
3217 case BaseOffsField:
3218 return ConstantInt::getSigned(IntPtrTy, BaseOffs);
3219 }
3220 }
3221
3222 void SetCombinedField(FieldName Field, Value *V,
3223 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3224 switch (Field) {
3225 default:
3226 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3227 break;
3228 case ExtAddrMode::BaseRegField:
3229 BaseReg = V;
3230 break;
3231 case ExtAddrMode::BaseGVField:
3232 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3233 // in the BaseReg field.
3234 assert(BaseReg == nullptr);
3235 BaseReg = V;
3236 BaseGV = nullptr;
3237 break;
3238 case ExtAddrMode::ScaledRegField:
3239 ScaledReg = V;
3240 // If we have a mix of scaled and unscaled addrmodes then we want scale
3241 // to be the scale and not zero.
3242 if (!Scale)
3243 for (const ExtAddrMode &AM : AddrModes)
3244 if (AM.Scale) {
3245 Scale = AM.Scale;
3246 break;
3247 }
3248 break;
3249 case ExtAddrMode::BaseOffsField:
3250 // The offset is no longer a constant, so it goes in ScaledReg with a
3251 // scale of 1.
3252 assert(ScaledReg == nullptr);
3253 ScaledReg = V;
3254 Scale = 1;
3255 BaseOffs = 0;
3256 break;
3257 }
3258 }
3259};
3260
3261#ifndef NDEBUG
3262static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3263 AM.print(OS);
3264 return OS;
3265}
3266#endif
3267
3268#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3269void ExtAddrMode::print(raw_ostream &OS) const {
3270 bool NeedPlus = false;
3271 OS << "[";
3272 if (InBounds)
3273 OS << "inbounds ";
3274 if (BaseGV) {
3275 OS << "GV:";
3276 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3277 NeedPlus = true;
3278 }
3279
3280 if (BaseOffs) {
3281 OS << (NeedPlus ? " + " : "") << BaseOffs;
3282 NeedPlus = true;
3283 }
3284
3285 if (BaseReg) {
3286 OS << (NeedPlus ? " + " : "") << "Base:";
3287 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3288 NeedPlus = true;
3289 }
3290 if (Scale) {
3291 OS << (NeedPlus ? " + " : "") << Scale << "*";
3292 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3293 }
3294
3295 OS << ']';
3296}
3297
3298LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3299 print(dbgs());
3300 dbgs() << '\n';
3301}
3302#endif
3303
3304} // end anonymous namespace
3305
3306namespace {
3307
3308/// This class provides transaction based operation on the IR.
3309/// Every change made through this class is recorded in the internal state and
3310/// can be undone (rollback) until commit is called.
3311/// CGP does not check if instructions could be speculatively executed when
3312/// moved. Preserving the original location would pessimize the debugging
3313/// experience, as well as negatively impact the quality of sample PGO.
3314class TypePromotionTransaction {
3315 /// This represents the common interface of the individual transaction.
3316 /// Each class implements the logic for doing one specific modification on
3317 /// the IR via the TypePromotionTransaction.
3318 class TypePromotionAction {
3319 protected:
3320 /// The Instruction modified.
3321 Instruction *Inst;
3322
3323 public:
3324 /// Constructor of the action.
3325 /// The constructor performs the related action on the IR.
3326 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3327
3328 virtual ~TypePromotionAction() = default;
3329
3330 /// Undo the modification done by this action.
3331 /// When this method is called, the IR must be in the same state as it was
3332 /// before this action was applied.
3333 /// \pre Undoing the action works if and only if the IR is in the exact same
3334 /// state as it was directly after this action was applied.
3335 virtual void undo() = 0;
3336
3337 /// Advocate every change made by this action.
3338 /// When the results on the IR of the action are to be kept, it is important
3339 /// to call this function, otherwise hidden information may be kept forever.
3340 virtual void commit() {
3341 // Nothing to be done, this action is not doing anything.
3342 }
3343 };
3344
3345 /// Utility to remember the position of an instruction.
3346 class InsertionHandler {
3347 /// Position of an instruction.
3348 /// Either an instruction:
3349 /// - Is the first in a basic block: BB is used.
3350 /// - Has a previous instruction: PrevInst is used.
3351 struct {
3352 BasicBlock::iterator PrevInst;
3353 BasicBlock *BB;
3354 } Point;
3355 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3356
3357 /// Remember whether or not the instruction had a previous instruction.
3358 bool HasPrevInstruction;
3359
3360 public:
3361 /// Record the position of \p Inst.
3362 InsertionHandler(Instruction *Inst) {
3363 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3364 BasicBlock *BB = Inst->getParent();
3365
3366 // Record where we would have to re-insert the instruction in the sequence
3367 // of DbgRecords, if we ended up reinserting.
3368 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3369
3370 if (HasPrevInstruction) {
3371 Point.PrevInst = std::prev(Inst->getIterator());
3372 } else {
3373 Point.BB = BB;
3374 }
3375 }
3376
3377 /// Insert \p Inst at the recorded position.
3378 void insert(Instruction *Inst) {
3379 if (HasPrevInstruction) {
3380 if (Inst->getParent())
3381 Inst->removeFromParent();
3382 Inst->insertAfter(Point.PrevInst);
3383 } else {
3384 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3385 if (Inst->getParent())
3386 Inst->moveBefore(*Point.BB, Position);
3387 else
3388 Inst->insertBefore(*Point.BB, Position);
3389 }
3390
3391 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3392 }
3393 };
3394
3395 /// Move an instruction before another.
3396 class InstructionMoveBefore : public TypePromotionAction {
3397 /// Original position of the instruction.
3398 InsertionHandler Position;
3399
3400 public:
3401 /// Move \p Inst before \p Before.
3402 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3403 : TypePromotionAction(Inst), Position(Inst) {
3404 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3405 << "\n");
3406 Inst->moveBefore(Before);
3407 }
3408
3409 /// Move the instruction back to its original position.
3410 void undo() override {
3411 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3412 Position.insert(Inst);
3413 }
3414 };
3415
3416 /// Set the operand of an instruction with a new value.
3417 class OperandSetter : public TypePromotionAction {
3418 /// Original operand of the instruction.
3419 Value *Origin;
3420
3421 /// Index of the modified instruction.
3422 unsigned Idx;
3423
3424 public:
3425 /// Set \p Idx operand of \p Inst with \p NewVal.
3426 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3427 : TypePromotionAction(Inst), Idx(Idx) {
3428 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3429 << "for:" << *Inst << "\n"
3430 << "with:" << *NewVal << "\n");
3431 Origin = Inst->getOperand(Idx);
3432 Inst->setOperand(Idx, NewVal);
3433 }
3434
3435 /// Restore the original value of the instruction.
3436 void undo() override {
3437 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3438 << "for: " << *Inst << "\n"
3439 << "with: " << *Origin << "\n");
3440 Inst->setOperand(Idx, Origin);
3441 }
3442 };
3443
3444 /// Hide the operands of an instruction.
3445 /// Do as if this instruction was not using any of its operands.
3446 class OperandsHider : public TypePromotionAction {
3447 /// The list of original operands.
3448 SmallVector<Value *, 4> OriginalValues;
3449
3450 public:
3451 /// Remove \p Inst from the uses of the operands of \p Inst.
3452 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3453 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3454 unsigned NumOpnds = Inst->getNumOperands();
3455 OriginalValues.reserve(NumOpnds);
3456 for (unsigned It = 0; It < NumOpnds; ++It) {
3457 // Save the current operand.
3458 Value *Val = Inst->getOperand(It);
3459 OriginalValues.push_back(Val);
3460 // Set a dummy one.
3461 // We could use OperandSetter here, but that would imply an overhead
3462 // that we are not willing to pay.
3463 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3464 }
3465 }
3466
3467 /// Restore the original list of uses.
3468 void undo() override {
3469 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3470 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3471 Inst->setOperand(It, OriginalValues[It]);
3472 }
3473 };
3474
3475 /// Build a truncate instruction.
3476 class TruncBuilder : public TypePromotionAction {
3477 Value *Val;
3478
3479 public:
3480 /// Build a truncate instruction of \p Opnd producing a \p Ty
3481 /// result.
3482 /// trunc Opnd to Ty.
3483 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3484 IRBuilder<> Builder(Opnd);
3485 Builder.SetCurrentDebugLocation(DebugLoc());
3486 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3487 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3488 }
3489
3490 /// Get the built value.
3491 Value *getBuiltValue() { return Val; }
3492
3493 /// Remove the built instruction.
3494 void undo() override {
3495 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3496 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3497 IVal->eraseFromParent();
3498 }
3499 };
3500
3501 /// Build a sign extension instruction.
3502 class SExtBuilder : public TypePromotionAction {
3503 Value *Val;
3504
3505 public:
3506 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3507 /// result.
3508 /// sext Opnd to Ty.
3509 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3510 : TypePromotionAction(InsertPt) {
3511 IRBuilder<> Builder(InsertPt);
3512 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3513 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3514 }
3515
3516 /// Get the built value.
3517 Value *getBuiltValue() { return Val; }
3518
3519 /// Remove the built instruction.
3520 void undo() override {
3521 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3522 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3523 IVal->eraseFromParent();
3524 }
3525 };
3526
3527 /// Build a zero extension instruction.
3528 class ZExtBuilder : public TypePromotionAction {
3529 Value *Val;
3530
3531 public:
3532 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3533 /// result.
3534 /// zext Opnd to Ty.
3535 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3536 : TypePromotionAction(InsertPt) {
3537 IRBuilder<> Builder(InsertPt);
3538 Builder.SetCurrentDebugLocation(DebugLoc());
3539 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3540 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3541 }
3542
3543 /// Get the built value.
3544 Value *getBuiltValue() { return Val; }
3545
3546 /// Remove the built instruction.
3547 void undo() override {
3548 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3549 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3550 IVal->eraseFromParent();
3551 }
3552 };
3553
3554 /// Mutate an instruction to another type.
3555 class TypeMutator : public TypePromotionAction {
3556 /// Record the original type.
3557 Type *OrigTy;
3558
3559 public:
3560 /// Mutate the type of \p Inst into \p NewTy.
3561 TypeMutator(Instruction *Inst, Type *NewTy)
3562 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3563 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3564 << "\n");
3565 Inst->mutateType(NewTy);
3566 }
3567
3568 /// Mutate the instruction back to its original type.
3569 void undo() override {
3570 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3571 << "\n");
3572 Inst->mutateType(OrigTy);
3573 }
3574 };
3575
3576 /// Replace the uses of an instruction by another instruction.
3577 class UsesReplacer : public TypePromotionAction {
3578 /// Helper structure to keep track of the replaced uses.
3579 struct InstructionAndIdx {
3580 /// The instruction using the instruction.
3581 Instruction *Inst;
3582
3583 /// The index where this instruction is used for Inst.
3584 unsigned Idx;
3585
3586 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3587 : Inst(Inst), Idx(Idx) {}
3588 };
3589
3590 /// Keep track of the original uses (pair Instruction, Index).
3592 /// Keep track of the debug users.
3593 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3594
3595 /// Keep track of the new value so that we can undo it by replacing
3596 /// instances of the new value with the original value.
3597 Value *New;
3598
3600
3601 public:
3602 /// Replace all the use of \p Inst by \p New.
3603 UsesReplacer(Instruction *Inst, Value *New)
3604 : TypePromotionAction(Inst), New(New) {
3605 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3606 << "\n");
3607 // Record the original uses.
3608 for (Use &U : Inst->uses()) {
3609 Instruction *UserI = cast<Instruction>(U.getUser());
3610 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3611 }
3612 // Record the debug uses separately. They are not in the instruction's
3613 // use list, but they are replaced by RAUW.
3614 findDbgValues(Inst, DbgVariableRecords);
3615
3616 // Now, we can replace the uses.
3617 Inst->replaceAllUsesWith(New);
3618 }
3619
3620 /// Reassign the original uses of Inst to Inst.
3621 void undo() override {
3622 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3623 for (InstructionAndIdx &Use : OriginalUses)
3624 Use.Inst->setOperand(Use.Idx, Inst);
3625 // RAUW has replaced all original uses with references to the new value,
3626 // including the debug uses. Since we are undoing the replacements,
3627 // the original debug uses must also be reinstated to maintain the
3628 // correctness and utility of debug value records.
3629 for (DbgVariableRecord *DVR : DbgVariableRecords)
3630 DVR->replaceVariableLocationOp(New, Inst);
3631 }
3632 };
3633
3634 /// Remove an instruction from the IR.
3635 class InstructionRemover : public TypePromotionAction {
3636 /// Original position of the instruction.
3637 InsertionHandler Inserter;
3638
3639 /// Helper structure to hide all the link to the instruction. In other
3640 /// words, this helps to do as if the instruction was removed.
3641 OperandsHider Hider;
3642
3643 /// Keep track of the uses replaced, if any.
3644 UsesReplacer *Replacer = nullptr;
3645
3646 /// Keep track of instructions removed.
3647 SetOfInstrs &RemovedInsts;
3648
3649 public:
3650 /// Remove all reference of \p Inst and optionally replace all its
3651 /// uses with New.
3652 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3653 /// \pre If !Inst->use_empty(), then New != nullptr
3654 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3655 Value *New = nullptr)
3656 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3657 RemovedInsts(RemovedInsts) {
3658 if (New)
3659 Replacer = new UsesReplacer(Inst, New);
3660 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3661 RemovedInsts.insert(Inst);
3662 /// The instructions removed here will be freed after completing
3663 /// optimizeBlock() for all blocks as we need to keep track of the
3664 /// removed instructions during promotion.
3665 Inst->removeFromParent();
3666 }
3667
3668 ~InstructionRemover() override { delete Replacer; }
3669
3670 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3671 InstructionRemover(const InstructionRemover &other) = delete;
3672
3673 /// Resurrect the instruction and reassign it to the proper uses if
3674 /// new value was provided when build this action.
3675 void undo() override {
3676 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3677 Inserter.insert(Inst);
3678 if (Replacer)
3679 Replacer->undo();
3680 Hider.undo();
3681 RemovedInsts.erase(Inst);
3682 }
3683 };
3684
3685public:
3686 /// Restoration point.
3687 /// The restoration point is a pointer to an action instead of an iterator
3688 /// because the iterator may be invalidated but not the pointer.
3689 using ConstRestorationPt = const TypePromotionAction *;
3690
3691 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3692 : RemovedInsts(RemovedInsts) {}
3693
3694 /// Advocate every changes made in that transaction. Return true if any change
3695 /// happen.
3696 bool commit();
3697
3698 /// Undo all the changes made after the given point.
3699 void rollback(ConstRestorationPt Point);
3700
3701 /// Get the current restoration point.
3702 ConstRestorationPt getRestorationPoint() const;
3703
3704 /// \name API for IR modification with state keeping to support rollback.
3705 /// @{
3706 /// Same as Instruction::setOperand.
3707 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3708
3709 /// Same as Instruction::eraseFromParent.
3710 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3711
3712 /// Same as Value::replaceAllUsesWith.
3713 void replaceAllUsesWith(Instruction *Inst, Value *New);
3714
3715 /// Same as Value::mutateType.
3716 void mutateType(Instruction *Inst, Type *NewTy);
3717
3718 /// Same as IRBuilder::createTrunc.
3719 Value *createTrunc(Instruction *Opnd, Type *Ty);
3720
3721 /// Same as IRBuilder::createSExt.
3722 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3723
3724 /// Same as IRBuilder::createZExt.
3725 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3726
3727private:
3728 /// The ordered list of actions made so far.
3730
3731 using CommitPt =
3732 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3733
3734 SetOfInstrs &RemovedInsts;
3735};
3736
3737} // end anonymous namespace
3738
3739void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3740 Value *NewVal) {
3741 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3742 Inst, Idx, NewVal));
3743}
3744
3745void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3746 Value *NewVal) {
3747 Actions.push_back(
3748 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3749 Inst, RemovedInsts, NewVal));
3750}
3751
3752void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3753 Value *New) {
3754 Actions.push_back(
3755 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3756}
3757
3758void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3759 Actions.push_back(
3760 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3761}
3762
3763Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3764 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3765 Value *Val = Ptr->getBuiltValue();
3766 Actions.push_back(std::move(Ptr));
3767 return Val;
3768}
3769
3770Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3771 Type *Ty) {
3772 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3773 Value *Val = Ptr->getBuiltValue();
3774 Actions.push_back(std::move(Ptr));
3775 return Val;
3776}
3777
3778Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3779 Type *Ty) {
3780 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3781 Value *Val = Ptr->getBuiltValue();
3782 Actions.push_back(std::move(Ptr));
3783 return Val;
3784}
3785
3786TypePromotionTransaction::ConstRestorationPt
3787TypePromotionTransaction::getRestorationPoint() const {
3788 return !Actions.empty() ? Actions.back().get() : nullptr;
3789}
3790
3791bool TypePromotionTransaction::commit() {
3792 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3793 Action->commit();
3794 bool Modified = !Actions.empty();
3795 Actions.clear();
3796 return Modified;
3797}
3798
3799void TypePromotionTransaction::rollback(
3800 TypePromotionTransaction::ConstRestorationPt Point) {
3801 while (!Actions.empty() && Point != Actions.back().get()) {
3802 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3803 Curr->undo();
3804 }
3805}
3806
3807namespace {
3808
3809/// A helper class for matching addressing modes.
3810///
3811/// This encapsulates the logic for matching the target-legal addressing modes.
3812class AddressingModeMatcher {
3813 SmallVectorImpl<Instruction *> &AddrModeInsts;
3814 const TargetLowering &TLI;
3815 const TargetRegisterInfo &TRI;
3816 const DataLayout &DL;
3817 const LoopInfo &LI;
3818 const std::function<const DominatorTree &()> getDTFn;
3819
3820 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3821 /// the memory instruction that we're computing this address for.
3822 Type *AccessTy;
3823 unsigned AddrSpace;
3824 Instruction *MemoryInst;
3825
3826 /// This is the addressing mode that we're building up. This is
3827 /// part of the return value of this addressing mode matching stuff.
3828 ExtAddrMode &AddrMode;
3829
3830 /// The instructions inserted by other CodeGenPrepare optimizations.
3831 const SetOfInstrs &InsertedInsts;
3832
3833 /// A map from the instructions to their type before promotion.
3834 InstrToOrigTy &PromotedInsts;
3835
3836 /// The ongoing transaction where every action should be registered.
3837 TypePromotionTransaction &TPT;
3838
3839 // A GEP which has too large offset to be folded into the addressing mode.
3840 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3841
3842 /// This is set to true when we should not do profitability checks.
3843 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3844 bool IgnoreProfitability;
3845
3846 /// True if we are optimizing for size.
3847 bool OptSize = false;
3848
3849 ProfileSummaryInfo *PSI;
3850 BlockFrequencyInfo *BFI;
3851
3852 AddressingModeMatcher(
3853 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3854 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3855 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3856 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3857 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3858 TypePromotionTransaction &TPT,
3859 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3860 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3861 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3862 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3863 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3864 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3865 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3866 IgnoreProfitability = false;
3867 }
3868
3869public:
3870 /// Find the maximal addressing mode that a load/store of V can fold,
3871 /// give an access type of AccessTy. This returns a list of involved
3872 /// instructions in AddrModeInsts.
3873 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3874 /// optimizations.
3875 /// \p PromotedInsts maps the instructions to their type before promotion.
3876 /// \p The ongoing transaction where every action should be registered.
3877 static ExtAddrMode
3878 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3879 SmallVectorImpl<Instruction *> &AddrModeInsts,
3880 const TargetLowering &TLI, const LoopInfo &LI,
3881 const std::function<const DominatorTree &()> getDTFn,
3882 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3883 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3884 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3885 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3886 ExtAddrMode Result;
3887
3888 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3889 AccessTy, AS, MemoryInst, Result,
3890 InsertedInsts, PromotedInsts, TPT,
3891 LargeOffsetGEP, OptSize, PSI, BFI)
3892 .matchAddr(V, 0);
3893 (void)Success;
3894 assert(Success && "Couldn't select *anything*?");
3895 return Result;
3896 }
3897
3898private:
3899 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3900 bool matchAddr(Value *Addr, unsigned Depth);
3901 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3902 bool *MovedAway = nullptr);
3903 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3904 ExtAddrMode &AMBefore,
3905 ExtAddrMode &AMAfter);
3906 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3907 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3908 Value *PromotedOperand) const;
3909};
3910
3911class PhiNodeSet;
3912
3913/// An iterator for PhiNodeSet.
3914class PhiNodeSetIterator {
3915 PhiNodeSet *const Set;
3916 size_t CurrentIndex = 0;
3917
3918public:
3919 /// The constructor. Start should point to either a valid element, or be equal
3920 /// to the size of the underlying SmallVector of the PhiNodeSet.
3921 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3922 PHINode *operator*() const;
3923 PhiNodeSetIterator &operator++();
3924 bool operator==(const PhiNodeSetIterator &RHS) const;
3925 bool operator!=(const PhiNodeSetIterator &RHS) const;
3926};
3927
3928/// Keeps a set of PHINodes.
3929///
3930/// This is a minimal set implementation for a specific use case:
3931/// It is very fast when there are very few elements, but also provides good
3932/// performance when there are many. It is similar to SmallPtrSet, but also
3933/// provides iteration by insertion order, which is deterministic and stable
3934/// across runs. It is also similar to SmallSetVector, but provides removing
3935/// elements in O(1) time. This is achieved by not actually removing the element
3936/// from the underlying vector, so comes at the cost of using more memory, but
3937/// that is fine, since PhiNodeSets are used as short lived objects.
3938class PhiNodeSet {
3939 friend class PhiNodeSetIterator;
3940
3941 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3942 using iterator = PhiNodeSetIterator;
3943
3944 /// Keeps the elements in the order of their insertion in the underlying
3945 /// vector. To achieve constant time removal, it never deletes any element.
3947
3948 /// Keeps the elements in the underlying set implementation. This (and not the
3949 /// NodeList defined above) is the source of truth on whether an element
3950 /// is actually in the collection.
3951 MapType NodeMap;
3952
3953 /// Points to the first valid (not deleted) element when the set is not empty
3954 /// and the value is not zero. Equals to the size of the underlying vector
3955 /// when the set is empty. When the value is 0, as in the beginning, the
3956 /// first element may or may not be valid.
3957 size_t FirstValidElement = 0;
3958
3959public:
3960 /// Inserts a new element to the collection.
3961 /// \returns true if the element is actually added, i.e. was not in the
3962 /// collection before the operation.
3963 bool insert(PHINode *Ptr) {
3964 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3965 NodeList.push_back(Ptr);
3966 return true;
3967 }
3968 return false;
3969 }
3970
3971 /// Removes the element from the collection.
3972 /// \returns whether the element is actually removed, i.e. was in the
3973 /// collection before the operation.
3974 bool erase(PHINode *Ptr) {
3975 if (NodeMap.erase(Ptr)) {
3976 SkipRemovedElements(FirstValidElement);
3977 return true;
3978 }
3979 return false;
3980 }
3981
3982 /// Removes all elements and clears the collection.
3983 void clear() {
3984 NodeMap.clear();
3985 NodeList.clear();
3986 FirstValidElement = 0;
3987 }
3988
3989 /// \returns an iterator that will iterate the elements in the order of
3990 /// insertion.
3991 iterator begin() {
3992 if (FirstValidElement == 0)
3993 SkipRemovedElements(FirstValidElement);
3994 return PhiNodeSetIterator(this, FirstValidElement);
3995 }
3996
3997 /// \returns an iterator that points to the end of the collection.
3998 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3999
4000 /// Returns the number of elements in the collection.
4001 size_t size() const { return NodeMap.size(); }
4002
4003 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
4004 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
4005
4006private:
4007 /// Updates the CurrentIndex so that it will point to a valid element.
4008 ///
4009 /// If the element of NodeList at CurrentIndex is valid, it does not
4010 /// change it. If there are no more valid elements, it updates CurrentIndex
4011 /// to point to the end of the NodeList.
4012 void SkipRemovedElements(size_t &CurrentIndex) {
4013 while (CurrentIndex < NodeList.size()) {
4014 auto it = NodeMap.find(NodeList[CurrentIndex]);
4015 // If the element has been deleted and added again later, NodeMap will
4016 // point to a different index, so CurrentIndex will still be invalid.
4017 if (it != NodeMap.end() && it->second == CurrentIndex)
4018 break;
4019 ++CurrentIndex;
4020 }
4021 }
4022};
4023
4024PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4025 : Set(Set), CurrentIndex(Start) {}
4026
4027PHINode *PhiNodeSetIterator::operator*() const {
4028 assert(CurrentIndex < Set->NodeList.size() &&
4029 "PhiNodeSet access out of range");
4030 return Set->NodeList[CurrentIndex];
4031}
4032
4033PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4034 assert(CurrentIndex < Set->NodeList.size() &&
4035 "PhiNodeSet access out of range");
4036 ++CurrentIndex;
4037 Set->SkipRemovedElements(CurrentIndex);
4038 return *this;
4039}
4040
4041bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4042 return CurrentIndex == RHS.CurrentIndex;
4043}
4044
4045bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4046 return !((*this) == RHS);
4047}
4048
4049/// Keep track of simplification of Phi nodes.
4050/// Accept the set of all phi nodes and erase phi node from this set
4051/// if it is simplified.
4052class SimplificationTracker {
4053 DenseMap<Value *, Value *> Storage;
4054 // Tracks newly created Phi nodes. The elements are iterated by insertion
4055 // order.
4056 PhiNodeSet AllPhiNodes;
4057 // Tracks newly created Select nodes.
4058 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4059
4060public:
4061 Value *Get(Value *V) {
4062 do {
4063 auto SV = Storage.find(V);
4064 if (SV == Storage.end())
4065 return V;
4066 V = SV->second;
4067 } while (true);
4068 }
4069
4070 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4071
4072 void ReplacePhi(PHINode *From, PHINode *To) {
4073 Value *OldReplacement = Get(From);
4074 while (OldReplacement != From) {
4075 From = To;
4076 To = dyn_cast<PHINode>(OldReplacement);
4077 OldReplacement = Get(From);
4078 }
4079 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4080 Put(From, To);
4081 From->replaceAllUsesWith(To);
4082 AllPhiNodes.erase(From);
4083 From->eraseFromParent();
4084 }
4085
4086 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4087
4088 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4089
4090 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4091
4092 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4093
4094 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4095
4096 void destroyNewNodes(Type *CommonType) {
4097 // For safe erasing, replace the uses with dummy value first.
4098 auto *Dummy = PoisonValue::get(CommonType);
4099 for (auto *I : AllPhiNodes) {
4100 I->replaceAllUsesWith(Dummy);
4101 I->eraseFromParent();
4102 }
4103 AllPhiNodes.clear();
4104 for (auto *I : AllSelectNodes) {
4105 I->replaceAllUsesWith(Dummy);
4106 I->eraseFromParent();
4107 }
4108 AllSelectNodes.clear();
4109 }
4110};
4111
4112/// A helper class for combining addressing modes.
4113class AddressingModeCombiner {
4114 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4115 typedef std::pair<PHINode *, PHINode *> PHIPair;
4116
4117private:
4118 /// The addressing modes we've collected.
4120
4121 /// The field in which the AddrModes differ, when we have more than one.
4122 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4123
4124 /// Are the AddrModes that we have all just equal to their original values?
4125 bool AllAddrModesTrivial = true;
4126
4127 /// Common Type for all different fields in addressing modes.
4128 Type *CommonType = nullptr;
4129
4130 const DataLayout &DL;
4131
4132 /// Original Address.
4133 Value *Original;
4134
4135 /// Common value among addresses
4136 Value *CommonValue = nullptr;
4137
4138public:
4139 AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
4140 : DL(DL), Original(OriginalValue) {}
4141
4142 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4143
4144 /// Get the combined AddrMode
4145 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4146
4147 /// Add a new AddrMode if it's compatible with the AddrModes we already
4148 /// have.
4149 /// \return True iff we succeeded in doing so.
4150 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4151 // Take note of if we have any non-trivial AddrModes, as we need to detect
4152 // when all AddrModes are trivial as then we would introduce a phi or select
4153 // which just duplicates what's already there.
4154 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4155
4156 // If this is the first addrmode then everything is fine.
4157 if (AddrModes.empty()) {
4158 AddrModes.emplace_back(NewAddrMode);
4159 return true;
4160 }
4161
4162 // Figure out how different this is from the other address modes, which we
4163 // can do just by comparing against the first one given that we only care
4164 // about the cumulative difference.
4165 ExtAddrMode::FieldName ThisDifferentField =
4166 AddrModes[0].compare(NewAddrMode);
4167 if (DifferentField == ExtAddrMode::NoField)
4168 DifferentField = ThisDifferentField;
4169 else if (DifferentField != ThisDifferentField)
4170 DifferentField = ExtAddrMode::MultipleFields;
4171
4172 // If NewAddrMode differs in more than one dimension we cannot handle it.
4173 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4174
4175 // If Scale Field is different then we reject.
4176 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4177
4178 // We also must reject the case when base offset is different and
4179 // scale reg is not null, we cannot handle this case due to merge of
4180 // different offsets will be used as ScaleReg.
4181 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4182 !NewAddrMode.ScaledReg);
4183
4184 // We also must reject the case when GV is different and BaseReg installed
4185 // due to we want to use base reg as a merge of GV values.
4186 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4187 !NewAddrMode.HasBaseReg);
4188
4189 // Even if NewAddMode is the same we still need to collect it due to
4190 // original value is different. And later we will need all original values
4191 // as anchors during finding the common Phi node.
4192 if (CanHandle)
4193 AddrModes.emplace_back(NewAddrMode);
4194 else
4195 AddrModes.clear();
4196
4197 return CanHandle;
4198 }
4199
4200 /// Combine the addressing modes we've collected into a single
4201 /// addressing mode.
4202 /// \return True iff we successfully combined them or we only had one so
4203 /// didn't need to combine them anyway.
4204 bool combineAddrModes() {
4205 // If we have no AddrModes then they can't be combined.
4206 if (AddrModes.size() == 0)
4207 return false;
4208
4209 // A single AddrMode can trivially be combined.
4210 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4211 return true;
4212
4213 // If the AddrModes we collected are all just equal to the value they are
4214 // derived from then combining them wouldn't do anything useful.
4215 if (AllAddrModesTrivial)
4216 return false;
4217
4218 if (!addrModeCombiningAllowed())
4219 return false;
4220
4221 // Build a map between <original value, basic block where we saw it> to
4222 // value of base register.
4223 // Bail out if there is no common type.
4224 FoldAddrToValueMapping Map;
4225 if (!initializeMap(Map))
4226 return false;
4227
4228 CommonValue = findCommon(Map);
4229 if (CommonValue)
4230 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4231 return CommonValue != nullptr;
4232 }
4233
4234private:
4235 /// `CommonValue` may be a placeholder inserted by us.
4236 /// If the placeholder is not used, we should remove this dead instruction.
4237 void eraseCommonValueIfDead() {
4238 if (CommonValue && CommonValue->use_empty())
4239 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4240 CommonInst->eraseFromParent();
4241 }
4242
4243 /// Initialize Map with anchor values. For address seen
4244 /// we set the value of different field saw in this address.
4245 /// At the same time we find a common type for different field we will
4246 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4247 /// Return false if there is no common type found.
4248 bool initializeMap(FoldAddrToValueMapping &Map) {
4249 // Keep track of keys where the value is null. We will need to replace it
4250 // with constant null when we know the common type.
4251 SmallVector<Value *, 2> NullValue;
4252 Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4253 for (auto &AM : AddrModes) {
4254 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4255 if (DV) {
4256 auto *Type = DV->getType();
4257 if (CommonType && CommonType != Type)
4258 return false;
4259 CommonType = Type;
4260 Map[AM.OriginalValue] = DV;
4261 } else {
4262 NullValue.push_back(AM.OriginalValue);
4263 }
4264 }
4265 assert(CommonType && "At least one non-null value must be!");
4266 for (auto *V : NullValue)
4267 Map[V] = Constant::getNullValue(CommonType);
4268 return true;
4269 }
4270
4271 /// We have mapping between value A and other value B where B was a field in
4272 /// addressing mode represented by A. Also we have an original value C
4273 /// representing an address we start with. Traversing from C through phi and
4274 /// selects we ended up with A's in a map. This utility function tries to find
4275 /// a value V which is a field in addressing mode C and traversing through phi
4276 /// nodes and selects we will end up in corresponded values B in a map.
4277 /// The utility will create a new Phi/Selects if needed.
4278 // The simple example looks as follows:
4279 // BB1:
4280 // p1 = b1 + 40
4281 // br cond BB2, BB3
4282 // BB2:
4283 // p2 = b2 + 40
4284 // br BB3
4285 // BB3:
4286 // p = phi [p1, BB1], [p2, BB2]
4287 // v = load p
4288 // Map is
4289 // p1 -> b1
4290 // p2 -> b2
4291 // Request is
4292 // p -> ?
4293 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4294 Value *findCommon(FoldAddrToValueMapping &Map) {
4295 // Tracks the simplification of newly created phi nodes. The reason we use
4296 // this mapping is because we will add new created Phi nodes in AddrToBase.
4297 // Simplification of Phi nodes is recursive, so some Phi node may
4298 // be simplified after we added it to AddrToBase. In reality this
4299 // simplification is possible only if original phi/selects were not
4300 // simplified yet.
4301 // Using this mapping we can find the current value in AddrToBase.
4302 SimplificationTracker ST;
4303
4304 // First step, DFS to create PHI nodes for all intermediate blocks.
4305 // Also fill traverse order for the second step.
4306 SmallVector<Value *, 32> TraverseOrder;
4307 InsertPlaceholders(Map, TraverseOrder, ST);
4308
4309 // Second Step, fill new nodes by merged values and simplify if possible.
4310 FillPlaceholders(Map, TraverseOrder, ST);
4311
4312 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4313 ST.destroyNewNodes(CommonType);
4314 return nullptr;
4315 }
4316
4317 // Now we'd like to match New Phi nodes to existed ones.
4318 unsigned PhiNotMatchedCount = 0;
4319 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4320 ST.destroyNewNodes(CommonType);
4321 return nullptr;
4322 }
4323
4324 auto *Result = ST.Get(Map.find(Original)->second);
4325 if (Result) {
4326 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4327 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4328 }
4329 return Result;
4330 }
4331
4332 /// Try to match PHI node to Candidate.
4333 /// Matcher tracks the matched Phi nodes.
4334 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4335 SmallSetVector<PHIPair, 8> &Matcher,
4336 PhiNodeSet &PhiNodesToMatch) {
4337 SmallVector<PHIPair, 8> WorkList;
4338 Matcher.insert({PHI, Candidate});
4339 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4340 MatchedPHIs.insert(PHI);
4341 WorkList.push_back({PHI, Candidate});
4342 SmallSet<PHIPair, 8> Visited;
4343 while (!WorkList.empty()) {
4344 auto Item = WorkList.pop_back_val();
4345 if (!Visited.insert(Item).second)
4346 continue;
4347 // We iterate over all incoming values to Phi to compare them.
4348 // If values are different and both of them Phi and the first one is a
4349 // Phi we added (subject to match) and both of them is in the same basic
4350 // block then we can match our pair if values match. So we state that
4351 // these values match and add it to work list to verify that.
4352 for (auto *B : Item.first->blocks()) {
4353 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4354 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4355 if (FirstValue == SecondValue)
4356 continue;
4357
4358 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4359 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4360
4361 // One of them is not Phi or
4362 // The first one is not Phi node from the set we'd like to match or
4363 // Phi nodes from different basic blocks then
4364 // we will not be able to match.
4365 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4366 FirstPhi->getParent() != SecondPhi->getParent())
4367 return false;
4368
4369 // If we already matched them then continue.
4370 if (Matcher.count({FirstPhi, SecondPhi}))
4371 continue;
4372 // So the values are different and does not match. So we need them to
4373 // match. (But we register no more than one match per PHI node, so that
4374 // we won't later try to replace them twice.)
4375 if (MatchedPHIs.insert(FirstPhi).second)
4376 Matcher.insert({FirstPhi, SecondPhi});
4377 // But me must check it.
4378 WorkList.push_back({FirstPhi, SecondPhi});
4379 }
4380 }
4381 return true;
4382 }
4383
4384 /// For the given set of PHI nodes (in the SimplificationTracker) try
4385 /// to find their equivalents.
4386 /// Returns false if this matching fails and creation of new Phi is disabled.
4387 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4388 unsigned &PhiNotMatchedCount) {
4389 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4390 // order, so the replacements (ReplacePhi) are also done in a deterministic
4391 // order.
4392 SmallSetVector<PHIPair, 8> Matched;
4393 SmallPtrSet<PHINode *, 8> WillNotMatch;
4394 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4395 while (PhiNodesToMatch.size()) {
4396 PHINode *PHI = *PhiNodesToMatch.begin();
4397
4398 // Add us, if no Phi nodes in the basic block we do not match.
4399 WillNotMatch.clear();
4400 WillNotMatch.insert(PHI);
4401
4402 // Traverse all Phis until we found equivalent or fail to do that.
4403 bool IsMatched = false;
4404 for (auto &P : PHI->getParent()->phis()) {
4405 // Skip new Phi nodes.
4406 if (PhiNodesToMatch.count(&P))
4407 continue;
4408 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4409 break;
4410 // If it does not match, collect all Phi nodes from matcher.
4411 // if we end up with no match, them all these Phi nodes will not match
4412 // later.
4413 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4414 Matched.clear();
4415 }
4416 if (IsMatched) {
4417 // Replace all matched values and erase them.
4418 for (auto MV : Matched)
4419 ST.ReplacePhi(MV.first, MV.second);
4420 Matched.clear();
4421 continue;
4422 }
4423 // If we are not allowed to create new nodes then bail out.
4424 if (!AllowNewPhiNodes)
4425 return false;
4426 // Just remove all seen values in matcher. They will not match anything.
4427 PhiNotMatchedCount += WillNotMatch.size();
4428 for (auto *P : WillNotMatch)
4429 PhiNodesToMatch.erase(P);
4430 }
4431 return true;
4432 }
4433 /// Fill the placeholders with values from predecessors and simplify them.
4434 void FillPlaceholders(FoldAddrToValueMapping &Map,
4435 SmallVectorImpl<Value *> &TraverseOrder,
4436 SimplificationTracker &ST) {
4437 while (!TraverseOrder.empty()) {
4438 Value *Current = TraverseOrder.pop_back_val();
4439 assert(Map.contains(Current) && "No node to fill!!!");
4440 Value *V = Map[Current];
4441
4442 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4443 // CurrentValue also must be Select.
4444 auto *CurrentSelect = cast<SelectInst>(Current);
4445 auto *TrueValue = CurrentSelect->getTrueValue();
4446 assert(Map.contains(TrueValue) && "No True Value!");
4447 Select->setTrueValue(ST.Get(Map[TrueValue]));
4448 auto *FalseValue = CurrentSelect->getFalseValue();
4449 assert(Map.contains(FalseValue) && "No False Value!");
4450 Select->setFalseValue(ST.Get(Map[FalseValue]));
4451 } else {
4452 // Must be a Phi node then.
4453 auto *PHI = cast<PHINode>(V);
4454 // Fill the Phi node with values from predecessors.
4455 for (auto *B : predecessors(PHI->getParent())) {
4456 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4457 assert(Map.contains(PV) && "No predecessor Value!");
4458 PHI->addIncoming(ST.Get(Map[PV]), B);
4459 }
4460 }
4461 }
4462 }
4463
4464 /// Starting from original value recursively iterates over def-use chain up to
4465 /// known ending values represented in a map. For each traversed phi/select
4466 /// inserts a placeholder Phi or Select.
4467 /// Reports all new created Phi/Select nodes by adding them to set.
4468 /// Also reports and order in what values have been traversed.
4469 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4470 SmallVectorImpl<Value *> &TraverseOrder,
4471 SimplificationTracker &ST) {
4472 SmallVector<Value *, 32> Worklist;
4473 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4474 "Address must be a Phi or Select node");
4475 auto *Dummy = PoisonValue::get(CommonType);
4476 Worklist.push_back(Original);
4477 while (!Worklist.empty()) {
4478 Value *Current = Worklist.pop_back_val();
4479 // if it is already visited or it is an ending value then skip it.
4480 if (Map.contains(Current))
4481 continue;
4482 TraverseOrder.push_back(Current);
4483
4484 // CurrentValue must be a Phi node or select. All others must be covered
4485 // by anchors.
4486 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4487 // Is it OK to get metadata from OrigSelect?!
4488 // Create a Select placeholder with dummy value.
4489 SelectInst *Select =
4490 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4491 CurrentSelect->getName(),
4492 CurrentSelect->getIterator(), CurrentSelect);
4493 Map[Current] = Select;
4494 ST.insertNewSelect(Select);
4495 // We are interested in True and False values.
4496 Worklist.push_back(CurrentSelect->getTrueValue());
4497 Worklist.push_back(CurrentSelect->getFalseValue());
4498 } else {
4499 // It must be a Phi node then.
4500 PHINode *CurrentPhi = cast<PHINode>(Current);
4501 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4502 PHINode *PHI =
4503 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4504 Map[Current] = PHI;
4505 ST.insertNewPhi(PHI);
4506 append_range(Worklist, CurrentPhi->incoming_values());
4507 }
4508 }
4509 }
4510
4511 bool addrModeCombiningAllowed() {
4513 return false;
4514 switch (DifferentField) {
4515 default:
4516 return false;
4517 case ExtAddrMode::BaseRegField:
4519 case ExtAddrMode::BaseGVField:
4520 return AddrSinkCombineBaseGV;
4521 case ExtAddrMode::BaseOffsField:
4523 case ExtAddrMode::ScaledRegField:
4525 }
4526 }
4527};
4528} // end anonymous namespace
4529
4530/// Try adding ScaleReg*Scale to the current addressing mode.
4531/// Return true and update AddrMode if this addr mode is legal for the target,
4532/// false if not.
4533bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4534 unsigned Depth) {
4535 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4536 // mode. Just process that directly.
4537 if (Scale == 1)
4538 return matchAddr(ScaleReg, Depth);
4539
4540 // If the scale is 0, it takes nothing to add this.
4541 if (Scale == 0)
4542 return true;
4543
4544 // If we already have a scale of this value, we can add to it, otherwise, we
4545 // need an available scale field.
4546 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4547 return false;
4548
4549 ExtAddrMode TestAddrMode = AddrMode;
4550
4551 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4552 // [A+B + A*7] -> [B+A*8].
4553 TestAddrMode.Scale += Scale;
4554 TestAddrMode.ScaledReg = ScaleReg;
4555
4556 // If the new address isn't legal, bail out.
4557 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4558 return false;
4559
4560 // It was legal, so commit it.
4561 AddrMode = TestAddrMode;
4562
4563 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4564 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4565 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4566 // go any further: we can reuse it and cannot eliminate it.
4567 ConstantInt *CI = nullptr;
4568 Value *AddLHS = nullptr;
4569 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4570 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4571 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4572 TestAddrMode.InBounds = false;
4573 TestAddrMode.ScaledReg = AddLHS;
4574 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4575
4576 // If this addressing mode is legal, commit it and remember that we folded
4577 // this instruction.
4578 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4579 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4580 AddrMode = TestAddrMode;
4581 return true;
4582 }
4583 // Restore status quo.
4584 TestAddrMode = AddrMode;
4585 }
4586
4587 // If this is an add recurrence with a constant step, return the increment
4588 // instruction and the canonicalized step.
4589 auto GetConstantStep =
4590 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4591 auto *PN = dyn_cast<PHINode>(V);
4592 if (!PN)
4593 return std::nullopt;
4594 auto IVInc = getIVIncrement(PN, &LI);
4595 if (!IVInc)
4596 return std::nullopt;
4597 // TODO: The result of the intrinsics above is two-complement. However when
4598 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4599 // If it has nuw or nsw flags, we need to make sure that these flags are
4600 // inferrable at the point of memory instruction. Otherwise we are replacing
4601 // well-defined two-complement computation with poison. Currently, to avoid
4602 // potentially complex analysis needed to prove this, we reject such cases.
4603 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4604 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4605 return std::nullopt;
4606 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4607 return std::make_pair(IVInc->first, ConstantStep->getValue());
4608 return std::nullopt;
4609 };
4610
4611 // Try to account for the following special case:
4612 // 1. ScaleReg is an inductive variable;
4613 // 2. We use it with non-zero offset;
4614 // 3. IV's increment is available at the point of memory instruction.
4615 //
4616 // In this case, we may reuse the IV increment instead of the IV Phi to
4617 // achieve the following advantages:
4618 // 1. If IV step matches the offset, we will have no need in the offset;
4619 // 2. Even if they don't match, we will reduce the overlap of living IV
4620 // and IV increment, that will potentially lead to better register
4621 // assignment.
4622 if (AddrMode.BaseOffs) {
4623 if (auto IVStep = GetConstantStep(ScaleReg)) {
4624 Instruction *IVInc = IVStep->first;
4625 // The following assert is important to ensure a lack of infinite loops.
4626 // This transforms is (intentionally) the inverse of the one just above.
4627 // If they don't agree on the definition of an increment, we'd alternate
4628 // back and forth indefinitely.
4629 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4630 APInt Step = IVStep->second;
4631 APInt Offset = Step * AddrMode.Scale;
4632 if (Offset.isSignedIntN(64)) {
4633 TestAddrMode.InBounds = false;
4634 TestAddrMode.ScaledReg = IVInc;
4635 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4636 // If this addressing mode is legal, commit it..
4637 // (Note that we defer the (expensive) domtree base legality check
4638 // to the very last possible point.)
4639 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4640 getDTFn().dominates(IVInc, MemoryInst)) {
4641 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4642 AddrMode = TestAddrMode;
4643 return true;
4644 }
4645 // Restore status quo.
4646 TestAddrMode = AddrMode;
4647 }
4648 }
4649 }
4650
4651 // Otherwise, just return what we have.
4652 return true;
4653}
4654
4655/// This is a little filter, which returns true if an addressing computation
4656/// involving I might be folded into a load/store accessing it.
4657/// This doesn't need to be perfect, but needs to accept at least
4658/// the set of instructions that MatchOperationAddr can.
4660 switch (I->getOpcode()) {
4661 case Instruction::BitCast:
4662 case Instruction::AddrSpaceCast:
4663 // Don't touch identity bitcasts.
4664 if (I->getType() == I->getOperand(0)->getType())
4665 return false;
4666 return I->getType()->isIntOrPtrTy();
4667 case Instruction::PtrToInt:
4668 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4669 return true;
4670 case Instruction::IntToPtr:
4671 // We know the input is intptr_t, so this is foldable.
4672 return true;
4673 case Instruction::Add:
4674 return true;
4675 case Instruction::Mul:
4676 case Instruction::Shl:
4677 // Can only handle X*C and X << C.
4678 return isa<ConstantInt>(I->getOperand(1));
4679 case Instruction::GetElementPtr:
4680 return true;
4681 default:
4682 return false;
4683 }
4684}
4685
4686/// Check whether or not \p Val is a legal instruction for \p TLI.
4687/// \note \p Val is assumed to be the product of some type promotion.
4688/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4689/// to be legal, as the non-promoted value would have had the same state.
4691 const DataLayout &DL, Value *Val) {
4692 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4693 if (!PromotedInst)
4694 return false;
4695 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4696 // If the ISDOpcode is undefined, it was undefined before the promotion.
4697 if (!ISDOpcode)
4698 return true;
4699 // Otherwise, check if the promoted instruction is legal or not.
4700 return TLI.isOperationLegalOrCustom(
4701 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4702}
4703
4704namespace {
4705
4706/// Hepler class to perform type promotion.
4707class TypePromotionHelper {
4708 /// Utility function to add a promoted instruction \p ExtOpnd to
4709 /// \p PromotedInsts and record the type of extension we have seen.
4710 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4711 Instruction *ExtOpnd, bool IsSExt) {
4712 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4713 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4714 if (!Inserted) {
4715 // If the new extension is same as original, the information in
4716 // PromotedInsts[ExtOpnd] is still correct.
4717 if (It->second.getInt() == ExtTy)
4718 return;
4719
4720 // Now the new extension is different from old extension, we make
4721 // the type information invalid by setting extension type to
4722 // BothExtension.
4723 ExtTy = BothExtension;
4724 }
4725 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4726 }
4727
4728 /// Utility function to query the original type of instruction \p Opnd
4729 /// with a matched extension type. If the extension doesn't match, we
4730 /// cannot use the information we had on the original type.
4731 /// BothExtension doesn't match any extension type.
4732 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4733 Instruction *Opnd, bool IsSExt) {
4734 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4735 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4736 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4737 return It->second.getPointer();
4738 return nullptr;
4739 }
4740
4741 /// Utility function to check whether or not a sign or zero extension
4742 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4743 /// either using the operands of \p Inst or promoting \p Inst.
4744 /// The type of the extension is defined by \p IsSExt.
4745 /// In other words, check if:
4746 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4747 /// #1 Promotion applies:
4748 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4749 /// #2 Operand reuses:
4750 /// ext opnd1 to ConsideredExtType.
4751 /// \p PromotedInsts maps the instructions to their type before promotion.
4752 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4753 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4754
4755 /// Utility function to determine if \p OpIdx should be promoted when
4756 /// promoting \p Inst.
4757 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4758 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4759 }
4760
4761 /// Utility function to promote the operand of \p Ext when this
4762 /// operand is a promotable trunc or sext or zext.
4763 /// \p PromotedInsts maps the instructions to their type before promotion.
4764 /// \p CreatedInstsCost[out] contains the cost of all instructions
4765 /// created to promote the operand of Ext.
4766 /// Newly added extensions are inserted in \p Exts.
4767 /// Newly added truncates are inserted in \p Truncs.
4768 /// Should never be called directly.
4769 /// \return The promoted value which is used instead of Ext.
4770 static Value *promoteOperandForTruncAndAnyExt(
4771 Instruction *Ext, TypePromotionTransaction &TPT,
4772 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4773 SmallVectorImpl<Instruction *> *Exts,
4774 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4775
4776 /// Utility function to promote the operand of \p Ext when this
4777 /// operand is promotable and is not a supported trunc or sext.
4778 /// \p PromotedInsts maps the instructions to their type before promotion.
4779 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4780 /// created to promote the operand of Ext.
4781 /// Newly added extensions are inserted in \p Exts.
4782 /// Newly added truncates are inserted in \p Truncs.
4783 /// Should never be called directly.
4784 /// \return The promoted value which is used instead of Ext.
4785 static Value *promoteOperandForOther(Instruction *Ext,
4786 TypePromotionTransaction &TPT,
4787 InstrToOrigTy &PromotedInsts,
4788 unsigned &CreatedInstsCost,
4789 SmallVectorImpl<Instruction *> *Exts,
4790 SmallVectorImpl<Instruction *> *Truncs,
4791 const TargetLowering &TLI, bool IsSExt);
4792
4793 /// \see promoteOperandForOther.
4794 static Value *signExtendOperandForOther(
4795 Instruction *Ext, TypePromotionTransaction &TPT,
4796 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4797 SmallVectorImpl<Instruction *> *Exts,
4798 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4799 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4800 Exts, Truncs, TLI, true);
4801 }
4802
4803 /// \see promoteOperandForOther.
4804 static Value *zeroExtendOperandForOther(
4805 Instruction *Ext, TypePromotionTransaction &TPT,
4806 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4807 SmallVectorImpl<Instruction *> *Exts,
4808 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4809 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4810 Exts, Truncs, TLI, false);
4811 }
4812
4813public:
4814 /// Type for the utility function that promotes the operand of Ext.
4815 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4816 InstrToOrigTy &PromotedInsts,
4817 unsigned &CreatedInstsCost,
4818 SmallVectorImpl<Instruction *> *Exts,
4819 SmallVectorImpl<Instruction *> *Truncs,
4820 const TargetLowering &TLI);
4821
4822 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4823 /// action to promote the operand of \p Ext instead of using Ext.
4824 /// \return NULL if no promotable action is possible with the current
4825 /// sign extension.
4826 /// \p InsertedInsts keeps track of all the instructions inserted by the
4827 /// other CodeGenPrepare optimizations. This information is important
4828 /// because we do not want to promote these instructions as CodeGenPrepare
4829 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4830 /// \p PromotedInsts maps the instructions to their type before promotion.
4831 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4832 const TargetLowering &TLI,
4833 const InstrToOrigTy &PromotedInsts);
4834};
4835
4836} // end anonymous namespace
4837
4838bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4839 Type *ConsideredExtType,
4840 const InstrToOrigTy &PromotedInsts,
4841 bool IsSExt) {
4842 // The promotion helper does not know how to deal with vector types yet.
4843 // To be able to fix that, we would need to fix the places where we
4844 // statically extend, e.g., constants and such.
4845 if (Inst->getType()->isVectorTy())
4846 return false;
4847
4848 // We can always get through zext.
4849 if (isa<ZExtInst>(Inst))
4850 return true;
4851
4852 // sext(sext) is ok too.
4853 if (IsSExt && isa<SExtInst>(Inst))
4854 return true;
4855
4856 // We can get through binary operator, if it is legal. In other words, the
4857 // binary operator must have a nuw or nsw flag.
4858 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4859 if (isa<OverflowingBinaryOperator>(BinOp) &&
4860 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4861 (IsSExt && BinOp->hasNoSignedWrap())))
4862 return true;
4863
4864 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4865 if ((Inst->getOpcode() == Instruction::And ||
4866 Inst->getOpcode() == Instruction::Or))
4867 return true;
4868
4869 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4870 if (Inst->getOpcode() == Instruction::Xor) {
4871 // Make sure it is not a NOT.
4872 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4873 if (!Cst->getValue().isAllOnes())
4874 return true;
4875 }
4876
4877 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4878 // It may change a poisoned value into a regular value, like
4879 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4880 // poisoned value regular value
4881 // It should be OK since undef covers valid value.
4882 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4883 return true;
4884
4885 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4886 // It may change a poisoned value into a regular value, like
4887 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4888 // poisoned value regular value
4889 // It should be OK since undef covers valid value.
4890 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4891 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4892 if (ExtInst->hasOneUse()) {
4893 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4894 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4895 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4896 if (Cst &&
4897 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4898 return true;
4899 }
4900 }
4901 }
4902
4903 // Check if we can do the following simplification.
4904 // ext(trunc(opnd)) --> ext(opnd)
4905 if (!isa<TruncInst>(Inst))
4906 return false;
4907
4908 Value *OpndVal = Inst->getOperand(0);
4909 // Check if we can use this operand in the extension.
4910 // If the type is larger than the result type of the extension, we cannot.
4911 if (!OpndVal->getType()->isIntegerTy() ||
4912 OpndVal->getType()->getIntegerBitWidth() >
4913 ConsideredExtType->getIntegerBitWidth())
4914 return false;
4915
4916 // If the operand of the truncate is not an instruction, we will not have
4917 // any information on the dropped bits.
4918 // (Actually we could for constant but it is not worth the extra logic).
4919 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4920 if (!Opnd)
4921 return false;
4922
4923 // Check if the source of the type is narrow enough.
4924 // I.e., check that trunc just drops extended bits of the same kind of
4925 // the extension.
4926 // #1 get the type of the operand and check the kind of the extended bits.
4927 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4928 if (OpndType)
4929 ;
4930 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4931 OpndType = Opnd->getOperand(0)->getType();
4932 else
4933 return false;
4934
4935 // #2 check that the truncate just drops extended bits.
4936 return Inst->getType()->getIntegerBitWidth() >=
4937 OpndType->getIntegerBitWidth();
4938}
4939
4940TypePromotionHelper::Action TypePromotionHelper::getAction(
4941 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4942 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4943 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4944 "Unexpected instruction type");
4945 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4946 Type *ExtTy = Ext->getType();
4947 bool IsSExt = isa<SExtInst>(Ext);
4948 // If the operand of the extension is not an instruction, we cannot
4949 // get through.
4950 // If it, check we can get through.
4951 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4952 return nullptr;
4953
4954 // Do not promote if the operand has been added by codegenprepare.
4955 // Otherwise, it means we are undoing an optimization that is likely to be
4956 // redone, thus causing potential infinite loop.
4957 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4958 return nullptr;
4959
4960 // SExt or Trunc instructions.
4961 // Return the related handler.
4962 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4963 isa<ZExtInst>(ExtOpnd))
4964 return promoteOperandForTruncAndAnyExt;
4965
4966 // Regular instruction.
4967 // Abort early if we will have to insert non-free instructions.
4968 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4969 return nullptr;
4970 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4971}
4972
4973Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4974 Instruction *SExt, TypePromotionTransaction &TPT,
4975 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4976 SmallVectorImpl<Instruction *> *Exts,
4977 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4978 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4979 // get through it and this method should not be called.
4980 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4981 Value *ExtVal = SExt;
4982 bool HasMergedNonFreeExt = false;
4983 if (isa<ZExtInst>(SExtOpnd)) {
4984 // Replace s|zext(zext(opnd))
4985 // => zext(opnd).
4986 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4987 Value *ZExt =
4988 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4989 TPT.replaceAllUsesWith(SExt, ZExt);
4990 TPT.eraseInstruction(SExt);
4991 ExtVal = ZExt;
4992 } else {
4993 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4994 // => z|sext(opnd).
4995 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4996 }
4997 CreatedInstsCost = 0;
4998
4999 // Remove dead code.
5000 if (SExtOpnd->use_empty())
5001 TPT.eraseInstruction(SExtOpnd);
5002
5003 // Check if the extension is still needed.
5004 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5005 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5006 if (ExtInst) {
5007 if (Exts)
5008 Exts->push_back(ExtInst);
5009 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5010 }
5011 return ExtVal;
5012 }
5013
5014 // At this point we have: ext ty opnd to ty.
5015 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5016 Value *NextVal = ExtInst->getOperand(0);
5017 TPT.eraseInstruction(ExtInst, NextVal);
5018 return NextVal;
5019}
5020
5021Value *TypePromotionHelper::promoteOperandForOther(
5022 Instruction *Ext, TypePromotionTransaction &TPT,
5023 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5024 SmallVectorImpl<Instruction *> *Exts,
5025 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5026 bool IsSExt) {
5027 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5028 // get through it and this method should not be called.
5029 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5030 CreatedInstsCost = 0;
5031 if (!ExtOpnd->hasOneUse()) {
5032 // ExtOpnd will be promoted.
5033 // All its uses, but Ext, will need to use a truncated value of the
5034 // promoted version.
5035 // Create the truncate now.
5036 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5037 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5038 // Insert it just after the definition.
5039 ITrunc->moveAfter(ExtOpnd);
5040 if (Truncs)
5041 Truncs->push_back(ITrunc);
5042 }
5043
5044 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5045 // Restore the operand of Ext (which has been replaced by the previous call
5046 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5047 TPT.setOperand(Ext, 0, ExtOpnd);
5048 }
5049
5050 // Get through the Instruction:
5051 // 1. Update its type.
5052 // 2. Replace the uses of Ext by Inst.
5053 // 3. Extend each operand that needs to be extended.
5054
5055 // Remember the original type of the instruction before promotion.
5056 // This is useful to know that the high bits are sign extended bits.
5057 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5058 // Step #1.
5059 TPT.mutateType(ExtOpnd, Ext->getType());
5060 // Step #2.
5061 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5062 // Step #3.
5063 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5064 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5065 ++OpIdx) {
5066 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5067 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5068 !shouldExtOperand(ExtOpnd, OpIdx)) {
5069 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5070 continue;
5071 }
5072 // Check if we can statically extend the operand.
5073 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5074 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5075 LLVM_DEBUG(dbgs() << "Statically extend\n");
5076 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5077 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5078 : Cst->getValue().zext(BitWidth);
5079 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5080 continue;
5081 }
5082 // UndefValue are typed, so we have to statically sign extend them.
5083 if (isa<UndefValue>(Opnd)) {
5084 LLVM_DEBUG(dbgs() << "Statically extend\n");
5085 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5086 continue;
5087 }
5088
5089 // Otherwise we have to explicitly sign extend the operand.
5090 Value *ValForExtOpnd = IsSExt
5091 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5092 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5093 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5094 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5095 if (!InstForExtOpnd)
5096 continue;
5097
5098 if (Exts)
5099 Exts->push_back(InstForExtOpnd);
5100
5101 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5102 }
5103 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5104 TPT.eraseInstruction(Ext);
5105 return ExtOpnd;
5106}
5107
5108/// Check whether or not promoting an instruction to a wider type is profitable.
5109/// \p NewCost gives the cost of extension instructions created by the
5110/// promotion.
5111/// \p OldCost gives the cost of extension instructions before the promotion
5112/// plus the number of instructions that have been
5113/// matched in the addressing mode the promotion.
5114/// \p PromotedOperand is the value that has been promoted.
5115/// \return True if the promotion is profitable, false otherwise.
5116bool AddressingModeMatcher::isPromotionProfitable(
5117 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5118 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5119 << '\n');
5120 // The cost of the new extensions is greater than the cost of the
5121 // old extension plus what we folded.
5122 // This is not profitable.
5123 if (NewCost > OldCost)
5124 return false;
5125 if (NewCost < OldCost)
5126 return true;
5127 // The promotion is neutral but it may help folding the sign extension in
5128 // loads for instance.
5129 // Check that we did not create an illegal instruction.
5130 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5131}
5132
5133/// Given an instruction or constant expr, see if we can fold the operation
5134/// into the addressing mode. If so, update the addressing mode and return
5135/// true, otherwise return false without modifying AddrMode.
5136/// If \p MovedAway is not NULL, it contains the information of whether or
5137/// not AddrInst has to be folded into the addressing mode on success.
5138/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5139/// because it has been moved away.
5140/// Thus AddrInst must not be added in the matched instructions.
5141/// This state can happen when AddrInst is a sext, since it may be moved away.
5142/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5143/// not be referenced anymore.
5144bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5145 unsigned Depth,
5146 bool *MovedAway) {
5147 // Avoid exponential behavior on extremely deep expression trees.
5148 if (Depth >= 5)
5149 return false;
5150
5151 // By default, all matched instructions stay in place.
5152 if (MovedAway)
5153 *MovedAway = false;
5154
5155 switch (Opcode) {
5156 case Instruction::PtrToInt:
5157 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5158 return matchAddr(AddrInst->getOperand(0), Depth);
5159 case Instruction::IntToPtr: {
5160 auto AS = AddrInst->getType()->getPointerAddressSpace();
5161 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5162 // This inttoptr is a no-op if the integer type is pointer sized.
5163 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5164 return matchAddr(AddrInst->getOperand(0), Depth);
5165 return false;
5166 }
5167 case Instruction::BitCast:
5168 // BitCast is always a noop, and we can handle it as long as it is
5169 // int->int or pointer->pointer (we don't want int<->fp or something).
5170 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5171 // Don't touch identity bitcasts. These were probably put here by LSR,
5172 // and we don't want to mess around with them. Assume it knows what it
5173 // is doing.
5174 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5175 return matchAddr(AddrInst->getOperand(0), Depth);
5176 return false;
5177 case Instruction::AddrSpaceCast: {
5178 unsigned SrcAS =
5179 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5180 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5181 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5182 return matchAddr(AddrInst->getOperand(0), Depth);
5183 return false;
5184 }
5185 case Instruction::Add: {
5186 // Check to see if we can merge in one operand, then the other. If so, we
5187 // win.
5188 ExtAddrMode BackupAddrMode = AddrMode;
5189 unsigned OldSize = AddrModeInsts.size();
5190 // Start a transaction at this point.
5191 // The LHS may match but not the RHS.
5192 // Therefore, we need a higher level restoration point to undo partially
5193 // matched operation.
5194 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5195 TPT.getRestorationPoint();
5196
5197 // Try to match an integer constant second to increase its chance of ending
5198 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5199 int First = 0, Second = 1;
5200 if (isa<ConstantInt>(AddrInst->getOperand(First))
5201 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5202 std::swap(First, Second);
5203 AddrMode.InBounds = false;
5204 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5205 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5206 return true;
5207
5208 // Restore the old addr mode info.
5209 AddrMode = BackupAddrMode;
5210 AddrModeInsts.resize(OldSize);
5211 TPT.rollback(LastKnownGood);
5212
5213 // Otherwise this was over-aggressive. Try merging operands in the opposite
5214 // order.
5215 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5216 matchAddr(AddrInst->getOperand(First), Depth + 1))
5217 return true;
5218
5219 // Otherwise we definitely can't merge the ADD in.
5220 AddrMode = BackupAddrMode;
5221 AddrModeInsts.resize(OldSize);
5222 TPT.rollback(LastKnownGood);
5223 break;
5224 }
5225 // case Instruction::Or:
5226 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5227 // break;
5228 case Instruction::Mul:
5229 case Instruction::Shl: {
5230 // Can only handle X*C and X << C.
5231 AddrMode.InBounds = false;
5232 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5233 if (!RHS || RHS->getBitWidth() > 64)
5234 return false;
5235 int64_t Scale = Opcode == Instruction::Shl
5236 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5237 : RHS->getSExtValue();
5238
5239 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5240 }
5241 case Instruction::GetElementPtr: {
5242 // Scan the GEP. We check it if it contains constant offsets and at most
5243 // one variable offset.
5244 int VariableOperand = -1;
5245 unsigned VariableScale = 0;
5246
5247 int64_t ConstantOffset = 0;
5248 gep_type_iterator GTI = gep_type_begin(AddrInst);
5249 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5250 if (StructType *STy = GTI.getStructTypeOrNull()) {
5251 const StructLayout *SL = DL.getStructLayout(STy);
5252 unsigned Idx =
5253 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5254 ConstantOffset += SL->getElementOffset(Idx);
5255 } else {
5256 TypeSize TS = GTI.getSequentialElementStride(DL);
5257 if (TS.isNonZero()) {
5258 // The optimisations below currently only work for fixed offsets.
5259 if (TS.isScalable())
5260 return false;
5261 int64_t TypeSize = TS.getFixedValue();
5262 if (ConstantInt *CI =
5263 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5264 const APInt &CVal = CI->getValue();
5265 if (CVal.getSignificantBits() <= 64) {
5266 ConstantOffset += CVal.getSExtValue() * TypeSize;
5267 continue;
5268 }
5269 }
5270 // We only allow one variable index at the moment.
5271 if (VariableOperand != -1)
5272 return false;
5273
5274 // Remember the variable index.
5275 VariableOperand = i;
5276 VariableScale = TypeSize;
5277 }
5278 }
5279 }
5280
5281 // A common case is for the GEP to only do a constant offset. In this case,
5282 // just add it to the disp field and check validity.
5283 if (VariableOperand == -1) {
5284 AddrMode.BaseOffs += ConstantOffset;
5285 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5286 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5287 AddrMode.InBounds = false;
5288 return true;
5289 }
5290 AddrMode.BaseOffs -= ConstantOffset;
5291
5293 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5294 ConstantOffset > 0) {
5295 // Record GEPs with non-zero offsets as candidates for splitting in
5296 // the event that the offset cannot fit into the r+i addressing mode.
5297 // Simple and common case that only one GEP is used in calculating the
5298 // address for the memory access.
5299 Value *Base = AddrInst->getOperand(0);
5300 auto *BaseI = dyn_cast<Instruction>(Base);
5301 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5303 (BaseI && !isa<CastInst>(BaseI) &&
5304 !isa<GetElementPtrInst>(BaseI))) {
5305 // Make sure the parent block allows inserting non-PHI instructions
5306 // before the terminator.
5307 BasicBlock *Parent = BaseI ? BaseI->getParent()
5308 : &GEP->getFunction()->getEntryBlock();
5309 if (!Parent->getTerminator()->isEHPad())
5310 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5311 }
5312 }
5313
5314 return false;
5315 }
5316
5317 // Save the valid addressing mode in case we can't match.
5318 ExtAddrMode BackupAddrMode = AddrMode;
5319 unsigned OldSize = AddrModeInsts.size();
5320
5321 // See if the scale and offset amount is valid for this target.
5322 AddrMode.BaseOffs += ConstantOffset;
5323 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5324 AddrMode.InBounds = false;
5325
5326 // Match the base operand of the GEP.
5327 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5328 // If it couldn't be matched, just stuff the value in a register.
5329 if (AddrMode.HasBaseReg) {
5330 AddrMode = BackupAddrMode;
5331 AddrModeInsts.resize(OldSize);
5332 return false;
5333 }
5334 AddrMode.HasBaseReg = true;
5335 AddrMode.BaseReg = AddrInst->getOperand(0);
5336 }
5337
5338 // Match the remaining variable portion of the GEP.
5339 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5340 Depth)) {
5341 // If it couldn't be matched, try stuffing the base into a register
5342 // instead of matching it, and retrying the match of the scale.
5343 AddrMode = BackupAddrMode;
5344 AddrModeInsts.resize(OldSize);
5345 if (AddrMode.HasBaseReg)
5346 return false;
5347 AddrMode.HasBaseReg = true;
5348 AddrMode.BaseReg = AddrInst->getOperand(0);
5349 AddrMode.BaseOffs += ConstantOffset;
5350 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5351 VariableScale, Depth)) {
5352 // If even that didn't work, bail.
5353 AddrMode = BackupAddrMode;
5354 AddrModeInsts.resize(OldSize);
5355 return false;
5356 }
5357 }
5358
5359 return true;
5360 }
5361 case Instruction::SExt:
5362 case Instruction::ZExt: {
5363 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5364 if (!Ext)
5365 return false;
5366
5367 // Try to move this ext out of the way of the addressing mode.
5368 // Ask for a method for doing so.
5369 TypePromotionHelper::Action TPH =
5370 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5371 if (!TPH)
5372 return false;
5373
5374 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5375 TPT.getRestorationPoint();
5376 unsigned CreatedInstsCost = 0;
5377 unsigned ExtCost = !TLI.isExtFree(Ext);
5378 Value *PromotedOperand =
5379 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5380 // SExt has been moved away.
5381 // Thus either it will be rematched later in the recursive calls or it is
5382 // gone. Anyway, we must not fold it into the addressing mode at this point.
5383 // E.g.,
5384 // op = add opnd, 1
5385 // idx = ext op
5386 // addr = gep base, idx
5387 // is now:
5388 // promotedOpnd = ext opnd <- no match here
5389 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5390 // addr = gep base, op <- match
5391 if (MovedAway)
5392 *MovedAway = true;
5393
5394 assert(PromotedOperand &&
5395 "TypePromotionHelper should have filtered out those cases");
5396
5397 ExtAddrMode BackupAddrMode = AddrMode;
5398 unsigned OldSize = AddrModeInsts.size();
5399
5400 if (!matchAddr(PromotedOperand, Depth) ||
5401 // The total of the new cost is equal to the cost of the created
5402 // instructions.
5403 // The total of the old cost is equal to the cost of the extension plus
5404 // what we have saved in the addressing mode.
5405 !isPromotionProfitable(CreatedInstsCost,
5406 ExtCost + (AddrModeInsts.size() - OldSize),
5407 PromotedOperand)) {
5408 AddrMode = BackupAddrMode;
5409 AddrModeInsts.resize(OldSize);
5410 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5411 TPT.rollback(LastKnownGood);
5412 return false;
5413 }
5414
5415 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5416 AddrMode.replaceWith(Ext, PromotedOperand);
5417 return true;
5418 }
5419 case Instruction::Call:
5420 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5421 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5422 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5423 if (TLI.addressingModeSupportsTLS(GV))
5424 return matchAddr(AddrInst->getOperand(0), Depth);
5425 }
5426 }
5427 break;
5428 }
5429 return false;
5430}
5431
5432/// If we can, try to add the value of 'Addr' into the current addressing mode.
5433/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5434/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5435/// for the target.
5436///
5437bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5438 // Start a transaction at this point that we will rollback if the matching
5439 // fails.
5440 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5441 TPT.getRestorationPoint();
5442 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5443 if (CI->getValue().isSignedIntN(64)) {
5444 // Check if the addition would result in a signed overflow.
5445 int64_t Result;
5446 bool Overflow =
5447 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5448 if (!Overflow) {
5449 // Fold in immediates if legal for the target.
5450 AddrMode.BaseOffs = Result;
5451 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5452 return true;
5453 AddrMode.BaseOffs -= CI->getSExtValue();
5454 }
5455 }
5456 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5457 // If this is a global variable, try to fold it into the addressing mode.
5458 if (!AddrMode.BaseGV) {
5459 AddrMode.BaseGV = GV;
5460 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5461 return true;
5462 AddrMode.BaseGV = nullptr;
5463 }
5464 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5465 ExtAddrMode BackupAddrMode = AddrMode;
5466 unsigned OldSize = AddrModeInsts.size();
5467
5468 // Check to see if it is possible to fold this operation.
5469 bool MovedAway = false;
5470 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5471 // This instruction may have been moved away. If so, there is nothing
5472 // to check here.
5473 if (MovedAway)
5474 return true;
5475 // Okay, it's possible to fold this. Check to see if it is actually
5476 // *profitable* to do so. We use a simple cost model to avoid increasing
5477 // register pressure too much.
5478 if (I->hasOneUse() ||
5479 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5480 AddrModeInsts.push_back(I);
5481 return true;
5482 }
5483
5484 // It isn't profitable to do this, roll back.
5485 AddrMode = BackupAddrMode;
5486 AddrModeInsts.resize(OldSize);
5487 TPT.rollback(LastKnownGood);
5488 }
5489 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5490 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5491 return true;
5492 TPT.rollback(LastKnownGood);
5493 } else if (isa<ConstantPointerNull>(Addr)) {
5494 // Null pointer gets folded without affecting the addressing mode.
5495 return true;
5496 }
5497
5498 // Worse case, the target should support [reg] addressing modes. :)
5499 if (!AddrMode.HasBaseReg) {
5500 AddrMode.HasBaseReg = true;
5501 AddrMode.BaseReg = Addr;
5502 // Still check for legality in case the target supports [imm] but not [i+r].
5503 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5504 return true;
5505 AddrMode.HasBaseReg = false;
5506 AddrMode.BaseReg = nullptr;
5507 }
5508
5509 // If the base register is already taken, see if we can do [r+r].
5510 if (AddrMode.Scale == 0) {
5511 AddrMode.Scale = 1;
5512 AddrMode.ScaledReg = Addr;
5513 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5514 return true;
5515 AddrMode.Scale = 0;
5516 AddrMode.ScaledReg = nullptr;
5517 }
5518 // Couldn't match.
5519 TPT.rollback(LastKnownGood);
5520 return false;
5521}
5522
5523/// Check to see if all uses of OpVal by the specified inline asm call are due
5524/// to memory operands. If so, return true, otherwise return false.
5526 const TargetLowering &TLI,
5527 const TargetRegisterInfo &TRI) {
5528 const Function *F = CI->getFunction();
5529 TargetLowering::AsmOperandInfoVector TargetConstraints =
5530 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5531
5532 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5533 // Compute the constraint code and ConstraintType to use.
5534 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5535
5536 // If this asm operand is our Value*, and if it isn't an indirect memory
5537 // operand, we can't fold it! TODO: Also handle C_Address?
5538 if (OpInfo.CallOperandVal == OpVal &&
5539 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5540 !OpInfo.isIndirect))
5541 return false;
5542 }
5543
5544 return true;
5545}
5546
5547/// Recursively walk all the uses of I until we find a memory use.
5548/// If we find an obviously non-foldable instruction, return true.
5549/// Add accessed addresses and types to MemoryUses.
5551 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5552 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5553 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5554 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5555 // If we already considered this instruction, we're done.
5556 if (!ConsideredInsts.insert(I).second)
5557 return false;
5558
5559 // If this is an obviously unfoldable instruction, bail out.
5560 if (!MightBeFoldableInst(I))
5561 return true;
5562
5563 // Loop over all the uses, recursively processing them.
5564 for (Use &U : I->uses()) {
5565 // Conservatively return true if we're seeing a large number or a deep chain
5566 // of users. This avoids excessive compilation times in pathological cases.
5567 if (SeenInsts++ >= MaxAddressUsersToScan)
5568 return true;
5569
5570 Instruction *UserI = cast<Instruction>(U.getUser());
5571 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5572 MemoryUses.push_back({&U, LI->getType()});
5573 continue;
5574 }
5575
5576 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5577 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5578 return true; // Storing addr, not into addr.
5579 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5580 continue;
5581 }
5582
5583 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5584 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5585 return true; // Storing addr, not into addr.
5586 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5587 continue;
5588 }
5589
5591 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5592 return true; // Storing addr, not into addr.
5593 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5594 continue;
5595 }
5596
5599 Type *AccessTy;
5600 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5601 return true;
5602
5603 if (!find(PtrOps, U.get()))
5604 return true;
5605
5606 MemoryUses.push_back({&U, AccessTy});
5607 continue;
5608 }
5609
5610 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5611 if (CI->hasFnAttr(Attribute::Cold)) {
5612 // If this is a cold call, we can sink the addressing calculation into
5613 // the cold path. See optimizeCallInst
5614 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5615 continue;
5616 }
5617
5618 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5619 if (!IA)
5620 return true;
5621
5622 // If this is a memory operand, we're cool, otherwise bail out.
5623 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5624 return true;
5625 continue;
5626 }
5627
5628 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5629 PSI, BFI, SeenInsts))
5630 return true;
5631 }
5632
5633 return false;
5634}
5635
5637 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5638 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5640 unsigned SeenInsts = 0;
5641 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5642 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5643 PSI, BFI, SeenInsts);
5644}
5645
5646
5647/// Return true if Val is already known to be live at the use site that we're
5648/// folding it into. If so, there is no cost to include it in the addressing
5649/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5650/// instruction already.
5651bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5652 Value *KnownLive1,
5653 Value *KnownLive2) {
5654 // If Val is either of the known-live values, we know it is live!
5655 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5656 return true;
5657
5658 // All values other than instructions and arguments (e.g. constants) are live.
5659 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5660 return true;
5661
5662 // If Val is a constant sized alloca in the entry block, it is live, this is
5663 // true because it is just a reference to the stack/frame pointer, which is
5664 // live for the whole function.
5665 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5666 if (AI->isStaticAlloca())
5667 return true;
5668
5669 // Check to see if this value is already used in the memory instruction's
5670 // block. If so, it's already live into the block at the very least, so we
5671 // can reasonably fold it.
5672 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5673}
5674
5675/// It is possible for the addressing mode of the machine to fold the specified
5676/// instruction into a load or store that ultimately uses it.
5677/// However, the specified instruction has multiple uses.
5678/// Given this, it may actually increase register pressure to fold it
5679/// into the load. For example, consider this code:
5680///
5681/// X = ...
5682/// Y = X+1
5683/// use(Y) -> nonload/store
5684/// Z = Y+1
5685/// load Z
5686///
5687/// In this case, Y has multiple uses, and can be folded into the load of Z
5688/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5689/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5690/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5691/// number of computations either.
5692///
5693/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5694/// X was live across 'load Z' for other reasons, we actually *would* want to
5695/// fold the addressing mode in the Z case. This would make Y die earlier.
5696bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5697 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5698 if (IgnoreProfitability)
5699 return true;
5700
5701 // AMBefore is the addressing mode before this instruction was folded into it,
5702 // and AMAfter is the addressing mode after the instruction was folded. Get
5703 // the set of registers referenced by AMAfter and subtract out those
5704 // referenced by AMBefore: this is the set of values which folding in this
5705 // address extends the lifetime of.
5706 //
5707 // Note that there are only two potential values being referenced here,
5708 // BaseReg and ScaleReg (global addresses are always available, as are any
5709 // folded immediates).
5710 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5711
5712 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5713 // lifetime wasn't extended by adding this instruction.
5714 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5715 BaseReg = nullptr;
5716 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5717 ScaledReg = nullptr;
5718
5719 // If folding this instruction (and it's subexprs) didn't extend any live
5720 // ranges, we're ok with it.
5721 if (!BaseReg && !ScaledReg)
5722 return true;
5723
5724 // If all uses of this instruction can have the address mode sunk into them,
5725 // we can remove the addressing mode and effectively trade one live register
5726 // for another (at worst.) In this context, folding an addressing mode into
5727 // the use is just a particularly nice way of sinking it.
5729 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5730 return false; // Has a non-memory, non-foldable use!
5731
5732 // Now that we know that all uses of this instruction are part of a chain of
5733 // computation involving only operations that could theoretically be folded
5734 // into a memory use, loop over each of these memory operation uses and see
5735 // if they could *actually* fold the instruction. The assumption is that
5736 // addressing modes are cheap and that duplicating the computation involved
5737 // many times is worthwhile, even on a fastpath. For sinking candidates
5738 // (i.e. cold call sites), this serves as a way to prevent excessive code
5739 // growth since most architectures have some reasonable small and fast way to
5740 // compute an effective address. (i.e LEA on x86)
5741 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5742 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5743 Value *Address = Pair.first->get();
5744 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5745 Type *AddressAccessTy = Pair.second;
5746 unsigned AS = Address->getType()->getPointerAddressSpace();
5747
5748 // Do a match against the root of this address, ignoring profitability. This
5749 // will tell us if the addressing mode for the memory operation will
5750 // *actually* cover the shared instruction.
5751 ExtAddrMode Result;
5752 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5753 0);
5754 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5755 TPT.getRestorationPoint();
5756 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5757 AddressAccessTy, AS, UserI, Result,
5758 InsertedInsts, PromotedInsts, TPT,
5759 LargeOffsetGEP, OptSize, PSI, BFI);
5760 Matcher.IgnoreProfitability = true;
5761 bool Success = Matcher.matchAddr(Address, 0);
5762 (void)Success;
5763 assert(Success && "Couldn't select *anything*?");
5764
5765 // The match was to check the profitability, the changes made are not
5766 // part of the original matcher. Therefore, they should be dropped
5767 // otherwise the original matcher will not present the right state.
5768 TPT.rollback(LastKnownGood);
5769
5770 // If the match didn't cover I, then it won't be shared by it.
5771 if (!is_contained(MatchedAddrModeInsts, I))
5772 return false;
5773
5774 MatchedAddrModeInsts.clear();
5775 }
5776
5777 return true;
5778}
5779
5780/// Return true if the specified values are defined in a
5781/// different basic block than BB.
5782static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5784 return I->getParent() != BB;
5785 return false;
5786}
5787
5788// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5789// is the first instruction that will use Addr. So we need to find the first
5790// user of Addr in current BB.
5792 Value *SunkAddr) {
5793 if (Addr->hasOneUse())
5794 return MemoryInst->getIterator();
5795
5796 // We already have a SunkAddr in current BB, but we may need to insert cast
5797 // instruction after it.
5798 if (SunkAddr) {
5799 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5800 return std::next(AddrInst->getIterator());
5801 }
5802
5803 // Find the first user of Addr in current BB.
5804 Instruction *Earliest = MemoryInst;
5805 for (User *U : Addr->users()) {
5806 Instruction *UserInst = dyn_cast<Instruction>(U);
5807 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5808 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5809 continue;
5810 if (UserInst->comesBefore(Earliest))
5811 Earliest = UserInst;
5812 }
5813 }
5814 return Earliest->getIterator();
5815}
5816
5817/// Sink addressing mode computation immediate before MemoryInst if doing so
5818/// can be done without increasing register pressure. The need for the
5819/// register pressure constraint means this can end up being an all or nothing
5820/// decision for all uses of the same addressing computation.
5821///
5822/// Load and Store Instructions often have addressing modes that can do
5823/// significant amounts of computation. As such, instruction selection will try
5824/// to get the load or store to do as much computation as possible for the
5825/// program. The problem is that isel can only see within a single block. As
5826/// such, we sink as much legal addressing mode work into the block as possible.
5827///
5828/// This method is used to optimize both load/store and inline asms with memory
5829/// operands. It's also used to sink addressing computations feeding into cold
5830/// call sites into their (cold) basic block.
5831///
5832/// The motivation for handling sinking into cold blocks is that doing so can
5833/// both enable other address mode sinking (by satisfying the register pressure
5834/// constraint above), and reduce register pressure globally (by removing the
5835/// addressing mode computation from the fast path entirely.).
5836bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5837 Type *AccessTy, unsigned AddrSpace) {
5838 Value *Repl = Addr;
5839
5840 // Try to collapse single-value PHI nodes. This is necessary to undo
5841 // unprofitable PRE transformations.
5842 SmallVector<Value *, 8> worklist;
5843 SmallPtrSet<Value *, 16> Visited;
5844 worklist.push_back(Addr);
5845
5846 // Use a worklist to iteratively look through PHI and select nodes, and
5847 // ensure that the addressing mode obtained from the non-PHI/select roots of
5848 // the graph are compatible.
5849 bool PhiOrSelectSeen = false;
5850 SmallVector<Instruction *, 16> AddrModeInsts;
5851 AddressingModeCombiner AddrModes(*DL, Addr);
5852 TypePromotionTransaction TPT(RemovedInsts);
5853 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5854 TPT.getRestorationPoint();
5855 while (!worklist.empty()) {
5856 Value *V = worklist.pop_back_val();
5857
5858 // We allow traversing cyclic Phi nodes.
5859 // In case of success after this loop we ensure that traversing through
5860 // Phi nodes ends up with all cases to compute address of the form
5861 // BaseGV + Base + Scale * Index + Offset
5862 // where Scale and Offset are constans and BaseGV, Base and Index
5863 // are exactly the same Values in all cases.
5864 // It means that BaseGV, Scale and Offset dominate our memory instruction
5865 // and have the same value as they had in address computation represented
5866 // as Phi. So we can safely sink address computation to memory instruction.
5867 if (!Visited.insert(V).second)
5868 continue;
5869
5870 // For a PHI node, push all of its incoming values.
5871 if (PHINode *P = dyn_cast<PHINode>(V)) {
5872 append_range(worklist, P->incoming_values());
5873 PhiOrSelectSeen = true;
5874 continue;
5875 }
5876 // Similar for select.
5877 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5878 worklist.push_back(SI->getFalseValue());
5879 worklist.push_back(SI->getTrueValue());
5880 PhiOrSelectSeen = true;
5881 continue;
5882 }
5883
5884 // For non-PHIs, determine the addressing mode being computed. Note that
5885 // the result may differ depending on what other uses our candidate
5886 // addressing instructions might have.
5887 AddrModeInsts.clear();
5888 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5889 0);
5890 // Defer the query (and possible computation of) the dom tree to point of
5891 // actual use. It's expected that most address matches don't actually need
5892 // the domtree.
5893 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5894 Function *F = MemoryInst->getParent()->getParent();
5895 return this->getDT(*F);
5896 };
5897 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5898 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5899 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5900 BFI.get());
5901
5902 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5903 if (GEP && !NewGEPBases.count(GEP)) {
5904 // If splitting the underlying data structure can reduce the offset of a
5905 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5906 // previously split data structures.
5907 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5908 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5909 }
5910
5911 NewAddrMode.OriginalValue = V;
5912 if (!AddrModes.addNewAddrMode(NewAddrMode))
5913 break;
5914 }
5915
5916 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5917 // or we have multiple but either couldn't combine them or combining them
5918 // wouldn't do anything useful, bail out now.
5919 if (!AddrModes.combineAddrModes()) {
5920 TPT.rollback(LastKnownGood);
5921 return false;
5922 }
5923 bool Modified = TPT.commit();
5924
5925 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5926 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5927
5928 // If all the instructions matched are already in this BB, don't do anything.
5929 // If we saw a Phi node then it is not local definitely, and if we saw a
5930 // select then we want to push the address calculation past it even if it's
5931 // already in this BB.
5932 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5933 return IsNonLocalValue(V, MemoryInst->getParent());
5934 })) {
5935 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5936 << "\n");
5937 return Modified;
5938 }
5939
5940 // Now that we determined the addressing expression we want to use and know
5941 // that we have to sink it into this block. Check to see if we have already
5942 // done this for some other load/store instr in this block. If so, reuse
5943 // the computation. Before attempting reuse, check if the address is valid
5944 // as it may have been erased.
5945
5946 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5947
5948 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5949 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5950
5951 // The current BB may be optimized multiple times, we can't guarantee the
5952 // reuse of Addr happens later, call findInsertPos to find an appropriate
5953 // insert position.
5954 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5955
5956 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5957 if (!SunkAddr) {
5958 auto &DT = getDT(*MemoryInst->getFunction());
5959 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5960 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5961 return Modified;
5962 }
5963
5964 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5965
5966 if (SunkAddr) {
5967 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5968 << " for " << *MemoryInst << "\n");
5969 if (SunkAddr->getType() != Addr->getType()) {
5970 if (SunkAddr->getType()->getPointerAddressSpace() !=
5971 Addr->getType()->getPointerAddressSpace() &&
5972 !DL->isNonIntegralPointerType(Addr->getType())) {
5973 // There are two reasons the address spaces might not match: a no-op
5974 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5975 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5976 // TODO: allow bitcast between different address space pointers with the
5977 // same size.
5978 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5979 SunkAddr =
5980 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5981 } else
5982 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5983 }
5985 SubtargetInfo->addrSinkUsingGEPs())) {
5986 // By default, we use the GEP-based method when AA is used later. This
5987 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5988 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5989 << " for " << *MemoryInst << "\n");
5990 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5991
5992 // First, find the pointer.
5993 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5994 ResultPtr = AddrMode.BaseReg;
5995 AddrMode.BaseReg = nullptr;
5996 }
5997
5998 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5999 // We can't add more than one pointer together, nor can we scale a
6000 // pointer (both of which seem meaningless).
6001 if (ResultPtr || AddrMode.Scale != 1)
6002 return Modified;
6003
6004 ResultPtr = AddrMode.ScaledReg;
6005 AddrMode.Scale = 0;
6006 }
6007
6008 // It is only safe to sign extend the BaseReg if we know that the math
6009 // required to create it did not overflow before we extend it. Since
6010 // the original IR value was tossed in favor of a constant back when
6011 // the AddrMode was created we need to bail out gracefully if widths
6012 // do not match instead of extending it.
6013 //
6014 // (See below for code to add the scale.)
6015 if (AddrMode.Scale) {
6016 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6017 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6018 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6019 return Modified;
6020 }
6021
6022 GlobalValue *BaseGV = AddrMode.BaseGV;
6023 if (BaseGV != nullptr) {
6024 if (ResultPtr)
6025 return Modified;
6026
6027 if (BaseGV->isThreadLocal()) {
6028 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6029 } else {
6030 ResultPtr = BaseGV;
6031 }
6032 }
6033
6034 // If the real base value actually came from an inttoptr, then the matcher
6035 // will look through it and provide only the integer value. In that case,
6036 // use it here.
6037 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6038 if (!ResultPtr && AddrMode.BaseReg) {
6039 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6040 "sunkaddr");
6041 AddrMode.BaseReg = nullptr;
6042 } else if (!ResultPtr && AddrMode.Scale == 1) {
6043 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6044 "sunkaddr");
6045 AddrMode.Scale = 0;
6046 }
6047 }
6048
6049 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6050 !AddrMode.BaseOffs) {
6051 SunkAddr = Constant::getNullValue(Addr->getType());
6052 } else if (!ResultPtr) {
6053 return Modified;
6054 } else {
6055 Type *I8PtrTy =
6056 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6057
6058 // Start with the base register. Do this first so that subsequent address
6059 // matching finds it last, which will prevent it from trying to match it
6060 // as the scaled value in case it happens to be a mul. That would be
6061 // problematic if we've sunk a different mul for the scale, because then
6062 // we'd end up sinking both muls.
6063 if (AddrMode.BaseReg) {
6064 Value *V = AddrMode.BaseReg;
6065 if (V->getType() != IntPtrTy)
6066 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6067
6068 ResultIndex = V;
6069 }
6070
6071 // Add the scale value.
6072 if (AddrMode.Scale) {
6073 Value *V = AddrMode.ScaledReg;
6074 if (V->getType() == IntPtrTy) {
6075 // done.
6076 } else {
6077 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6078 cast<IntegerType>(V->getType())->getBitWidth() &&
6079 "We can't transform if ScaledReg is too narrow");
6080 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6081 }
6082
6083 if (AddrMode.Scale != 1)
6084 V = Builder.CreateMul(
6085 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6086 if (ResultIndex)
6087 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6088 else
6089 ResultIndex = V;
6090 }
6091
6092 // Add in the Base Offset if present.
6093 if (AddrMode.BaseOffs) {
6094 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6095 if (ResultIndex) {
6096 // We need to add this separately from the scale above to help with
6097 // SDAG consecutive load/store merging.
6098 if (ResultPtr->getType() != I8PtrTy)
6099 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6100 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6101 AddrMode.InBounds);
6102 }
6103
6104 ResultIndex = V;
6105 }
6106
6107 if (!ResultIndex) {
6108 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6109 // We know that we have a pointer without any offsets. If this pointer
6110 // originates from a different basic block than the current one, we
6111 // must be able to recreate it in the current basic block.
6112 // We do not support the recreation of any instructions yet.
6113 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6114 return Modified;
6115 SunkAddr = ResultPtr;
6116 } else {
6117 if (ResultPtr->getType() != I8PtrTy)
6118 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6119 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6120 AddrMode.InBounds);
6121 }
6122
6123 if (SunkAddr->getType() != Addr->getType()) {
6124 if (SunkAddr->getType()->getPointerAddressSpace() !=
6125 Addr->getType()->getPointerAddressSpace() &&
6126 !DL->isNonIntegralPointerType(Addr->getType())) {
6127 // There are two reasons the address spaces might not match: a no-op
6128 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6129 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6130 // TODO: allow bitcast between different address space pointers with
6131 // the same size.
6132 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6133 SunkAddr =
6134 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6135 } else
6136 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6137 }
6138 }
6139 } else {
6140 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6141 // non-integral pointers, so in that case bail out now.
6142 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6143 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6144 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6145 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6146 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6147 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6148 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6149 (AddrMode.BaseGV &&
6150 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6151 return Modified;
6152
6153 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6154 << " for " << *MemoryInst << "\n");
6155 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6156 Value *Result = nullptr;
6157
6158 // Start with the base register. Do this first so that subsequent address
6159 // matching finds it last, which will prevent it from trying to match it
6160 // as the scaled value in case it happens to be a mul. That would be
6161 // problematic if we've sunk a different mul for the scale, because then
6162 // we'd end up sinking both muls.
6163 if (AddrMode.BaseReg) {
6164 Value *V = AddrMode.BaseReg;
6165 if (V->getType()->isPointerTy())
6166 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6167 if (V->getType() != IntPtrTy)
6168 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6169 Result = V;
6170 }
6171
6172 // Add the scale value.
6173 if (AddrMode.Scale) {
6174 Value *V = AddrMode.ScaledReg;
6175 if (V->getType() == IntPtrTy) {
6176 // done.
6177 } else if (V->getType()->isPointerTy()) {
6178 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6179 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6180 cast<IntegerType>(V->getType())->getBitWidth()) {
6181 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6182 } else {
6183 // It is only safe to sign extend the BaseReg if we know that the math
6184 // required to create it did not overflow before we extend it. Since
6185 // the original IR value was tossed in favor of a constant back when
6186 // the AddrMode was created we need to bail out gracefully if widths
6187 // do not match instead of extending it.
6189 if (I && (Result != AddrMode.BaseReg))
6190 I->eraseFromParent();
6191 return Modified;
6192 }
6193 if (AddrMode.Scale != 1)
6194 V = Builder.CreateMul(
6195 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6196 if (Result)
6197 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6198 else
6199 Result = V;
6200 }
6201
6202 // Add in the BaseGV if present.
6203 GlobalValue *BaseGV = AddrMode.BaseGV;
6204 if (BaseGV != nullptr) {
6205 Value *BaseGVPtr;
6206 if (BaseGV->isThreadLocal()) {
6207 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6208 } else {
6209 BaseGVPtr = BaseGV;
6210 }
6211 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6212 if (Result)
6213 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6214 else
6215 Result = V;
6216 }
6217
6218 // Add in the Base Offset if present.
6219 if (AddrMode.BaseOffs) {
6220 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6221 if (Result)
6222 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6223 else
6224 Result = V;
6225 }
6226
6227 if (!Result)
6228 SunkAddr = Constant::getNullValue(Addr->getType());
6229 else
6230 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6231 }
6232
6233 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6234 // Store the newly computed address into the cache. In the case we reused a
6235 // value, this should be idempotent.
6236 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6237
6238 // If we have no uses, recursively delete the value and all dead instructions
6239 // using it.
6240 if (Repl->use_empty()) {
6241 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6242 RecursivelyDeleteTriviallyDeadInstructions(
6243 Repl, TLInfo, nullptr,
6244 [&](Value *V) { removeAllAssertingVHReferences(V); });
6245 });
6246 }
6247 ++NumMemoryInsts;
6248 return true;
6249}
6250
6251/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6252/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6253/// only handle a 2 operand GEP in the same basic block or a splat constant
6254/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6255/// index.
6256///
6257/// If the existing GEP has a vector base pointer that is splat, we can look
6258/// through the splat to find the scalar pointer. If we can't find a scalar
6259/// pointer there's nothing we can do.
6260///
6261/// If we have a GEP with more than 2 indices where the middle indices are all
6262/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6263///
6264/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6265/// followed by a GEP with an all zeroes vector index. This will enable
6266/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6267/// zero index.
6268bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6269 Value *Ptr) {
6270 Value *NewAddr;
6271
6272 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6273 // Don't optimize GEPs that don't have indices.
6274 if (!GEP->hasIndices())
6275 return false;
6276
6277 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6278 // FIXME: We should support this by sinking the GEP.
6279 if (MemoryInst->getParent() != GEP->getParent())
6280 return false;
6281
6282 SmallVector<Value *, 2> Ops(GEP->operands());
6283
6284 bool RewriteGEP = false;
6285
6286 if (Ops[0]->getType()->isVectorTy()) {
6287 Ops[0] = getSplatValue(Ops[0]);
6288 if (!Ops[0])
6289 return false;
6290 RewriteGEP = true;
6291 }
6292
6293 unsigned FinalIndex = Ops.size() - 1;
6294
6295 // Ensure all but the last index is 0.
6296 // FIXME: This isn't strictly required. All that's required is that they are
6297 // all scalars or splats.
6298 for (unsigned i = 1; i < FinalIndex; ++i) {
6299 auto *C = dyn_cast<Constant>(Ops[i]);
6300 if (!C)
6301 return false;
6302 if (isa<VectorType>(C->getType()))
6303 C = C->getSplatValue();
6304 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6305 if (!CI || !CI->isZero())
6306 return false;
6307 // Scalarize the index if needed.
6308 Ops[i] = CI;
6309 }
6310
6311 // Try to scalarize the final index.
6312 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6313 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6314 auto *C = dyn_cast<ConstantInt>(V);
6315 // Don't scalarize all zeros vector.
6316 if (!C || !C->isZero()) {
6317 Ops[FinalIndex] = V;
6318 RewriteGEP = true;
6319 }
6320 }
6321 }
6322
6323 // If we made any changes or the we have extra operands, we need to generate
6324 // new instructions.
6325 if (!RewriteGEP && Ops.size() == 2)
6326 return false;
6327
6328 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6329
6330 IRBuilder<> Builder(MemoryInst);
6331
6332 Type *SourceTy = GEP->getSourceElementType();
6333 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6334
6335 // If the final index isn't a vector, emit a scalar GEP containing all ops
6336 // and a vector GEP with all zeroes final index.
6337 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6338 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6339 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6340 auto *SecondTy = GetElementPtrInst::getIndexedType(
6341 SourceTy, ArrayRef(Ops).drop_front());
6342 NewAddr =
6343 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6344 } else {
6345 Value *Base = Ops[0];
6346 Value *Index = Ops[FinalIndex];
6347
6348 // Create a scalar GEP if there are more than 2 operands.
6349 if (Ops.size() != 2) {
6350 // Replace the last index with 0.
6351 Ops[FinalIndex] =
6352 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6353 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6355 SourceTy, ArrayRef(Ops).drop_front());
6356 }
6357
6358 // Now create the GEP with scalar pointer and vector index.
6359 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6360 }
6361 } else if (!isa<Constant>(Ptr)) {
6362 // Not a GEP, maybe its a splat and we can create a GEP to enable
6363 // SelectionDAGBuilder to use it as a uniform base.
6364 Value *V = getSplatValue(Ptr);
6365 if (!V)
6366 return false;
6367
6368 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6369
6370 IRBuilder<> Builder(MemoryInst);
6371
6372 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6373 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6374 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6375 Type *ScalarTy;
6376 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6377 Intrinsic::masked_gather) {
6378 ScalarTy = MemoryInst->getType()->getScalarType();
6379 } else {
6380 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6381 Intrinsic::masked_scatter);
6382 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6383 }
6384 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6385 } else {
6386 // Constant, SelectionDAGBuilder knows to check if its a splat.
6387 return false;
6388 }
6389
6390 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6391
6392 // If we have no uses, recursively delete the value and all dead instructions
6393 // using it.
6394 if (Ptr->use_empty())
6396 Ptr, TLInfo, nullptr,
6397 [&](Value *V) { removeAllAssertingVHReferences(V); });
6398
6399 return true;
6400}
6401
6402// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
6403// Check the pattern we are interested in where there are maximum 2 uses
6404// of the intrinsic which are the extract instructions.
6406 ExtractValueInst *&OverflowExtract) {
6407 // Bail out if it's more than 2 users:
6408 if (I->hasNUsesOrMore(3))
6409 return false;
6410
6411 for (User *U : I->users()) {
6412 auto *Extract = dyn_cast<ExtractValueInst>(U);
6413 if (!Extract || Extract->getNumIndices() != 1)
6414 return false;
6415
6416 unsigned Index = Extract->getIndices()[0];
6417 if (Index == 0)
6418 MulExtract = Extract;
6419 else if (Index == 1)
6420 OverflowExtract = Extract;
6421 else
6422 return false;
6423 }
6424 return true;
6425}
6426
6427// Rewrite the mul_with_overflow intrinsic by checking if both of the
6428// operands' value ranges are within the legal type. If so, we can optimize the
6429// multiplication algorithm. This code is supposed to be written during the step
6430// of type legalization, but given that we need to reconstruct the IR which is
6431// not doable there, we do it here.
6432// The IR after the optimization will look like:
6433// entry:
6434// if signed:
6435// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
6436// overflow_no
6437// else:
6438// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
6439// overflow_no:
6440// overflow:
6441// overflow.res:
6442// \returns true if optimization was applied
6443// TODO: This optimization can be further improved to optimize branching on
6444// overflow where the 'overflow_no' BB can branch directly to the false
6445// successor of overflow, but that would add additional complexity so we leave
6446// it for future work.
6447bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
6448 ModifyDT &ModifiedDT) {
6449 // Check if target supports this optimization.
6451 I->getContext(),
6452 TLI->getValueType(*DL, I->getType()->getContainedType(0))))
6453 return false;
6454
6455 ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
6456 if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
6457 return false;
6458
6459 // Keep track of the instruction to stop reoptimizing it again.
6460 InsertedInsts.insert(I);
6461
6462 Value *LHS = I->getOperand(0);
6463 Value *RHS = I->getOperand(1);
6464 Type *Ty = LHS->getType();
6465 unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
6466 Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
6467
6468 // New BBs:
6469 BasicBlock *OverflowEntryBB =
6470 I->getParent()->splitBasicBlock(I, "", /*Before*/ true);
6471 OverflowEntryBB->takeName(I->getParent());
6472 // Keep the 'br' instruction that is generated as a result of the split to be
6473 // erased/replaced later.
6474 Instruction *OldTerminator = OverflowEntryBB->getTerminator();
6475 BasicBlock *NoOverflowBB =
6476 BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
6477 NoOverflowBB->moveAfter(OverflowEntryBB);
6478 BasicBlock *OverflowBB =
6479 BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
6480 OverflowBB->moveAfter(NoOverflowBB);
6481
6482 // BB overflow.entry:
6483 IRBuilder<> Builder(OverflowEntryBB);
6484 // Extract low and high halves of LHS:
6485 Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
6486 Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
6487 HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
6488
6489 // Extract low and high halves of RHS:
6490 Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
6491 Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
6492 HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
6493
6494 Value *IsAnyBitTrue;
6495 if (IsSigned) {
6496 Value *SignLoLHS =
6497 Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
6498 Value *SignLoRHS =
6499 Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
6500 Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
6501 Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
6502 Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
6503 IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
6504 ConstantInt::getNullValue(Or->getType()));
6505 } else {
6506 Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
6507 ConstantInt::getNullValue(LegalTy));
6508 Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
6509 ConstantInt::getNullValue(LegalTy));
6510 IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
6511 }
6512 Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
6513
6514 // BB overflow.no:
6515 Builder.SetInsertPoint(NoOverflowBB);
6516 Value *ExtLoLHS, *ExtLoRHS;
6517 if (IsSigned) {
6518 ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
6519 ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
6520 } else {
6521 ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
6522 ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
6523 }
6524
6525 Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
6526
6527 // Create the 'overflow.res' BB to merge the results of
6528 // the two paths:
6529 BasicBlock *OverflowResBB = I->getParent();
6530 OverflowResBB->setName("overflow.res");
6531
6532 // BB overflow.no: jump to overflow.res BB
6533 Builder.CreateBr(OverflowResBB);
6534 // No we don't need the old terminator in overflow.entry BB, erase it:
6535 OldTerminator->eraseFromParent();
6536
6537 // BB overflow.res:
6538 Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6539 // Create PHI nodes to merge results from no.overflow BB and overflow BB to
6540 // replace the extract instructions.
6541 PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
6542 *OverflowFlagPHI =
6543 Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6544
6545 // Add the incoming values from no.overflow BB and later from overflow BB.
6546 OverflowResPHI->addIncoming(Mul, NoOverflowBB);
6547 OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
6548 NoOverflowBB);
6549
6550 // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
6551 if (MulExtract) {
6552 MulExtract->replaceAllUsesWith(OverflowResPHI);
6553 MulExtract->eraseFromParent();
6554 }
6555 if (OverflowExtract) {
6556 OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
6557 OverflowExtract->eraseFromParent();
6558 }
6559
6560 // Remove the intrinsic from parent (overflow.res BB) as it will be part of
6561 // overflow BB
6562 I->removeFromParent();
6563 // BB overflow:
6564 I->insertInto(OverflowBB, OverflowBB->end());
6565 Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6566 Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6567 Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
6568 Builder.CreateBr(OverflowResBB);
6569
6570 // Add The Extracted values to the PHINodes in the overflow.res BB.
6571 OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
6572 OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
6573
6574 ModifiedDT = ModifyDT::ModifyBBDT;
6575 return true;
6576}
6577
6578/// If there are any memory operands, use OptimizeMemoryInst to sink their
6579/// address computing into the block when possible / profitable.
6580bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6581 bool MadeChange = false;
6582
6583 const TargetRegisterInfo *TRI =
6585 TargetLowering::AsmOperandInfoVector TargetConstraints =
6586 TLI->ParseConstraints(*DL, TRI, *CS);
6587 unsigned ArgNo = 0;
6588 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6589 // Compute the constraint code and ConstraintType to use.
6590 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6591
6592 // TODO: Also handle C_Address?
6593 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6594 OpInfo.isIndirect) {
6595 Value *OpVal = CS->getArgOperand(ArgNo++);
6596 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6597 } else if (OpInfo.Type == InlineAsm::isInput)
6598 ArgNo++;
6599 }
6600
6601 return MadeChange;
6602}
6603
6604/// Check if all the uses of \p Val are equivalent (or free) zero or
6605/// sign extensions.
6606static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6607 assert(!Val->use_empty() && "Input must have at least one use");
6608 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6609 bool IsSExt = isa<SExtInst>(FirstUser);
6610 Type *ExtTy = FirstUser->getType();
6611 for (const User *U : Val->users()) {
6612 const Instruction *UI = cast<Instruction>(U);
6613 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6614 return false;
6615 Type *CurTy = UI->getType();
6616 // Same input and output types: Same instruction after CSE.
6617 if (CurTy == ExtTy)
6618 continue;
6619
6620 // If IsSExt is true, we are in this situation:
6621 // a = Val
6622 // b = sext ty1 a to ty2
6623 // c = sext ty1 a to ty3
6624 // Assuming ty2 is shorter than ty3, this could be turned into:
6625 // a = Val
6626 // b = sext ty1 a to ty2
6627 // c = sext ty2 b to ty3
6628 // However, the last sext is not free.
6629 if (IsSExt)
6630 return false;
6631
6632 // This is a ZExt, maybe this is free to extend from one type to another.
6633 // In that case, we would not account for a different use.
6634 Type *NarrowTy;
6635 Type *LargeTy;
6636 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6637 CurTy->getScalarType()->getIntegerBitWidth()) {
6638 NarrowTy = CurTy;
6639 LargeTy = ExtTy;
6640 } else {
6641 NarrowTy = ExtTy;
6642 LargeTy = CurTy;
6643 }
6644
6645 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6646 return false;
6647 }
6648 // All uses are the same or can be derived from one another for free.
6649 return true;
6650}
6651
6652/// Try to speculatively promote extensions in \p Exts and continue
6653/// promoting through newly promoted operands recursively as far as doing so is
6654/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6655/// When some promotion happened, \p TPT contains the proper state to revert
6656/// them.
6657///
6658/// \return true if some promotion happened, false otherwise.
6659bool CodeGenPrepare::tryToPromoteExts(
6660 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6661 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6662 unsigned CreatedInstsCost) {
6663 bool Promoted = false;
6664
6665 // Iterate over all the extensions to try to promote them.
6666 for (auto *I : Exts) {
6667 // Early check if we directly have ext(load).
6668 if (isa<LoadInst>(I->getOperand(0))) {
6669 ProfitablyMovedExts.push_back(I);
6670 continue;
6671 }
6672
6673 // Check whether or not we want to do any promotion. The reason we have
6674 // this check inside the for loop is to catch the case where an extension
6675 // is directly fed by a load because in such case the extension can be moved
6676 // up without any promotion on its operands.
6678 return false;
6679
6680 // Get the action to perform the promotion.
6681 TypePromotionHelper::Action TPH =
6682 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6683 // Check if we can promote.
6684 if (!TPH) {
6685 // Save the current extension as we cannot move up through its operand.
6686 ProfitablyMovedExts.push_back(I);
6687 continue;
6688 }
6689
6690 // Save the current state.
6691 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6692 TPT.getRestorationPoint();
6693 SmallVector<Instruction *, 4> NewExts;
6694 unsigned NewCreatedInstsCost = 0;
6695 unsigned ExtCost = !TLI->isExtFree(I);
6696 // Promote.
6697 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6698 &NewExts, nullptr, *TLI);
6699 assert(PromotedVal &&
6700 "TypePromotionHelper should have filtered out those cases");
6701
6702 // We would be able to merge only one extension in a load.
6703 // Therefore, if we have more than 1 new extension we heuristically
6704 // cut this search path, because it means we degrade the code quality.
6705 // With exactly 2, the transformation is neutral, because we will merge
6706 // one extension but leave one. However, we optimistically keep going,
6707 // because the new extension may be removed too. Also avoid replacing a
6708 // single free extension with multiple extensions, as this increases the
6709 // number of IR instructions while not providing any savings.
6710 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6711 // FIXME: It would be possible to propagate a negative value instead of
6712 // conservatively ceiling it to 0.
6713 TotalCreatedInstsCost =
6714 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6715 if (!StressExtLdPromotion &&
6716 (TotalCreatedInstsCost > 1 ||
6717 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6718 (ExtCost == 0 && NewExts.size() > 1))) {
6719 // This promotion is not profitable, rollback to the previous state, and
6720 // save the current extension in ProfitablyMovedExts as the latest
6721 // speculative promotion turned out to be unprofitable.
6722 TPT.rollback(LastKnownGood);
6723 ProfitablyMovedExts.push_back(I);
6724 continue;
6725 }
6726 // Continue promoting NewExts as far as doing so is profitable.
6727 SmallVector<Instruction *, 2> NewlyMovedExts;
6728 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6729 bool NewPromoted = false;
6730 for (auto *ExtInst : NewlyMovedExts) {
6731 Instruction *MovedExt = cast<Instruction>(ExtInst);
6732 Value *ExtOperand = MovedExt->getOperand(0);
6733 // If we have reached to a load, we need this extra profitability check
6734 // as it could potentially be merged into an ext(load).
6735 if (isa<LoadInst>(ExtOperand) &&
6736 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6737 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6738 continue;
6739
6740 ProfitablyMovedExts.push_back(MovedExt);
6741 NewPromoted = true;
6742 }
6743
6744 // If none of speculative promotions for NewExts is profitable, rollback
6745 // and save the current extension (I) as the last profitable extension.
6746 if (!NewPromoted) {
6747 TPT.rollback(LastKnownGood);
6748 ProfitablyMovedExts.push_back(I);
6749 continue;
6750 }
6751 // The promotion is profitable.
6752 Promoted = true;
6753 }
6754 return Promoted;
6755}
6756
6757/// Merging redundant sexts when one is dominating the other.
6758bool CodeGenPrepare::mergeSExts(Function &F) {
6759 bool Changed = false;
6760 for (auto &Entry : ValToSExtendedUses) {
6761 SExts &Insts = Entry.second;
6762 SExts CurPts;
6763 for (Instruction *Inst : Insts) {
6764 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6765 Inst->getOperand(0) != Entry.first)
6766 continue;
6767 bool inserted = false;
6768 for (auto &Pt : CurPts) {
6769 if (getDT(F).dominates(Inst, Pt)) {
6770 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6771 RemovedInsts.insert(Pt);
6772 Pt->removeFromParent();
6773 Pt = Inst;
6774 inserted = true;
6775 Changed = true;
6776 break;
6777 }
6778 if (!getDT(F).dominates(Pt, Inst))
6779 // Give up if we need to merge in a common dominator as the
6780 // experiments show it is not profitable.
6781 continue;
6782 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6783 RemovedInsts.insert(Inst);
6784 Inst->removeFromParent();
6785 inserted = true;
6786 Changed = true;
6787 break;
6788 }
6789 if (!inserted)
6790 CurPts.push_back(Inst);
6791 }
6792 }
6793 return Changed;
6794}
6795
6796// Splitting large data structures so that the GEPs accessing them can have
6797// smaller offsets so that they can be sunk to the same blocks as their users.
6798// For example, a large struct starting from %base is split into two parts
6799// where the second part starts from %new_base.
6800//
6801// Before:
6802// BB0:
6803// %base =
6804//
6805// BB1:
6806// %gep0 = gep %base, off0
6807// %gep1 = gep %base, off1
6808// %gep2 = gep %base, off2
6809//
6810// BB2:
6811// %load1 = load %gep0
6812// %load2 = load %gep1
6813// %load3 = load %gep2
6814//
6815// After:
6816// BB0:
6817// %base =
6818// %new_base = gep %base, off0
6819//
6820// BB1:
6821// %new_gep0 = %new_base
6822// %new_gep1 = gep %new_base, off1 - off0
6823// %new_gep2 = gep %new_base, off2 - off0
6824//
6825// BB2:
6826// %load1 = load i32, i32* %new_gep0
6827// %load2 = load i32, i32* %new_gep1
6828// %load3 = load i32, i32* %new_gep2
6829//
6830// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6831// their offsets are smaller enough to fit into the addressing mode.
6832bool CodeGenPrepare::splitLargeGEPOffsets() {
6833 bool Changed = false;
6834 for (auto &Entry : LargeOffsetGEPMap) {
6835 Value *OldBase = Entry.first;
6836 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6837 &LargeOffsetGEPs = Entry.second;
6838 auto compareGEPOffset =
6839 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6840 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6841 if (LHS.first == RHS.first)
6842 return false;
6843 if (LHS.second != RHS.second)
6844 return LHS.second < RHS.second;
6845 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6846 };
6847 // Sorting all the GEPs of the same data structures based on the offsets.
6848 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6849 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6850 // Skip if all the GEPs have the same offsets.
6851 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6852 continue;
6853 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6854 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6855 Value *NewBaseGEP = nullptr;
6856
6857 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6858 GetElementPtrInst *GEP) {
6859 LLVMContext &Ctx = GEP->getContext();
6860 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6861 Type *I8PtrTy =
6862 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6863
6864 BasicBlock::iterator NewBaseInsertPt;
6865 BasicBlock *NewBaseInsertBB;
6866 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6867 // If the base of the struct is an instruction, the new base will be
6868 // inserted close to it.
6869 NewBaseInsertBB = BaseI->getParent();
6870 if (isa<PHINode>(BaseI))
6871 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6872 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6873 NewBaseInsertBB =
6874 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6875 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6876 } else
6877 NewBaseInsertPt = std::next(BaseI->getIterator());
6878 } else {
6879 // If the current base is an argument or global value, the new base
6880 // will be inserted to the entry block.
6881 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6882 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6883 }
6884 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6885 // Create a new base.
6886 // TODO: Avoid implicit trunc?
6887 // See https://github.com/llvm/llvm-project/issues/112510.
6888 Value *BaseIndex =
6889 ConstantInt::getSigned(PtrIdxTy, BaseOffset, /*ImplicitTrunc=*/true);
6890 NewBaseGEP = OldBase;
6891 if (NewBaseGEP->getType() != I8PtrTy)
6892 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6893 NewBaseGEP =
6894 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6895 NewGEPBases.insert(NewBaseGEP);
6896 return;
6897 };
6898
6899 // Check whether all the offsets can be encoded with prefered common base.
6900 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6901 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6902 BaseOffset = PreferBase;
6903 // Create a new base if the offset of the BaseGEP can be decoded with one
6904 // instruction.
6905 createNewBase(BaseOffset, OldBase, BaseGEP);
6906 }
6907
6908 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6909 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6910 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6911 int64_t Offset = LargeOffsetGEP->second;
6912 if (Offset != BaseOffset) {
6913 TargetLowering::AddrMode AddrMode;
6914 AddrMode.HasBaseReg = true;
6915 AddrMode.BaseOffs = Offset - BaseOffset;
6916 // The result type of the GEP might not be the type of the memory
6917 // access.
6918 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6919 GEP->getResultElementType(),
6920 GEP->getAddressSpace())) {
6921 // We need to create a new base if the offset to the current base is
6922 // too large to fit into the addressing mode. So, a very large struct
6923 // may be split into several parts.
6924 BaseGEP = GEP;
6925 BaseOffset = Offset;
6926 NewBaseGEP = nullptr;
6927 }
6928 }
6929
6930 // Generate a new GEP to replace the current one.
6931 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6932
6933 if (!NewBaseGEP) {
6934 // Create a new base if we don't have one yet. Find the insertion
6935 // pointer for the new base first.
6936 createNewBase(BaseOffset, OldBase, GEP);
6937 }
6938
6939 IRBuilder<> Builder(GEP);
6940 Value *NewGEP = NewBaseGEP;
6941 if (Offset != BaseOffset) {
6942 // Calculate the new offset for the new GEP.
6943 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6944 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6945 }
6946 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6947 LargeOffsetGEPID.erase(GEP);
6948 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6949 GEP->eraseFromParent();
6950 Changed = true;
6951 }
6952 }
6953 return Changed;
6954}
6955
6956bool CodeGenPrepare::optimizePhiType(
6957 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6958 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6959 // We are looking for a collection on interconnected phi nodes that together
6960 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6961 // are of the same type. Convert the whole set of nodes to the type of the
6962 // bitcast.
6963 Type *PhiTy = I->getType();
6964 Type *ConvertTy = nullptr;
6965 if (Visited.count(I) ||
6966 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6967 return false;
6968
6969 SmallVector<Instruction *, 4> Worklist;
6970 Worklist.push_back(cast<Instruction>(I));
6971 SmallPtrSet<PHINode *, 4> PhiNodes;
6972 SmallPtrSet<ConstantData *, 4> Constants;
6973 PhiNodes.insert(I);
6974 Visited.insert(I);
6975 SmallPtrSet<Instruction *, 4> Defs;
6976 SmallPtrSet<Instruction *, 4> Uses;
6977 // This works by adding extra bitcasts between load/stores and removing
6978 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6979 // we can get in the situation where we remove a bitcast in one iteration
6980 // just to add it again in the next. We need to ensure that at least one
6981 // bitcast we remove are anchored to something that will not change back.
6982 bool AnyAnchored = false;
6983
6984 while (!Worklist.empty()) {
6985 Instruction *II = Worklist.pop_back_val();
6986
6987 if (auto *Phi = dyn_cast<PHINode>(II)) {
6988 // Handle Defs, which might also be PHI's
6989 for (Value *V : Phi->incoming_values()) {
6990 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6991 if (!PhiNodes.count(OpPhi)) {
6992 if (!Visited.insert(OpPhi).second)
6993 return false;
6994 PhiNodes.insert(OpPhi);
6995 Worklist.push_back(OpPhi);
6996 }
6997 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6998 if (!OpLoad->isSimple())
6999 return false;
7000 if (Defs.insert(OpLoad).second)
7001 Worklist.push_back(OpLoad);
7002 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
7003 if (Defs.insert(OpEx).second)
7004 Worklist.push_back(OpEx);
7005 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7006 if (!ConvertTy)
7007 ConvertTy = OpBC->getOperand(0)->getType();
7008 if (OpBC->getOperand(0)->getType() != ConvertTy)
7009 return false;
7010 if (Defs.insert(OpBC).second) {
7011 Worklist.push_back(OpBC);
7012 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
7013 !isa<ExtractElementInst>(OpBC->getOperand(0));
7014 }
7015 } else if (auto *OpC = dyn_cast<ConstantData>(V))
7016 Constants.insert(OpC);
7017 else
7018 return false;
7019 }
7020 }
7021
7022 // Handle uses which might also be phi's
7023 for (User *V : II->users()) {
7024 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7025 if (!PhiNodes.count(OpPhi)) {
7026 if (Visited.count(OpPhi))
7027 return false;
7028 PhiNodes.insert(OpPhi);
7029 Visited.insert(OpPhi);
7030 Worklist.push_back(OpPhi);
7031 }
7032 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
7033 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
7034 return false;
7035 Uses.insert(OpStore);
7036 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7037 if (!ConvertTy)
7038 ConvertTy = OpBC->getType();
7039 if (OpBC->getType() != ConvertTy)
7040 return false;
7041 Uses.insert(OpBC);
7042 AnyAnchored |=
7043 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
7044 } else {
7045 return false;
7046 }
7047 }
7048 }
7049
7050 if (!ConvertTy || !AnyAnchored ||
7051 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
7052 return false;
7053
7054 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
7055 << *ConvertTy << "\n");
7056
7057 // Create all the new phi nodes of the new type, and bitcast any loads to the
7058 // correct type.
7059 ValueToValueMap ValMap;
7060 for (ConstantData *C : Constants)
7061 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
7062 for (Instruction *D : Defs) {
7063 if (isa<BitCastInst>(D)) {
7064 ValMap[D] = D->getOperand(0);
7065 DeletedInstrs.insert(D);
7066 } else {
7067 BasicBlock::iterator insertPt = std::next(D->getIterator());
7068 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
7069 }
7070 }
7071 for (PHINode *Phi : PhiNodes)
7072 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
7073 Phi->getName() + ".tc", Phi->getIterator());
7074 // Pipe together all the PhiNodes.
7075 for (PHINode *Phi : PhiNodes) {
7076 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
7077 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
7078 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
7079 Phi->getIncomingBlock(i));
7080 Visited.insert(NewPhi);
7081 }
7082 // And finally pipe up the stores and bitcasts
7083 for (Instruction *U : Uses) {
7084 if (isa<BitCastInst>(U)) {
7085 DeletedInstrs.insert(U);
7086 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
7087 } else {
7088 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
7089 U->getIterator()));
7090 }
7091 }
7092
7093 // Save the removed phis to be deleted later.
7094 DeletedInstrs.insert_range(PhiNodes);
7095 return true;
7096}
7097
7098bool CodeGenPrepare::optimizePhiTypes(Function &F) {
7099 if (!OptimizePhiTypes)
7100 return false;
7101
7102 bool Changed = false;
7103 SmallPtrSet<PHINode *, 4> Visited;
7104 SmallPtrSet<Instruction *, 4> DeletedInstrs;
7105
7106 // Attempt to optimize all the phis in the functions to the correct type.
7107 for (auto &BB : F)
7108 for (auto &Phi : BB.phis())
7109 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
7110
7111 // Remove any old phi's that have been converted.
7112 for (auto *I : DeletedInstrs) {
7113 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
7114 I->eraseFromParent();
7115 }
7116
7117 return Changed;
7118}
7119
7120/// Return true, if an ext(load) can be formed from an extension in
7121/// \p MovedExts.
7122bool CodeGenPrepare::canFormExtLd(
7123 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
7124 Instruction *&Inst, bool HasPromoted) {
7125 for (auto *MovedExtInst : MovedExts) {
7126 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
7127 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
7128 Inst = MovedExtInst;
7129 break;
7130 }
7131 }
7132 if (!LI)
7133 return false;
7134
7135 // If they're already in the same block, there's nothing to do.
7136 // Make the cheap checks first if we did not promote.
7137 // If we promoted, we need to check if it is indeed profitable.
7138 if (!HasPromoted && LI->getParent() == Inst->getParent())
7139 return false;
7140
7141 return TLI->isExtLoad(LI, Inst, *DL);
7142}
7143
7144/// Move a zext or sext fed by a load into the same basic block as the load,
7145/// unless conditions are unfavorable. This allows SelectionDAG to fold the
7146/// extend into the load.
7147///
7148/// E.g.,
7149/// \code
7150/// %ld = load i32* %addr
7151/// %add = add nuw i32 %ld, 4
7152/// %zext = zext i32 %add to i64
7153// \endcode
7154/// =>
7155/// \code
7156/// %ld = load i32* %addr
7157/// %zext = zext i32 %ld to i64
7158/// %add = add nuw i64 %zext, 4
7159/// \encode
7160/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
7161/// allow us to match zext(load i32*) to i64.
7162///
7163/// Also, try to promote the computations used to obtain a sign extended
7164/// value used into memory accesses.
7165/// E.g.,
7166/// \code
7167/// a = add nsw i32 b, 3
7168/// d = sext i32 a to i64
7169/// e = getelementptr ..., i64 d
7170/// \endcode
7171/// =>
7172/// \code
7173/// f = sext i32 b to i64
7174/// a = add nsw i64 f, 3
7175/// e = getelementptr ..., i64 a
7176/// \endcode
7177///
7178/// \p Inst[in/out] the extension may be modified during the process if some
7179/// promotions apply.
7180bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7181 bool AllowPromotionWithoutCommonHeader = false;
7182 /// See if it is an interesting sext operations for the address type
7183 /// promotion before trying to promote it, e.g., the ones with the right
7184 /// type and used in memory accesses.
7185 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7186 *Inst, AllowPromotionWithoutCommonHeader);
7187 TypePromotionTransaction TPT(RemovedInsts);
7188 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7189 TPT.getRestorationPoint();
7191 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7192 Exts.push_back(Inst);
7193
7194 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7195
7196 // Look for a load being extended.
7197 LoadInst *LI = nullptr;
7198 Instruction *ExtFedByLoad;
7199
7200 // Try to promote a chain of computation if it allows to form an extended
7201 // load.
7202 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7203 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7204 TPT.commit();
7205 // Move the extend into the same block as the load.
7206 ExtFedByLoad->moveAfter(LI);
7207 ++NumExtsMoved;
7208 Inst = ExtFedByLoad;
7209 return true;
7210 }
7211
7212 // Continue promoting SExts if known as considerable depending on targets.
7213 if (ATPConsiderable &&
7214 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7215 HasPromoted, TPT, SpeculativelyMovedExts))
7216 return true;
7217
7218 TPT.rollback(LastKnownGood);
7219 return false;
7220}
7221
7222// Perform address type promotion if doing so is profitable.
7223// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7224// instructions that sign extended the same initial value. However, if
7225// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7226// extension is just profitable.
7227bool CodeGenPrepare::performAddressTypePromotion(
7228 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7229 bool HasPromoted, TypePromotionTransaction &TPT,
7230 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7231 bool Promoted = false;
7232 SmallPtrSet<Instruction *, 1> UnhandledExts;
7233 bool AllSeenFirst = true;
7234 for (auto *I : SpeculativelyMovedExts) {
7235 Value *HeadOfChain = I->getOperand(0);
7236 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
7237 SeenChainsForSExt.find(HeadOfChain);
7238 // If there is an unhandled SExt which has the same header, try to promote
7239 // it as well.
7240 if (AlreadySeen != SeenChainsForSExt.end()) {
7241 if (AlreadySeen->second != nullptr)
7242 UnhandledExts.insert(AlreadySeen->second);
7243 AllSeenFirst = false;
7244 }
7245 }
7246
7247 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7248 SpeculativelyMovedExts.size() == 1)) {
7249 TPT.commit();
7250 if (HasPromoted)
7251 Promoted = true;
7252 for (auto *I : SpeculativelyMovedExts) {
7253 Value *HeadOfChain = I->getOperand(0);
7254 SeenChainsForSExt[HeadOfChain] = nullptr;
7255 ValToSExtendedUses[HeadOfChain].push_back(I);
7256 }
7257 // Update Inst as promotion happen.
7258 Inst = SpeculativelyMovedExts.pop_back_val();
7259 } else {
7260 // This is the first chain visited from the header, keep the current chain
7261 // as unhandled. Defer to promote this until we encounter another SExt
7262 // chain derived from the same header.
7263 for (auto *I : SpeculativelyMovedExts) {
7264 Value *HeadOfChain = I->getOperand(0);
7265 SeenChainsForSExt[HeadOfChain] = Inst;
7266 }
7267 return false;
7268 }
7269
7270 if (!AllSeenFirst && !UnhandledExts.empty())
7271 for (auto *VisitedSExt : UnhandledExts) {
7272 if (RemovedInsts.count(VisitedSExt))
7273 continue;
7274 TypePromotionTransaction TPT(RemovedInsts);
7276 SmallVector<Instruction *, 2> Chains;
7277 Exts.push_back(VisitedSExt);
7278 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7279 TPT.commit();
7280 if (HasPromoted)
7281 Promoted = true;
7282 for (auto *I : Chains) {
7283 Value *HeadOfChain = I->getOperand(0);
7284 // Mark this as handled.
7285 SeenChainsForSExt[HeadOfChain] = nullptr;
7286 ValToSExtendedUses[HeadOfChain].push_back(I);
7287 }
7288 }
7289 return Promoted;
7290}
7291
7292bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7293 BasicBlock *DefBB = I->getParent();
7294
7295 // If the result of a {s|z}ext and its source are both live out, rewrite all
7296 // other uses of the source with result of extension.
7297 Value *Src = I->getOperand(0);
7298 if (Src->hasOneUse())
7299 return false;
7300
7301 // Only do this xform if truncating is free.
7302 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7303 return false;
7304
7305 // Only safe to perform the optimization if the source is also defined in
7306 // this block.
7307 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7308 return false;
7309
7310 bool DefIsLiveOut = false;
7311 for (User *U : I->users()) {
7313
7314 // Figure out which BB this ext is used in.
7315 BasicBlock *UserBB = UI->getParent();
7316 if (UserBB == DefBB)
7317 continue;
7318 DefIsLiveOut = true;
7319 break;
7320 }
7321 if (!DefIsLiveOut)
7322 return false;
7323
7324 // Make sure none of the uses are PHI nodes.
7325 for (User *U : Src->users()) {
7327 BasicBlock *UserBB = UI->getParent();
7328 if (UserBB == DefBB)
7329 continue;
7330 // Be conservative. We don't want this xform to end up introducing
7331 // reloads just before load / store instructions.
7332 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7333 return false;
7334 }
7335
7336 // InsertedTruncs - Only insert one trunc in each block once.
7337 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7338
7339 bool MadeChange = false;
7340 for (Use &U : Src->uses()) {
7341 Instruction *User = cast<Instruction>(U.getUser());
7342
7343 // Figure out which BB this ext is used in.
7344 BasicBlock *UserBB = User->getParent();
7345 if (UserBB == DefBB)
7346 continue;
7347
7348 // Both src and def are live in this block. Rewrite the use.
7349 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7350
7351 if (!InsertedTrunc) {
7352 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7353 assert(InsertPt != UserBB->end());
7354 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7355 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7356 InsertedInsts.insert(InsertedTrunc);
7357 }
7358
7359 // Replace a use of the {s|z}ext source with a use of the result.
7360 U = InsertedTrunc;
7361 ++NumExtUses;
7362 MadeChange = true;
7363 }
7364
7365 return MadeChange;
7366}
7367
7368// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7369// just after the load if the target can fold this into one extload instruction,
7370// with the hope of eliminating some of the other later "and" instructions using
7371// the loaded value. "and"s that are made trivially redundant by the insertion
7372// of the new "and" are removed by this function, while others (e.g. those whose
7373// path from the load goes through a phi) are left for isel to potentially
7374// remove.
7375//
7376// For example:
7377//
7378// b0:
7379// x = load i32
7380// ...
7381// b1:
7382// y = and x, 0xff
7383// z = use y
7384//
7385// becomes:
7386//
7387// b0:
7388// x = load i32
7389// x' = and x, 0xff
7390// ...
7391// b1:
7392// z = use x'
7393//
7394// whereas:
7395//
7396// b0:
7397// x1 = load i32
7398// ...
7399// b1:
7400// x2 = load i32
7401// ...
7402// b2:
7403// x = phi x1, x2
7404// y = and x, 0xff
7405//
7406// becomes (after a call to optimizeLoadExt for each load):
7407//
7408// b0:
7409// x1 = load i32
7410// x1' = and x1, 0xff
7411// ...
7412// b1:
7413// x2 = load i32
7414// x2' = and x2, 0xff
7415// ...
7416// b2:
7417// x = phi x1', x2'
7418// y = and x, 0xff
7419bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7420 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7421 return false;
7422
7423 // Skip loads we've already transformed.
7424 if (Load->hasOneUse() &&
7425 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7426 return false;
7427
7428 // Look at all uses of Load, looking through phis, to determine how many bits
7429 // of the loaded value are needed.
7430 SmallVector<Instruction *, 8> WorkList;
7431 SmallPtrSet<Instruction *, 16> Visited;
7432 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7433 SmallVector<Instruction *, 8> DropFlags;
7434 for (auto *U : Load->users())
7435 WorkList.push_back(cast<Instruction>(U));
7436
7437 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7438 unsigned BitWidth = LoadResultVT.getSizeInBits();
7439 // If the BitWidth is 0, do not try to optimize the type
7440 if (BitWidth == 0)
7441 return false;
7442
7443 APInt DemandBits(BitWidth, 0);
7444 APInt WidestAndBits(BitWidth, 0);
7445
7446 while (!WorkList.empty()) {
7447 Instruction *I = WorkList.pop_back_val();
7448
7449 // Break use-def graph loops.
7450 if (!Visited.insert(I).second)
7451 continue;
7452
7453 // For a PHI node, push all of its users.
7454 if (auto *Phi = dyn_cast<PHINode>(I)) {
7455 for (auto *U : Phi->users())
7456 WorkList.push_back(cast<Instruction>(U));
7457 continue;
7458 }
7459
7460 switch (I->getOpcode()) {
7461 case Instruction::And: {
7462 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7463 if (!AndC)
7464 return false;
7465 APInt AndBits = AndC->getValue();
7466 DemandBits |= AndBits;
7467 // Keep track of the widest and mask we see.
7468 if (AndBits.ugt(WidestAndBits))
7469 WidestAndBits = AndBits;
7470 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7471 AndsToMaybeRemove.push_back(I);
7472 break;
7473 }
7474
7475 case Instruction::Shl: {
7476 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7477 if (!ShlC)
7478 return false;
7479 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7480 DemandBits.setLowBits(BitWidth - ShiftAmt);
7481 DropFlags.push_back(I);
7482 break;
7483 }
7484
7485 case Instruction::Trunc: {
7486 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7487 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7488 DemandBits.setLowBits(TruncBitWidth);
7489 DropFlags.push_back(I);
7490 break;
7491 }
7492
7493 default:
7494 return false;
7495 }
7496 }
7497
7498 uint32_t ActiveBits = DemandBits.getActiveBits();
7499 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7500 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7501 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7502 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7503 // followed by an AND.
7504 // TODO: Look into removing this restriction by fixing backends to either
7505 // return false for isLoadExtLegal for i1 or have them select this pattern to
7506 // a single instruction.
7507 //
7508 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7509 // mask, since these are the only ands that will be removed by isel.
7510 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7511 WidestAndBits != DemandBits)
7512 return false;
7513
7514 LLVMContext &Ctx = Load->getType()->getContext();
7515 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7516 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7517
7518 // Reject cases that won't be matched as extloads.
7519 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7520 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7521 return false;
7522
7523 IRBuilder<> Builder(Load->getNextNode());
7524 auto *NewAnd = cast<Instruction>(
7525 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7526 // Mark this instruction as "inserted by CGP", so that other
7527 // optimizations don't touch it.
7528 InsertedInsts.insert(NewAnd);
7529
7530 // Replace all uses of load with new and (except for the use of load in the
7531 // new and itself).
7532 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7533 NewAnd->setOperand(0, Load);
7534
7535 // Remove any and instructions that are now redundant.
7536 for (auto *And : AndsToMaybeRemove)
7537 // Check that the and mask is the same as the one we decided to put on the
7538 // new and.
7539 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7540 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7541 if (&*CurInstIterator == And)
7542 CurInstIterator = std::next(And->getIterator());
7543 And->eraseFromParent();
7544 ++NumAndUses;
7545 }
7546
7547 // NSW flags may not longer hold.
7548 for (auto *Inst : DropFlags)
7549 Inst->setHasNoSignedWrap(false);
7550
7551 ++NumAndsAdded;
7552 return true;
7553}
7554
7555/// Check if V (an operand of a select instruction) is an expensive instruction
7556/// that is only used once.
7558 auto *I = dyn_cast<Instruction>(V);
7559 // If it's safe to speculatively execute, then it should not have side
7560 // effects; therefore, it's safe to sink and possibly *not* execute.
7561 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7562 TTI->isExpensiveToSpeculativelyExecute(I);
7563}
7564
7565/// Returns true if a SelectInst should be turned into an explicit branch.
7567 const TargetLowering *TLI,
7568 SelectInst *SI) {
7569 // If even a predictable select is cheap, then a branch can't be cheaper.
7570 if (!TLI->isPredictableSelectExpensive())
7571 return false;
7572
7573 // FIXME: This should use the same heuristics as IfConversion to determine
7574 // whether a select is better represented as a branch.
7575
7576 // If metadata tells us that the select condition is obviously predictable,
7577 // then we want to replace the select with a branch.
7578 uint64_t TrueWeight, FalseWeight;
7579 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7580 uint64_t Max = std::max(TrueWeight, FalseWeight);
7581 uint64_t Sum = TrueWeight + FalseWeight;
7582 if (Sum != 0) {
7583 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7584 if (Probability > TTI->getPredictableBranchThreshold())
7585 return true;
7586 }
7587 }
7588
7589 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7590
7591 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7592 // comparison condition. If the compare has more than one use, there's
7593 // probably another cmov or setcc around, so it's not worth emitting a branch.
7594 if (!Cmp || !Cmp->hasOneUse())
7595 return false;
7596
7597 // If either operand of the select is expensive and only needed on one side
7598 // of the select, we should form a branch.
7599 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7600 sinkSelectOperand(TTI, SI->getFalseValue()))
7601 return true;
7602
7603 return false;
7604}
7605
7606/// If \p isTrue is true, return the true value of \p SI, otherwise return
7607/// false value of \p SI. If the true/false value of \p SI is defined by any
7608/// select instructions in \p Selects, look through the defining select
7609/// instruction until the true/false value is not defined in \p Selects.
7610static Value *
7612 const SmallPtrSet<const Instruction *, 2> &Selects) {
7613 Value *V = nullptr;
7614
7615 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7616 DefSI = dyn_cast<SelectInst>(V)) {
7617 assert(DefSI->getCondition() == SI->getCondition() &&
7618 "The condition of DefSI does not match with SI");
7619 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7620 }
7621
7622 assert(V && "Failed to get select true/false value");
7623 return V;
7624}
7625
7626bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7627 assert(Shift->isShift() && "Expected a shift");
7628
7629 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7630 // general vector shifts, and (3) the shift amount is a select-of-splatted
7631 // values, hoist the shifts before the select:
7632 // shift Op0, (select Cond, TVal, FVal) -->
7633 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7634 //
7635 // This is inverting a generic IR transform when we know that the cost of a
7636 // general vector shift is more than the cost of 2 shift-by-scalars.
7637 // We can't do this effectively in SDAG because we may not be able to
7638 // determine if the select operands are splats from within a basic block.
7639 Type *Ty = Shift->getType();
7640 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7641 return false;
7642 Value *Cond, *TVal, *FVal;
7643 if (!match(Shift->getOperand(1),
7644 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7645 return false;
7646 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7647 return false;
7648
7649 IRBuilder<> Builder(Shift);
7650 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7651 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7652 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7653 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7654 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7655 Shift->eraseFromParent();
7656 return true;
7657}
7658
7659bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7660 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7661 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7662 "Expected a funnel shift");
7663
7664 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7665 // than general vector shifts, and (3) the shift amount is select-of-splatted
7666 // values, hoist the funnel shifts before the select:
7667 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7668 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7669 //
7670 // This is inverting a generic IR transform when we know that the cost of a
7671 // general vector shift is more than the cost of 2 shift-by-scalars.
7672 // We can't do this effectively in SDAG because we may not be able to
7673 // determine if the select operands are splats from within a basic block.
7674 Type *Ty = Fsh->getType();
7675 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7676 return false;
7677 Value *Cond, *TVal, *FVal;
7678 if (!match(Fsh->getOperand(2),
7679 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7680 return false;
7681 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7682 return false;
7683
7684 IRBuilder<> Builder(Fsh);
7685 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7686 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7687 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7688 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7689 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7690 Fsh->eraseFromParent();
7691 return true;
7692}
7693
7694/// If we have a SelectInst that will likely profit from branch prediction,
7695/// turn it into a branch.
7696bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7698 return false;
7699
7700 // If the SelectOptimize pass is enabled, selects have already been optimized.
7702 return false;
7703
7704 // Find all consecutive select instructions that share the same condition.
7706 ASI.push_back(SI);
7708 It != SI->getParent()->end(); ++It) {
7709 SelectInst *I = dyn_cast<SelectInst>(&*It);
7710 if (I && SI->getCondition() == I->getCondition()) {
7711 ASI.push_back(I);
7712 } else {
7713 break;
7714 }
7715 }
7716
7717 SelectInst *LastSI = ASI.back();
7718 // Increment the current iterator to skip all the rest of select instructions
7719 // because they will be either "not lowered" or "all lowered" to branch.
7720 CurInstIterator = std::next(LastSI->getIterator());
7721 // Examine debug-info attached to the consecutive select instructions. They
7722 // won't be individually optimised by optimizeInst, so we need to perform
7723 // DbgVariableRecord maintenence here instead.
7724 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7725 fixupDbgVariableRecordsOnInst(*SI);
7726
7727 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7728
7729 // Can we convert the 'select' to CF ?
7730 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7731 return false;
7732
7733 TargetLowering::SelectSupportKind SelectKind;
7734 if (SI->getType()->isVectorTy())
7735 SelectKind = TargetLowering::ScalarCondVectorVal;
7736 else
7737 SelectKind = TargetLowering::ScalarValSelect;
7738
7739 if (TLI->isSelectSupported(SelectKind) &&
7741 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7742 return false;
7743
7744 // The DominatorTree needs to be rebuilt by any consumers after this
7745 // transformation. We simply reset here rather than setting the ModifiedDT
7746 // flag to avoid restarting the function walk in runOnFunction for each
7747 // select optimized.
7748 DT.reset();
7749
7750 // Transform a sequence like this:
7751 // start:
7752 // %cmp = cmp uge i32 %a, %b
7753 // %sel = select i1 %cmp, i32 %c, i32 %d
7754 //
7755 // Into:
7756 // start:
7757 // %cmp = cmp uge i32 %a, %b
7758 // %cmp.frozen = freeze %cmp
7759 // br i1 %cmp.frozen, label %select.true, label %select.false
7760 // select.true:
7761 // br label %select.end
7762 // select.false:
7763 // br label %select.end
7764 // select.end:
7765 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7766 //
7767 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7768 // In addition, we may sink instructions that produce %c or %d from
7769 // the entry block into the destination(s) of the new branch.
7770 // If the true or false blocks do not contain a sunken instruction, that
7771 // block and its branch may be optimized away. In that case, one side of the
7772 // first branch will point directly to select.end, and the corresponding PHI
7773 // predecessor block will be the start block.
7774
7775 // Collect values that go on the true side and the values that go on the false
7776 // side.
7777 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7778 for (SelectInst *SI : ASI) {
7779 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7780 TrueInstrs.push_back(cast<Instruction>(V));
7781 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7782 FalseInstrs.push_back(cast<Instruction>(V));
7783 }
7784
7785 // Split the select block, according to how many (if any) values go on each
7786 // side.
7787 BasicBlock *StartBlock = SI->getParent();
7788 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7789 // We should split before any debug-info.
7790 SplitPt.setHeadBit(true);
7791
7792 IRBuilder<> IB(SI);
7793 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7794
7795 BasicBlock *TrueBlock = nullptr;
7796 BasicBlock *FalseBlock = nullptr;
7797 BasicBlock *EndBlock = nullptr;
7798 BranchInst *TrueBranch = nullptr;
7799 BranchInst *FalseBranch = nullptr;
7800 if (TrueInstrs.size() == 0) {
7802 CondFr, SplitPt, false, nullptr, nullptr, LI));
7803 FalseBlock = FalseBranch->getParent();
7804 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7805 } else if (FalseInstrs.size() == 0) {
7807 CondFr, SplitPt, false, nullptr, nullptr, LI));
7808 TrueBlock = TrueBranch->getParent();
7809 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7810 } else {
7811 Instruction *ThenTerm = nullptr;
7812 Instruction *ElseTerm = nullptr;
7813 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7814 nullptr, nullptr, LI);
7815 TrueBranch = cast<BranchInst>(ThenTerm);
7816 FalseBranch = cast<BranchInst>(ElseTerm);
7817 TrueBlock = TrueBranch->getParent();
7818 FalseBlock = FalseBranch->getParent();
7819 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7820 }
7821
7822 EndBlock->setName("select.end");
7823 if (TrueBlock)
7824 TrueBlock->setName("select.true.sink");
7825 if (FalseBlock)
7826 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7827 : "select.false.sink");
7828
7829 if (IsHugeFunc) {
7830 if (TrueBlock)
7831 FreshBBs.insert(TrueBlock);
7832 if (FalseBlock)
7833 FreshBBs.insert(FalseBlock);
7834 FreshBBs.insert(EndBlock);
7835 }
7836
7837 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7838
7839 static const unsigned MD[] = {
7840 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7841 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7842 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7843
7844 // Sink expensive instructions into the conditional blocks to avoid executing
7845 // them speculatively.
7846 for (Instruction *I : TrueInstrs)
7847 I->moveBefore(TrueBranch->getIterator());
7848 for (Instruction *I : FalseInstrs)
7849 I->moveBefore(FalseBranch->getIterator());
7850
7851 // If we did not create a new block for one of the 'true' or 'false' paths
7852 // of the condition, it means that side of the branch goes to the end block
7853 // directly and the path originates from the start block from the point of
7854 // view of the new PHI.
7855 if (TrueBlock == nullptr)
7856 TrueBlock = StartBlock;
7857 else if (FalseBlock == nullptr)
7858 FalseBlock = StartBlock;
7859
7860 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7861 // Use reverse iterator because later select may use the value of the
7862 // earlier select, and we need to propagate value through earlier select
7863 // to get the PHI operand.
7864 for (SelectInst *SI : llvm::reverse(ASI)) {
7865 // The select itself is replaced with a PHI Node.
7866 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7867 PN->insertBefore(EndBlock->begin());
7868 PN->takeName(SI);
7869 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7870 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7871 PN->setDebugLoc(SI->getDebugLoc());
7872
7873 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7874 SI->eraseFromParent();
7875 INS.erase(SI);
7876 ++NumSelectsExpanded;
7877 }
7878
7879 // Instruct OptimizeBlock to skip to the next block.
7880 CurInstIterator = StartBlock->end();
7881 return true;
7882}
7883
7884/// Some targets only accept certain types for splat inputs. For example a VDUP
7885/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7886/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7887bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7888 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7890 m_Undef(), m_ZeroMask())))
7891 return false;
7892 Type *NewType = TLI->shouldConvertSplatType(SVI);
7893 if (!NewType)
7894 return false;
7895
7896 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7897 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7898 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7899 "Expected a type of the same size!");
7900 auto *NewVecType =
7901 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7902
7903 // Create a bitcast (shuffle (insert (bitcast(..))))
7904 IRBuilder<> Builder(SVI->getContext());
7905 Builder.SetInsertPoint(SVI);
7906 Value *BC1 = Builder.CreateBitCast(
7907 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7908 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7909 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7910
7911 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7913 SVI, TLInfo, nullptr,
7914 [&](Value *V) { removeAllAssertingVHReferences(V); });
7915
7916 // Also hoist the bitcast up to its operand if it they are not in the same
7917 // block.
7918 if (auto *BCI = dyn_cast<Instruction>(BC1))
7919 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7920 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7921 !Op->isTerminator() && !Op->isEHPad())
7922 BCI->moveAfter(Op);
7923
7924 return true;
7925}
7926
7927bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7928 // If the operands of I can be folded into a target instruction together with
7929 // I, duplicate and sink them.
7930 SmallVector<Use *, 4> OpsToSink;
7931 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7932 return false;
7933
7934 // OpsToSink can contain multiple uses in a use chain (e.g.
7935 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7936 // uses must come first, so we process the ops in reverse order so as to not
7937 // create invalid IR.
7938 BasicBlock *TargetBB = I->getParent();
7939 bool Changed = false;
7940 SmallVector<Use *, 4> ToReplace;
7941 Instruction *InsertPoint = I;
7942 DenseMap<const Instruction *, unsigned long> InstOrdering;
7943 unsigned long InstNumber = 0;
7944 for (const auto &I : *TargetBB)
7945 InstOrdering[&I] = InstNumber++;
7946
7947 for (Use *U : reverse(OpsToSink)) {
7948 auto *UI = cast<Instruction>(U->get());
7949 if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
7950 continue;
7951 if (UI->getParent() == TargetBB) {
7952 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7953 InsertPoint = UI;
7954 continue;
7955 }
7956 ToReplace.push_back(U);
7957 }
7958
7959 SetVector<Instruction *> MaybeDead;
7960 DenseMap<Instruction *, Instruction *> NewInstructions;
7961 for (Use *U : ToReplace) {
7962 auto *UI = cast<Instruction>(U->get());
7963 Instruction *NI = UI->clone();
7964
7965 if (IsHugeFunc) {
7966 // Now we clone an instruction, its operands' defs may sink to this BB
7967 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7968 for (Value *Op : NI->operands())
7969 if (auto *OpDef = dyn_cast<Instruction>(Op))
7970 FreshBBs.insert(OpDef->getParent());
7971 }
7972
7973 NewInstructions[UI] = NI;
7974 MaybeDead.insert(UI);
7975 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7976 NI->insertBefore(InsertPoint->getIterator());
7977 InsertPoint = NI;
7978 InsertedInsts.insert(NI);
7979
7980 // Update the use for the new instruction, making sure that we update the
7981 // sunk instruction uses, if it is part of a chain that has already been
7982 // sunk.
7983 Instruction *OldI = cast<Instruction>(U->getUser());
7984 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7985 It->second->setOperand(U->getOperandNo(), NI);
7986 else
7987 U->set(NI);
7988 Changed = true;
7989 }
7990
7991 // Remove instructions that are dead after sinking.
7992 for (auto *I : MaybeDead) {
7993 if (!I->hasNUsesOrMore(1)) {
7994 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7995 I->eraseFromParent();
7996 }
7997 }
7998
7999 return Changed;
8000}
8001
8002bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
8003 Value *Cond = SI->getCondition();
8004 Type *OldType = Cond->getType();
8005 LLVMContext &Context = Cond->getContext();
8006 EVT OldVT = TLI->getValueType(*DL, OldType);
8007 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
8008 unsigned RegWidth = RegType.getSizeInBits();
8009
8010 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
8011 return false;
8012
8013 // If the register width is greater than the type width, expand the condition
8014 // of the switch instruction and each case constant to the width of the
8015 // register. By widening the type of the switch condition, subsequent
8016 // comparisons (for case comparisons) will not need to be extended to the
8017 // preferred register width, so we will potentially eliminate N-1 extends,
8018 // where N is the number of cases in the switch.
8019 auto *NewType = Type::getIntNTy(Context, RegWidth);
8020
8021 // Extend the switch condition and case constants using the target preferred
8022 // extend unless the switch condition is a function argument with an extend
8023 // attribute. In that case, we can avoid an unnecessary mask/extension by
8024 // matching the argument extension instead.
8025 Instruction::CastOps ExtType = Instruction::ZExt;
8026 // Some targets prefer SExt over ZExt.
8027 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
8028 ExtType = Instruction::SExt;
8029
8030 if (auto *Arg = dyn_cast<Argument>(Cond)) {
8031 if (Arg->hasSExtAttr())
8032 ExtType = Instruction::SExt;
8033 if (Arg->hasZExtAttr())
8034 ExtType = Instruction::ZExt;
8035 }
8036
8037 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
8038 ExtInst->insertBefore(SI->getIterator());
8039 ExtInst->setDebugLoc(SI->getDebugLoc());
8040 SI->setCondition(ExtInst);
8041 for (auto Case : SI->cases()) {
8042 const APInt &NarrowConst = Case.getCaseValue()->getValue();
8043 APInt WideConst = (ExtType == Instruction::ZExt)
8044 ? NarrowConst.zext(RegWidth)
8045 : NarrowConst.sext(RegWidth);
8046 Case.setValue(ConstantInt::get(Context, WideConst));
8047 }
8048
8049 return true;
8050}
8051
8052bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
8053 // The SCCP optimization tends to produce code like this:
8054 // switch(x) { case 42: phi(42, ...) }
8055 // Materializing the constant for the phi-argument needs instructions; So we
8056 // change the code to:
8057 // switch(x) { case 42: phi(x, ...) }
8058
8059 Value *Condition = SI->getCondition();
8060 // Avoid endless loop in degenerate case.
8061 if (isa<ConstantInt>(*Condition))
8062 return false;
8063
8064 bool Changed = false;
8065 BasicBlock *SwitchBB = SI->getParent();
8066 Type *ConditionType = Condition->getType();
8067
8068 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
8069 ConstantInt *CaseValue = Case.getCaseValue();
8070 BasicBlock *CaseBB = Case.getCaseSuccessor();
8071 // Set to true if we previously checked that `CaseBB` is only reached by
8072 // a single case from this switch.
8073 bool CheckedForSinglePred = false;
8074 for (PHINode &PHI : CaseBB->phis()) {
8075 Type *PHIType = PHI.getType();
8076 // If ZExt is free then we can also catch patterns like this:
8077 // switch((i32)x) { case 42: phi((i64)42, ...); }
8078 // and replace `(i64)42` with `zext i32 %x to i64`.
8079 bool TryZExt =
8080 PHIType->isIntegerTy() &&
8081 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
8082 TLI->isZExtFree(ConditionType, PHIType);
8083 if (PHIType == ConditionType || TryZExt) {
8084 // Set to true to skip this case because of multiple preds.
8085 bool SkipCase = false;
8086 Value *Replacement = nullptr;
8087 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
8088 Value *PHIValue = PHI.getIncomingValue(I);
8089 if (PHIValue != CaseValue) {
8090 if (!TryZExt)
8091 continue;
8092 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
8093 if (!PHIValueInt ||
8094 PHIValueInt->getValue() !=
8095 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
8096 continue;
8097 }
8098 if (PHI.getIncomingBlock(I) != SwitchBB)
8099 continue;
8100 // We cannot optimize if there are multiple case labels jumping to
8101 // this block. This check may get expensive when there are many
8102 // case labels so we test for it last.
8103 if (!CheckedForSinglePred) {
8104 CheckedForSinglePred = true;
8105 if (SI->findCaseDest(CaseBB) == nullptr) {
8106 SkipCase = true;
8107 break;
8108 }
8109 }
8110
8111 if (Replacement == nullptr) {
8112 if (PHIValue == CaseValue) {
8113 Replacement = Condition;
8114 } else {
8115 IRBuilder<> Builder(SI);
8116 Replacement = Builder.CreateZExt(Condition, PHIType);
8117 }
8118 }
8119 PHI.setIncomingValue(I, Replacement);
8120 Changed = true;
8121 }
8122 if (SkipCase)
8123 break;
8124 }
8125 }
8126 }
8127 return Changed;
8128}
8129
8130bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
8131 bool Changed = optimizeSwitchType(SI);
8132 Changed |= optimizeSwitchPhiConstants(SI);
8133 return Changed;
8134}
8135
8136namespace {
8137
8138/// Helper class to promote a scalar operation to a vector one.
8139/// This class is used to move downward extractelement transition.
8140/// E.g.,
8141/// a = vector_op <2 x i32>
8142/// b = extractelement <2 x i32> a, i32 0
8143/// c = scalar_op b
8144/// store c
8145///
8146/// =>
8147/// a = vector_op <2 x i32>
8148/// c = vector_op a (equivalent to scalar_op on the related lane)
8149/// * d = extractelement <2 x i32> c, i32 0
8150/// * store d
8151/// Assuming both extractelement and store can be combine, we get rid of the
8152/// transition.
8153class VectorPromoteHelper {
8154 /// DataLayout associated with the current module.
8155 const DataLayout &DL;
8156
8157 /// Used to perform some checks on the legality of vector operations.
8158 const TargetLowering &TLI;
8159
8160 /// Used to estimated the cost of the promoted chain.
8161 const TargetTransformInfo &TTI;
8162
8163 /// The transition being moved downwards.
8164 Instruction *Transition;
8165
8166 /// The sequence of instructions to be promoted.
8167 SmallVector<Instruction *, 4> InstsToBePromoted;
8168
8169 /// Cost of combining a store and an extract.
8170 unsigned StoreExtractCombineCost;
8171
8172 /// Instruction that will be combined with the transition.
8173 Instruction *CombineInst = nullptr;
8174
8175 /// The instruction that represents the current end of the transition.
8176 /// Since we are faking the promotion until we reach the end of the chain
8177 /// of computation, we need a way to get the current end of the transition.
8178 Instruction *getEndOfTransition() const {
8179 if (InstsToBePromoted.empty())
8180 return Transition;
8181 return InstsToBePromoted.back();
8182 }
8183
8184 /// Return the index of the original value in the transition.
8185 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8186 /// c, is at index 0.
8187 unsigned getTransitionOriginalValueIdx() const {
8188 assert(isa<ExtractElementInst>(Transition) &&
8189 "Other kind of transitions are not supported yet");
8190 return 0;
8191 }
8192
8193 /// Return the index of the index in the transition.
8194 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8195 /// is at index 1.
8196 unsigned getTransitionIdx() const {
8197 assert(isa<ExtractElementInst>(Transition) &&
8198 "Other kind of transitions are not supported yet");
8199 return 1;
8200 }
8201
8202 /// Get the type of the transition.
8203 /// This is the type of the original value.
8204 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8205 /// transition is <2 x i32>.
8206 Type *getTransitionType() const {
8207 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8208 }
8209
8210 /// Promote \p ToBePromoted by moving \p Def downward through.
8211 /// I.e., we have the following sequence:
8212 /// Def = Transition <ty1> a to <ty2>
8213 /// b = ToBePromoted <ty2> Def, ...
8214 /// =>
8215 /// b = ToBePromoted <ty1> a, ...
8216 /// Def = Transition <ty1> ToBePromoted to <ty2>
8217 void promoteImpl(Instruction *ToBePromoted);
8218
8219 /// Check whether or not it is profitable to promote all the
8220 /// instructions enqueued to be promoted.
8221 bool isProfitableToPromote() {
8222 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8223 unsigned Index = isa<ConstantInt>(ValIdx)
8224 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8225 : -1;
8226 Type *PromotedType = getTransitionType();
8227
8228 StoreInst *ST = cast<StoreInst>(CombineInst);
8229 unsigned AS = ST->getPointerAddressSpace();
8230 // Check if this store is supported.
8232 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8233 ST->getAlign())) {
8234 // If this is not supported, there is no way we can combine
8235 // the extract with the store.
8236 return false;
8237 }
8238
8239 // The scalar chain of computation has to pay for the transition
8240 // scalar to vector.
8241 // The vector chain has to account for the combining cost.
8244 InstructionCost ScalarCost =
8245 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8246 InstructionCost VectorCost = StoreExtractCombineCost;
8247 for (const auto &Inst : InstsToBePromoted) {
8248 // Compute the cost.
8249 // By construction, all instructions being promoted are arithmetic ones.
8250 // Moreover, one argument is a constant that can be viewed as a splat
8251 // constant.
8252 Value *Arg0 = Inst->getOperand(0);
8253 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8254 isa<ConstantFP>(Arg0);
8255 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8256 if (IsArg0Constant)
8258 else
8260
8261 ScalarCost += TTI.getArithmeticInstrCost(
8262 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8263 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8264 CostKind, Arg0Info, Arg1Info);
8265 }
8266 LLVM_DEBUG(
8267 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8268 << ScalarCost << "\nVector: " << VectorCost << '\n');
8269 return ScalarCost > VectorCost;
8270 }
8271
8272 /// Generate a constant vector with \p Val with the same
8273 /// number of elements as the transition.
8274 /// \p UseSplat defines whether or not \p Val should be replicated
8275 /// across the whole vector.
8276 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8277 /// otherwise we generate a vector with as many poison as possible:
8278 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8279 /// used at the index of the extract.
8280 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8281 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8282 if (!UseSplat) {
8283 // If we cannot determine where the constant must be, we have to
8284 // use a splat constant.
8285 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8286 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8287 ExtractIdx = CstVal->getSExtValue();
8288 else
8289 UseSplat = true;
8290 }
8291
8292 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8293 if (UseSplat)
8294 return ConstantVector::getSplat(EC, Val);
8295
8296 if (!EC.isScalable()) {
8297 SmallVector<Constant *, 4> ConstVec;
8298 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8299 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8300 if (Idx == ExtractIdx)
8301 ConstVec.push_back(Val);
8302 else
8303 ConstVec.push_back(PoisonVal);
8304 }
8305 return ConstantVector::get(ConstVec);
8306 } else
8308 "Generate scalable vector for non-splat is unimplemented");
8309 }
8310
8311 /// Check if promoting to a vector type an operand at \p OperandIdx
8312 /// in \p Use can trigger undefined behavior.
8313 static bool canCauseUndefinedBehavior(const Instruction *Use,
8314 unsigned OperandIdx) {
8315 // This is not safe to introduce undef when the operand is on
8316 // the right hand side of a division-like instruction.
8317 if (OperandIdx != 1)
8318 return false;
8319 switch (Use->getOpcode()) {
8320 default:
8321 return false;
8322 case Instruction::SDiv:
8323 case Instruction::UDiv:
8324 case Instruction::SRem:
8325 case Instruction::URem:
8326 return true;
8327 case Instruction::FDiv:
8328 case Instruction::FRem:
8329 return !Use->hasNoNaNs();
8330 }
8331 llvm_unreachable(nullptr);
8332 }
8333
8334public:
8335 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8336 const TargetTransformInfo &TTI, Instruction *Transition,
8337 unsigned CombineCost)
8338 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8339 StoreExtractCombineCost(CombineCost) {
8340 assert(Transition && "Do not know how to promote null");
8341 }
8342
8343 /// Check if we can promote \p ToBePromoted to \p Type.
8344 bool canPromote(const Instruction *ToBePromoted) const {
8345 // We could support CastInst too.
8346 return isa<BinaryOperator>(ToBePromoted);
8347 }
8348
8349 /// Check if it is profitable to promote \p ToBePromoted
8350 /// by moving downward the transition through.
8351 bool shouldPromote(const Instruction *ToBePromoted) const {
8352 // Promote only if all the operands can be statically expanded.
8353 // Indeed, we do not want to introduce any new kind of transitions.
8354 for (const Use &U : ToBePromoted->operands()) {
8355 const Value *Val = U.get();
8356 if (Val == getEndOfTransition()) {
8357 // If the use is a division and the transition is on the rhs,
8358 // we cannot promote the operation, otherwise we may create a
8359 // division by zero.
8360 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8361 return false;
8362 continue;
8363 }
8364 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8365 !isa<ConstantFP>(Val))
8366 return false;
8367 }
8368 // Check that the resulting operation is legal.
8369 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8370 if (!ISDOpcode)
8371 return false;
8372 return StressStoreExtract ||
8374 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8375 }
8376
8377 /// Check whether or not \p Use can be combined
8378 /// with the transition.
8379 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8380 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8381
8382 /// Record \p ToBePromoted as part of the chain to be promoted.
8383 void enqueueForPromotion(Instruction *ToBePromoted) {
8384 InstsToBePromoted.push_back(ToBePromoted);
8385 }
8386
8387 /// Set the instruction that will be combined with the transition.
8388 void recordCombineInstruction(Instruction *ToBeCombined) {
8389 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8390 CombineInst = ToBeCombined;
8391 }
8392
8393 /// Promote all the instructions enqueued for promotion if it is
8394 /// is profitable.
8395 /// \return True if the promotion happened, false otherwise.
8396 bool promote() {
8397 // Check if there is something to promote.
8398 // Right now, if we do not have anything to combine with,
8399 // we assume the promotion is not profitable.
8400 if (InstsToBePromoted.empty() || !CombineInst)
8401 return false;
8402
8403 // Check cost.
8404 if (!StressStoreExtract && !isProfitableToPromote())
8405 return false;
8406
8407 // Promote.
8408 for (auto &ToBePromoted : InstsToBePromoted)
8409 promoteImpl(ToBePromoted);
8410 InstsToBePromoted.clear();
8411 return true;
8412 }
8413};
8414
8415} // end anonymous namespace
8416
8417void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8418 // At this point, we know that all the operands of ToBePromoted but Def
8419 // can be statically promoted.
8420 // For Def, we need to use its parameter in ToBePromoted:
8421 // b = ToBePromoted ty1 a
8422 // Def = Transition ty1 b to ty2
8423 // Move the transition down.
8424 // 1. Replace all uses of the promoted operation by the transition.
8425 // = ... b => = ... Def.
8426 assert(ToBePromoted->getType() == Transition->getType() &&
8427 "The type of the result of the transition does not match "
8428 "the final type");
8429 ToBePromoted->replaceAllUsesWith(Transition);
8430 // 2. Update the type of the uses.
8431 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8432 Type *TransitionTy = getTransitionType();
8433 ToBePromoted->mutateType(TransitionTy);
8434 // 3. Update all the operands of the promoted operation with promoted
8435 // operands.
8436 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8437 for (Use &U : ToBePromoted->operands()) {
8438 Value *Val = U.get();
8439 Value *NewVal = nullptr;
8440 if (Val == Transition)
8441 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8442 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8443 isa<ConstantFP>(Val)) {
8444 // Use a splat constant if it is not safe to use undef.
8445 NewVal = getConstantVector(
8446 cast<Constant>(Val),
8447 isa<UndefValue>(Val) ||
8448 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8449 } else
8450 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8451 "this?");
8452 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8453 }
8454 Transition->moveAfter(ToBePromoted);
8455 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8456}
8457
8458/// Some targets can do store(extractelement) with one instruction.
8459/// Try to push the extractelement towards the stores when the target
8460/// has this feature and this is profitable.
8461bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8462 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8463 if (DisableStoreExtract ||
8466 Inst->getOperand(1), CombineCost)))
8467 return false;
8468
8469 // At this point we know that Inst is a vector to scalar transition.
8470 // Try to move it down the def-use chain, until:
8471 // - We can combine the transition with its single use
8472 // => we got rid of the transition.
8473 // - We escape the current basic block
8474 // => we would need to check that we are moving it at a cheaper place and
8475 // we do not do that for now.
8476 BasicBlock *Parent = Inst->getParent();
8477 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8478 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8479 // If the transition has more than one use, assume this is not going to be
8480 // beneficial.
8481 while (Inst->hasOneUse()) {
8482 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8483 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8484
8485 if (ToBePromoted->getParent() != Parent) {
8486 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8487 << ToBePromoted->getParent()->getName()
8488 << ") than the transition (" << Parent->getName()
8489 << ").\n");
8490 return false;
8491 }
8492
8493 if (VPH.canCombine(ToBePromoted)) {
8494 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8495 << "will be combined with: " << *ToBePromoted << '\n');
8496 VPH.recordCombineInstruction(ToBePromoted);
8497 bool Changed = VPH.promote();
8498 NumStoreExtractExposed += Changed;
8499 return Changed;
8500 }
8501
8502 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8503 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8504 return false;
8505
8506 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8507
8508 VPH.enqueueForPromotion(ToBePromoted);
8509 Inst = ToBePromoted;
8510 }
8511 return false;
8512}
8513
8514/// For the instruction sequence of store below, F and I values
8515/// are bundled together as an i64 value before being stored into memory.
8516/// Sometimes it is more efficient to generate separate stores for F and I,
8517/// which can remove the bitwise instructions or sink them to colder places.
8518///
8519/// (store (or (zext (bitcast F to i32) to i64),
8520/// (shl (zext I to i64), 32)), addr) -->
8521/// (store F, addr) and (store I, addr+4)
8522///
8523/// Similarly, splitting for other merged store can also be beneficial, like:
8524/// For pair of {i32, i32}, i64 store --> two i32 stores.
8525/// For pair of {i32, i16}, i64 store --> two i32 stores.
8526/// For pair of {i16, i16}, i32 store --> two i16 stores.
8527/// For pair of {i16, i8}, i32 store --> two i16 stores.
8528/// For pair of {i8, i8}, i16 store --> two i8 stores.
8529///
8530/// We allow each target to determine specifically which kind of splitting is
8531/// supported.
8532///
8533/// The store patterns are commonly seen from the simple code snippet below
8534/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8535/// void goo(const std::pair<int, float> &);
8536/// hoo() {
8537/// ...
8538/// goo(std::make_pair(tmp, ftmp));
8539/// ...
8540/// }
8541///
8542/// Although we already have similar splitting in DAG Combine, we duplicate
8543/// it in CodeGenPrepare to catch the case in which pattern is across
8544/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8545/// during code expansion.
8547 const TargetLowering &TLI) {
8548 // Handle simple but common cases only.
8549 Type *StoreType = SI.getValueOperand()->getType();
8550
8551 // The code below assumes shifting a value by <number of bits>,
8552 // whereas scalable vectors would have to be shifted by
8553 // <2log(vscale) + number of bits> in order to store the
8554 // low/high parts. Bailing out for now.
8555 if (StoreType->isScalableTy())
8556 return false;
8557
8558 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8559 DL.getTypeSizeInBits(StoreType) == 0)
8560 return false;
8561
8562 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8563 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8564 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8565 return false;
8566
8567 // Don't split the store if it is volatile.
8568 if (SI.isVolatile())
8569 return false;
8570
8571 // Match the following patterns:
8572 // (store (or (zext LValue to i64),
8573 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8574 // or
8575 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8576 // (zext LValue to i64),
8577 // Expect both operands of OR and the first operand of SHL have only
8578 // one use.
8579 Value *LValue, *HValue;
8580 if (!match(SI.getValueOperand(),
8583 m_SpecificInt(HalfValBitSize))))))
8584 return false;
8585
8586 // Check LValue and HValue are int with size less or equal than 32.
8587 if (!LValue->getType()->isIntegerTy() ||
8588 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8589 !HValue->getType()->isIntegerTy() ||
8590 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8591 return false;
8592
8593 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8594 // as the input of target query.
8595 auto *LBC = dyn_cast<BitCastInst>(LValue);
8596 auto *HBC = dyn_cast<BitCastInst>(HValue);
8597 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8598 : EVT::getEVT(LValue->getType());
8599 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8600 : EVT::getEVT(HValue->getType());
8601 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8602 return false;
8603
8604 // Start to split store.
8605 IRBuilder<> Builder(SI.getContext());
8606 Builder.SetInsertPoint(&SI);
8607
8608 // If LValue/HValue is a bitcast in another BB, create a new one in current
8609 // BB so it may be merged with the splitted stores by dag combiner.
8610 if (LBC && LBC->getParent() != SI.getParent())
8611 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8612 if (HBC && HBC->getParent() != SI.getParent())
8613 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8614
8615 bool IsLE = SI.getDataLayout().isLittleEndian();
8616 auto CreateSplitStore = [&](Value *V, bool Upper) {
8617 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8618 Value *Addr = SI.getPointerOperand();
8619 Align Alignment = SI.getAlign();
8620 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8621 if (IsOffsetStore) {
8622 Addr = Builder.CreateGEP(
8623 SplitStoreType, Addr,
8624 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8625
8626 // When splitting the store in half, naturally one half will retain the
8627 // alignment of the original wider store, regardless of whether it was
8628 // over-aligned or not, while the other will require adjustment.
8629 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8630 }
8631 Builder.CreateAlignedStore(V, Addr, Alignment);
8632 };
8633
8634 CreateSplitStore(LValue, false);
8635 CreateSplitStore(HValue, true);
8636
8637 // Delete the old store.
8638 SI.eraseFromParent();
8639 return true;
8640}
8641
8642// Return true if the GEP has two operands, the first operand is of a sequential
8643// type, and the second operand is a constant.
8646 return GEP->getNumOperands() == 2 && I.isSequential() &&
8647 isa<ConstantInt>(GEP->getOperand(1));
8648}
8649
8650// Try unmerging GEPs to reduce liveness interference (register pressure) across
8651// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8652// reducing liveness interference across those edges benefits global register
8653// allocation. Currently handles only certain cases.
8654//
8655// For example, unmerge %GEPI and %UGEPI as below.
8656//
8657// ---------- BEFORE ----------
8658// SrcBlock:
8659// ...
8660// %GEPIOp = ...
8661// ...
8662// %GEPI = gep %GEPIOp, Idx
8663// ...
8664// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8665// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8666// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8667// %UGEPI)
8668//
8669// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8670// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8671// ...
8672//
8673// DstBi:
8674// ...
8675// %UGEPI = gep %GEPIOp, UIdx
8676// ...
8677// ---------------------------
8678//
8679// ---------- AFTER ----------
8680// SrcBlock:
8681// ... (same as above)
8682// (* %GEPI is still alive on the indirectbr edges)
8683// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8684// unmerging)
8685// ...
8686//
8687// DstBi:
8688// ...
8689// %UGEPI = gep %GEPI, (UIdx-Idx)
8690// ...
8691// ---------------------------
8692//
8693// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8694// no longer alive on them.
8695//
8696// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8697// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8698// not to disable further simplications and optimizations as a result of GEP
8699// merging.
8700//
8701// Note this unmerging may increase the length of the data flow critical path
8702// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8703// between the register pressure and the length of data-flow critical
8704// path. Restricting this to the uncommon IndirectBr case would minimize the
8705// impact of potentially longer critical path, if any, and the impact on compile
8706// time.
8708 const TargetTransformInfo *TTI) {
8709 BasicBlock *SrcBlock = GEPI->getParent();
8710 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8711 // (non-IndirectBr) cases exit early here.
8712 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8713 return false;
8714 // Check that GEPI is a simple gep with a single constant index.
8715 if (!GEPSequentialConstIndexed(GEPI))
8716 return false;
8717 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8718 // Check that GEPI is a cheap one.
8719 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8722 return false;
8723 Value *GEPIOp = GEPI->getOperand(0);
8724 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8725 if (!isa<Instruction>(GEPIOp))
8726 return false;
8727 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8728 if (GEPIOpI->getParent() != SrcBlock)
8729 return false;
8730 // Check that GEP is used outside the block, meaning it's alive on the
8731 // IndirectBr edge(s).
8732 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8733 if (auto *I = dyn_cast<Instruction>(Usr)) {
8734 if (I->getParent() != SrcBlock) {
8735 return true;
8736 }
8737 }
8738 return false;
8739 }))
8740 return false;
8741 // The second elements of the GEP chains to be unmerged.
8742 std::vector<GetElementPtrInst *> UGEPIs;
8743 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8744 // on IndirectBr edges.
8745 for (User *Usr : GEPIOp->users()) {
8746 if (Usr == GEPI)
8747 continue;
8748 // Check if Usr is an Instruction. If not, give up.
8749 if (!isa<Instruction>(Usr))
8750 return false;
8751 auto *UI = cast<Instruction>(Usr);
8752 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8753 if (UI->getParent() == SrcBlock)
8754 continue;
8755 // Check if Usr is a GEP. If not, give up.
8756 if (!isa<GetElementPtrInst>(Usr))
8757 return false;
8758 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8759 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8760 // the pointer operand to it. If so, record it in the vector. If not, give
8761 // up.
8762 if (!GEPSequentialConstIndexed(UGEPI))
8763 return false;
8764 if (UGEPI->getOperand(0) != GEPIOp)
8765 return false;
8766 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8767 return false;
8768 if (GEPIIdx->getType() !=
8769 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8770 return false;
8771 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8772 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8775 return false;
8776 UGEPIs.push_back(UGEPI);
8777 }
8778 if (UGEPIs.size() == 0)
8779 return false;
8780 // Check the materializing cost of (Uidx-Idx).
8781 for (GetElementPtrInst *UGEPI : UGEPIs) {
8782 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8783 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8785 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8786 if (ImmCost > TargetTransformInfo::TCC_Basic)
8787 return false;
8788 }
8789 // Now unmerge between GEPI and UGEPIs.
8790 for (GetElementPtrInst *UGEPI : UGEPIs) {
8791 UGEPI->setOperand(0, GEPI);
8792 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8793 Constant *NewUGEPIIdx = ConstantInt::get(
8794 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8795 UGEPI->setOperand(1, NewUGEPIIdx);
8796 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8797 // inbounds to avoid UB.
8798 if (!GEPI->isInBounds()) {
8799 UGEPI->setIsInBounds(false);
8800 }
8801 }
8802 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8803 // alive on IndirectBr edges).
8804 assert(llvm::none_of(GEPIOp->users(),
8805 [&](User *Usr) {
8806 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8807 }) &&
8808 "GEPIOp is used outside SrcBlock");
8809 return true;
8810}
8811
8812static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8814 bool IsHugeFunc) {
8815 // Try and convert
8816 // %c = icmp ult %x, 8
8817 // br %c, bla, blb
8818 // %tc = lshr %x, 3
8819 // to
8820 // %tc = lshr %x, 3
8821 // %c = icmp eq %tc, 0
8822 // br %c, bla, blb
8823 // Creating the cmp to zero can be better for the backend, especially if the
8824 // lshr produces flags that can be used automatically.
8825 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8826 return false;
8827
8828 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8829 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8830 return false;
8831
8832 Value *X = Cmp->getOperand(0);
8833 if (!X->hasUseList())
8834 return false;
8835
8836 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8837
8838 for (auto *U : X->users()) {
8840 // A quick dominance check
8841 if (!UI ||
8842 (UI->getParent() != Branch->getParent() &&
8843 UI->getParent() != Branch->getSuccessor(0) &&
8844 UI->getParent() != Branch->getSuccessor(1)) ||
8845 (UI->getParent() != Branch->getParent() &&
8846 !UI->getParent()->getSinglePredecessor()))
8847 continue;
8848
8849 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8850 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8851 IRBuilder<> Builder(Branch);
8852 if (UI->getParent() != Branch->getParent())
8853 UI->moveBefore(Branch->getIterator());
8855 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8856 ConstantInt::get(UI->getType(), 0));
8857 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8858 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8859 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8860 return true;
8861 }
8862 if (Cmp->isEquality() &&
8863 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8864 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8865 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8866 IRBuilder<> Builder(Branch);
8867 if (UI->getParent() != Branch->getParent())
8868 UI->moveBefore(Branch->getIterator());
8870 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8871 ConstantInt::get(UI->getType(), 0));
8872 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8873 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8874 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8875 return true;
8876 }
8877 }
8878 return false;
8879}
8880
8881bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8882 bool AnyChange = false;
8883 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8884
8885 // Bail out if we inserted the instruction to prevent optimizations from
8886 // stepping on each other's toes.
8887 if (InsertedInsts.count(I))
8888 return AnyChange;
8889
8890 // TODO: Move into the switch on opcode below here.
8891 if (PHINode *P = dyn_cast<PHINode>(I)) {
8892 // It is possible for very late stage optimizations (such as SimplifyCFG)
8893 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8894 // trivial PHI, go ahead and zap it here.
8895 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8896 LargeOffsetGEPMap.erase(P);
8897 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8898 P->eraseFromParent();
8899 ++NumPHIsElim;
8900 return true;
8901 }
8902 return AnyChange;
8903 }
8904
8905 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8906 // If the source of the cast is a constant, then this should have
8907 // already been constant folded. The only reason NOT to constant fold
8908 // it is if something (e.g. LSR) was careful to place the constant
8909 // evaluation in a block other than then one that uses it (e.g. to hoist
8910 // the address of globals out of a loop). If this is the case, we don't
8911 // want to forward-subst the cast.
8912 if (isa<Constant>(CI->getOperand(0)))
8913 return AnyChange;
8914
8915 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8916 return true;
8917
8919 isa<TruncInst>(I)) &&
8921 I, LI->getLoopFor(I->getParent()), *TTI))
8922 return true;
8923
8924 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8925 /// Sink a zext or sext into its user blocks if the target type doesn't
8926 /// fit in one register
8927 if (TLI->getTypeAction(CI->getContext(),
8928 TLI->getValueType(*DL, CI->getType())) ==
8929 TargetLowering::TypeExpandInteger) {
8930 return SinkCast(CI);
8931 } else {
8933 I, LI->getLoopFor(I->getParent()), *TTI))
8934 return true;
8935
8936 bool MadeChange = optimizeExt(I);
8937 return MadeChange | optimizeExtUses(I);
8938 }
8939 }
8940 return AnyChange;
8941 }
8942
8943 if (auto *Cmp = dyn_cast<CmpInst>(I))
8944 if (optimizeCmp(Cmp, ModifiedDT))
8945 return true;
8946
8947 if (match(I, m_URem(m_Value(), m_Value())))
8948 if (optimizeURem(I))
8949 return true;
8950
8951 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8952 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8953 bool Modified = optimizeLoadExt(LI);
8954 unsigned AS = LI->getPointerAddressSpace();
8955 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8956 return Modified;
8957 }
8958
8959 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8960 if (splitMergedValStore(*SI, *DL, *TLI))
8961 return true;
8962 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8963 unsigned AS = SI->getPointerAddressSpace();
8964 return optimizeMemoryInst(I, SI->getOperand(1),
8965 SI->getOperand(0)->getType(), AS);
8966 }
8967
8968 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8969 unsigned AS = RMW->getPointerAddressSpace();
8970 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8971 }
8972
8973 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8974 unsigned AS = CmpX->getPointerAddressSpace();
8975 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8976 CmpX->getCompareOperand()->getType(), AS);
8977 }
8978
8979 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8980
8981 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8982 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8983 return true;
8984
8985 // TODO: Move this into the switch on opcode - it handles shifts already.
8986 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8987 BinOp->getOpcode() == Instruction::LShr)) {
8988 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8989 if (CI && TLI->hasExtractBitsInsn())
8990 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8991 return true;
8992 }
8993
8994 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8995 if (GEPI->hasAllZeroIndices()) {
8996 /// The GEP operand must be a pointer, so must its result -> BitCast
8997 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8998 GEPI->getName(), GEPI->getIterator());
8999 NC->setDebugLoc(GEPI->getDebugLoc());
9000 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
9002 GEPI, TLInfo, nullptr,
9003 [&](Value *V) { removeAllAssertingVHReferences(V); });
9004 ++NumGEPsElim;
9005 optimizeInst(NC, ModifiedDT);
9006 return true;
9007 }
9009 return true;
9010 }
9011 }
9012
9013 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
9014 // freeze(icmp a, const)) -> icmp (freeze a), const
9015 // This helps generate efficient conditional jumps.
9016 Instruction *CmpI = nullptr;
9017 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
9018 CmpI = II;
9019 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
9020 CmpI = F->getFastMathFlags().none() ? F : nullptr;
9021
9022 if (CmpI && CmpI->hasOneUse()) {
9023 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
9024 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
9026 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
9028 if (Const0 || Const1) {
9029 if (!Const0 || !Const1) {
9030 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
9031 F->takeName(FI);
9032 CmpI->setOperand(Const0 ? 1 : 0, F);
9033 }
9034 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
9035 FI->eraseFromParent();
9036 return true;
9037 }
9038 }
9039 return AnyChange;
9040 }
9041
9042 if (tryToSinkFreeOperands(I))
9043 return true;
9044
9045 switch (I->getOpcode()) {
9046 case Instruction::Shl:
9047 case Instruction::LShr:
9048 case Instruction::AShr:
9049 return optimizeShiftInst(cast<BinaryOperator>(I));
9050 case Instruction::Call:
9051 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
9052 case Instruction::Select:
9053 return optimizeSelectInst(cast<SelectInst>(I));
9054 case Instruction::ShuffleVector:
9055 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
9056 case Instruction::Switch:
9057 return optimizeSwitchInst(cast<SwitchInst>(I));
9058 case Instruction::ExtractElement:
9059 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
9060 case Instruction::Br:
9061 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
9062 }
9063
9064 return AnyChange;
9065}
9066
9067/// Given an OR instruction, check to see if this is a bitreverse
9068/// idiom. If so, insert the new intrinsic and return true.
9069bool CodeGenPrepare::makeBitReverse(Instruction &I) {
9070 if (!I.getType()->isIntegerTy() ||
9072 TLI->getValueType(*DL, I.getType(), true)))
9073 return false;
9074
9075 SmallVector<Instruction *, 4> Insts;
9076 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
9077 return false;
9078 Instruction *LastInst = Insts.back();
9079 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
9081 &I, TLInfo, nullptr,
9082 [&](Value *V) { removeAllAssertingVHReferences(V); });
9083 return true;
9084}
9085
9086// In this pass we look for GEP and cast instructions that are used
9087// across basic blocks and rewrite them to improve basic-block-at-a-time
9088// selection.
9089bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
9090 SunkAddrs.clear();
9091 bool MadeChange = false;
9092
9093 do {
9094 CurInstIterator = BB.begin();
9095 ModifiedDT = ModifyDT::NotModifyDT;
9096 while (CurInstIterator != BB.end()) {
9097 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
9098 if (ModifiedDT != ModifyDT::NotModifyDT) {
9099 // For huge function we tend to quickly go though the inner optmization
9100 // opportunities in the BB. So we go back to the BB head to re-optimize
9101 // each instruction instead of go back to the function head.
9102 if (IsHugeFunc) {
9103 DT.reset();
9104 getDT(*BB.getParent());
9105 break;
9106 } else {
9107 return true;
9108 }
9109 }
9110 }
9111 } while (ModifiedDT == ModifyDT::ModifyInstDT);
9112
9113 bool MadeBitReverse = true;
9114 while (MadeBitReverse) {
9115 MadeBitReverse = false;
9116 for (auto &I : reverse(BB)) {
9117 if (makeBitReverse(I)) {
9118 MadeBitReverse = MadeChange = true;
9119 break;
9120 }
9121 }
9122 }
9123 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
9124
9125 return MadeChange;
9126}
9127
9128bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
9129 bool AnyChange = false;
9130 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
9131 AnyChange |= fixupDbgVariableRecord(DVR);
9132 return AnyChange;
9133}
9134
9135// FIXME: should updating debug-info really cause the "changed" flag to fire,
9136// which can cause a function to be reprocessed?
9137bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
9138 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
9139 DVR.Type != DbgVariableRecord::LocationType::Assign)
9140 return false;
9141
9142 // Does this DbgVariableRecord refer to a sunk address calculation?
9143 bool AnyChange = false;
9144 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
9145 DVR.location_ops().end());
9146 for (Value *Location : LocationOps) {
9147 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
9148 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
9149 if (SunkAddr) {
9150 // Point dbg.value at locally computed address, which should give the best
9151 // opportunity to be accurately lowered. This update may change the type
9152 // of pointer being referred to; however this makes no difference to
9153 // debugging information, and we can't generate bitcasts that may affect
9154 // codegen.
9155 DVR.replaceVariableLocationOp(Location, SunkAddr);
9156 AnyChange = true;
9157 }
9158 }
9159 return AnyChange;
9160}
9161
9163 DVR->removeFromParent();
9164 BasicBlock *VIBB = VI->getParent();
9165 if (isa<PHINode>(VI))
9166 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
9167 else
9168 VIBB->insertDbgRecordAfter(DVR, &*VI);
9169}
9170
9171// A llvm.dbg.value may be using a value before its definition, due to
9172// optimizations in this pass and others. Scan for such dbg.values, and rescue
9173// them by moving the dbg.value to immediately after the value definition.
9174// FIXME: Ideally this should never be necessary, and this has the potential
9175// to re-order dbg.value intrinsics.
9176bool CodeGenPrepare::placeDbgValues(Function &F) {
9177 bool MadeChange = false;
9178 DominatorTree DT(F);
9179
9180 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9181 SmallVector<Instruction *, 4> VIs;
9182 for (Value *V : DbgItem->location_ops())
9183 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9184 VIs.push_back(VI);
9185
9186 // This item may depend on multiple instructions, complicating any
9187 // potential sink. This block takes the defensive approach, opting to
9188 // "undef" the item if it has more than one instruction and any of them do
9189 // not dominate iem.
9190 for (Instruction *VI : VIs) {
9191 if (VI->isTerminator())
9192 continue;
9193
9194 // If VI is a phi in a block with an EHPad terminator, we can't insert
9195 // after it.
9196 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9197 continue;
9198
9199 // If the defining instruction dominates the dbg.value, we do not need
9200 // to move the dbg.value.
9201 if (DT.dominates(VI, Position))
9202 continue;
9203
9204 // If we depend on multiple instructions and any of them doesn't
9205 // dominate this DVI, we probably can't salvage it: moving it to
9206 // after any of the instructions could cause us to lose the others.
9207 if (VIs.size() > 1) {
9208 LLVM_DEBUG(
9209 dbgs()
9210 << "Unable to find valid location for Debug Value, undefing:\n"
9211 << *DbgItem);
9212 DbgItem->setKillLocation();
9213 break;
9214 }
9215
9216 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9217 << *DbgItem << ' ' << *VI);
9218 DbgInserterHelper(DbgItem, VI->getIterator());
9219 MadeChange = true;
9220 ++NumDbgValueMoved;
9221 }
9222 };
9223
9224 for (BasicBlock &BB : F) {
9225 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9226 // Process any DbgVariableRecord records attached to this
9227 // instruction.
9228 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9229 filterDbgVars(Insn.getDbgRecordRange()))) {
9230 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9231 continue;
9232 DbgProcessor(&DVR, &Insn);
9233 }
9234 }
9235 }
9236
9237 return MadeChange;
9238}
9239
9240// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9241// probes can be chained dependencies of other regular DAG nodes and block DAG
9242// combine optimizations.
9243bool CodeGenPrepare::placePseudoProbes(Function &F) {
9244 bool MadeChange = false;
9245 for (auto &Block : F) {
9246 // Move the rest probes to the beginning of the block.
9247 auto FirstInst = Block.getFirstInsertionPt();
9248 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9249 ++FirstInst;
9250 BasicBlock::iterator I(FirstInst);
9251 I++;
9252 while (I != Block.end()) {
9253 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9254 II->moveBefore(FirstInst);
9255 MadeChange = true;
9256 }
9257 }
9258 }
9259 return MadeChange;
9260}
9261
9262/// Scale down both weights to fit into uint32_t.
9263static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9264 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9265 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9266 NewTrue = NewTrue / Scale;
9267 NewFalse = NewFalse / Scale;
9268}
9269
9270/// Some targets prefer to split a conditional branch like:
9271/// \code
9272/// %0 = icmp ne i32 %a, 0
9273/// %1 = icmp ne i32 %b, 0
9274/// %or.cond = or i1 %0, %1
9275/// br i1 %or.cond, label %TrueBB, label %FalseBB
9276/// \endcode
9277/// into multiple branch instructions like:
9278/// \code
9279/// bb1:
9280/// %0 = icmp ne i32 %a, 0
9281/// br i1 %0, label %TrueBB, label %bb2
9282/// bb2:
9283/// %1 = icmp ne i32 %b, 0
9284/// br i1 %1, label %TrueBB, label %FalseBB
9285/// \endcode
9286/// This usually allows instruction selection to do even further optimizations
9287/// and combine the compare with the branch instruction. Currently this is
9288/// applied for targets which have "cheap" jump instructions.
9289///
9290/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9291///
9292bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9293 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9294 return false;
9295
9296 bool MadeChange = false;
9297 for (auto &BB : F) {
9298 // Does this BB end with the following?
9299 // %cond1 = icmp|fcmp|binary instruction ...
9300 // %cond2 = icmp|fcmp|binary instruction ...
9301 // %cond.or = or|and i1 %cond1, cond2
9302 // br i1 %cond.or label %dest1, label %dest2"
9303 Instruction *LogicOp;
9304 BasicBlock *TBB, *FBB;
9305 if (!match(BB.getTerminator(),
9306 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9307 continue;
9308
9309 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9310 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9311 continue;
9312
9313 // The merging of mostly empty BB can cause a degenerate branch.
9314 if (TBB == FBB)
9315 continue;
9316
9317 unsigned Opc;
9318 Value *Cond1, *Cond2;
9319 if (match(LogicOp,
9320 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9321 Opc = Instruction::And;
9322 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9323 m_OneUse(m_Value(Cond2)))))
9324 Opc = Instruction::Or;
9325 else
9326 continue;
9327
9328 auto IsGoodCond = [](Value *Cond) {
9329 return match(
9330 Cond,
9332 m_LogicalOr(m_Value(), m_Value()))));
9333 };
9334 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9335 continue;
9336
9337 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9338
9339 // Create a new BB.
9340 auto *TmpBB =
9341 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9342 BB.getParent(), BB.getNextNode());
9343 if (IsHugeFunc)
9344 FreshBBs.insert(TmpBB);
9345
9346 // Update original basic block by using the first condition directly by the
9347 // branch instruction and removing the no longer needed and/or instruction.
9348 Br1->setCondition(Cond1);
9349 LogicOp->eraseFromParent();
9350
9351 // Depending on the condition we have to either replace the true or the
9352 // false successor of the original branch instruction.
9353 if (Opc == Instruction::And)
9354 Br1->setSuccessor(0, TmpBB);
9355 else
9356 Br1->setSuccessor(1, TmpBB);
9357
9358 // Fill in the new basic block.
9359 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9360 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9361 I->removeFromParent();
9362 I->insertBefore(Br2->getIterator());
9363 }
9364
9365 // Update PHI nodes in both successors. The original BB needs to be
9366 // replaced in one successor's PHI nodes, because the branch comes now from
9367 // the newly generated BB (NewBB). In the other successor we need to add one
9368 // incoming edge to the PHI nodes, because both branch instructions target
9369 // now the same successor. Depending on the original branch condition
9370 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9371 // we perform the correct update for the PHI nodes.
9372 // This doesn't change the successor order of the just created branch
9373 // instruction (or any other instruction).
9374 if (Opc == Instruction::Or)
9375 std::swap(TBB, FBB);
9376
9377 // Replace the old BB with the new BB.
9378 TBB->replacePhiUsesWith(&BB, TmpBB);
9379
9380 // Add another incoming edge from the new BB.
9381 for (PHINode &PN : FBB->phis()) {
9382 auto *Val = PN.getIncomingValueForBlock(&BB);
9383 PN.addIncoming(Val, TmpBB);
9384 }
9385
9386 // Update the branch weights (from SelectionDAGBuilder::
9387 // FindMergedConditions).
9388 if (Opc == Instruction::Or) {
9389 // Codegen X | Y as:
9390 // BB1:
9391 // jmp_if_X TBB
9392 // jmp TmpBB
9393 // TmpBB:
9394 // jmp_if_Y TBB
9395 // jmp FBB
9396 //
9397
9398 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9399 // The requirement is that
9400 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9401 // = TrueProb for original BB.
9402 // Assuming the original weights are A and B, one choice is to set BB1's
9403 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9404 // assumes that
9405 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9406 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9407 // TmpBB, but the math is more complicated.
9408 uint64_t TrueWeight, FalseWeight;
9409 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9410 uint64_t NewTrueWeight = TrueWeight;
9411 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9412 scaleWeights(NewTrueWeight, NewFalseWeight);
9413 Br1->setMetadata(LLVMContext::MD_prof,
9414 MDBuilder(Br1->getContext())
9415 .createBranchWeights(TrueWeight, FalseWeight,
9416 hasBranchWeightOrigin(*Br1)));
9417
9418 NewTrueWeight = TrueWeight;
9419 NewFalseWeight = 2 * FalseWeight;
9420 scaleWeights(NewTrueWeight, NewFalseWeight);
9421 Br2->setMetadata(LLVMContext::MD_prof,
9422 MDBuilder(Br2->getContext())
9423 .createBranchWeights(TrueWeight, FalseWeight));
9424 }
9425 } else {
9426 // Codegen X & Y as:
9427 // BB1:
9428 // jmp_if_X TmpBB
9429 // jmp FBB
9430 // TmpBB:
9431 // jmp_if_Y TBB
9432 // jmp FBB
9433 //
9434 // This requires creation of TmpBB after CurBB.
9435
9436 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9437 // The requirement is that
9438 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9439 // = FalseProb for original BB.
9440 // Assuming the original weights are A and B, one choice is to set BB1's
9441 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9442 // assumes that
9443 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9444 uint64_t TrueWeight, FalseWeight;
9445 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9446 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9447 uint64_t NewFalseWeight = FalseWeight;
9448 scaleWeights(NewTrueWeight, NewFalseWeight);
9449 Br1->setMetadata(LLVMContext::MD_prof,
9450 MDBuilder(Br1->getContext())
9451 .createBranchWeights(TrueWeight, FalseWeight));
9452
9453 NewTrueWeight = 2 * TrueWeight;
9454 NewFalseWeight = FalseWeight;
9455 scaleWeights(NewTrueWeight, NewFalseWeight);
9456 Br2->setMetadata(LLVMContext::MD_prof,
9457 MDBuilder(Br2->getContext())
9458 .createBranchWeights(TrueWeight, FalseWeight));
9459 }
9460 }
9461
9462 ModifiedDT = ModifyDT::ModifyBBDT;
9463 MadeChange = true;
9464
9465 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9466 TmpBB->dump());
9467 }
9468 return MadeChange;
9469}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, ExtractValueInst *&OverflowExtract)
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1450
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1183
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1540
unsigned logBase2() const
Definition APInt.h:1770
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1571
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:539
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:701
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Conditional or Unconditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:57
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:807
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:342
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Type * getValueType() const
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2762
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:194
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:962
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:242
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
iterator_range< use_iterator > uses()
Definition Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:838
user_iterator_impl< User > user_iterator
Definition Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
std::enable_if_t< std::is_signed_v< T >, T > MulOverflow(T X, T Y, T &Result)
Multiply two signed integers, computing the two's complement truncated result, returning true if an o...
Definition MathExtras.h:753
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2244
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1731
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2122
constexpr from_range_t from_range
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2163
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2190
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3761
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:203
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:588
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
This contains information for each constraint that we are lowering.