LLVM 23.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
45#include "llvm/Config/llvm-config.h"
46#include "llvm/IR/Argument.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DebugInfo.h"
54#include "llvm/IR/Dominators.h"
55#include "llvm/IR/Function.h"
57#include "llvm/IR/GlobalValue.h"
59#include "llvm/IR/IRBuilder.h"
60#include "llvm/IR/InlineAsm.h"
61#include "llvm/IR/InstrTypes.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/IntrinsicsAArch64.h"
67#include "llvm/IR/LLVMContext.h"
68#include "llvm/IR/MDBuilder.h"
69#include "llvm/IR/Module.h"
70#include "llvm/IR/Operator.h"
73#include "llvm/IR/Statepoint.h"
74#include "llvm/IR/Type.h"
75#include "llvm/IR/Use.h"
76#include "llvm/IR/User.h"
77#include "llvm/IR/Value.h"
78#include "llvm/IR/ValueHandle.h"
79#include "llvm/IR/ValueMap.h"
81#include "llvm/Pass.h"
87#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinking and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
329 ValueMap<Value *, WeakTrackingVH> SunkAddrs;
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
347 MapVector<AssertingVH<Value>,
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare() = default;
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
380 SmallPtrSet<BasicBlock *, 32> FreshBBs;
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
391 bool run(Function &F, FunctionAnalysisManager &AM);
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeMulWithOverflow(Instruction *I, bool IsSigned,
435 ModifyDT &ModifiedDT);
436 bool optimizeInlineAsmInst(CallInst *CS);
437 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
438 bool optimizeExt(Instruction *&I);
439 bool optimizeExtUses(Instruction *I);
440 bool optimizeLoadExt(LoadInst *Load);
441 bool optimizeShiftInst(BinaryOperator *BO);
442 bool optimizeFunnelShift(IntrinsicInst *Fsh);
443 bool optimizeSelectInst(SelectInst *SI);
444 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
445 bool optimizeSwitchType(SwitchInst *SI);
446 bool optimizeSwitchPhiConstants(SwitchInst *SI);
447 bool optimizeSwitchInst(SwitchInst *SI);
448 bool optimizeExtractElementInst(Instruction *Inst);
449 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
450 bool fixupDbgVariableRecord(DbgVariableRecord &I);
451 bool fixupDbgVariableRecordsOnInst(Instruction &I);
452 bool placeDbgValues(Function &F);
453 bool placePseudoProbes(Function &F);
454 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
455 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
456 bool tryToPromoteExts(TypePromotionTransaction &TPT,
457 const SmallVectorImpl<Instruction *> &Exts,
458 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
459 unsigned CreatedInstsCost = 0);
460 bool mergeSExts(Function &F);
461 bool splitLargeGEPOffsets();
462 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
463 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
464 bool optimizePhiTypes(Function &F);
465 bool performAddressTypePromotion(
466 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
467 bool HasPromoted, TypePromotionTransaction &TPT,
468 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
469 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
470 bool simplifyOffsetableRelocate(GCStatepointInst &I);
471
472 bool tryToSinkFreeOperands(Instruction *I);
473 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
474 CmpInst *Cmp, Intrinsic::ID IID);
475 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 bool optimizeURem(Instruction *Rem);
477 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
478 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
479 bool unfoldPowerOf2Test(CmpInst *Cmp);
480 void verifyBFIUpdates(Function &F);
481 bool _run(Function &F);
482};
483
484class CodeGenPrepareLegacyPass : public FunctionPass {
485public:
486 static char ID; // Pass identification, replacement for typeid
487
488 CodeGenPrepareLegacyPass() : FunctionPass(ID) {}
489
490 bool runOnFunction(Function &F) override;
491
492 StringRef getPassName() const override { return "CodeGen Prepare"; }
493
494 void getAnalysisUsage(AnalysisUsage &AU) const override {
495 // FIXME: When we can selectively preserve passes, preserve the domtree.
496 AU.addRequired<ProfileSummaryInfoWrapperPass>();
497 AU.addRequired<TargetLibraryInfoWrapperPass>();
498 AU.addRequired<TargetPassConfig>();
499 AU.addRequired<TargetTransformInfoWrapperPass>();
500 AU.addRequired<LoopInfoWrapperPass>();
501 AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
522 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
540 "Optimize for code generation", false, false)
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
557 return PA;
558}
559
560bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
561 DL = &F.getDataLayout();
562 SubtargetInfo = TM->getSubtargetImpl(F);
563 TLI = SubtargetInfo->getTargetLowering();
564 TRI = SubtargetInfo->getRegisterInfo();
565 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
567 LI = &AM.getResult<LoopAnalysis>(F);
568 BPI.reset(new BranchProbabilityInfo(F, *LI));
569 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
570 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
571 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
572 BBSectionsProfileReader =
574 return _run(F);
575}
576
577bool CodeGenPrepare::_run(Function &F) {
578 bool EverMadeChange = false;
579
580 OptSize = F.hasOptSize();
581 // Use the basic-block-sections profile to promote hot functions to .text.hot
582 // if requested.
583 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
584 BBSectionsProfileReader->isFunctionHot(F.getName())) {
585 (void)F.setSectionPrefix("hot");
586 } else if (ProfileGuidedSectionPrefix) {
587 // The hot attribute overwrites profile count based hotness while profile
588 // counts based hotness overwrite the cold attribute.
589 // This is a conservative behabvior.
590 if (F.hasFnAttribute(Attribute::Hot) ||
591 PSI->isFunctionHotInCallGraph(&F, *BFI))
592 (void)F.setSectionPrefix("hot");
593 // If PSI shows this function is not hot, we will placed the function
594 // into unlikely section if (1) PSI shows this is a cold function, or
595 // (2) the function has a attribute of cold.
596 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
597 F.hasFnAttribute(Attribute::Cold))
598 (void)F.setSectionPrefix("unlikely");
601 (void)F.setSectionPrefix("unknown");
602 }
603
604 /// This optimization identifies DIV instructions that can be
605 /// profitably bypassed and carried out with a shorter, faster divide.
606 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
607 const DenseMap<unsigned int, unsigned int> &BypassWidths =
609 BasicBlock *BB = &*F.begin();
610 while (BB != nullptr) {
611 // bypassSlowDivision may create new BBs, but we don't want to reapply the
612 // optimization to those blocks.
613 BasicBlock *Next = BB->getNextNode();
614 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
615 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620 // Get rid of @llvm.assume builtins before attempting to eliminate empty
621 // blocks, since there might be blocks that only contain @llvm.assume calls
622 // (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625 // Eliminate blocks that contain only PHI nodes and an
626 // unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633 // Split some critical edges where one of the sources is an indirect branch,
634 // to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
637
638 // If we are optimzing huge function, we need to consider the build time.
639 // Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
641
642 // Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647 bool MadeChange = true;
648 bool FuncIterated = false;
649 while (MadeChange) {
650 MadeChange = false;
651
652 for (BasicBlock &BB : llvm::make_early_inc_range(F)) {
653 if (FuncIterated && !FreshBBs.contains(&BB))
654 continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
658
659 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663 if (IsHugeFunc) {
664 // If the BB is updated, it may still has chance to be optimized.
665 // This usually happen at sink optimization.
666 // For example:
667 //
668 // bb0:
669 // %and = and i32 %a, 4
670 // %cmp = icmp eq i32 %and, 0
671 //
672 // If the %cmp sink to other BB, the %and will has chance to sink.
673 if (Changed)
674 FreshBBs.insert(&BB);
675 else if (FuncIterated)
676 FreshBBs.erase(&BB);
677 } else {
678 // For small/normal functions, we restart BB iteration if the dominator
679 // tree of the Function was changed.
680 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681 break;
682 }
683 }
684 // We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689 if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693 if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697 if (MadeChange && VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701 // Really free removed instructions during promotion.
702 for (Instruction *I : RemovedInsts)
703 I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716 if (!DisableBranchOpts) {
717 MadeChange = false;
718 // Use a set vector to get deterministic iteration order. The order the
719 // blocks are removed may affect whether or not PHI nodes in successors
720 // are removed.
721 SmallSetVector<BasicBlock *, 8> WorkList;
722 for (BasicBlock &BB : F) {
724 MadeChange |= ConstantFoldTerminator(&BB, true);
725 if (!MadeChange)
726 continue;
727
728 for (BasicBlock *Succ : Successors)
729 if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733 // Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735 while (!WorkList.empty()) {
736 BasicBlock *BB = WorkList.pop_back_val();
738
739 DeleteDeadBlock(BB);
740
741 for (BasicBlock *Succ : Successors)
742 if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746 // Merge pairs of basic blocks with unconditional branches, connected by
747 // a single edge.
748 if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754 if (!DisableGCOpts) {
756 for (BasicBlock &BB : F)
757 for (Instruction &I : BB)
758 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760 for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764 // Do this last to clean up use-before-def scenarios introduced by other
765 // preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
771 verifyBFIUpdates(F);
772#endif
773
774 return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778 bool MadeChange = false;
779 for (BasicBlock &BB : F) {
780 CurInstIterator = BB.begin();
781 while (CurInstIterator != BB.end()) {
782 Instruction *I = &*(CurInstIterator++);
783 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange = true;
785 Value *Operand = Assume->getOperand(0);
786 Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
790 });
791 }
792 }
793 }
794 return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
804 if (!GEP)
805 return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810 if (VecI == LargeOffsetGEPMap.end())
811 return;
812
813 auto &GEPVector = VecI->second;
814 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
815
816 if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821[[maybe_unused]] void CodeGenPrepare::verifyBFIUpdates(Function &F) {
822 DominatorTree NewDT(F);
823 LoopInfo NewLI(NewDT);
824 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
833 bool Changed = false;
834 // Scan all of the blocks in the function, except for the entry block.
835 // Use a temporary array to avoid iterator being invalidated when
836 // deleting blocks.
839
840 SmallSet<WeakTrackingVH, 16> Preds;
841 for (auto &Block : Blocks) {
843 if (!BB)
844 continue;
845 // If the destination block has a single pred, then this is a trivial
846 // edge, just collapse it.
847 BasicBlock *SinglePred = BB->getSinglePredecessor();
848
849 // Don't merge if BB's address is taken.
850 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
851 continue;
852
853 // Make an effort to skip unreachable blocks.
854 if (DT && !DT->isReachableFromEntry(BB))
855 continue;
856
857 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
858 if (Term && !Term->isConditional()) {
859 Changed = true;
860 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
861
862 // Merge BB into SinglePred and delete it.
863 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
864 /* MemDep */ nullptr,
865 /* PredecessorWithTwoSuccessors */ false, DT);
866 Preds.insert(SinglePred);
867
868 if (IsHugeFunc) {
869 // Update FreshBBs to optimize the merged BB.
870 FreshBBs.insert(SinglePred);
871 FreshBBs.erase(BB);
872 }
873 }
874 }
875
876 // (Repeatedly) merging blocks into their predecessors can create redundant
877 // debug intrinsics.
878 for (const auto &Pred : Preds)
879 if (auto *BB = cast_or_null<BasicBlock>(Pred))
881
882 return Changed;
883}
884
885/// Find a destination block from BB if BB is mergeable empty block.
886BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
887 // If this block doesn't end with an uncond branch, ignore it.
888 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
889 if (!BI || !BI->isUnconditional())
890 return nullptr;
891
892 // If the instruction before the branch (skipping debug info) isn't a phi
893 // node, then other stuff is happening here.
895 if (BBI != BB->begin()) {
896 --BBI;
897 if (!isa<PHINode>(BBI))
898 return nullptr;
899 }
900
901 // Do not break infinite loops.
902 BasicBlock *DestBB = BI->getSuccessor(0);
903 if (DestBB == BB)
904 return nullptr;
905
906 if (!canMergeBlocks(BB, DestBB))
907 DestBB = nullptr;
908
909 return DestBB;
910}
911
912/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
913/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
914/// edges in ways that are non-optimal for isel. Start by eliminating these
915/// blocks so we can split them the way we want them.
916bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
917 SmallPtrSet<BasicBlock *, 16> Preheaders;
918 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
919 while (!LoopList.empty()) {
920 Loop *L = LoopList.pop_back_val();
921 llvm::append_range(LoopList, *L);
922 if (BasicBlock *Preheader = L->getLoopPreheader())
923 Preheaders.insert(Preheader);
924 }
925
926 bool MadeChange = false;
927 // Copy blocks into a temporary array to avoid iterator invalidation issues
928 // as we remove them.
929 // Note that this intentionally skips the entry block.
931 for (auto &Block : llvm::drop_begin(F)) {
932 // Delete phi nodes that could block deleting other empty blocks.
934 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
935 Blocks.push_back(&Block);
936 }
937
938 for (auto &Block : Blocks) {
940 if (!BB)
941 continue;
942 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
943 if (!DestBB ||
944 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
945 continue;
946
947 eliminateMostlyEmptyBlock(BB);
948 MadeChange = true;
949 }
950 return MadeChange;
951}
952
953bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
954 BasicBlock *DestBB,
955 bool isPreheader) {
956 // Do not delete loop preheaders if doing so would create a critical edge.
957 // Loop preheaders can be good locations to spill registers. If the
958 // preheader is deleted and we create a critical edge, registers may be
959 // spilled in the loop body instead.
960 if (!DisablePreheaderProtect && isPreheader &&
961 !(BB->getSinglePredecessor() &&
963 return false;
964
965 // Skip merging if the block's successor is also a successor to any callbr
966 // that leads to this block.
967 // FIXME: Is this really needed? Is this a correctness issue?
968 for (BasicBlock *Pred : predecessors(BB)) {
969 if (isa<CallBrInst>(Pred->getTerminator()) &&
970 llvm::is_contained(successors(Pred), DestBB))
971 return false;
972 }
973
974 // Try to skip merging if the unique predecessor of BB is terminated by a
975 // switch or indirect branch instruction, and BB is used as an incoming block
976 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
977 // add COPY instructions in the predecessor of BB instead of BB (if it is not
978 // merged). Note that the critical edge created by merging such blocks wont be
979 // split in MachineSink because the jump table is not analyzable. By keeping
980 // such empty block (BB), ISel will place COPY instructions in BB, not in the
981 // predecessor of BB.
982 BasicBlock *Pred = BB->getUniquePredecessor();
983 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
985 return true;
986
987 if (BB->getTerminator() != &*BB->getFirstNonPHIOrDbg())
988 return true;
989
990 // We use a simple cost heuristic which determine skipping merging is
991 // profitable if the cost of skipping merging is less than the cost of
992 // merging : Cost(skipping merging) < Cost(merging BB), where the
993 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
994 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
995 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
996 // Freq(Pred) / Freq(BB) > 2.
997 // Note that if there are multiple empty blocks sharing the same incoming
998 // value for the PHIs in the DestBB, we consider them together. In such
999 // case, Cost(merging BB) will be the sum of their frequencies.
1000
1001 if (!isa<PHINode>(DestBB->begin()))
1002 return true;
1003
1004 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1005
1006 // Find all other incoming blocks from which incoming values of all PHIs in
1007 // DestBB are the same as the ones from BB.
1008 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1009 if (DestBBPred == BB)
1010 continue;
1011
1012 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1013 return DestPN.getIncomingValueForBlock(BB) ==
1014 DestPN.getIncomingValueForBlock(DestBBPred);
1015 }))
1016 SameIncomingValueBBs.insert(DestBBPred);
1017 }
1018
1019 // See if all BB's incoming values are same as the value from Pred. In this
1020 // case, no reason to skip merging because COPYs are expected to be place in
1021 // Pred already.
1022 if (SameIncomingValueBBs.count(Pred))
1023 return true;
1024
1025 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1026 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1027
1028 for (auto *SameValueBB : SameIncomingValueBBs)
1029 if (SameValueBB->getUniquePredecessor() == Pred &&
1030 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1031 BBFreq += BFI->getBlockFreq(SameValueBB);
1032
1033 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1034 return !Limit || PredFreq <= *Limit;
1035}
1036
1037/// Return true if we can merge BB into DestBB if there is a single
1038/// unconditional branch between them, and BB contains no other non-phi
1039/// instructions.
1040bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1041 const BasicBlock *DestBB) const {
1042 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1043 // the successor. If there are more complex condition (e.g. preheaders),
1044 // don't mess around with them.
1045 for (const PHINode &PN : BB->phis()) {
1046 for (const User *U : PN.users()) {
1047 const Instruction *UI = cast<Instruction>(U);
1048 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1049 return false;
1050 // If User is inside DestBB block and it is a PHINode then check
1051 // incoming value. If incoming value is not from BB then this is
1052 // a complex condition (e.g. preheaders) we want to avoid here.
1053 if (UI->getParent() == DestBB) {
1054 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1055 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1056 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1057 if (Insn && Insn->getParent() == BB &&
1058 Insn->getParent() != UPN->getIncomingBlock(I))
1059 return false;
1060 }
1061 }
1062 }
1063 }
1064
1065 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1066 // and DestBB may have conflicting incoming values for the block. If so, we
1067 // can't merge the block.
1068 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1069 if (!DestBBPN)
1070 return true; // no conflict.
1071
1072 // Collect the preds of BB.
1073 SmallPtrSet<const BasicBlock *, 16> BBPreds;
1074 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1075 // It is faster to get preds from a PHI than with pred_iterator.
1076 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1077 BBPreds.insert(BBPN->getIncomingBlock(i));
1078 } else {
1079 BBPreds.insert_range(predecessors(BB));
1080 }
1081
1082 // Walk the preds of DestBB.
1083 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1084 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1085 if (BBPreds.count(Pred)) { // Common predecessor?
1086 for (const PHINode &PN : DestBB->phis()) {
1087 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1088 const Value *V2 = PN.getIncomingValueForBlock(BB);
1089
1090 // If V2 is a phi node in BB, look up what the mapped value will be.
1091 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1092 if (V2PN->getParent() == BB)
1093 V2 = V2PN->getIncomingValueForBlock(Pred);
1094
1095 // If there is a conflict, bail out.
1096 if (V1 != V2)
1097 return false;
1098 }
1099 }
1100 }
1101
1102 return true;
1103}
1104
1105/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1106static void replaceAllUsesWith(Value *Old, Value *New,
1108 bool IsHuge) {
1109 auto *OldI = dyn_cast<Instruction>(Old);
1110 if (OldI) {
1111 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1112 UI != E; ++UI) {
1114 if (IsHuge)
1115 FreshBBs.insert(User->getParent());
1116 }
1117 }
1118 Old->replaceAllUsesWith(New);
1119}
1120
1121/// Eliminate a basic block that has only phi's and an unconditional branch in
1122/// it.
1123void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1124 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1125 BasicBlock *DestBB = BI->getSuccessor(0);
1126
1127 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1128 << *BB << *DestBB);
1129
1130 // If the destination block has a single pred, then this is a trivial edge,
1131 // just collapse it.
1132 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1133 if (SinglePred != DestBB) {
1134 assert(SinglePred == BB &&
1135 "Single predecessor not the same as predecessor");
1136 // Merge DestBB into SinglePred/BB and delete it.
1138 // Note: BB(=SinglePred) will not be deleted on this path.
1139 // DestBB(=its single successor) is the one that was deleted.
1140 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1141
1142 if (IsHugeFunc) {
1143 // Update FreshBBs to optimize the merged BB.
1144 FreshBBs.insert(SinglePred);
1145 FreshBBs.erase(DestBB);
1146 }
1147 return;
1148 }
1149 }
1150
1151 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1152 // to handle the new incoming edges it is about to have.
1153 for (PHINode &PN : DestBB->phis()) {
1154 // Remove the incoming value for BB, and remember it.
1155 Value *InVal = PN.removeIncomingValue(BB, false);
1156
1157 // Two options: either the InVal is a phi node defined in BB or it is some
1158 // value that dominates BB.
1159 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1160 if (InValPhi && InValPhi->getParent() == BB) {
1161 // Add all of the input values of the input PHI as inputs of this phi.
1162 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1163 PN.addIncoming(InValPhi->getIncomingValue(i),
1164 InValPhi->getIncomingBlock(i));
1165 } else {
1166 // Otherwise, add one instance of the dominating value for each edge that
1167 // we will be adding.
1168 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1169 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1170 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1171 } else {
1172 for (BasicBlock *Pred : predecessors(BB))
1173 PN.addIncoming(InVal, Pred);
1174 }
1175 }
1176 }
1177
1178 // Preserve loop Metadata.
1179 if (BI->hasMetadata(LLVMContext::MD_loop)) {
1180 for (auto *Pred : predecessors(BB))
1181 Pred->getTerminator()->copyMetadata(*BI, LLVMContext::MD_loop);
1182 }
1183
1184 // The PHIs are now updated, change everything that refers to BB to use
1185 // DestBB and remove BB.
1186 BB->replaceAllUsesWith(DestBB);
1187 BB->eraseFromParent();
1188 ++NumBlocksElim;
1189
1190 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1191}
1192
1193// Computes a map of base pointer relocation instructions to corresponding
1194// derived pointer relocation instructions given a vector of all relocate calls
1196 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1198 &RelocateInstMap) {
1199 // Collect information in two maps: one primarily for locating the base object
1200 // while filling the second map; the second map is the final structure holding
1201 // a mapping between Base and corresponding Derived relocate calls
1203 for (auto *ThisRelocate : AllRelocateCalls) {
1204 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1205 ThisRelocate->getDerivedPtrIndex());
1206 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1207 }
1208 for (auto &Item : RelocateIdxMap) {
1209 std::pair<unsigned, unsigned> Key = Item.first;
1210 if (Key.first == Key.second)
1211 // Base relocation: nothing to insert
1212 continue;
1213
1214 GCRelocateInst *I = Item.second;
1215 auto BaseKey = std::make_pair(Key.first, Key.first);
1216
1217 // We're iterating over RelocateIdxMap so we cannot modify it.
1218 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1219 if (MaybeBase == RelocateIdxMap.end())
1220 // TODO: We might want to insert a new base object relocate and gep off
1221 // that, if there are enough derived object relocates.
1222 continue;
1223
1224 RelocateInstMap[MaybeBase->second].push_back(I);
1225 }
1226}
1227
1228// Accepts a GEP and extracts the operands into a vector provided they're all
1229// small integer constants
1231 SmallVectorImpl<Value *> &OffsetV) {
1232 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1233 // Only accept small constant integer operands
1234 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1235 if (!Op || Op->getZExtValue() > 20)
1236 return false;
1237 }
1238
1239 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1240 OffsetV.push_back(GEP->getOperand(i));
1241 return true;
1242}
1243
1244// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1245// replace, computes a replacement, and affects it.
1246static bool
1248 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1249 bool MadeChange = false;
1250 // We must ensure the relocation of derived pointer is defined after
1251 // relocation of base pointer. If we find a relocation corresponding to base
1252 // defined earlier than relocation of base then we move relocation of base
1253 // right before found relocation. We consider only relocation in the same
1254 // basic block as relocation of base. Relocations from other basic block will
1255 // be skipped by optimization and we do not care about them.
1256 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1257 &*R != RelocatedBase; ++R)
1258 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1259 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1260 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1261 RelocatedBase->moveBefore(RI->getIterator());
1262 MadeChange = true;
1263 break;
1264 }
1265
1266 for (GCRelocateInst *ToReplace : Targets) {
1267 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1268 "Not relocating a derived object of the original base object");
1269 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1270 // A duplicate relocate call. TODO: coalesce duplicates.
1271 continue;
1272 }
1273
1274 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1275 // Base and derived relocates are in different basic blocks.
1276 // In this case transform is only valid when base dominates derived
1277 // relocate. However it would be too expensive to check dominance
1278 // for each such relocate, so we skip the whole transformation.
1279 continue;
1280 }
1281
1282 Value *Base = ToReplace->getBasePtr();
1283 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1284 if (!Derived || Derived->getPointerOperand() != Base)
1285 continue;
1286
1288 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1289 continue;
1290
1291 // Create a Builder and replace the target callsite with a gep
1292 assert(RelocatedBase->getNextNode() &&
1293 "Should always have one since it's not a terminator");
1294
1295 // Insert after RelocatedBase
1296 IRBuilder<> Builder(RelocatedBase->getNextNode());
1297 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1298
1299 // If gc_relocate does not match the actual type, cast it to the right type.
1300 // In theory, there must be a bitcast after gc_relocate if the type does not
1301 // match, and we should reuse it to get the derived pointer. But it could be
1302 // cases like this:
1303 // bb1:
1304 // ...
1305 // %g1 = call coldcc i8 addrspace(1)*
1306 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1307 //
1308 // bb2:
1309 // ...
1310 // %g2 = call coldcc i8 addrspace(1)*
1311 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1312 //
1313 // merge:
1314 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1315 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1316 //
1317 // In this case, we can not find the bitcast any more. So we insert a new
1318 // bitcast no matter there is already one or not. In this way, we can handle
1319 // all cases, and the extra bitcast should be optimized away in later
1320 // passes.
1321 Value *ActualRelocatedBase = RelocatedBase;
1322 if (RelocatedBase->getType() != Base->getType()) {
1323 ActualRelocatedBase =
1324 Builder.CreateBitCast(RelocatedBase, Base->getType());
1325 }
1326 Value *Replacement =
1327 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1328 ArrayRef(OffsetV));
1329 Replacement->takeName(ToReplace);
1330 // If the newly generated derived pointer's type does not match the original
1331 // derived pointer's type, cast the new derived pointer to match it. Same
1332 // reasoning as above.
1333 Value *ActualReplacement = Replacement;
1334 if (Replacement->getType() != ToReplace->getType()) {
1335 ActualReplacement =
1336 Builder.CreateBitCast(Replacement, ToReplace->getType());
1337 }
1338 ToReplace->replaceAllUsesWith(ActualReplacement);
1339 ToReplace->eraseFromParent();
1340
1341 MadeChange = true;
1342 }
1343 return MadeChange;
1344}
1345
1346// Turns this:
1347//
1348// %base = ...
1349// %ptr = gep %base + 15
1350// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1351// %base' = relocate(%tok, i32 4, i32 4)
1352// %ptr' = relocate(%tok, i32 4, i32 5)
1353// %val = load %ptr'
1354//
1355// into this:
1356//
1357// %base = ...
1358// %ptr = gep %base + 15
1359// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1360// %base' = gc.relocate(%tok, i32 4, i32 4)
1361// %ptr' = gep %base' + 15
1362// %val = load %ptr'
1363bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1364 bool MadeChange = false;
1365 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1366 for (auto *U : I.users())
1367 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1368 // Collect all the relocate calls associated with a statepoint
1369 AllRelocateCalls.push_back(Relocate);
1370
1371 // We need at least one base pointer relocation + one derived pointer
1372 // relocation to mangle
1373 if (AllRelocateCalls.size() < 2)
1374 return false;
1375
1376 // RelocateInstMap is a mapping from the base relocate instruction to the
1377 // corresponding derived relocate instructions
1378 MapVector<GCRelocateInst *, SmallVector<GCRelocateInst *, 0>> RelocateInstMap;
1379 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1380 if (RelocateInstMap.empty())
1381 return false;
1382
1383 for (auto &Item : RelocateInstMap)
1384 // Item.first is the RelocatedBase to offset against
1385 // Item.second is the vector of Targets to replace
1386 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1387 return MadeChange;
1388}
1389
1390/// Sink the specified cast instruction into its user blocks.
1391static bool SinkCast(CastInst *CI) {
1392 BasicBlock *DefBB = CI->getParent();
1393
1394 /// InsertedCasts - Only insert a cast in each block once.
1396
1397 bool MadeChange = false;
1398 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1399 UI != E;) {
1400 Use &TheUse = UI.getUse();
1402
1403 // Figure out which BB this cast is used in. For PHI's this is the
1404 // appropriate predecessor block.
1405 BasicBlock *UserBB = User->getParent();
1406 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1407 UserBB = PN->getIncomingBlock(TheUse);
1408 }
1409
1410 // Preincrement use iterator so we don't invalidate it.
1411 ++UI;
1412
1413 // The first insertion point of a block containing an EH pad is after the
1414 // pad. If the pad is the user, we cannot sink the cast past the pad.
1415 if (User->isEHPad())
1416 continue;
1417
1418 // If the block selected to receive the cast is an EH pad that does not
1419 // allow non-PHI instructions before the terminator, we can't sink the
1420 // cast.
1421 if (UserBB->getTerminator()->isEHPad())
1422 continue;
1423
1424 // If this user is in the same block as the cast, don't change the cast.
1425 if (UserBB == DefBB)
1426 continue;
1427
1428 // If we have already inserted a cast into this block, use it.
1429 CastInst *&InsertedCast = InsertedCasts[UserBB];
1430
1431 if (!InsertedCast) {
1432 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1433 assert(InsertPt != UserBB->end());
1434 InsertedCast = cast<CastInst>(CI->clone());
1435 InsertedCast->insertBefore(*UserBB, InsertPt);
1436 }
1437
1438 // Replace a use of the cast with a use of the new cast.
1439 TheUse = InsertedCast;
1440 MadeChange = true;
1441 ++NumCastUses;
1442 }
1443
1444 // If we removed all uses, nuke the cast.
1445 if (CI->use_empty()) {
1446 salvageDebugInfo(*CI);
1447 CI->eraseFromParent();
1448 MadeChange = true;
1449 }
1450
1451 return MadeChange;
1452}
1453
1454/// If the specified cast instruction is a noop copy (e.g. it's casting from
1455/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1456/// reduce the number of virtual registers that must be created and coalesced.
1457///
1458/// Return true if any changes are made.
1460 const DataLayout &DL) {
1461 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1462 // than sinking only nop casts, but is helpful on some platforms.
1463 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1464 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1465 ASC->getDestAddressSpace()))
1466 return false;
1467 }
1468
1469 // If this is a noop copy,
1470 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1471 EVT DstVT = TLI.getValueType(DL, CI->getType());
1472
1473 // This is an fp<->int conversion?
1474 if (SrcVT.isInteger() != DstVT.isInteger())
1475 return false;
1476
1477 // If this is an extension, it will be a zero or sign extension, which
1478 // isn't a noop.
1479 if (SrcVT.bitsLT(DstVT))
1480 return false;
1481
1482 // If these values will be promoted, find out what they will be promoted
1483 // to. This helps us consider truncates on PPC as noop copies when they
1484 // are.
1485 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1487 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1488 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1490 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1491
1492 // If, after promotion, these are the same types, this is a noop copy.
1493 if (SrcVT != DstVT)
1494 return false;
1495
1496 return SinkCast(CI);
1497}
1498
1499// Match a simple increment by constant operation. Note that if a sub is
1500// matched, the step is negated (as if the step had been canonicalized to
1501// an add, even though we leave the instruction alone.)
1502static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1503 Constant *&Step) {
1504 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1506 m_Instruction(LHS), m_Constant(Step)))))
1507 return true;
1508 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1510 m_Instruction(LHS), m_Constant(Step))))) {
1511 Step = ConstantExpr::getNeg(Step);
1512 return true;
1513 }
1514 return false;
1515}
1516
1517/// If given \p PN is an inductive variable with value IVInc coming from the
1518/// backedge, and on each iteration it gets increased by Step, return pair
1519/// <IVInc, Step>. Otherwise, return std::nullopt.
1520static std::optional<std::pair<Instruction *, Constant *>>
1521getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1522 const Loop *L = LI->getLoopFor(PN->getParent());
1523 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1524 return std::nullopt;
1525 auto *IVInc =
1526 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1527 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1528 return std::nullopt;
1529 Instruction *LHS = nullptr;
1530 Constant *Step = nullptr;
1531 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1532 return std::make_pair(IVInc, Step);
1533 return std::nullopt;
1534}
1535
1536static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1537 auto *I = dyn_cast<Instruction>(V);
1538 if (!I)
1539 return false;
1540 Instruction *LHS = nullptr;
1541 Constant *Step = nullptr;
1542 if (!matchIncrement(I, LHS, Step))
1543 return false;
1544 if (auto *PN = dyn_cast<PHINode>(LHS))
1545 if (auto IVInc = getIVIncrement(PN, LI))
1546 return IVInc->first == I;
1547 return false;
1548}
1549
1550bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1551 Value *Arg0, Value *Arg1,
1552 CmpInst *Cmp,
1553 Intrinsic::ID IID) {
1554 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1555 if (!isIVIncrement(BO, LI))
1556 return false;
1557 const Loop *L = LI->getLoopFor(BO->getParent());
1558 assert(L && "L should not be null after isIVIncrement()");
1559 // Do not risk on moving increment into a child loop.
1560 if (LI->getLoopFor(Cmp->getParent()) != L)
1561 return false;
1562
1563 // Finally, we need to ensure that the insert point will dominate all
1564 // existing uses of the increment.
1565
1566 auto &DT = getDT(*BO->getParent()->getParent());
1567 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1568 // If we're moving up the dom tree, all uses are trivially dominated.
1569 // (This is the common case for code produced by LSR.)
1570 return true;
1571
1572 // Otherwise, special case the single use in the phi recurrence.
1573 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1574 };
1575 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1576 // We used to use a dominator tree here to allow multi-block optimization.
1577 // But that was problematic because:
1578 // 1. It could cause a perf regression by hoisting the math op into the
1579 // critical path.
1580 // 2. It could cause a perf regression by creating a value that was live
1581 // across multiple blocks and increasing register pressure.
1582 // 3. Use of a dominator tree could cause large compile-time regression.
1583 // This is because we recompute the DT on every change in the main CGP
1584 // run-loop. The recomputing is probably unnecessary in many cases, so if
1585 // that was fixed, using a DT here would be ok.
1586 //
1587 // There is one important particular case we still want to handle: if BO is
1588 // the IV increment. Important properties that make it profitable:
1589 // - We can speculate IV increment anywhere in the loop (as long as the
1590 // indvar Phi is its only user);
1591 // - Upon computing Cmp, we effectively compute something equivalent to the
1592 // IV increment (despite it loops differently in the IR). So moving it up
1593 // to the cmp point does not really increase register pressure.
1594 return false;
1595 }
1596
1597 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1598 if (BO->getOpcode() == Instruction::Add &&
1599 IID == Intrinsic::usub_with_overflow) {
1600 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1602 }
1603
1604 // Insert at the first instruction of the pair.
1605 Instruction *InsertPt = nullptr;
1606 for (Instruction &Iter : *Cmp->getParent()) {
1607 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1608 // the overflow intrinsic are defined.
1609 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1610 InsertPt = &Iter;
1611 break;
1612 }
1613 }
1614 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1615
1616 IRBuilder<> Builder(InsertPt);
1617 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1618 if (BO->getOpcode() != Instruction::Xor) {
1619 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1620 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1621 } else
1622 assert(BO->hasOneUse() &&
1623 "Patterns with XOr should use the BO only in the compare");
1624 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1625 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1626 Cmp->eraseFromParent();
1627 BO->eraseFromParent();
1628 return true;
1629}
1630
1631/// Match special-case patterns that check for unsigned add overflow.
1633 BinaryOperator *&Add) {
1634 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1635 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1636 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1637
1638 // We are not expecting non-canonical/degenerate code. Just bail out.
1639 if (isa<Constant>(A))
1640 return false;
1641
1642 ICmpInst::Predicate Pred = Cmp->getPredicate();
1643 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1644 B = ConstantInt::get(B->getType(), 1);
1645 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1646 B = Constant::getAllOnesValue(B->getType());
1647 else
1648 return false;
1649
1650 // Check the users of the variable operand of the compare looking for an add
1651 // with the adjusted constant.
1652 for (User *U : A->users()) {
1653 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1655 return true;
1656 }
1657 }
1658 return false;
1659}
1660
1661/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1662/// intrinsic. Return true if any changes were made.
1663bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1664 ModifyDT &ModifiedDT) {
1665 bool EdgeCase = false;
1666 Value *A, *B;
1667 BinaryOperator *Add;
1668 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1670 return false;
1671 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1672 A = Add->getOperand(0);
1673 B = Add->getOperand(1);
1674 EdgeCase = true;
1675 }
1676
1678 TLI->getValueType(*DL, Add->getType()),
1679 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1680 return false;
1681
1682 // We don't want to move around uses of condition values this late, so we
1683 // check if it is legal to create the call to the intrinsic in the basic
1684 // block containing the icmp.
1685 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1686 return false;
1687
1688 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1689 Intrinsic::uadd_with_overflow))
1690 return false;
1691
1692 // Reset callers - do not crash by iterating over a dead instruction.
1693 ModifiedDT = ModifyDT::ModifyInstDT;
1694 return true;
1695}
1696
1697bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1698 ModifyDT &ModifiedDT) {
1699 // We are not expecting non-canonical/degenerate code. Just bail out.
1700 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1701 if (isa<Constant>(A) && isa<Constant>(B))
1702 return false;
1703
1704 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1705 ICmpInst::Predicate Pred = Cmp->getPredicate();
1706 if (Pred == ICmpInst::ICMP_UGT) {
1707 std::swap(A, B);
1708 Pred = ICmpInst::ICMP_ULT;
1709 }
1710 // Convert special-case: (A == 0) is the same as (A u< 1).
1711 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1712 B = ConstantInt::get(B->getType(), 1);
1713 Pred = ICmpInst::ICMP_ULT;
1714 }
1715 // Convert special-case: (A != 0) is the same as (0 u< A).
1716 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1717 std::swap(A, B);
1718 Pred = ICmpInst::ICMP_ULT;
1719 }
1720 if (Pred != ICmpInst::ICMP_ULT)
1721 return false;
1722
1723 // Walk the users of a variable operand of a compare looking for a subtract or
1724 // add with that same operand. Also match the 2nd operand of the compare to
1725 // the add/sub, but that may be a negated constant operand of an add.
1726 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1727 BinaryOperator *Sub = nullptr;
1728 for (User *U : CmpVariableOperand->users()) {
1729 // A - B, A u< B --> usubo(A, B)
1730 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1732 break;
1733 }
1734
1735 // A + (-C), A u< C (canonicalized form of (sub A, C))
1736 const APInt *CmpC, *AddC;
1737 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1738 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1740 break;
1741 }
1742 }
1743 if (!Sub)
1744 return false;
1745
1747 TLI->getValueType(*DL, Sub->getType()),
1748 Sub->hasNUsesOrMore(1)))
1749 return false;
1750
1751 // We don't want to move around uses of condition values this late, so we
1752 // check if it is legal to create the call to the intrinsic in the basic
1753 // block containing the icmp.
1754 if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
1755 return false;
1756
1757 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1758 Cmp, Intrinsic::usub_with_overflow))
1759 return false;
1760
1761 // Reset callers - do not crash by iterating over a dead instruction.
1762 ModifiedDT = ModifyDT::ModifyInstDT;
1763 return true;
1764}
1765
1766// Decanonicalizes icmp+ctpop power-of-two test if ctpop is slow.
1767// The same transformation exists in DAG combiner, but we repeat it here because
1768// DAG builder can break the pattern by moving icmp into a successor block.
1769bool CodeGenPrepare::unfoldPowerOf2Test(CmpInst *Cmp) {
1770 CmpPredicate Pred;
1771 Value *X;
1772 const APInt *C;
1773
1774 // (icmp (ctpop x), c)
1777 return false;
1778
1779 // We're only interested in "is power of 2 [or zero]" patterns.
1780 bool IsStrictlyPowerOf2Test = ICmpInst::isEquality(Pred) && *C == 1;
1781 bool IsPowerOf2OrZeroTest = (Pred == CmpInst::ICMP_ULT && *C == 2) ||
1782 (Pred == CmpInst::ICMP_UGT && *C == 1);
1783 if (!IsStrictlyPowerOf2Test && !IsPowerOf2OrZeroTest)
1784 return false;
1785
1786 // Some targets have better codegen for `ctpop(x) u</u>= 2/1`than for
1787 // `ctpop(x) ==/!= 1`. If ctpop is fast, only try changing the comparison,
1788 // and otherwise expand ctpop into a few simple instructions.
1789 Type *OpTy = X->getType();
1790 if (TLI->isCtpopFast(TLI->getValueType(*DL, OpTy))) {
1791 // Look for `ctpop(x) ==/!= 1`, where `ctpop(x)` is known to be non-zero.
1792 if (!IsStrictlyPowerOf2Test || !isKnownNonZero(Cmp->getOperand(0), *DL))
1793 return false;
1794
1795 // ctpop(x) == 1 -> ctpop(x) u< 2
1796 // ctpop(x) != 1 -> ctpop(x) u> 1
1797 if (Pred == ICmpInst::ICMP_EQ) {
1798 Cmp->setOperand(1, ConstantInt::get(OpTy, 2));
1799 Cmp->setPredicate(ICmpInst::ICMP_ULT);
1800 } else {
1801 Cmp->setPredicate(ICmpInst::ICMP_UGT);
1802 }
1803 return true;
1804 }
1805
1806 Value *NewCmp;
1807 if (IsPowerOf2OrZeroTest ||
1808 (IsStrictlyPowerOf2Test && isKnownNonZero(Cmp->getOperand(0), *DL))) {
1809 // ctpop(x) u< 2 -> (x & (x - 1)) == 0
1810 // ctpop(x) u> 1 -> (x & (x - 1)) != 0
1811 IRBuilder<> Builder(Cmp);
1812 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1813 Value *And = Builder.CreateAnd(X, Sub);
1814 CmpInst::Predicate NewPred =
1815 (Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_EQ)
1817 : CmpInst::ICMP_NE;
1818 NewCmp = Builder.CreateICmp(NewPred, And, ConstantInt::getNullValue(OpTy));
1819 } else {
1820 // ctpop(x) == 1 -> (x ^ (x - 1)) u> (x - 1)
1821 // ctpop(x) != 1 -> (x ^ (x - 1)) u<= (x - 1)
1822 IRBuilder<> Builder(Cmp);
1823 Value *Sub = Builder.CreateAdd(X, Constant::getAllOnesValue(OpTy));
1824 Value *Xor = Builder.CreateXor(X, Sub);
1825 CmpInst::Predicate NewPred =
1827 NewCmp = Builder.CreateICmp(NewPred, Xor, Sub);
1828 }
1829
1830 Cmp->replaceAllUsesWith(NewCmp);
1832 return true;
1833}
1834
1835/// Sink the given CmpInst into user blocks to reduce the number of virtual
1836/// registers that must be created and coalesced. This is a clear win except on
1837/// targets with multiple condition code registers (PowerPC), where it might
1838/// lose; some adjustment may be wanted there.
1839///
1840/// Return true if any changes are made.
1841static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI,
1842 const DataLayout &DL) {
1843 if (TLI.hasMultipleConditionRegisters(EVT::getEVT(Cmp->getType())))
1844 return false;
1845
1846 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1847 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1848 return false;
1849
1850 bool UsedInPhiOrCurrentBlock = any_of(Cmp->users(), [Cmp](User *U) {
1851 return isa<PHINode>(U) ||
1852 cast<Instruction>(U)->getParent() == Cmp->getParent();
1853 });
1854
1855 // Avoid sinking larger than legal integer comparisons unless its ONLY used in
1856 // another BB.
1857 if (UsedInPhiOrCurrentBlock && Cmp->getOperand(0)->getType()->isIntegerTy() &&
1858 Cmp->getOperand(0)->getType()->getScalarSizeInBits() >
1859 DL.getLargestLegalIntTypeSizeInBits())
1860 return false;
1861
1862 // Only insert a cmp in each block once.
1864
1865 bool MadeChange = false;
1866 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1867 UI != E;) {
1868 Use &TheUse = UI.getUse();
1870
1871 // Preincrement use iterator so we don't invalidate it.
1872 ++UI;
1873
1874 // Don't bother for PHI nodes.
1875 if (isa<PHINode>(User))
1876 continue;
1877
1878 // Figure out which BB this cmp is used in.
1879 BasicBlock *UserBB = User->getParent();
1880 BasicBlock *DefBB = Cmp->getParent();
1881
1882 // If this user is in the same block as the cmp, don't change the cmp.
1883 if (UserBB == DefBB)
1884 continue;
1885
1886 // If we have already inserted a cmp into this block, use it.
1887 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1888
1889 if (!InsertedCmp) {
1890 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1891 assert(InsertPt != UserBB->end());
1892 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1893 Cmp->getOperand(0), Cmp->getOperand(1), "");
1894 InsertedCmp->insertBefore(*UserBB, InsertPt);
1895 // Propagate the debug info.
1896 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1897 }
1898
1899 // Replace a use of the cmp with a use of the new cmp.
1900 TheUse = InsertedCmp;
1901 MadeChange = true;
1902 ++NumCmpUses;
1903 }
1904
1905 // If we removed all uses, nuke the cmp.
1906 if (Cmp->use_empty()) {
1907 Cmp->eraseFromParent();
1908 MadeChange = true;
1909 }
1910
1911 return MadeChange;
1912}
1913
1914/// For pattern like:
1915///
1916/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1917/// ...
1918/// DomBB:
1919/// ...
1920/// br DomCond, TrueBB, CmpBB
1921/// CmpBB: (with DomBB being the single predecessor)
1922/// ...
1923/// Cmp = icmp eq CmpOp0, CmpOp1
1924/// ...
1925///
1926/// It would use two comparison on targets that lowering of icmp sgt/slt is
1927/// different from lowering of icmp eq (PowerPC). This function try to convert
1928/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1929/// After that, DomCond and Cmp can use the same comparison so reduce one
1930/// comparison.
1931///
1932/// Return true if any changes are made.
1934 const TargetLowering &TLI) {
1936 return false;
1937
1938 ICmpInst::Predicate Pred = Cmp->getPredicate();
1939 if (Pred != ICmpInst::ICMP_EQ)
1940 return false;
1941
1942 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1943 // icmp slt/sgt would introduce more redundant LLVM IR.
1944 for (User *U : Cmp->users()) {
1945 if (isa<BranchInst>(U))
1946 continue;
1947 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1948 continue;
1949 return false;
1950 }
1951
1952 // This is a cheap/incomplete check for dominance - just match a single
1953 // predecessor with a conditional branch.
1954 BasicBlock *CmpBB = Cmp->getParent();
1955 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1956 if (!DomBB)
1957 return false;
1958
1959 // We want to ensure that the only way control gets to the comparison of
1960 // interest is that a less/greater than comparison on the same operands is
1961 // false.
1962 Value *DomCond;
1963 BasicBlock *TrueBB, *FalseBB;
1964 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1965 return false;
1966 if (CmpBB != FalseBB)
1967 return false;
1968
1969 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1970 CmpPredicate DomPred;
1971 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1972 return false;
1973 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1974 return false;
1975
1976 // Convert the equality comparison to the opposite of the dominating
1977 // comparison and swap the direction for all branch/select users.
1978 // We have conceptually converted:
1979 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1980 // to
1981 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1982 // And similarly for branches.
1983 for (User *U : Cmp->users()) {
1984 if (auto *BI = dyn_cast<BranchInst>(U)) {
1985 assert(BI->isConditional() && "Must be conditional");
1986 BI->swapSuccessors();
1987 continue;
1988 }
1989 if (auto *SI = dyn_cast<SelectInst>(U)) {
1990 // Swap operands
1991 SI->swapValues();
1992 SI->swapProfMetadata();
1993 continue;
1994 }
1995 llvm_unreachable("Must be a branch or a select");
1996 }
1997 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1998 return true;
1999}
2000
2001/// Many architectures use the same instruction for both subtract and cmp. Try
2002/// to swap cmp operands to match subtract operations to allow for CSE.
2004 Value *Op0 = Cmp->getOperand(0);
2005 Value *Op1 = Cmp->getOperand(1);
2006 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
2007 isa<Constant>(Op1) || Op0 == Op1)
2008 return false;
2009
2010 // If a subtract already has the same operands as a compare, swapping would be
2011 // bad. If a subtract has the same operands as a compare but in reverse order,
2012 // then swapping is good.
2013 int GoodToSwap = 0;
2014 unsigned NumInspected = 0;
2015 for (const User *U : Op0->users()) {
2016 // Avoid walking many users.
2017 if (++NumInspected > 128)
2018 return false;
2019 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
2020 GoodToSwap++;
2021 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
2022 GoodToSwap--;
2023 }
2024
2025 if (GoodToSwap > 0) {
2026 Cmp->swapOperands();
2027 return true;
2028 }
2029 return false;
2030}
2031
2032static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
2033 const DataLayout &DL) {
2034 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
2035 if (!FCmp)
2036 return false;
2037
2038 // Don't fold if the target offers free fabs and the predicate is legal.
2039 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
2040 if (TLI.isFAbsFree(VT) &&
2042 VT.getSimpleVT()))
2043 return false;
2044
2045 // Reverse the canonicalization if it is a FP class test
2046 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
2047 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
2048 };
2049 auto [ClassVal, ClassTest] =
2050 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
2051 FCmp->getOperand(0), FCmp->getOperand(1));
2052 if (!ClassVal)
2053 return false;
2054
2055 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
2056 return false;
2057
2058 IRBuilder<> Builder(Cmp);
2059 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
2060 Cmp->replaceAllUsesWith(IsFPClass);
2062 return true;
2063}
2064
2066 Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut,
2067 Value *&AddOffsetOut, PHINode *&LoopIncrPNOut) {
2068 Value *Incr, *RemAmt;
2069 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
2070 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
2071 return false;
2072
2073 Value *AddInst, *AddOffset;
2074 // Find out loop increment PHI.
2075 auto *PN = dyn_cast<PHINode>(Incr);
2076 if (PN != nullptr) {
2077 AddInst = nullptr;
2078 AddOffset = nullptr;
2079 } else {
2080 // Search through a NUW add on top of the loop increment.
2081 Value *V0, *V1;
2082 if (!match(Incr, m_NUWAdd(m_Value(V0), m_Value(V1))))
2083 return false;
2084
2085 AddInst = Incr;
2086 PN = dyn_cast<PHINode>(V0);
2087 if (PN != nullptr) {
2088 AddOffset = V1;
2089 } else {
2090 PN = dyn_cast<PHINode>(V1);
2091 AddOffset = V0;
2092 }
2093 }
2094
2095 if (!PN)
2096 return false;
2097
2098 // This isn't strictly necessary, what we really need is one increment and any
2099 // amount of initial values all being the same.
2100 if (PN->getNumIncomingValues() != 2)
2101 return false;
2102
2103 // Only trivially analyzable loops.
2104 Loop *L = LI->getLoopFor(PN->getParent());
2105 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2106 return false;
2107
2108 // Req that the remainder is in the loop
2109 if (!L->contains(Rem))
2110 return false;
2111
2112 // Only works if the remainder amount is a loop invaraint
2113 if (!L->isLoopInvariant(RemAmt))
2114 return false;
2115
2116 // Only works if the AddOffset is a loop invaraint
2117 if (AddOffset && !L->isLoopInvariant(AddOffset))
2118 return false;
2119
2120 // Is the PHI a loop increment?
2121 auto LoopIncrInfo = getIVIncrement(PN, LI);
2122 if (!LoopIncrInfo)
2123 return false;
2124
2125 // We need remainder_amount % increment_amount to be zero. Increment of one
2126 // satisfies that without any special logic and is overwhelmingly the common
2127 // case.
2128 if (!match(LoopIncrInfo->second, m_One()))
2129 return false;
2130
2131 // Need the increment to not overflow.
2132 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2133 return false;
2134
2135 // Set output variables.
2136 RemAmtOut = RemAmt;
2137 LoopIncrPNOut = PN;
2138 AddInstOut = AddInst;
2139 AddOffsetOut = AddOffset;
2140
2141 return true;
2142}
2143
2144// Try to transform:
2145//
2146// for(i = Start; i < End; ++i)
2147// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2148//
2149// ->
2150//
2151// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2152// for(i = Start; i < End; ++i, ++rem)
2153// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2155 const LoopInfo *LI,
2157 bool IsHuge) {
2158 Value *AddOffset, *RemAmt, *AddInst;
2159 PHINode *LoopIncrPN;
2160 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, AddInst,
2161 AddOffset, LoopIncrPN))
2162 return false;
2163
2164 // Only non-constant remainder as the extra IV is probably not profitable
2165 // in that case.
2166 //
2167 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2168 // we can rule out register pressure and ensure this `urem` is executed each
2169 // iteration, its probably profitable to handle the const case as well.
2170 //
2171 // Potential TODO(2): Should we have a check for how "nested" this remainder
2172 // operation is? The new code runs every iteration so if the remainder is
2173 // guarded behind unlikely conditions this might not be worth it.
2174 if (match(RemAmt, m_ImmConstant()))
2175 return false;
2176
2177 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2178 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2179 // If we have add create initial value for remainder.
2180 // The logic here is:
2181 // (urem (add nuw Start, IncrLoopInvariant), RemAmtLoopInvariant
2182 //
2183 // Only proceed if the expression simplifies (otherwise we can't fully
2184 // optimize out the urem).
2185 if (AddInst) {
2186 assert(AddOffset && "We found an add but missing values");
2187 // Without dom-condition/assumption cache we aren't likely to get much out
2188 // of a context instruction.
2189 Start = simplifyAddInst(Start, AddOffset,
2190 match(AddInst, m_NSWAdd(m_Value(), m_Value())),
2191 /*IsNUW=*/true, *DL);
2192 if (!Start)
2193 return false;
2194 }
2195
2196 // If we can't fully optimize out the `rem`, skip this transform.
2197 Start = simplifyURemInst(Start, RemAmt, *DL);
2198 if (!Start)
2199 return false;
2200
2201 // Create new remainder with induction variable.
2202 Type *Ty = Rem->getType();
2203 IRBuilder<> Builder(Rem->getContext());
2204
2205 Builder.SetInsertPoint(LoopIncrPN);
2206 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2207
2208 Builder.SetInsertPoint(cast<Instruction>(
2209 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2210 // `(add (urem x, y), 1)` is always nuw.
2211 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2212 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2213 Value *RemSel =
2214 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2215
2216 NewRem->addIncoming(Start, L->getLoopPreheader());
2217 NewRem->addIncoming(RemSel, L->getLoopLatch());
2218
2219 // Insert all touched BBs.
2220 FreshBBs.insert(LoopIncrPN->getParent());
2221 FreshBBs.insert(L->getLoopLatch());
2222 FreshBBs.insert(Rem->getParent());
2223 if (AddInst)
2224 FreshBBs.insert(cast<Instruction>(AddInst)->getParent());
2225 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2226 Rem->eraseFromParent();
2227 if (AddInst && AddInst->use_empty())
2228 cast<Instruction>(AddInst)->eraseFromParent();
2229 return true;
2230}
2231
2232bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2233 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2234 return true;
2235 return false;
2236}
2237
2238bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2239 if (sinkCmpExpression(Cmp, *TLI, *DL))
2240 return true;
2241
2242 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2243 return true;
2244
2245 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2246 return true;
2247
2248 if (unfoldPowerOf2Test(Cmp))
2249 return true;
2250
2251 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2252 return true;
2253
2255 return true;
2256
2257 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2258 return true;
2259
2260 return false;
2261}
2262
2263/// Duplicate and sink the given 'and' instruction into user blocks where it is
2264/// used in a compare to allow isel to generate better code for targets where
2265/// this operation can be combined.
2266///
2267/// Return true if any changes are made.
2269 SetOfInstrs &InsertedInsts) {
2270 // Double-check that we're not trying to optimize an instruction that was
2271 // already optimized by some other part of this pass.
2272 assert(!InsertedInsts.count(AndI) &&
2273 "Attempting to optimize already optimized and instruction");
2274 (void)InsertedInsts;
2275
2276 // Nothing to do for single use in same basic block.
2277 if (AndI->hasOneUse() &&
2278 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2279 return false;
2280
2281 // Try to avoid cases where sinking/duplicating is likely to increase register
2282 // pressure.
2283 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2284 !isa<ConstantInt>(AndI->getOperand(1)) &&
2285 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2286 return false;
2287
2288 for (auto *U : AndI->users()) {
2290
2291 // Only sink 'and' feeding icmp with 0.
2292 if (!isa<ICmpInst>(User))
2293 return false;
2294
2295 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2296 if (!CmpC || !CmpC->isZero())
2297 return false;
2298 }
2299
2300 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2301 return false;
2302
2303 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2304 LLVM_DEBUG(AndI->getParent()->dump());
2305
2306 // Push the 'and' into the same block as the icmp 0. There should only be
2307 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2308 // others, so we don't need to keep track of which BBs we insert into.
2309 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2310 UI != E;) {
2311 Use &TheUse = UI.getUse();
2313
2314 // Preincrement use iterator so we don't invalidate it.
2315 ++UI;
2316
2317 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2318
2319 // Keep the 'and' in the same place if the use is already in the same block.
2320 Instruction *InsertPt =
2321 User->getParent() == AndI->getParent() ? AndI : User;
2322 Instruction *InsertedAnd = BinaryOperator::Create(
2323 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2324 InsertPt->getIterator());
2325 // Propagate the debug info.
2326 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2327
2328 // Replace a use of the 'and' with a use of the new 'and'.
2329 TheUse = InsertedAnd;
2330 ++NumAndUses;
2331 LLVM_DEBUG(User->getParent()->dump());
2332 }
2333
2334 // We removed all uses, nuke the and.
2335 AndI->eraseFromParent();
2336 return true;
2337}
2338
2339/// Check if the candidates could be combined with a shift instruction, which
2340/// includes:
2341/// 1. Truncate instruction
2342/// 2. And instruction and the imm is a mask of the low bits:
2343/// imm & (imm+1) == 0
2345 if (!isa<TruncInst>(User)) {
2346 if (User->getOpcode() != Instruction::And ||
2348 return false;
2349
2350 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2351
2352 if ((Cimm & (Cimm + 1)).getBoolValue())
2353 return false;
2354 }
2355 return true;
2356}
2357
2358/// Sink both shift and truncate instruction to the use of truncate's BB.
2359static bool
2362 const TargetLowering &TLI, const DataLayout &DL) {
2363 BasicBlock *UserBB = User->getParent();
2365 auto *TruncI = cast<TruncInst>(User);
2366 bool MadeChange = false;
2367
2368 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2369 TruncE = TruncI->user_end();
2370 TruncUI != TruncE;) {
2371
2372 Use &TruncTheUse = TruncUI.getUse();
2373 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2374 // Preincrement use iterator so we don't invalidate it.
2375
2376 ++TruncUI;
2377
2378 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2379 if (!ISDOpcode)
2380 continue;
2381
2382 // If the use is actually a legal node, there will not be an
2383 // implicit truncate.
2384 // FIXME: always querying the result type is just an
2385 // approximation; some nodes' legality is determined by the
2386 // operand or other means. There's no good way to find out though.
2388 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2389 continue;
2390
2391 // Don't bother for PHI nodes.
2392 if (isa<PHINode>(TruncUser))
2393 continue;
2394
2395 BasicBlock *TruncUserBB = TruncUser->getParent();
2396
2397 if (UserBB == TruncUserBB)
2398 continue;
2399
2400 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2401 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2402
2403 if (!InsertedShift && !InsertedTrunc) {
2404 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2405 assert(InsertPt != TruncUserBB->end());
2406 // Sink the shift
2407 if (ShiftI->getOpcode() == Instruction::AShr)
2408 InsertedShift =
2409 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2410 else
2411 InsertedShift =
2412 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2413 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2414 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2415
2416 // Sink the trunc
2417 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2418 TruncInsertPt++;
2419 // It will go ahead of any debug-info.
2420 TruncInsertPt.setHeadBit(true);
2421 assert(TruncInsertPt != TruncUserBB->end());
2422
2423 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2424 TruncI->getType(), "");
2425 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2426 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2427
2428 MadeChange = true;
2429
2430 TruncTheUse = InsertedTrunc;
2431 }
2432 }
2433 return MadeChange;
2434}
2435
2436/// Sink the shift *right* instruction into user blocks if the uses could
2437/// potentially be combined with this shift instruction and generate BitExtract
2438/// instruction. It will only be applied if the architecture supports BitExtract
2439/// instruction. Here is an example:
2440/// BB1:
2441/// %x.extract.shift = lshr i64 %arg1, 32
2442/// BB2:
2443/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2444/// ==>
2445///
2446/// BB2:
2447/// %x.extract.shift.1 = lshr i64 %arg1, 32
2448/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2449///
2450/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2451/// instruction.
2452/// Return true if any changes are made.
2454 const TargetLowering &TLI,
2455 const DataLayout &DL) {
2456 BasicBlock *DefBB = ShiftI->getParent();
2457
2458 /// Only insert instructions in each block once.
2460
2461 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2462
2463 bool MadeChange = false;
2464 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2465 UI != E;) {
2466 Use &TheUse = UI.getUse();
2468 // Preincrement use iterator so we don't invalidate it.
2469 ++UI;
2470
2471 // Don't bother for PHI nodes.
2472 if (isa<PHINode>(User))
2473 continue;
2474
2476 continue;
2477
2478 BasicBlock *UserBB = User->getParent();
2479
2480 if (UserBB == DefBB) {
2481 // If the shift and truncate instruction are in the same BB. The use of
2482 // the truncate(TruncUse) may still introduce another truncate if not
2483 // legal. In this case, we would like to sink both shift and truncate
2484 // instruction to the BB of TruncUse.
2485 // for example:
2486 // BB1:
2487 // i64 shift.result = lshr i64 opnd, imm
2488 // trunc.result = trunc shift.result to i16
2489 //
2490 // BB2:
2491 // ----> We will have an implicit truncate here if the architecture does
2492 // not have i16 compare.
2493 // cmp i16 trunc.result, opnd2
2494 //
2495 if (isa<TruncInst>(User) &&
2496 shiftIsLegal
2497 // If the type of the truncate is legal, no truncate will be
2498 // introduced in other basic blocks.
2499 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2500 MadeChange =
2501 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2502
2503 continue;
2504 }
2505 // If we have already inserted a shift into this block, use it.
2506 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2507
2508 if (!InsertedShift) {
2509 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2510 assert(InsertPt != UserBB->end());
2511
2512 if (ShiftI->getOpcode() == Instruction::AShr)
2513 InsertedShift =
2514 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2515 else
2516 InsertedShift =
2517 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2518 InsertedShift->insertBefore(*UserBB, InsertPt);
2519 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2520
2521 MadeChange = true;
2522 }
2523
2524 // Replace a use of the shift with a use of the new shift.
2525 TheUse = InsertedShift;
2526 }
2527
2528 // If we removed all uses, or there are none, nuke the shift.
2529 if (ShiftI->use_empty()) {
2530 salvageDebugInfo(*ShiftI);
2531 ShiftI->eraseFromParent();
2532 MadeChange = true;
2533 }
2534
2535 return MadeChange;
2536}
2537
2538/// If counting leading or trailing zeros is an expensive operation and a zero
2539/// input is defined, add a check for zero to avoid calling the intrinsic.
2540///
2541/// We want to transform:
2542/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2543///
2544/// into:
2545/// entry:
2546/// %cmpz = icmp eq i64 %A, 0
2547/// br i1 %cmpz, label %cond.end, label %cond.false
2548/// cond.false:
2549/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2550/// br label %cond.end
2551/// cond.end:
2552/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2553///
2554/// If the transform is performed, return true and set ModifiedDT to true.
2555static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI,
2556 const TargetLowering *TLI,
2557 const DataLayout *DL, ModifyDT &ModifiedDT,
2559 bool IsHugeFunc) {
2560 // If a zero input is undefined, it doesn't make sense to despeculate that.
2561 if (match(CountZeros->getOperand(1), m_One()))
2562 return false;
2563
2564 // If it's cheap to speculate, there's nothing to do.
2565 Type *Ty = CountZeros->getType();
2566 auto IntrinsicID = CountZeros->getIntrinsicID();
2567 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2568 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2569 return false;
2570
2571 // Only handle scalar cases. Anything else requires too much work.
2572 unsigned SizeInBits = Ty->getScalarSizeInBits();
2573 if (Ty->isVectorTy())
2574 return false;
2575
2576 // Bail if the value is never zero.
2577 Use &Op = CountZeros->getOperandUse(0);
2578 if (isKnownNonZero(Op, *DL))
2579 return false;
2580
2581 // The intrinsic will be sunk behind a compare against zero and branch.
2582 BasicBlock *StartBlock = CountZeros->getParent();
2583 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2584 if (IsHugeFunc)
2585 FreshBBs.insert(CallBlock);
2586
2587 // Create another block after the count zero intrinsic. A PHI will be added
2588 // in this block to select the result of the intrinsic or the bit-width
2589 // constant if the input to the intrinsic is zero.
2590 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2591 // Any debug-info after CountZeros should not be included.
2592 SplitPt.setHeadBit(true);
2593 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2594 if (IsHugeFunc)
2595 FreshBBs.insert(EndBlock);
2596
2597 // Update the LoopInfo. The new blocks are in the same loop as the start
2598 // block.
2599 if (Loop *L = LI.getLoopFor(StartBlock)) {
2600 L->addBasicBlockToLoop(CallBlock, LI);
2601 L->addBasicBlockToLoop(EndBlock, LI);
2602 }
2603
2604 // Set up a builder to create a compare, conditional branch, and PHI.
2605 IRBuilder<> Builder(CountZeros->getContext());
2606 Builder.SetInsertPoint(StartBlock->getTerminator());
2607 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2608
2609 // Replace the unconditional branch that was created by the first split with
2610 // a compare against zero and a conditional branch.
2611 Value *Zero = Constant::getNullValue(Ty);
2612 // Avoid introducing branch on poison. This also replaces the ctz operand.
2614 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2615 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2616 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2617 StartBlock->getTerminator()->eraseFromParent();
2618
2619 // Create a PHI in the end block to select either the output of the intrinsic
2620 // or the bit width of the operand.
2621 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2622 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2623 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2624 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2625 PN->addIncoming(BitWidth, StartBlock);
2626 PN->addIncoming(CountZeros, CallBlock);
2627
2628 // We are explicitly handling the zero case, so we can set the intrinsic's
2629 // undefined zero argument to 'true'. This will also prevent reprocessing the
2630 // intrinsic; we only despeculate when a zero input is defined.
2631 CountZeros->setArgOperand(1, Builder.getTrue());
2632 ModifiedDT = ModifyDT::ModifyBBDT;
2633 return true;
2634}
2635
2636bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2637 BasicBlock *BB = CI->getParent();
2638
2639 // Sink address computing for memory operands into the block.
2640 if (CI->isInlineAsm() && optimizeInlineAsmInst(CI))
2641 return true;
2642
2643 // Align the pointer arguments to this call if the target thinks it's a good
2644 // idea
2645 unsigned MinSize;
2646 Align PrefAlign;
2647 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2648 for (auto &Arg : CI->args()) {
2649 // We want to align both objects whose address is used directly and
2650 // objects whose address is used in casts and GEPs, though it only makes
2651 // sense for GEPs if the offset is a multiple of the desired alignment and
2652 // if size - offset meets the size threshold.
2653 if (!Arg->getType()->isPointerTy())
2654 continue;
2655 APInt Offset(DL->getIndexSizeInBits(
2656 cast<PointerType>(Arg->getType())->getAddressSpace()),
2657 0);
2658 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
2659 uint64_t Offset2 = Offset.getLimitedValue();
2660 if (!isAligned(PrefAlign, Offset2))
2661 continue;
2662 AllocaInst *AI;
2663 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign) {
2664 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(*DL);
2665 if (AllocaSize && AllocaSize->getKnownMinValue() >= MinSize + Offset2)
2666 AI->setAlignment(PrefAlign);
2667 }
2668 // Global variables can only be aligned if they are defined in this
2669 // object (i.e. they are uniquely initialized in this object), and
2670 // over-aligning global variables that have an explicit section is
2671 // forbidden.
2672 GlobalVariable *GV;
2673 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2674 GV->getPointerAlignment(*DL) < PrefAlign &&
2675 GV->getGlobalSize(*DL) >= MinSize + Offset2)
2676 GV->setAlignment(PrefAlign);
2677 }
2678 }
2679 // If this is a memcpy (or similar) then we may be able to improve the
2680 // alignment.
2681 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2682 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2683 MaybeAlign MIDestAlign = MI->getDestAlign();
2684 if (!MIDestAlign || DestAlign > *MIDestAlign)
2685 MI->setDestAlignment(DestAlign);
2686 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2687 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2688 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2689 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2690 MTI->setSourceAlignment(SrcAlign);
2691 }
2692 }
2693
2694 // If we have a cold call site, try to sink addressing computation into the
2695 // cold block. This interacts with our handling for loads and stores to
2696 // ensure that we can fold all uses of a potential addressing computation
2697 // into their uses. TODO: generalize this to work over profiling data
2698 if (CI->hasFnAttr(Attribute::Cold) &&
2699 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2700 for (auto &Arg : CI->args()) {
2701 if (!Arg->getType()->isPointerTy())
2702 continue;
2703 unsigned AS = Arg->getType()->getPointerAddressSpace();
2704 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2705 return true;
2706 }
2707
2708 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2709 if (II) {
2710 switch (II->getIntrinsicID()) {
2711 default:
2712 break;
2713 case Intrinsic::assume:
2714 llvm_unreachable("llvm.assume should have been removed already");
2715 case Intrinsic::allow_runtime_check:
2716 case Intrinsic::allow_ubsan_check:
2717 case Intrinsic::experimental_widenable_condition: {
2718 // Give up on future widening opportunities so that we can fold away dead
2719 // paths and merge blocks before going into block-local instruction
2720 // selection.
2721 if (II->use_empty()) {
2722 II->eraseFromParent();
2723 return true;
2724 }
2725 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2726 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2727 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2728 });
2729 return true;
2730 }
2731 case Intrinsic::objectsize:
2732 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2733 case Intrinsic::is_constant:
2734 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2735 case Intrinsic::aarch64_stlxr:
2736 case Intrinsic::aarch64_stxr: {
2737 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2738 if (!ExtVal || !ExtVal->hasOneUse() ||
2739 ExtVal->getParent() == CI->getParent())
2740 return false;
2741 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2742 ExtVal->moveBefore(CI->getIterator());
2743 // Mark this instruction as "inserted by CGP", so that other
2744 // optimizations don't touch it.
2745 InsertedInsts.insert(ExtVal);
2746 return true;
2747 }
2748
2749 case Intrinsic::launder_invariant_group:
2750 case Intrinsic::strip_invariant_group: {
2751 Value *ArgVal = II->getArgOperand(0);
2752 auto it = LargeOffsetGEPMap.find(II);
2753 if (it != LargeOffsetGEPMap.end()) {
2754 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2755 // Make sure not to have to deal with iterator invalidation
2756 // after possibly adding ArgVal to LargeOffsetGEPMap.
2757 auto GEPs = std::move(it->second);
2758 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2759 LargeOffsetGEPMap.erase(II);
2760 }
2761
2762 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2763 II->eraseFromParent();
2764 return true;
2765 }
2766 case Intrinsic::cttz:
2767 case Intrinsic::ctlz:
2768 // If counting zeros is expensive, try to avoid it.
2769 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2770 IsHugeFunc);
2771 case Intrinsic::fshl:
2772 case Intrinsic::fshr:
2773 return optimizeFunnelShift(II);
2774 case Intrinsic::masked_gather:
2775 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2776 case Intrinsic::masked_scatter:
2777 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2778 case Intrinsic::masked_load:
2779 // Treat v1X masked load as load X type.
2780 if (auto *VT = dyn_cast<FixedVectorType>(II->getType())) {
2781 if (VT->getNumElements() == 1) {
2782 Value *PtrVal = II->getArgOperand(0);
2783 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2784 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2785 return true;
2786 }
2787 }
2788 return false;
2789 case Intrinsic::masked_store:
2790 // Treat v1X masked store as store X type.
2791 if (auto *VT =
2792 dyn_cast<FixedVectorType>(II->getArgOperand(0)->getType())) {
2793 if (VT->getNumElements() == 1) {
2794 Value *PtrVal = II->getArgOperand(1);
2795 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2796 if (optimizeMemoryInst(II, PtrVal, VT->getElementType(), AS))
2797 return true;
2798 }
2799 }
2800 return false;
2801 case Intrinsic::umul_with_overflow:
2802 return optimizeMulWithOverflow(II, /*IsSigned=*/false, ModifiedDT);
2803 case Intrinsic::smul_with_overflow:
2804 return optimizeMulWithOverflow(II, /*IsSigned=*/true, ModifiedDT);
2805 }
2806
2807 SmallVector<Value *, 2> PtrOps;
2808 Type *AccessTy;
2809 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2810 while (!PtrOps.empty()) {
2811 Value *PtrVal = PtrOps.pop_back_val();
2812 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2813 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2814 return true;
2815 }
2816 }
2817
2818 // From here on out we're working with named functions.
2819 auto *Callee = CI->getCalledFunction();
2820 if (!Callee)
2821 return false;
2822
2823 // Lower all default uses of _chk calls. This is very similar
2824 // to what InstCombineCalls does, but here we are only lowering calls
2825 // to fortified library functions (e.g. __memcpy_chk) that have the default
2826 // "don't know" as the objectsize. Anything else should be left alone.
2827 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2828 IRBuilder<> Builder(CI);
2829 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2830 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2831 CI->eraseFromParent();
2832 return true;
2833 }
2834
2835 // SCCP may have propagated, among other things, C++ static variables across
2836 // calls. If this happens to be the case, we may want to undo it in order to
2837 // avoid redundant pointer computation of the constant, as the function method
2838 // returning the constant needs to be executed anyways.
2839 auto GetUniformReturnValue = [](const Function *F) -> GlobalVariable * {
2840 if (!F->getReturnType()->isPointerTy())
2841 return nullptr;
2842
2843 GlobalVariable *UniformValue = nullptr;
2844 for (auto &BB : *F) {
2845 if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
2846 if (auto *V = dyn_cast<GlobalVariable>(RI->getReturnValue())) {
2847 if (!UniformValue)
2848 UniformValue = V;
2849 else if (V != UniformValue)
2850 return nullptr;
2851 } else {
2852 return nullptr;
2853 }
2854 }
2855 }
2856
2857 return UniformValue;
2858 };
2859
2860 if (Callee->hasExactDefinition()) {
2861 if (GlobalVariable *RV = GetUniformReturnValue(Callee)) {
2862 bool MadeChange = false;
2863 for (Use &U : make_early_inc_range(RV->uses())) {
2864 auto *I = dyn_cast<Instruction>(U.getUser());
2865 if (!I || I->getParent() != CI->getParent()) {
2866 // Limit to the same basic block to avoid extending the call-site live
2867 // range, which otherwise could increase register pressure.
2868 continue;
2869 }
2870 if (CI->comesBefore(I)) {
2871 U.set(CI);
2872 MadeChange = true;
2873 }
2874 }
2875
2876 return MadeChange;
2877 }
2878 }
2879
2880 return false;
2881}
2882
2884 const CallInst *CI) {
2885 assert(CI && CI->use_empty());
2886
2887 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2888 switch (II->getIntrinsicID()) {
2889 case Intrinsic::memset:
2890 case Intrinsic::memcpy:
2891 case Intrinsic::memmove:
2892 return true;
2893 default:
2894 return false;
2895 }
2896
2897 LibFunc LF;
2898 Function *Callee = CI->getCalledFunction();
2899 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2900 switch (LF) {
2901 case LibFunc_strcpy:
2902 case LibFunc_strncpy:
2903 case LibFunc_strcat:
2904 case LibFunc_strncat:
2905 return true;
2906 default:
2907 return false;
2908 }
2909
2910 return false;
2911}
2912
2913/// Look for opportunities to duplicate return instructions to the predecessor
2914/// to enable tail call optimizations. The case it is currently looking for is
2915/// the following one. Known intrinsics or library function that may be tail
2916/// called are taken into account as well.
2917/// @code
2918/// bb0:
2919/// %tmp0 = tail call i32 @f0()
2920/// br label %return
2921/// bb1:
2922/// %tmp1 = tail call i32 @f1()
2923/// br label %return
2924/// bb2:
2925/// %tmp2 = tail call i32 @f2()
2926/// br label %return
2927/// return:
2928/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2929/// ret i32 %retval
2930/// @endcode
2931///
2932/// =>
2933///
2934/// @code
2935/// bb0:
2936/// %tmp0 = tail call i32 @f0()
2937/// ret i32 %tmp0
2938/// bb1:
2939/// %tmp1 = tail call i32 @f1()
2940/// ret i32 %tmp1
2941/// bb2:
2942/// %tmp2 = tail call i32 @f2()
2943/// ret i32 %tmp2
2944/// @endcode
2945bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2946 ModifyDT &ModifiedDT) {
2947 if (!BB->getTerminator())
2948 return false;
2949
2950 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2951 if (!RetI)
2952 return false;
2953
2954 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2955
2956 PHINode *PN = nullptr;
2957 ExtractValueInst *EVI = nullptr;
2958 BitCastInst *BCI = nullptr;
2959 Value *V = RetI->getReturnValue();
2960 if (V) {
2961 BCI = dyn_cast<BitCastInst>(V);
2962 if (BCI)
2963 V = BCI->getOperand(0);
2964
2966 if (EVI) {
2967 V = EVI->getOperand(0);
2968 if (!llvm::all_of(EVI->indices(), equal_to(0)))
2969 return false;
2970 }
2971
2972 PN = dyn_cast<PHINode>(V);
2973 }
2974
2975 if (PN && PN->getParent() != BB)
2976 return false;
2977
2978 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2979 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2980 if (BC && BC->hasOneUse())
2981 Inst = BC->user_back();
2982
2983 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2984 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2985 return false;
2986 };
2987
2989
2990 auto isFakeUse = [&FakeUses](const Instruction *Inst) {
2991 if (auto *II = dyn_cast<IntrinsicInst>(Inst);
2992 II && II->getIntrinsicID() == Intrinsic::fake_use) {
2993 // Record the instruction so it can be preserved when the exit block is
2994 // removed. Do not preserve the fake use that uses the result of the
2995 // PHI instruction.
2996 // Do not copy fake uses that use the result of a PHI node.
2997 // FIXME: If we do want to copy the fake use into the return blocks, we
2998 // have to figure out which of the PHI node operands to use for each
2999 // copy.
3000 if (!isa<PHINode>(II->getOperand(0))) {
3001 FakeUses.push_back(II);
3002 }
3003 return true;
3004 }
3005
3006 return false;
3007 };
3008
3009 // Make sure there are no instructions between the first instruction
3010 // and return.
3012 // Skip over pseudo-probes and the bitcast.
3013 while (&*BI == BCI || &*BI == EVI || isa<PseudoProbeInst>(BI) ||
3014 isLifetimeEndOrBitCastFor(&*BI) || isFakeUse(&*BI))
3015 BI = std::next(BI);
3016 if (&*BI != RetI)
3017 return false;
3018
3019 // Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
3020 // call.
3021 auto MayBePermittedAsTailCall = [&](const auto *CI) {
3022 return TLI->mayBeEmittedAsTailCall(CI) &&
3023 attributesPermitTailCall(BB->getParent(), CI, RetI, *TLI);
3024 };
3025
3026 SmallVector<BasicBlock *, 4> TailCallBBs;
3027 // Record the call instructions so we can insert any fake uses
3028 // that need to be preserved before them.
3030 if (PN) {
3031 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
3032 // Look through bitcasts.
3033 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
3034 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
3035 BasicBlock *PredBB = PN->getIncomingBlock(I);
3036 // Make sure the phi value is indeed produced by the tail call.
3037 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
3038 MayBePermittedAsTailCall(CI)) {
3039 TailCallBBs.push_back(PredBB);
3040 CallInsts.push_back(CI);
3041 } else {
3042 // Consider the cases in which the phi value is indirectly produced by
3043 // the tail call, for example when encountering memset(), memmove(),
3044 // strcpy(), whose return value may have been optimized out. In such
3045 // cases, the value needs to be the first function argument.
3046 //
3047 // bb0:
3048 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
3049 // br label %return
3050 // return:
3051 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
3052 if (PredBB && PredBB->getSingleSuccessor() == BB)
3054 PredBB->getTerminator()->getPrevNode());
3055
3056 if (CI && CI->use_empty() &&
3057 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3058 IncomingVal == CI->getArgOperand(0) &&
3059 MayBePermittedAsTailCall(CI)) {
3060 TailCallBBs.push_back(PredBB);
3061 CallInsts.push_back(CI);
3062 }
3063 }
3064 }
3065 } else {
3066 SmallPtrSet<BasicBlock *, 4> VisitedBBs;
3067 for (BasicBlock *Pred : predecessors(BB)) {
3068 if (!VisitedBBs.insert(Pred).second)
3069 continue;
3070 if (Instruction *I = Pred->rbegin()->getPrevNode()) {
3071 CallInst *CI = dyn_cast<CallInst>(I);
3072 if (CI && CI->use_empty() && MayBePermittedAsTailCall(CI)) {
3073 // Either we return void or the return value must be the first
3074 // argument of a known intrinsic or library function.
3075 if (!V || isa<UndefValue>(V) ||
3076 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
3077 V == CI->getArgOperand(0))) {
3078 TailCallBBs.push_back(Pred);
3079 CallInsts.push_back(CI);
3080 }
3081 }
3082 }
3083 }
3084 }
3085
3086 bool Changed = false;
3087 for (auto const &TailCallBB : TailCallBBs) {
3088 // Make sure the call instruction is followed by an unconditional branch to
3089 // the return block.
3090 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
3091 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
3092 continue;
3093
3094 // Duplicate the return into TailCallBB.
3095 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
3097 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
3098 BFI->setBlockFreq(BB,
3099 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
3100 ModifiedDT = ModifyDT::ModifyBBDT;
3101 Changed = true;
3102 ++NumRetsDup;
3103 }
3104
3105 // If we eliminated all predecessors of the block, delete the block now.
3106 if (Changed && !BB->hasAddressTaken() && pred_empty(BB)) {
3107 // Copy the fake uses found in the original return block to all blocks
3108 // that contain tail calls.
3109 for (auto *CI : CallInsts) {
3110 for (auto const *FakeUse : FakeUses) {
3111 auto *ClonedInst = FakeUse->clone();
3112 ClonedInst->insertBefore(CI->getIterator());
3113 }
3114 }
3115 BB->eraseFromParent();
3116 }
3117
3118 return Changed;
3119}
3120
3121//===----------------------------------------------------------------------===//
3122// Memory Optimization
3123//===----------------------------------------------------------------------===//
3124
3125namespace {
3126
3127/// This is an extended version of TargetLowering::AddrMode
3128/// which holds actual Value*'s for register values.
3129struct ExtAddrMode : public TargetLowering::AddrMode {
3130 Value *BaseReg = nullptr;
3131 Value *ScaledReg = nullptr;
3132 Value *OriginalValue = nullptr;
3133 bool InBounds = true;
3134
3135 enum FieldName {
3136 NoField = 0x00,
3137 BaseRegField = 0x01,
3138 BaseGVField = 0x02,
3139 BaseOffsField = 0x04,
3140 ScaledRegField = 0x08,
3141 ScaleField = 0x10,
3142 MultipleFields = 0xff
3143 };
3144
3145 ExtAddrMode() = default;
3146
3147 void print(raw_ostream &OS) const;
3148 void dump() const;
3149
3150 // Replace From in ExtAddrMode with To.
3151 // E.g., SExt insts may be promoted and deleted. We should replace them with
3152 // the promoted values.
3153 void replaceWith(Value *From, Value *To) {
3154 if (ScaledReg == From)
3155 ScaledReg = To;
3156 }
3157
3158 FieldName compare(const ExtAddrMode &other) {
3159 // First check that the types are the same on each field, as differing types
3160 // is something we can't cope with later on.
3161 if (BaseReg && other.BaseReg &&
3162 BaseReg->getType() != other.BaseReg->getType())
3163 return MultipleFields;
3164 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
3165 return MultipleFields;
3166 if (ScaledReg && other.ScaledReg &&
3167 ScaledReg->getType() != other.ScaledReg->getType())
3168 return MultipleFields;
3169
3170 // Conservatively reject 'inbounds' mismatches.
3171 if (InBounds != other.InBounds)
3172 return MultipleFields;
3173
3174 // Check each field to see if it differs.
3175 unsigned Result = NoField;
3176 if (BaseReg != other.BaseReg)
3177 Result |= BaseRegField;
3178 if (BaseGV != other.BaseGV)
3179 Result |= BaseGVField;
3180 if (BaseOffs != other.BaseOffs)
3181 Result |= BaseOffsField;
3182 if (ScaledReg != other.ScaledReg)
3183 Result |= ScaledRegField;
3184 // Don't count 0 as being a different scale, because that actually means
3185 // unscaled (which will already be counted by having no ScaledReg).
3186 if (Scale && other.Scale && Scale != other.Scale)
3187 Result |= ScaleField;
3188
3189 if (llvm::popcount(Result) > 1)
3190 return MultipleFields;
3191 else
3192 return static_cast<FieldName>(Result);
3193 }
3194
3195 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
3196 // with no offset.
3197 bool isTrivial() {
3198 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
3199 // trivial if at most one of these terms is nonzero, except that BaseGV and
3200 // BaseReg both being zero actually means a null pointer value, which we
3201 // consider to be 'non-zero' here.
3202 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
3203 }
3204
3205 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
3206 switch (Field) {
3207 default:
3208 return nullptr;
3209 case BaseRegField:
3210 return BaseReg;
3211 case BaseGVField:
3212 return BaseGV;
3213 case ScaledRegField:
3214 return ScaledReg;
3215 case BaseOffsField:
3216 return ConstantInt::getSigned(IntPtrTy, BaseOffs);
3217 }
3218 }
3219
3220 void SetCombinedField(FieldName Field, Value *V,
3221 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
3222 switch (Field) {
3223 default:
3224 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
3225 break;
3226 case ExtAddrMode::BaseRegField:
3227 BaseReg = V;
3228 break;
3229 case ExtAddrMode::BaseGVField:
3230 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
3231 // in the BaseReg field.
3232 assert(BaseReg == nullptr);
3233 BaseReg = V;
3234 BaseGV = nullptr;
3235 break;
3236 case ExtAddrMode::ScaledRegField:
3237 ScaledReg = V;
3238 // If we have a mix of scaled and unscaled addrmodes then we want scale
3239 // to be the scale and not zero.
3240 if (!Scale)
3241 for (const ExtAddrMode &AM : AddrModes)
3242 if (AM.Scale) {
3243 Scale = AM.Scale;
3244 break;
3245 }
3246 break;
3247 case ExtAddrMode::BaseOffsField:
3248 // The offset is no longer a constant, so it goes in ScaledReg with a
3249 // scale of 1.
3250 assert(ScaledReg == nullptr);
3251 ScaledReg = V;
3252 Scale = 1;
3253 BaseOffs = 0;
3254 break;
3255 }
3256 }
3257};
3258
3259#ifndef NDEBUG
3260static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3261 AM.print(OS);
3262 return OS;
3263}
3264#endif
3265
3266#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3267void ExtAddrMode::print(raw_ostream &OS) const {
3268 bool NeedPlus = false;
3269 OS << "[";
3270 if (InBounds)
3271 OS << "inbounds ";
3272 if (BaseGV) {
3273 OS << "GV:";
3274 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3275 NeedPlus = true;
3276 }
3277
3278 if (BaseOffs) {
3279 OS << (NeedPlus ? " + " : "") << BaseOffs;
3280 NeedPlus = true;
3281 }
3282
3283 if (BaseReg) {
3284 OS << (NeedPlus ? " + " : "") << "Base:";
3285 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3286 NeedPlus = true;
3287 }
3288 if (Scale) {
3289 OS << (NeedPlus ? " + " : "") << Scale << "*";
3290 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3291 }
3292
3293 OS << ']';
3294}
3295
3296LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3297 print(dbgs());
3298 dbgs() << '\n';
3299}
3300#endif
3301
3302} // end anonymous namespace
3303
3304namespace {
3305
3306/// This class provides transaction based operation on the IR.
3307/// Every change made through this class is recorded in the internal state and
3308/// can be undone (rollback) until commit is called.
3309/// CGP does not check if instructions could be speculatively executed when
3310/// moved. Preserving the original location would pessimize the debugging
3311/// experience, as well as negatively impact the quality of sample PGO.
3312class TypePromotionTransaction {
3313 /// This represents the common interface of the individual transaction.
3314 /// Each class implements the logic for doing one specific modification on
3315 /// the IR via the TypePromotionTransaction.
3316 class TypePromotionAction {
3317 protected:
3318 /// The Instruction modified.
3319 Instruction *Inst;
3320
3321 public:
3322 /// Constructor of the action.
3323 /// The constructor performs the related action on the IR.
3324 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3325
3326 virtual ~TypePromotionAction() = default;
3327
3328 /// Undo the modification done by this action.
3329 /// When this method is called, the IR must be in the same state as it was
3330 /// before this action was applied.
3331 /// \pre Undoing the action works if and only if the IR is in the exact same
3332 /// state as it was directly after this action was applied.
3333 virtual void undo() = 0;
3334
3335 /// Advocate every change made by this action.
3336 /// When the results on the IR of the action are to be kept, it is important
3337 /// to call this function, otherwise hidden information may be kept forever.
3338 virtual void commit() {
3339 // Nothing to be done, this action is not doing anything.
3340 }
3341 };
3342
3343 /// Utility to remember the position of an instruction.
3344 class InsertionHandler {
3345 /// Position of an instruction.
3346 /// Either an instruction:
3347 /// - Is the first in a basic block: BB is used.
3348 /// - Has a previous instruction: PrevInst is used.
3349 struct {
3350 BasicBlock::iterator PrevInst;
3351 BasicBlock *BB;
3352 } Point;
3353 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3354
3355 /// Remember whether or not the instruction had a previous instruction.
3356 bool HasPrevInstruction;
3357
3358 public:
3359 /// Record the position of \p Inst.
3360 InsertionHandler(Instruction *Inst) {
3361 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3362 BasicBlock *BB = Inst->getParent();
3363
3364 // Record where we would have to re-insert the instruction in the sequence
3365 // of DbgRecords, if we ended up reinserting.
3366 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3367
3368 if (HasPrevInstruction) {
3369 Point.PrevInst = std::prev(Inst->getIterator());
3370 } else {
3371 Point.BB = BB;
3372 }
3373 }
3374
3375 /// Insert \p Inst at the recorded position.
3376 void insert(Instruction *Inst) {
3377 if (HasPrevInstruction) {
3378 if (Inst->getParent())
3379 Inst->removeFromParent();
3380 Inst->insertAfter(Point.PrevInst);
3381 } else {
3382 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3383 if (Inst->getParent())
3384 Inst->moveBefore(*Point.BB, Position);
3385 else
3386 Inst->insertBefore(*Point.BB, Position);
3387 }
3388
3389 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3390 }
3391 };
3392
3393 /// Move an instruction before another.
3394 class InstructionMoveBefore : public TypePromotionAction {
3395 /// Original position of the instruction.
3396 InsertionHandler Position;
3397
3398 public:
3399 /// Move \p Inst before \p Before.
3400 InstructionMoveBefore(Instruction *Inst, BasicBlock::iterator Before)
3401 : TypePromotionAction(Inst), Position(Inst) {
3402 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3403 << "\n");
3404 Inst->moveBefore(Before);
3405 }
3406
3407 /// Move the instruction back to its original position.
3408 void undo() override {
3409 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3410 Position.insert(Inst);
3411 }
3412 };
3413
3414 /// Set the operand of an instruction with a new value.
3415 class OperandSetter : public TypePromotionAction {
3416 /// Original operand of the instruction.
3417 Value *Origin;
3418
3419 /// Index of the modified instruction.
3420 unsigned Idx;
3421
3422 public:
3423 /// Set \p Idx operand of \p Inst with \p NewVal.
3424 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3425 : TypePromotionAction(Inst), Idx(Idx) {
3426 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3427 << "for:" << *Inst << "\n"
3428 << "with:" << *NewVal << "\n");
3429 Origin = Inst->getOperand(Idx);
3430 Inst->setOperand(Idx, NewVal);
3431 }
3432
3433 /// Restore the original value of the instruction.
3434 void undo() override {
3435 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3436 << "for: " << *Inst << "\n"
3437 << "with: " << *Origin << "\n");
3438 Inst->setOperand(Idx, Origin);
3439 }
3440 };
3441
3442 /// Hide the operands of an instruction.
3443 /// Do as if this instruction was not using any of its operands.
3444 class OperandsHider : public TypePromotionAction {
3445 /// The list of original operands.
3446 SmallVector<Value *, 4> OriginalValues;
3447
3448 public:
3449 /// Remove \p Inst from the uses of the operands of \p Inst.
3450 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3451 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3452 unsigned NumOpnds = Inst->getNumOperands();
3453 OriginalValues.reserve(NumOpnds);
3454 for (unsigned It = 0; It < NumOpnds; ++It) {
3455 // Save the current operand.
3456 Value *Val = Inst->getOperand(It);
3457 OriginalValues.push_back(Val);
3458 // Set a dummy one.
3459 // We could use OperandSetter here, but that would imply an overhead
3460 // that we are not willing to pay.
3461 Inst->setOperand(It, PoisonValue::get(Val->getType()));
3462 }
3463 }
3464
3465 /// Restore the original list of uses.
3466 void undo() override {
3467 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3468 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3469 Inst->setOperand(It, OriginalValues[It]);
3470 }
3471 };
3472
3473 /// Build a truncate instruction.
3474 class TruncBuilder : public TypePromotionAction {
3475 Value *Val;
3476
3477 public:
3478 /// Build a truncate instruction of \p Opnd producing a \p Ty
3479 /// result.
3480 /// trunc Opnd to Ty.
3481 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3482 IRBuilder<> Builder(Opnd);
3483 Builder.SetCurrentDebugLocation(DebugLoc());
3484 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3485 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3486 }
3487
3488 /// Get the built value.
3489 Value *getBuiltValue() { return Val; }
3490
3491 /// Remove the built instruction.
3492 void undo() override {
3493 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3494 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3495 IVal->eraseFromParent();
3496 }
3497 };
3498
3499 /// Build a sign extension instruction.
3500 class SExtBuilder : public TypePromotionAction {
3501 Value *Val;
3502
3503 public:
3504 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3505 /// result.
3506 /// sext Opnd to Ty.
3507 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3508 : TypePromotionAction(InsertPt) {
3509 IRBuilder<> Builder(InsertPt);
3510 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3511 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3512 }
3513
3514 /// Get the built value.
3515 Value *getBuiltValue() { return Val; }
3516
3517 /// Remove the built instruction.
3518 void undo() override {
3519 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3520 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3521 IVal->eraseFromParent();
3522 }
3523 };
3524
3525 /// Build a zero extension instruction.
3526 class ZExtBuilder : public TypePromotionAction {
3527 Value *Val;
3528
3529 public:
3530 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3531 /// result.
3532 /// zext Opnd to Ty.
3533 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3534 : TypePromotionAction(InsertPt) {
3535 IRBuilder<> Builder(InsertPt);
3536 Builder.SetCurrentDebugLocation(DebugLoc());
3537 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3538 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3539 }
3540
3541 /// Get the built value.
3542 Value *getBuiltValue() { return Val; }
3543
3544 /// Remove the built instruction.
3545 void undo() override {
3546 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3547 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3548 IVal->eraseFromParent();
3549 }
3550 };
3551
3552 /// Mutate an instruction to another type.
3553 class TypeMutator : public TypePromotionAction {
3554 /// Record the original type.
3555 Type *OrigTy;
3556
3557 public:
3558 /// Mutate the type of \p Inst into \p NewTy.
3559 TypeMutator(Instruction *Inst, Type *NewTy)
3560 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3561 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3562 << "\n");
3563 Inst->mutateType(NewTy);
3564 }
3565
3566 /// Mutate the instruction back to its original type.
3567 void undo() override {
3568 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3569 << "\n");
3570 Inst->mutateType(OrigTy);
3571 }
3572 };
3573
3574 /// Replace the uses of an instruction by another instruction.
3575 class UsesReplacer : public TypePromotionAction {
3576 /// Helper structure to keep track of the replaced uses.
3577 struct InstructionAndIdx {
3578 /// The instruction using the instruction.
3579 Instruction *Inst;
3580
3581 /// The index where this instruction is used for Inst.
3582 unsigned Idx;
3583
3584 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3585 : Inst(Inst), Idx(Idx) {}
3586 };
3587
3588 /// Keep track of the original uses (pair Instruction, Index).
3590 /// Keep track of the debug users.
3591 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3592
3593 /// Keep track of the new value so that we can undo it by replacing
3594 /// instances of the new value with the original value.
3595 Value *New;
3596
3598
3599 public:
3600 /// Replace all the use of \p Inst by \p New.
3601 UsesReplacer(Instruction *Inst, Value *New)
3602 : TypePromotionAction(Inst), New(New) {
3603 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3604 << "\n");
3605 // Record the original uses.
3606 for (Use &U : Inst->uses()) {
3607 Instruction *UserI = cast<Instruction>(U.getUser());
3608 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3609 }
3610 // Record the debug uses separately. They are not in the instruction's
3611 // use list, but they are replaced by RAUW.
3612 findDbgValues(Inst, DbgVariableRecords);
3613
3614 // Now, we can replace the uses.
3615 Inst->replaceAllUsesWith(New);
3616 }
3617
3618 /// Reassign the original uses of Inst to Inst.
3619 void undo() override {
3620 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3621 for (InstructionAndIdx &Use : OriginalUses)
3622 Use.Inst->setOperand(Use.Idx, Inst);
3623 // RAUW has replaced all original uses with references to the new value,
3624 // including the debug uses. Since we are undoing the replacements,
3625 // the original debug uses must also be reinstated to maintain the
3626 // correctness and utility of debug value records.
3627 for (DbgVariableRecord *DVR : DbgVariableRecords)
3628 DVR->replaceVariableLocationOp(New, Inst);
3629 }
3630 };
3631
3632 /// Remove an instruction from the IR.
3633 class InstructionRemover : public TypePromotionAction {
3634 /// Original position of the instruction.
3635 InsertionHandler Inserter;
3636
3637 /// Helper structure to hide all the link to the instruction. In other
3638 /// words, this helps to do as if the instruction was removed.
3639 OperandsHider Hider;
3640
3641 /// Keep track of the uses replaced, if any.
3642 UsesReplacer *Replacer = nullptr;
3643
3644 /// Keep track of instructions removed.
3645 SetOfInstrs &RemovedInsts;
3646
3647 public:
3648 /// Remove all reference of \p Inst and optionally replace all its
3649 /// uses with New.
3650 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3651 /// \pre If !Inst->use_empty(), then New != nullptr
3652 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3653 Value *New = nullptr)
3654 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3655 RemovedInsts(RemovedInsts) {
3656 if (New)
3657 Replacer = new UsesReplacer(Inst, New);
3658 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3659 RemovedInsts.insert(Inst);
3660 /// The instructions removed here will be freed after completing
3661 /// optimizeBlock() for all blocks as we need to keep track of the
3662 /// removed instructions during promotion.
3663 Inst->removeFromParent();
3664 }
3665
3666 ~InstructionRemover() override { delete Replacer; }
3667
3668 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3669 InstructionRemover(const InstructionRemover &other) = delete;
3670
3671 /// Resurrect the instruction and reassign it to the proper uses if
3672 /// new value was provided when build this action.
3673 void undo() override {
3674 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3675 Inserter.insert(Inst);
3676 if (Replacer)
3677 Replacer->undo();
3678 Hider.undo();
3679 RemovedInsts.erase(Inst);
3680 }
3681 };
3682
3683public:
3684 /// Restoration point.
3685 /// The restoration point is a pointer to an action instead of an iterator
3686 /// because the iterator may be invalidated but not the pointer.
3687 using ConstRestorationPt = const TypePromotionAction *;
3688
3689 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3690 : RemovedInsts(RemovedInsts) {}
3691
3692 /// Advocate every changes made in that transaction. Return true if any change
3693 /// happen.
3694 bool commit();
3695
3696 /// Undo all the changes made after the given point.
3697 void rollback(ConstRestorationPt Point);
3698
3699 /// Get the current restoration point.
3700 ConstRestorationPt getRestorationPoint() const;
3701
3702 /// \name API for IR modification with state keeping to support rollback.
3703 /// @{
3704 /// Same as Instruction::setOperand.
3705 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3706
3707 /// Same as Instruction::eraseFromParent.
3708 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3709
3710 /// Same as Value::replaceAllUsesWith.
3711 void replaceAllUsesWith(Instruction *Inst, Value *New);
3712
3713 /// Same as Value::mutateType.
3714 void mutateType(Instruction *Inst, Type *NewTy);
3715
3716 /// Same as IRBuilder::createTrunc.
3717 Value *createTrunc(Instruction *Opnd, Type *Ty);
3718
3719 /// Same as IRBuilder::createSExt.
3720 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3721
3722 /// Same as IRBuilder::createZExt.
3723 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3724
3725private:
3726 /// The ordered list of actions made so far.
3728
3729 using CommitPt =
3730 SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
3731
3732 SetOfInstrs &RemovedInsts;
3733};
3734
3735} // end anonymous namespace
3736
3737void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3738 Value *NewVal) {
3739 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3740 Inst, Idx, NewVal));
3741}
3742
3743void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3744 Value *NewVal) {
3745 Actions.push_back(
3746 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3747 Inst, RemovedInsts, NewVal));
3748}
3749
3750void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3751 Value *New) {
3752 Actions.push_back(
3753 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3754}
3755
3756void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3757 Actions.push_back(
3758 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3759}
3760
3761Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3762 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3763 Value *Val = Ptr->getBuiltValue();
3764 Actions.push_back(std::move(Ptr));
3765 return Val;
3766}
3767
3768Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3769 Type *Ty) {
3770 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3771 Value *Val = Ptr->getBuiltValue();
3772 Actions.push_back(std::move(Ptr));
3773 return Val;
3774}
3775
3776Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3777 Type *Ty) {
3778 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3779 Value *Val = Ptr->getBuiltValue();
3780 Actions.push_back(std::move(Ptr));
3781 return Val;
3782}
3783
3784TypePromotionTransaction::ConstRestorationPt
3785TypePromotionTransaction::getRestorationPoint() const {
3786 return !Actions.empty() ? Actions.back().get() : nullptr;
3787}
3788
3789bool TypePromotionTransaction::commit() {
3790 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3791 Action->commit();
3792 bool Modified = !Actions.empty();
3793 Actions.clear();
3794 return Modified;
3795}
3796
3797void TypePromotionTransaction::rollback(
3798 TypePromotionTransaction::ConstRestorationPt Point) {
3799 while (!Actions.empty() && Point != Actions.back().get()) {
3800 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3801 Curr->undo();
3802 }
3803}
3804
3805namespace {
3806
3807/// A helper class for matching addressing modes.
3808///
3809/// This encapsulates the logic for matching the target-legal addressing modes.
3810class AddressingModeMatcher {
3811 SmallVectorImpl<Instruction *> &AddrModeInsts;
3812 const TargetLowering &TLI;
3813 const TargetRegisterInfo &TRI;
3814 const DataLayout &DL;
3815 const LoopInfo &LI;
3816 const std::function<const DominatorTree &()> getDTFn;
3817
3818 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3819 /// the memory instruction that we're computing this address for.
3820 Type *AccessTy;
3821 unsigned AddrSpace;
3822 Instruction *MemoryInst;
3823
3824 /// This is the addressing mode that we're building up. This is
3825 /// part of the return value of this addressing mode matching stuff.
3826 ExtAddrMode &AddrMode;
3827
3828 /// The instructions inserted by other CodeGenPrepare optimizations.
3829 const SetOfInstrs &InsertedInsts;
3830
3831 /// A map from the instructions to their type before promotion.
3832 InstrToOrigTy &PromotedInsts;
3833
3834 /// The ongoing transaction where every action should be registered.
3835 TypePromotionTransaction &TPT;
3836
3837 // A GEP which has too large offset to be folded into the addressing mode.
3838 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3839
3840 /// This is set to true when we should not do profitability checks.
3841 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3842 bool IgnoreProfitability;
3843
3844 /// True if we are optimizing for size.
3845 bool OptSize = false;
3846
3847 ProfileSummaryInfo *PSI;
3848 BlockFrequencyInfo *BFI;
3849
3850 AddressingModeMatcher(
3851 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
3852 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3853 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3854 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3855 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3856 TypePromotionTransaction &TPT,
3857 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3858 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3859 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3860 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3861 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3862 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3863 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3864 IgnoreProfitability = false;
3865 }
3866
3867public:
3868 /// Find the maximal addressing mode that a load/store of V can fold,
3869 /// give an access type of AccessTy. This returns a list of involved
3870 /// instructions in AddrModeInsts.
3871 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3872 /// optimizations.
3873 /// \p PromotedInsts maps the instructions to their type before promotion.
3874 /// \p The ongoing transaction where every action should be registered.
3875 static ExtAddrMode
3876 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3877 SmallVectorImpl<Instruction *> &AddrModeInsts,
3878 const TargetLowering &TLI, const LoopInfo &LI,
3879 const std::function<const DominatorTree &()> getDTFn,
3880 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3881 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3882 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3883 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3884 ExtAddrMode Result;
3885
3886 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3887 AccessTy, AS, MemoryInst, Result,
3888 InsertedInsts, PromotedInsts, TPT,
3889 LargeOffsetGEP, OptSize, PSI, BFI)
3890 .matchAddr(V, 0);
3891 (void)Success;
3892 assert(Success && "Couldn't select *anything*?");
3893 return Result;
3894 }
3895
3896private:
3897 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3898 bool matchAddr(Value *Addr, unsigned Depth);
3899 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3900 bool *MovedAway = nullptr);
3901 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3902 ExtAddrMode &AMBefore,
3903 ExtAddrMode &AMAfter);
3904 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3905 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3906 Value *PromotedOperand) const;
3907};
3908
3909class PhiNodeSet;
3910
3911/// An iterator for PhiNodeSet.
3912class PhiNodeSetIterator {
3913 PhiNodeSet *const Set;
3914 size_t CurrentIndex = 0;
3915
3916public:
3917 /// The constructor. Start should point to either a valid element, or be equal
3918 /// to the size of the underlying SmallVector of the PhiNodeSet.
3919 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3920 PHINode *operator*() const;
3921 PhiNodeSetIterator &operator++();
3922 bool operator==(const PhiNodeSetIterator &RHS) const;
3923 bool operator!=(const PhiNodeSetIterator &RHS) const;
3924};
3925
3926/// Keeps a set of PHINodes.
3927///
3928/// This is a minimal set implementation for a specific use case:
3929/// It is very fast when there are very few elements, but also provides good
3930/// performance when there are many. It is similar to SmallPtrSet, but also
3931/// provides iteration by insertion order, which is deterministic and stable
3932/// across runs. It is also similar to SmallSetVector, but provides removing
3933/// elements in O(1) time. This is achieved by not actually removing the element
3934/// from the underlying vector, so comes at the cost of using more memory, but
3935/// that is fine, since PhiNodeSets are used as short lived objects.
3936class PhiNodeSet {
3937 friend class PhiNodeSetIterator;
3938
3939 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
3940 using iterator = PhiNodeSetIterator;
3941
3942 /// Keeps the elements in the order of their insertion in the underlying
3943 /// vector. To achieve constant time removal, it never deletes any element.
3945
3946 /// Keeps the elements in the underlying set implementation. This (and not the
3947 /// NodeList defined above) is the source of truth on whether an element
3948 /// is actually in the collection.
3949 MapType NodeMap;
3950
3951 /// Points to the first valid (not deleted) element when the set is not empty
3952 /// and the value is not zero. Equals to the size of the underlying vector
3953 /// when the set is empty. When the value is 0, as in the beginning, the
3954 /// first element may or may not be valid.
3955 size_t FirstValidElement = 0;
3956
3957public:
3958 /// Inserts a new element to the collection.
3959 /// \returns true if the element is actually added, i.e. was not in the
3960 /// collection before the operation.
3961 bool insert(PHINode *Ptr) {
3962 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3963 NodeList.push_back(Ptr);
3964 return true;
3965 }
3966 return false;
3967 }
3968
3969 /// Removes the element from the collection.
3970 /// \returns whether the element is actually removed, i.e. was in the
3971 /// collection before the operation.
3972 bool erase(PHINode *Ptr) {
3973 if (NodeMap.erase(Ptr)) {
3974 SkipRemovedElements(FirstValidElement);
3975 return true;
3976 }
3977 return false;
3978 }
3979
3980 /// Removes all elements and clears the collection.
3981 void clear() {
3982 NodeMap.clear();
3983 NodeList.clear();
3984 FirstValidElement = 0;
3985 }
3986
3987 /// \returns an iterator that will iterate the elements in the order of
3988 /// insertion.
3989 iterator begin() {
3990 if (FirstValidElement == 0)
3991 SkipRemovedElements(FirstValidElement);
3992 return PhiNodeSetIterator(this, FirstValidElement);
3993 }
3994
3995 /// \returns an iterator that points to the end of the collection.
3996 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3997
3998 /// Returns the number of elements in the collection.
3999 size_t size() const { return NodeMap.size(); }
4000
4001 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
4002 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
4003
4004private:
4005 /// Updates the CurrentIndex so that it will point to a valid element.
4006 ///
4007 /// If the element of NodeList at CurrentIndex is valid, it does not
4008 /// change it. If there are no more valid elements, it updates CurrentIndex
4009 /// to point to the end of the NodeList.
4010 void SkipRemovedElements(size_t &CurrentIndex) {
4011 while (CurrentIndex < NodeList.size()) {
4012 auto it = NodeMap.find(NodeList[CurrentIndex]);
4013 // If the element has been deleted and added again later, NodeMap will
4014 // point to a different index, so CurrentIndex will still be invalid.
4015 if (it != NodeMap.end() && it->second == CurrentIndex)
4016 break;
4017 ++CurrentIndex;
4018 }
4019 }
4020};
4021
4022PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
4023 : Set(Set), CurrentIndex(Start) {}
4024
4025PHINode *PhiNodeSetIterator::operator*() const {
4026 assert(CurrentIndex < Set->NodeList.size() &&
4027 "PhiNodeSet access out of range");
4028 return Set->NodeList[CurrentIndex];
4029}
4030
4031PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
4032 assert(CurrentIndex < Set->NodeList.size() &&
4033 "PhiNodeSet access out of range");
4034 ++CurrentIndex;
4035 Set->SkipRemovedElements(CurrentIndex);
4036 return *this;
4037}
4038
4039bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
4040 return CurrentIndex == RHS.CurrentIndex;
4041}
4042
4043bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
4044 return !((*this) == RHS);
4045}
4046
4047/// Keep track of simplification of Phi nodes.
4048/// Accept the set of all phi nodes and erase phi node from this set
4049/// if it is simplified.
4050class SimplificationTracker {
4051 DenseMap<Value *, Value *> Storage;
4052 // Tracks newly created Phi nodes. The elements are iterated by insertion
4053 // order.
4054 PhiNodeSet AllPhiNodes;
4055 // Tracks newly created Select nodes.
4056 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
4057
4058public:
4059 Value *Get(Value *V) {
4060 do {
4061 auto SV = Storage.find(V);
4062 if (SV == Storage.end())
4063 return V;
4064 V = SV->second;
4065 } while (true);
4066 }
4067
4068 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
4069
4070 void ReplacePhi(PHINode *From, PHINode *To) {
4071 Value *OldReplacement = Get(From);
4072 while (OldReplacement != From) {
4073 From = To;
4074 To = dyn_cast<PHINode>(OldReplacement);
4075 OldReplacement = Get(From);
4076 }
4077 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
4078 Put(From, To);
4079 From->replaceAllUsesWith(To);
4080 AllPhiNodes.erase(From);
4081 From->eraseFromParent();
4082 }
4083
4084 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
4085
4086 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
4087
4088 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
4089
4090 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
4091
4092 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
4093
4094 void destroyNewNodes(Type *CommonType) {
4095 // For safe erasing, replace the uses with dummy value first.
4096 auto *Dummy = PoisonValue::get(CommonType);
4097 for (auto *I : AllPhiNodes) {
4098 I->replaceAllUsesWith(Dummy);
4099 I->eraseFromParent();
4100 }
4101 AllPhiNodes.clear();
4102 for (auto *I : AllSelectNodes) {
4103 I->replaceAllUsesWith(Dummy);
4104 I->eraseFromParent();
4105 }
4106 AllSelectNodes.clear();
4107 }
4108};
4109
4110/// A helper class for combining addressing modes.
4111class AddressingModeCombiner {
4112 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
4113 typedef std::pair<PHINode *, PHINode *> PHIPair;
4114
4115private:
4116 /// The addressing modes we've collected.
4118
4119 /// The field in which the AddrModes differ, when we have more than one.
4120 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
4121
4122 /// Are the AddrModes that we have all just equal to their original values?
4123 bool AllAddrModesTrivial = true;
4124
4125 /// Common Type for all different fields in addressing modes.
4126 Type *CommonType = nullptr;
4127
4128 const DataLayout &DL;
4129
4130 /// Original Address.
4131 Value *Original;
4132
4133 /// Common value among addresses
4134 Value *CommonValue = nullptr;
4135
4136public:
4137 AddressingModeCombiner(const DataLayout &DL, Value *OriginalValue)
4138 : DL(DL), Original(OriginalValue) {}
4139
4140 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
4141
4142 /// Get the combined AddrMode
4143 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
4144
4145 /// Add a new AddrMode if it's compatible with the AddrModes we already
4146 /// have.
4147 /// \return True iff we succeeded in doing so.
4148 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
4149 // Take note of if we have any non-trivial AddrModes, as we need to detect
4150 // when all AddrModes are trivial as then we would introduce a phi or select
4151 // which just duplicates what's already there.
4152 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
4153
4154 // If this is the first addrmode then everything is fine.
4155 if (AddrModes.empty()) {
4156 AddrModes.emplace_back(NewAddrMode);
4157 return true;
4158 }
4159
4160 // Figure out how different this is from the other address modes, which we
4161 // can do just by comparing against the first one given that we only care
4162 // about the cumulative difference.
4163 ExtAddrMode::FieldName ThisDifferentField =
4164 AddrModes[0].compare(NewAddrMode);
4165 if (DifferentField == ExtAddrMode::NoField)
4166 DifferentField = ThisDifferentField;
4167 else if (DifferentField != ThisDifferentField)
4168 DifferentField = ExtAddrMode::MultipleFields;
4169
4170 // If NewAddrMode differs in more than one dimension we cannot handle it.
4171 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
4172
4173 // If Scale Field is different then we reject.
4174 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
4175
4176 // We also must reject the case when base offset is different and
4177 // scale reg is not null, we cannot handle this case due to merge of
4178 // different offsets will be used as ScaleReg.
4179 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
4180 !NewAddrMode.ScaledReg);
4181
4182 // We also must reject the case when GV is different and BaseReg installed
4183 // due to we want to use base reg as a merge of GV values.
4184 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
4185 !NewAddrMode.HasBaseReg);
4186
4187 // Even if NewAddMode is the same we still need to collect it due to
4188 // original value is different. And later we will need all original values
4189 // as anchors during finding the common Phi node.
4190 if (CanHandle)
4191 AddrModes.emplace_back(NewAddrMode);
4192 else
4193 AddrModes.clear();
4194
4195 return CanHandle;
4196 }
4197
4198 /// Combine the addressing modes we've collected into a single
4199 /// addressing mode.
4200 /// \return True iff we successfully combined them or we only had one so
4201 /// didn't need to combine them anyway.
4202 bool combineAddrModes() {
4203 // If we have no AddrModes then they can't be combined.
4204 if (AddrModes.size() == 0)
4205 return false;
4206
4207 // A single AddrMode can trivially be combined.
4208 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4209 return true;
4210
4211 // If the AddrModes we collected are all just equal to the value they are
4212 // derived from then combining them wouldn't do anything useful.
4213 if (AllAddrModesTrivial)
4214 return false;
4215
4216 if (!addrModeCombiningAllowed())
4217 return false;
4218
4219 // Build a map between <original value, basic block where we saw it> to
4220 // value of base register.
4221 // Bail out if there is no common type.
4222 FoldAddrToValueMapping Map;
4223 if (!initializeMap(Map))
4224 return false;
4225
4226 CommonValue = findCommon(Map);
4227 if (CommonValue)
4228 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4229 return CommonValue != nullptr;
4230 }
4231
4232private:
4233 /// `CommonValue` may be a placeholder inserted by us.
4234 /// If the placeholder is not used, we should remove this dead instruction.
4235 void eraseCommonValueIfDead() {
4236 if (CommonValue && CommonValue->use_empty())
4237 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4238 CommonInst->eraseFromParent();
4239 }
4240
4241 /// Initialize Map with anchor values. For address seen
4242 /// we set the value of different field saw in this address.
4243 /// At the same time we find a common type for different field we will
4244 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4245 /// Return false if there is no common type found.
4246 bool initializeMap(FoldAddrToValueMapping &Map) {
4247 // Keep track of keys where the value is null. We will need to replace it
4248 // with constant null when we know the common type.
4249 SmallVector<Value *, 2> NullValue;
4250 Type *IntPtrTy = DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4251 for (auto &AM : AddrModes) {
4252 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4253 if (DV) {
4254 auto *Type = DV->getType();
4255 if (CommonType && CommonType != Type)
4256 return false;
4257 CommonType = Type;
4258 Map[AM.OriginalValue] = DV;
4259 } else {
4260 NullValue.push_back(AM.OriginalValue);
4261 }
4262 }
4263 assert(CommonType && "At least one non-null value must be!");
4264 for (auto *V : NullValue)
4265 Map[V] = Constant::getNullValue(CommonType);
4266 return true;
4267 }
4268
4269 /// We have mapping between value A and other value B where B was a field in
4270 /// addressing mode represented by A. Also we have an original value C
4271 /// representing an address we start with. Traversing from C through phi and
4272 /// selects we ended up with A's in a map. This utility function tries to find
4273 /// a value V which is a field in addressing mode C and traversing through phi
4274 /// nodes and selects we will end up in corresponded values B in a map.
4275 /// The utility will create a new Phi/Selects if needed.
4276 // The simple example looks as follows:
4277 // BB1:
4278 // p1 = b1 + 40
4279 // br cond BB2, BB3
4280 // BB2:
4281 // p2 = b2 + 40
4282 // br BB3
4283 // BB3:
4284 // p = phi [p1, BB1], [p2, BB2]
4285 // v = load p
4286 // Map is
4287 // p1 -> b1
4288 // p2 -> b2
4289 // Request is
4290 // p -> ?
4291 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4292 Value *findCommon(FoldAddrToValueMapping &Map) {
4293 // Tracks the simplification of newly created phi nodes. The reason we use
4294 // this mapping is because we will add new created Phi nodes in AddrToBase.
4295 // Simplification of Phi nodes is recursive, so some Phi node may
4296 // be simplified after we added it to AddrToBase. In reality this
4297 // simplification is possible only if original phi/selects were not
4298 // simplified yet.
4299 // Using this mapping we can find the current value in AddrToBase.
4300 SimplificationTracker ST;
4301
4302 // First step, DFS to create PHI nodes for all intermediate blocks.
4303 // Also fill traverse order for the second step.
4304 SmallVector<Value *, 32> TraverseOrder;
4305 InsertPlaceholders(Map, TraverseOrder, ST);
4306
4307 // Second Step, fill new nodes by merged values and simplify if possible.
4308 FillPlaceholders(Map, TraverseOrder, ST);
4309
4310 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4311 ST.destroyNewNodes(CommonType);
4312 return nullptr;
4313 }
4314
4315 // Now we'd like to match New Phi nodes to existed ones.
4316 unsigned PhiNotMatchedCount = 0;
4317 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4318 ST.destroyNewNodes(CommonType);
4319 return nullptr;
4320 }
4321
4322 auto *Result = ST.Get(Map.find(Original)->second);
4323 if (Result) {
4324 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4325 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4326 }
4327 return Result;
4328 }
4329
4330 /// Try to match PHI node to Candidate.
4331 /// Matcher tracks the matched Phi nodes.
4332 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4333 SmallSetVector<PHIPair, 8> &Matcher,
4334 PhiNodeSet &PhiNodesToMatch) {
4335 SmallVector<PHIPair, 8> WorkList;
4336 Matcher.insert({PHI, Candidate});
4337 SmallPtrSet<PHINode *, 8> MatchedPHIs;
4338 MatchedPHIs.insert(PHI);
4339 WorkList.push_back({PHI, Candidate});
4340 SmallSet<PHIPair, 8> Visited;
4341 while (!WorkList.empty()) {
4342 auto Item = WorkList.pop_back_val();
4343 if (!Visited.insert(Item).second)
4344 continue;
4345 // We iterate over all incoming values to Phi to compare them.
4346 // If values are different and both of them Phi and the first one is a
4347 // Phi we added (subject to match) and both of them is in the same basic
4348 // block then we can match our pair if values match. So we state that
4349 // these values match and add it to work list to verify that.
4350 for (auto *B : Item.first->blocks()) {
4351 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4352 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4353 if (FirstValue == SecondValue)
4354 continue;
4355
4356 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4357 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4358
4359 // One of them is not Phi or
4360 // The first one is not Phi node from the set we'd like to match or
4361 // Phi nodes from different basic blocks then
4362 // we will not be able to match.
4363 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4364 FirstPhi->getParent() != SecondPhi->getParent())
4365 return false;
4366
4367 // If we already matched them then continue.
4368 if (Matcher.count({FirstPhi, SecondPhi}))
4369 continue;
4370 // So the values are different and does not match. So we need them to
4371 // match. (But we register no more than one match per PHI node, so that
4372 // we won't later try to replace them twice.)
4373 if (MatchedPHIs.insert(FirstPhi).second)
4374 Matcher.insert({FirstPhi, SecondPhi});
4375 // But me must check it.
4376 WorkList.push_back({FirstPhi, SecondPhi});
4377 }
4378 }
4379 return true;
4380 }
4381
4382 /// For the given set of PHI nodes (in the SimplificationTracker) try
4383 /// to find their equivalents.
4384 /// Returns false if this matching fails and creation of new Phi is disabled.
4385 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4386 unsigned &PhiNotMatchedCount) {
4387 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4388 // order, so the replacements (ReplacePhi) are also done in a deterministic
4389 // order.
4390 SmallSetVector<PHIPair, 8> Matched;
4391 SmallPtrSet<PHINode *, 8> WillNotMatch;
4392 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4393 while (PhiNodesToMatch.size()) {
4394 PHINode *PHI = *PhiNodesToMatch.begin();
4395
4396 // Add us, if no Phi nodes in the basic block we do not match.
4397 WillNotMatch.clear();
4398 WillNotMatch.insert(PHI);
4399
4400 // Traverse all Phis until we found equivalent or fail to do that.
4401 bool IsMatched = false;
4402 for (auto &P : PHI->getParent()->phis()) {
4403 // Skip new Phi nodes.
4404 if (PhiNodesToMatch.count(&P))
4405 continue;
4406 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4407 break;
4408 // If it does not match, collect all Phi nodes from matcher.
4409 // if we end up with no match, them all these Phi nodes will not match
4410 // later.
4411 WillNotMatch.insert_range(llvm::make_first_range(Matched));
4412 Matched.clear();
4413 }
4414 if (IsMatched) {
4415 // Replace all matched values and erase them.
4416 for (auto MV : Matched)
4417 ST.ReplacePhi(MV.first, MV.second);
4418 Matched.clear();
4419 continue;
4420 }
4421 // If we are not allowed to create new nodes then bail out.
4422 if (!AllowNewPhiNodes)
4423 return false;
4424 // Just remove all seen values in matcher. They will not match anything.
4425 PhiNotMatchedCount += WillNotMatch.size();
4426 for (auto *P : WillNotMatch)
4427 PhiNodesToMatch.erase(P);
4428 }
4429 return true;
4430 }
4431 /// Fill the placeholders with values from predecessors and simplify them.
4432 void FillPlaceholders(FoldAddrToValueMapping &Map,
4433 SmallVectorImpl<Value *> &TraverseOrder,
4434 SimplificationTracker &ST) {
4435 while (!TraverseOrder.empty()) {
4436 Value *Current = TraverseOrder.pop_back_val();
4437 assert(Map.contains(Current) && "No node to fill!!!");
4438 Value *V = Map[Current];
4439
4440 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4441 // CurrentValue also must be Select.
4442 auto *CurrentSelect = cast<SelectInst>(Current);
4443 auto *TrueValue = CurrentSelect->getTrueValue();
4444 assert(Map.contains(TrueValue) && "No True Value!");
4445 Select->setTrueValue(ST.Get(Map[TrueValue]));
4446 auto *FalseValue = CurrentSelect->getFalseValue();
4447 assert(Map.contains(FalseValue) && "No False Value!");
4448 Select->setFalseValue(ST.Get(Map[FalseValue]));
4449 } else {
4450 // Must be a Phi node then.
4451 auto *PHI = cast<PHINode>(V);
4452 // Fill the Phi node with values from predecessors.
4453 for (auto *B : predecessors(PHI->getParent())) {
4454 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4455 assert(Map.contains(PV) && "No predecessor Value!");
4456 PHI->addIncoming(ST.Get(Map[PV]), B);
4457 }
4458 }
4459 }
4460 }
4461
4462 /// Starting from original value recursively iterates over def-use chain up to
4463 /// known ending values represented in a map. For each traversed phi/select
4464 /// inserts a placeholder Phi or Select.
4465 /// Reports all new created Phi/Select nodes by adding them to set.
4466 /// Also reports and order in what values have been traversed.
4467 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4468 SmallVectorImpl<Value *> &TraverseOrder,
4469 SimplificationTracker &ST) {
4470 SmallVector<Value *, 32> Worklist;
4471 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4472 "Address must be a Phi or Select node");
4473 auto *Dummy = PoisonValue::get(CommonType);
4474 Worklist.push_back(Original);
4475 while (!Worklist.empty()) {
4476 Value *Current = Worklist.pop_back_val();
4477 // if it is already visited or it is an ending value then skip it.
4478 if (Map.contains(Current))
4479 continue;
4480 TraverseOrder.push_back(Current);
4481
4482 // CurrentValue must be a Phi node or select. All others must be covered
4483 // by anchors.
4484 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4485 // Is it OK to get metadata from OrigSelect?!
4486 // Create a Select placeholder with dummy value.
4487 SelectInst *Select =
4488 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4489 CurrentSelect->getName(),
4490 CurrentSelect->getIterator(), CurrentSelect);
4491 Map[Current] = Select;
4492 ST.insertNewSelect(Select);
4493 // We are interested in True and False values.
4494 Worklist.push_back(CurrentSelect->getTrueValue());
4495 Worklist.push_back(CurrentSelect->getFalseValue());
4496 } else {
4497 // It must be a Phi node then.
4498 PHINode *CurrentPhi = cast<PHINode>(Current);
4499 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4500 PHINode *PHI =
4501 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4502 Map[Current] = PHI;
4503 ST.insertNewPhi(PHI);
4504 append_range(Worklist, CurrentPhi->incoming_values());
4505 }
4506 }
4507 }
4508
4509 bool addrModeCombiningAllowed() {
4511 return false;
4512 switch (DifferentField) {
4513 default:
4514 return false;
4515 case ExtAddrMode::BaseRegField:
4517 case ExtAddrMode::BaseGVField:
4518 return AddrSinkCombineBaseGV;
4519 case ExtAddrMode::BaseOffsField:
4521 case ExtAddrMode::ScaledRegField:
4523 }
4524 }
4525};
4526} // end anonymous namespace
4527
4528/// Try adding ScaleReg*Scale to the current addressing mode.
4529/// Return true and update AddrMode if this addr mode is legal for the target,
4530/// false if not.
4531bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4532 unsigned Depth) {
4533 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4534 // mode. Just process that directly.
4535 if (Scale == 1)
4536 return matchAddr(ScaleReg, Depth);
4537
4538 // If the scale is 0, it takes nothing to add this.
4539 if (Scale == 0)
4540 return true;
4541
4542 // If we already have a scale of this value, we can add to it, otherwise, we
4543 // need an available scale field.
4544 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4545 return false;
4546
4547 ExtAddrMode TestAddrMode = AddrMode;
4548
4549 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4550 // [A+B + A*7] -> [B+A*8].
4551 TestAddrMode.Scale += Scale;
4552 TestAddrMode.ScaledReg = ScaleReg;
4553
4554 // If the new address isn't legal, bail out.
4555 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4556 return false;
4557
4558 // It was legal, so commit it.
4559 AddrMode = TestAddrMode;
4560
4561 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4562 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4563 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4564 // go any further: we can reuse it and cannot eliminate it.
4565 ConstantInt *CI = nullptr;
4566 Value *AddLHS = nullptr;
4567 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4568 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4569 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4570 TestAddrMode.InBounds = false;
4571 TestAddrMode.ScaledReg = AddLHS;
4572 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4573
4574 // If this addressing mode is legal, commit it and remember that we folded
4575 // this instruction.
4576 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4577 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4578 AddrMode = TestAddrMode;
4579 return true;
4580 }
4581 // Restore status quo.
4582 TestAddrMode = AddrMode;
4583 }
4584
4585 // If this is an add recurrence with a constant step, return the increment
4586 // instruction and the canonicalized step.
4587 auto GetConstantStep =
4588 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4589 auto *PN = dyn_cast<PHINode>(V);
4590 if (!PN)
4591 return std::nullopt;
4592 auto IVInc = getIVIncrement(PN, &LI);
4593 if (!IVInc)
4594 return std::nullopt;
4595 // TODO: The result of the intrinsics above is two-complement. However when
4596 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4597 // If it has nuw or nsw flags, we need to make sure that these flags are
4598 // inferrable at the point of memory instruction. Otherwise we are replacing
4599 // well-defined two-complement computation with poison. Currently, to avoid
4600 // potentially complex analysis needed to prove this, we reject such cases.
4601 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4602 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4603 return std::nullopt;
4604 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4605 return std::make_pair(IVInc->first, ConstantStep->getValue());
4606 return std::nullopt;
4607 };
4608
4609 // Try to account for the following special case:
4610 // 1. ScaleReg is an inductive variable;
4611 // 2. We use it with non-zero offset;
4612 // 3. IV's increment is available at the point of memory instruction.
4613 //
4614 // In this case, we may reuse the IV increment instead of the IV Phi to
4615 // achieve the following advantages:
4616 // 1. If IV step matches the offset, we will have no need in the offset;
4617 // 2. Even if they don't match, we will reduce the overlap of living IV
4618 // and IV increment, that will potentially lead to better register
4619 // assignment.
4620 if (AddrMode.BaseOffs) {
4621 if (auto IVStep = GetConstantStep(ScaleReg)) {
4622 Instruction *IVInc = IVStep->first;
4623 // The following assert is important to ensure a lack of infinite loops.
4624 // This transforms is (intentionally) the inverse of the one just above.
4625 // If they don't agree on the definition of an increment, we'd alternate
4626 // back and forth indefinitely.
4627 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4628 APInt Step = IVStep->second;
4629 APInt Offset = Step * AddrMode.Scale;
4630 if (Offset.isSignedIntN(64)) {
4631 TestAddrMode.InBounds = false;
4632 TestAddrMode.ScaledReg = IVInc;
4633 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4634 // If this addressing mode is legal, commit it..
4635 // (Note that we defer the (expensive) domtree base legality check
4636 // to the very last possible point.)
4637 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4638 getDTFn().dominates(IVInc, MemoryInst)) {
4639 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4640 AddrMode = TestAddrMode;
4641 return true;
4642 }
4643 // Restore status quo.
4644 TestAddrMode = AddrMode;
4645 }
4646 }
4647 }
4648
4649 // Otherwise, just return what we have.
4650 return true;
4651}
4652
4653/// This is a little filter, which returns true if an addressing computation
4654/// involving I might be folded into a load/store accessing it.
4655/// This doesn't need to be perfect, but needs to accept at least
4656/// the set of instructions that MatchOperationAddr can.
4658 switch (I->getOpcode()) {
4659 case Instruction::BitCast:
4660 case Instruction::AddrSpaceCast:
4661 // Don't touch identity bitcasts.
4662 if (I->getType() == I->getOperand(0)->getType())
4663 return false;
4664 return I->getType()->isIntOrPtrTy();
4665 case Instruction::PtrToInt:
4666 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4667 return true;
4668 case Instruction::IntToPtr:
4669 // We know the input is intptr_t, so this is foldable.
4670 return true;
4671 case Instruction::Add:
4672 return true;
4673 case Instruction::Mul:
4674 case Instruction::Shl:
4675 // Can only handle X*C and X << C.
4676 return isa<ConstantInt>(I->getOperand(1));
4677 case Instruction::GetElementPtr:
4678 return true;
4679 default:
4680 return false;
4681 }
4682}
4683
4684/// Check whether or not \p Val is a legal instruction for \p TLI.
4685/// \note \p Val is assumed to be the product of some type promotion.
4686/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4687/// to be legal, as the non-promoted value would have had the same state.
4689 const DataLayout &DL, Value *Val) {
4690 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4691 if (!PromotedInst)
4692 return false;
4693 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4694 // If the ISDOpcode is undefined, it was undefined before the promotion.
4695 if (!ISDOpcode)
4696 return true;
4697 // Otherwise, check if the promoted instruction is legal or not.
4698 return TLI.isOperationLegalOrCustom(
4699 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4700}
4701
4702namespace {
4703
4704/// Hepler class to perform type promotion.
4705class TypePromotionHelper {
4706 /// Utility function to add a promoted instruction \p ExtOpnd to
4707 /// \p PromotedInsts and record the type of extension we have seen.
4708 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4709 Instruction *ExtOpnd, bool IsSExt) {
4710 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4711 auto [It, Inserted] = PromotedInsts.try_emplace(ExtOpnd);
4712 if (!Inserted) {
4713 // If the new extension is same as original, the information in
4714 // PromotedInsts[ExtOpnd] is still correct.
4715 if (It->second.getInt() == ExtTy)
4716 return;
4717
4718 // Now the new extension is different from old extension, we make
4719 // the type information invalid by setting extension type to
4720 // BothExtension.
4721 ExtTy = BothExtension;
4722 }
4723 It->second = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4724 }
4725
4726 /// Utility function to query the original type of instruction \p Opnd
4727 /// with a matched extension type. If the extension doesn't match, we
4728 /// cannot use the information we had on the original type.
4729 /// BothExtension doesn't match any extension type.
4730 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4731 Instruction *Opnd, bool IsSExt) {
4732 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4733 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4734 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4735 return It->second.getPointer();
4736 return nullptr;
4737 }
4738
4739 /// Utility function to check whether or not a sign or zero extension
4740 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4741 /// either using the operands of \p Inst or promoting \p Inst.
4742 /// The type of the extension is defined by \p IsSExt.
4743 /// In other words, check if:
4744 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4745 /// #1 Promotion applies:
4746 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4747 /// #2 Operand reuses:
4748 /// ext opnd1 to ConsideredExtType.
4749 /// \p PromotedInsts maps the instructions to their type before promotion.
4750 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4751 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4752
4753 /// Utility function to determine if \p OpIdx should be promoted when
4754 /// promoting \p Inst.
4755 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4756 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4757 }
4758
4759 /// Utility function to promote the operand of \p Ext when this
4760 /// operand is a promotable trunc or sext or zext.
4761 /// \p PromotedInsts maps the instructions to their type before promotion.
4762 /// \p CreatedInstsCost[out] contains the cost of all instructions
4763 /// created to promote the operand of Ext.
4764 /// Newly added extensions are inserted in \p Exts.
4765 /// Newly added truncates are inserted in \p Truncs.
4766 /// Should never be called directly.
4767 /// \return The promoted value which is used instead of Ext.
4768 static Value *promoteOperandForTruncAndAnyExt(
4769 Instruction *Ext, TypePromotionTransaction &TPT,
4770 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4771 SmallVectorImpl<Instruction *> *Exts,
4772 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
4773
4774 /// Utility function to promote the operand of \p Ext when this
4775 /// operand is promotable and is not a supported trunc or sext.
4776 /// \p PromotedInsts maps the instructions to their type before promotion.
4777 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4778 /// created to promote the operand of Ext.
4779 /// Newly added extensions are inserted in \p Exts.
4780 /// Newly added truncates are inserted in \p Truncs.
4781 /// Should never be called directly.
4782 /// \return The promoted value which is used instead of Ext.
4783 static Value *promoteOperandForOther(Instruction *Ext,
4784 TypePromotionTransaction &TPT,
4785 InstrToOrigTy &PromotedInsts,
4786 unsigned &CreatedInstsCost,
4787 SmallVectorImpl<Instruction *> *Exts,
4788 SmallVectorImpl<Instruction *> *Truncs,
4789 const TargetLowering &TLI, bool IsSExt);
4790
4791 /// \see promoteOperandForOther.
4792 static Value *signExtendOperandForOther(
4793 Instruction *Ext, TypePromotionTransaction &TPT,
4794 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4795 SmallVectorImpl<Instruction *> *Exts,
4796 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4797 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4798 Exts, Truncs, TLI, true);
4799 }
4800
4801 /// \see promoteOperandForOther.
4802 static Value *zeroExtendOperandForOther(
4803 Instruction *Ext, TypePromotionTransaction &TPT,
4804 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4805 SmallVectorImpl<Instruction *> *Exts,
4806 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4807 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4808 Exts, Truncs, TLI, false);
4809 }
4810
4811public:
4812 /// Type for the utility function that promotes the operand of Ext.
4813 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4814 InstrToOrigTy &PromotedInsts,
4815 unsigned &CreatedInstsCost,
4816 SmallVectorImpl<Instruction *> *Exts,
4817 SmallVectorImpl<Instruction *> *Truncs,
4818 const TargetLowering &TLI);
4819
4820 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4821 /// action to promote the operand of \p Ext instead of using Ext.
4822 /// \return NULL if no promotable action is possible with the current
4823 /// sign extension.
4824 /// \p InsertedInsts keeps track of all the instructions inserted by the
4825 /// other CodeGenPrepare optimizations. This information is important
4826 /// because we do not want to promote these instructions as CodeGenPrepare
4827 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4828 /// \p PromotedInsts maps the instructions to their type before promotion.
4829 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4830 const TargetLowering &TLI,
4831 const InstrToOrigTy &PromotedInsts);
4832};
4833
4834} // end anonymous namespace
4835
4836bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4837 Type *ConsideredExtType,
4838 const InstrToOrigTy &PromotedInsts,
4839 bool IsSExt) {
4840 // The promotion helper does not know how to deal with vector types yet.
4841 // To be able to fix that, we would need to fix the places where we
4842 // statically extend, e.g., constants and such.
4843 if (Inst->getType()->isVectorTy())
4844 return false;
4845
4846 // We can always get through zext.
4847 if (isa<ZExtInst>(Inst))
4848 return true;
4849
4850 // sext(sext) is ok too.
4851 if (IsSExt && isa<SExtInst>(Inst))
4852 return true;
4853
4854 // We can get through binary operator, if it is legal. In other words, the
4855 // binary operator must have a nuw or nsw flag.
4856 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4857 if (isa<OverflowingBinaryOperator>(BinOp) &&
4858 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4859 (IsSExt && BinOp->hasNoSignedWrap())))
4860 return true;
4861
4862 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4863 if ((Inst->getOpcode() == Instruction::And ||
4864 Inst->getOpcode() == Instruction::Or))
4865 return true;
4866
4867 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4868 if (Inst->getOpcode() == Instruction::Xor) {
4869 // Make sure it is not a NOT.
4870 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4871 if (!Cst->getValue().isAllOnes())
4872 return true;
4873 }
4874
4875 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4876 // It may change a poisoned value into a regular value, like
4877 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4878 // poisoned value regular value
4879 // It should be OK since undef covers valid value.
4880 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4881 return true;
4882
4883 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4884 // It may change a poisoned value into a regular value, like
4885 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4886 // poisoned value regular value
4887 // It should be OK since undef covers valid value.
4888 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4889 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4890 if (ExtInst->hasOneUse()) {
4891 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4892 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4893 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4894 if (Cst &&
4895 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4896 return true;
4897 }
4898 }
4899 }
4900
4901 // Check if we can do the following simplification.
4902 // ext(trunc(opnd)) --> ext(opnd)
4903 if (!isa<TruncInst>(Inst))
4904 return false;
4905
4906 Value *OpndVal = Inst->getOperand(0);
4907 // Check if we can use this operand in the extension.
4908 // If the type is larger than the result type of the extension, we cannot.
4909 if (!OpndVal->getType()->isIntegerTy() ||
4910 OpndVal->getType()->getIntegerBitWidth() >
4911 ConsideredExtType->getIntegerBitWidth())
4912 return false;
4913
4914 // If the operand of the truncate is not an instruction, we will not have
4915 // any information on the dropped bits.
4916 // (Actually we could for constant but it is not worth the extra logic).
4917 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4918 if (!Opnd)
4919 return false;
4920
4921 // Check if the source of the type is narrow enough.
4922 // I.e., check that trunc just drops extended bits of the same kind of
4923 // the extension.
4924 // #1 get the type of the operand and check the kind of the extended bits.
4925 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4926 if (OpndType)
4927 ;
4928 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4929 OpndType = Opnd->getOperand(0)->getType();
4930 else
4931 return false;
4932
4933 // #2 check that the truncate just drops extended bits.
4934 return Inst->getType()->getIntegerBitWidth() >=
4935 OpndType->getIntegerBitWidth();
4936}
4937
4938TypePromotionHelper::Action TypePromotionHelper::getAction(
4939 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4940 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4941 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4942 "Unexpected instruction type");
4943 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4944 Type *ExtTy = Ext->getType();
4945 bool IsSExt = isa<SExtInst>(Ext);
4946 // If the operand of the extension is not an instruction, we cannot
4947 // get through.
4948 // If it, check we can get through.
4949 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4950 return nullptr;
4951
4952 // Do not promote if the operand has been added by codegenprepare.
4953 // Otherwise, it means we are undoing an optimization that is likely to be
4954 // redone, thus causing potential infinite loop.
4955 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4956 return nullptr;
4957
4958 // SExt or Trunc instructions.
4959 // Return the related handler.
4960 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4961 isa<ZExtInst>(ExtOpnd))
4962 return promoteOperandForTruncAndAnyExt;
4963
4964 // Regular instruction.
4965 // Abort early if we will have to insert non-free instructions.
4966 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4967 return nullptr;
4968 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4969}
4970
4971Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4972 Instruction *SExt, TypePromotionTransaction &TPT,
4973 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4974 SmallVectorImpl<Instruction *> *Exts,
4975 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4976 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4977 // get through it and this method should not be called.
4978 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4979 Value *ExtVal = SExt;
4980 bool HasMergedNonFreeExt = false;
4981 if (isa<ZExtInst>(SExtOpnd)) {
4982 // Replace s|zext(zext(opnd))
4983 // => zext(opnd).
4984 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4985 Value *ZExt =
4986 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4987 TPT.replaceAllUsesWith(SExt, ZExt);
4988 TPT.eraseInstruction(SExt);
4989 ExtVal = ZExt;
4990 } else {
4991 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4992 // => z|sext(opnd).
4993 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4994 }
4995 CreatedInstsCost = 0;
4996
4997 // Remove dead code.
4998 if (SExtOpnd->use_empty())
4999 TPT.eraseInstruction(SExtOpnd);
5000
5001 // Check if the extension is still needed.
5002 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
5003 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
5004 if (ExtInst) {
5005 if (Exts)
5006 Exts->push_back(ExtInst);
5007 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
5008 }
5009 return ExtVal;
5010 }
5011
5012 // At this point we have: ext ty opnd to ty.
5013 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
5014 Value *NextVal = ExtInst->getOperand(0);
5015 TPT.eraseInstruction(ExtInst, NextVal);
5016 return NextVal;
5017}
5018
5019Value *TypePromotionHelper::promoteOperandForOther(
5020 Instruction *Ext, TypePromotionTransaction &TPT,
5021 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
5022 SmallVectorImpl<Instruction *> *Exts,
5023 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
5024 bool IsSExt) {
5025 // By construction, the operand of Ext is an instruction. Otherwise we cannot
5026 // get through it and this method should not be called.
5027 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
5028 CreatedInstsCost = 0;
5029 if (!ExtOpnd->hasOneUse()) {
5030 // ExtOpnd will be promoted.
5031 // All its uses, but Ext, will need to use a truncated value of the
5032 // promoted version.
5033 // Create the truncate now.
5034 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
5035 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
5036 // Insert it just after the definition.
5037 ITrunc->moveAfter(ExtOpnd);
5038 if (Truncs)
5039 Truncs->push_back(ITrunc);
5040 }
5041
5042 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
5043 // Restore the operand of Ext (which has been replaced by the previous call
5044 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
5045 TPT.setOperand(Ext, 0, ExtOpnd);
5046 }
5047
5048 // Get through the Instruction:
5049 // 1. Update its type.
5050 // 2. Replace the uses of Ext by Inst.
5051 // 3. Extend each operand that needs to be extended.
5052
5053 // Remember the original type of the instruction before promotion.
5054 // This is useful to know that the high bits are sign extended bits.
5055 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
5056 // Step #1.
5057 TPT.mutateType(ExtOpnd, Ext->getType());
5058 // Step #2.
5059 TPT.replaceAllUsesWith(Ext, ExtOpnd);
5060 // Step #3.
5061 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
5062 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
5063 ++OpIdx) {
5064 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
5065 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
5066 !shouldExtOperand(ExtOpnd, OpIdx)) {
5067 LLVM_DEBUG(dbgs() << "No need to propagate\n");
5068 continue;
5069 }
5070 // Check if we can statically extend the operand.
5071 Value *Opnd = ExtOpnd->getOperand(OpIdx);
5072 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
5073 LLVM_DEBUG(dbgs() << "Statically extend\n");
5074 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
5075 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
5076 : Cst->getValue().zext(BitWidth);
5077 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
5078 continue;
5079 }
5080 // UndefValue are typed, so we have to statically sign extend them.
5081 if (isa<UndefValue>(Opnd)) {
5082 LLVM_DEBUG(dbgs() << "Statically extend\n");
5083 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
5084 continue;
5085 }
5086
5087 // Otherwise we have to explicitly sign extend the operand.
5088 Value *ValForExtOpnd = IsSExt
5089 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
5090 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
5091 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
5092 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
5093 if (!InstForExtOpnd)
5094 continue;
5095
5096 if (Exts)
5097 Exts->push_back(InstForExtOpnd);
5098
5099 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
5100 }
5101 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
5102 TPT.eraseInstruction(Ext);
5103 return ExtOpnd;
5104}
5105
5106/// Check whether or not promoting an instruction to a wider type is profitable.
5107/// \p NewCost gives the cost of extension instructions created by the
5108/// promotion.
5109/// \p OldCost gives the cost of extension instructions before the promotion
5110/// plus the number of instructions that have been
5111/// matched in the addressing mode the promotion.
5112/// \p PromotedOperand is the value that has been promoted.
5113/// \return True if the promotion is profitable, false otherwise.
5114bool AddressingModeMatcher::isPromotionProfitable(
5115 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
5116 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
5117 << '\n');
5118 // The cost of the new extensions is greater than the cost of the
5119 // old extension plus what we folded.
5120 // This is not profitable.
5121 if (NewCost > OldCost)
5122 return false;
5123 if (NewCost < OldCost)
5124 return true;
5125 // The promotion is neutral but it may help folding the sign extension in
5126 // loads for instance.
5127 // Check that we did not create an illegal instruction.
5128 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
5129}
5130
5131/// Given an instruction or constant expr, see if we can fold the operation
5132/// into the addressing mode. If so, update the addressing mode and return
5133/// true, otherwise return false without modifying AddrMode.
5134/// If \p MovedAway is not NULL, it contains the information of whether or
5135/// not AddrInst has to be folded into the addressing mode on success.
5136/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
5137/// because it has been moved away.
5138/// Thus AddrInst must not be added in the matched instructions.
5139/// This state can happen when AddrInst is a sext, since it may be moved away.
5140/// Therefore, AddrInst may not be valid when MovedAway is true and it must
5141/// not be referenced anymore.
5142bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
5143 unsigned Depth,
5144 bool *MovedAway) {
5145 // Avoid exponential behavior on extremely deep expression trees.
5146 if (Depth >= 5)
5147 return false;
5148
5149 // By default, all matched instructions stay in place.
5150 if (MovedAway)
5151 *MovedAway = false;
5152
5153 switch (Opcode) {
5154 case Instruction::PtrToInt:
5155 // PtrToInt is always a noop, as we know that the int type is pointer sized.
5156 return matchAddr(AddrInst->getOperand(0), Depth);
5157 case Instruction::IntToPtr: {
5158 auto AS = AddrInst->getType()->getPointerAddressSpace();
5159 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
5160 // This inttoptr is a no-op if the integer type is pointer sized.
5161 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
5162 return matchAddr(AddrInst->getOperand(0), Depth);
5163 return false;
5164 }
5165 case Instruction::BitCast:
5166 // BitCast is always a noop, and we can handle it as long as it is
5167 // int->int or pointer->pointer (we don't want int<->fp or something).
5168 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
5169 // Don't touch identity bitcasts. These were probably put here by LSR,
5170 // and we don't want to mess around with them. Assume it knows what it
5171 // is doing.
5172 AddrInst->getOperand(0)->getType() != AddrInst->getType())
5173 return matchAddr(AddrInst->getOperand(0), Depth);
5174 return false;
5175 case Instruction::AddrSpaceCast: {
5176 unsigned SrcAS =
5177 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
5178 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
5179 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
5180 return matchAddr(AddrInst->getOperand(0), Depth);
5181 return false;
5182 }
5183 case Instruction::Add: {
5184 // Check to see if we can merge in one operand, then the other. If so, we
5185 // win.
5186 ExtAddrMode BackupAddrMode = AddrMode;
5187 unsigned OldSize = AddrModeInsts.size();
5188 // Start a transaction at this point.
5189 // The LHS may match but not the RHS.
5190 // Therefore, we need a higher level restoration point to undo partially
5191 // matched operation.
5192 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5193 TPT.getRestorationPoint();
5194
5195 // Try to match an integer constant second to increase its chance of ending
5196 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
5197 int First = 0, Second = 1;
5198 if (isa<ConstantInt>(AddrInst->getOperand(First))
5199 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5200 std::swap(First, Second);
5201 AddrMode.InBounds = false;
5202 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5203 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5204 return true;
5205
5206 // Restore the old addr mode info.
5207 AddrMode = BackupAddrMode;
5208 AddrModeInsts.resize(OldSize);
5209 TPT.rollback(LastKnownGood);
5210
5211 // Otherwise this was over-aggressive. Try merging operands in the opposite
5212 // order.
5213 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5214 matchAddr(AddrInst->getOperand(First), Depth + 1))
5215 return true;
5216
5217 // Otherwise we definitely can't merge the ADD in.
5218 AddrMode = BackupAddrMode;
5219 AddrModeInsts.resize(OldSize);
5220 TPT.rollback(LastKnownGood);
5221 break;
5222 }
5223 // case Instruction::Or:
5224 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5225 // break;
5226 case Instruction::Mul:
5227 case Instruction::Shl: {
5228 // Can only handle X*C and X << C.
5229 AddrMode.InBounds = false;
5230 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5231 if (!RHS || RHS->getBitWidth() > 64)
5232 return false;
5233 int64_t Scale = Opcode == Instruction::Shl
5234 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5235 : RHS->getSExtValue();
5236
5237 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5238 }
5239 case Instruction::GetElementPtr: {
5240 // Scan the GEP. We check it if it contains constant offsets and at most
5241 // one variable offset.
5242 int VariableOperand = -1;
5243 unsigned VariableScale = 0;
5244
5245 int64_t ConstantOffset = 0;
5246 gep_type_iterator GTI = gep_type_begin(AddrInst);
5247 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5248 if (StructType *STy = GTI.getStructTypeOrNull()) {
5249 const StructLayout *SL = DL.getStructLayout(STy);
5250 unsigned Idx =
5251 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5252 ConstantOffset += SL->getElementOffset(Idx);
5253 } else {
5254 TypeSize TS = GTI.getSequentialElementStride(DL);
5255 if (TS.isNonZero()) {
5256 // The optimisations below currently only work for fixed offsets.
5257 if (TS.isScalable())
5258 return false;
5259 int64_t TypeSize = TS.getFixedValue();
5260 if (ConstantInt *CI =
5261 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5262 const APInt &CVal = CI->getValue();
5263 if (CVal.getSignificantBits() <= 64) {
5264 ConstantOffset += CVal.getSExtValue() * TypeSize;
5265 continue;
5266 }
5267 }
5268 // We only allow one variable index at the moment.
5269 if (VariableOperand != -1)
5270 return false;
5271
5272 // Remember the variable index.
5273 VariableOperand = i;
5274 VariableScale = TypeSize;
5275 }
5276 }
5277 }
5278
5279 // A common case is for the GEP to only do a constant offset. In this case,
5280 // just add it to the disp field and check validity.
5281 if (VariableOperand == -1) {
5282 AddrMode.BaseOffs += ConstantOffset;
5283 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5284 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5285 AddrMode.InBounds = false;
5286 return true;
5287 }
5288 AddrMode.BaseOffs -= ConstantOffset;
5289
5291 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5292 ConstantOffset > 0) {
5293 // Record GEPs with non-zero offsets as candidates for splitting in
5294 // the event that the offset cannot fit into the r+i addressing mode.
5295 // Simple and common case that only one GEP is used in calculating the
5296 // address for the memory access.
5297 Value *Base = AddrInst->getOperand(0);
5298 auto *BaseI = dyn_cast<Instruction>(Base);
5299 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5301 (BaseI && !isa<CastInst>(BaseI) &&
5302 !isa<GetElementPtrInst>(BaseI))) {
5303 // Make sure the parent block allows inserting non-PHI instructions
5304 // before the terminator.
5305 BasicBlock *Parent = BaseI ? BaseI->getParent()
5306 : &GEP->getFunction()->getEntryBlock();
5307 if (!Parent->getTerminator()->isEHPad())
5308 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5309 }
5310 }
5311
5312 return false;
5313 }
5314
5315 // Save the valid addressing mode in case we can't match.
5316 ExtAddrMode BackupAddrMode = AddrMode;
5317 unsigned OldSize = AddrModeInsts.size();
5318
5319 // See if the scale and offset amount is valid for this target.
5320 AddrMode.BaseOffs += ConstantOffset;
5321 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5322 AddrMode.InBounds = false;
5323
5324 // Match the base operand of the GEP.
5325 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5326 // If it couldn't be matched, just stuff the value in a register.
5327 if (AddrMode.HasBaseReg) {
5328 AddrMode = BackupAddrMode;
5329 AddrModeInsts.resize(OldSize);
5330 return false;
5331 }
5332 AddrMode.HasBaseReg = true;
5333 AddrMode.BaseReg = AddrInst->getOperand(0);
5334 }
5335
5336 // Match the remaining variable portion of the GEP.
5337 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5338 Depth)) {
5339 // If it couldn't be matched, try stuffing the base into a register
5340 // instead of matching it, and retrying the match of the scale.
5341 AddrMode = BackupAddrMode;
5342 AddrModeInsts.resize(OldSize);
5343 if (AddrMode.HasBaseReg)
5344 return false;
5345 AddrMode.HasBaseReg = true;
5346 AddrMode.BaseReg = AddrInst->getOperand(0);
5347 AddrMode.BaseOffs += ConstantOffset;
5348 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5349 VariableScale, Depth)) {
5350 // If even that didn't work, bail.
5351 AddrMode = BackupAddrMode;
5352 AddrModeInsts.resize(OldSize);
5353 return false;
5354 }
5355 }
5356
5357 return true;
5358 }
5359 case Instruction::SExt:
5360 case Instruction::ZExt: {
5361 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5362 if (!Ext)
5363 return false;
5364
5365 // Try to move this ext out of the way of the addressing mode.
5366 // Ask for a method for doing so.
5367 TypePromotionHelper::Action TPH =
5368 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5369 if (!TPH)
5370 return false;
5371
5372 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5373 TPT.getRestorationPoint();
5374 unsigned CreatedInstsCost = 0;
5375 unsigned ExtCost = !TLI.isExtFree(Ext);
5376 Value *PromotedOperand =
5377 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5378 // SExt has been moved away.
5379 // Thus either it will be rematched later in the recursive calls or it is
5380 // gone. Anyway, we must not fold it into the addressing mode at this point.
5381 // E.g.,
5382 // op = add opnd, 1
5383 // idx = ext op
5384 // addr = gep base, idx
5385 // is now:
5386 // promotedOpnd = ext opnd <- no match here
5387 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5388 // addr = gep base, op <- match
5389 if (MovedAway)
5390 *MovedAway = true;
5391
5392 assert(PromotedOperand &&
5393 "TypePromotionHelper should have filtered out those cases");
5394
5395 ExtAddrMode BackupAddrMode = AddrMode;
5396 unsigned OldSize = AddrModeInsts.size();
5397
5398 if (!matchAddr(PromotedOperand, Depth) ||
5399 // The total of the new cost is equal to the cost of the created
5400 // instructions.
5401 // The total of the old cost is equal to the cost of the extension plus
5402 // what we have saved in the addressing mode.
5403 !isPromotionProfitable(CreatedInstsCost,
5404 ExtCost + (AddrModeInsts.size() - OldSize),
5405 PromotedOperand)) {
5406 AddrMode = BackupAddrMode;
5407 AddrModeInsts.resize(OldSize);
5408 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5409 TPT.rollback(LastKnownGood);
5410 return false;
5411 }
5412
5413 // SExt has been deleted. Make sure it is not referenced by the AddrMode.
5414 AddrMode.replaceWith(Ext, PromotedOperand);
5415 return true;
5416 }
5417 case Instruction::Call:
5418 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5419 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5420 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5421 if (TLI.addressingModeSupportsTLS(GV))
5422 return matchAddr(AddrInst->getOperand(0), Depth);
5423 }
5424 }
5425 break;
5426 }
5427 return false;
5428}
5429
5430/// If we can, try to add the value of 'Addr' into the current addressing mode.
5431/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5432/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5433/// for the target.
5434///
5435bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5436 // Start a transaction at this point that we will rollback if the matching
5437 // fails.
5438 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5439 TPT.getRestorationPoint();
5440 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5441 if (CI->getValue().isSignedIntN(64)) {
5442 // Check if the addition would result in a signed overflow.
5443 int64_t Result;
5444 bool Overflow =
5445 AddOverflow(AddrMode.BaseOffs, CI->getSExtValue(), Result);
5446 if (!Overflow) {
5447 // Fold in immediates if legal for the target.
5448 AddrMode.BaseOffs = Result;
5449 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5450 return true;
5451 AddrMode.BaseOffs -= CI->getSExtValue();
5452 }
5453 }
5454 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5455 // If this is a global variable, try to fold it into the addressing mode.
5456 if (!AddrMode.BaseGV) {
5457 AddrMode.BaseGV = GV;
5458 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5459 return true;
5460 AddrMode.BaseGV = nullptr;
5461 }
5462 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5463 ExtAddrMode BackupAddrMode = AddrMode;
5464 unsigned OldSize = AddrModeInsts.size();
5465
5466 // Check to see if it is possible to fold this operation.
5467 bool MovedAway = false;
5468 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5469 // This instruction may have been moved away. If so, there is nothing
5470 // to check here.
5471 if (MovedAway)
5472 return true;
5473 // Okay, it's possible to fold this. Check to see if it is actually
5474 // *profitable* to do so. We use a simple cost model to avoid increasing
5475 // register pressure too much.
5476 if (I->hasOneUse() ||
5477 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5478 AddrModeInsts.push_back(I);
5479 return true;
5480 }
5481
5482 // It isn't profitable to do this, roll back.
5483 AddrMode = BackupAddrMode;
5484 AddrModeInsts.resize(OldSize);
5485 TPT.rollback(LastKnownGood);
5486 }
5487 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5488 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5489 return true;
5490 TPT.rollback(LastKnownGood);
5491 } else if (isa<ConstantPointerNull>(Addr)) {
5492 // Null pointer gets folded without affecting the addressing mode.
5493 return true;
5494 }
5495
5496 // Worse case, the target should support [reg] addressing modes. :)
5497 if (!AddrMode.HasBaseReg) {
5498 AddrMode.HasBaseReg = true;
5499 AddrMode.BaseReg = Addr;
5500 // Still check for legality in case the target supports [imm] but not [i+r].
5501 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5502 return true;
5503 AddrMode.HasBaseReg = false;
5504 AddrMode.BaseReg = nullptr;
5505 }
5506
5507 // If the base register is already taken, see if we can do [r+r].
5508 if (AddrMode.Scale == 0) {
5509 AddrMode.Scale = 1;
5510 AddrMode.ScaledReg = Addr;
5511 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5512 return true;
5513 AddrMode.Scale = 0;
5514 AddrMode.ScaledReg = nullptr;
5515 }
5516 // Couldn't match.
5517 TPT.rollback(LastKnownGood);
5518 return false;
5519}
5520
5521/// Check to see if all uses of OpVal by the specified inline asm call are due
5522/// to memory operands. If so, return true, otherwise return false.
5524 const TargetLowering &TLI,
5525 const TargetRegisterInfo &TRI) {
5526 const Function *F = CI->getFunction();
5527 TargetLowering::AsmOperandInfoVector TargetConstraints =
5528 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5529
5530 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5531 // Compute the constraint code and ConstraintType to use.
5532 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5533
5534 // If this asm operand is our Value*, and if it isn't an indirect memory
5535 // operand, we can't fold it! TODO: Also handle C_Address?
5536 if (OpInfo.CallOperandVal == OpVal &&
5537 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5538 !OpInfo.isIndirect))
5539 return false;
5540 }
5541
5542 return true;
5543}
5544
5545/// Recursively walk all the uses of I until we find a memory use.
5546/// If we find an obviously non-foldable instruction, return true.
5547/// Add accessed addresses and types to MemoryUses.
5549 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5550 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5551 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5552 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5553 // If we already considered this instruction, we're done.
5554 if (!ConsideredInsts.insert(I).second)
5555 return false;
5556
5557 // If this is an obviously unfoldable instruction, bail out.
5558 if (!MightBeFoldableInst(I))
5559 return true;
5560
5561 // Loop over all the uses, recursively processing them.
5562 for (Use &U : I->uses()) {
5563 // Conservatively return true if we're seeing a large number or a deep chain
5564 // of users. This avoids excessive compilation times in pathological cases.
5565 if (SeenInsts++ >= MaxAddressUsersToScan)
5566 return true;
5567
5568 Instruction *UserI = cast<Instruction>(U.getUser());
5569 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5570 MemoryUses.push_back({&U, LI->getType()});
5571 continue;
5572 }
5573
5574 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5575 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5576 return true; // Storing addr, not into addr.
5577 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5578 continue;
5579 }
5580
5581 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5582 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5583 return true; // Storing addr, not into addr.
5584 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5585 continue;
5586 }
5587
5589 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5590 return true; // Storing addr, not into addr.
5591 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5592 continue;
5593 }
5594
5597 Type *AccessTy;
5598 if (!TLI.getAddrModeArguments(II, PtrOps, AccessTy))
5599 return true;
5600
5601 if (!find(PtrOps, U.get()))
5602 return true;
5603
5604 MemoryUses.push_back({&U, AccessTy});
5605 continue;
5606 }
5607
5608 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5609 if (CI->hasFnAttr(Attribute::Cold)) {
5610 // If this is a cold call, we can sink the addressing calculation into
5611 // the cold path. See optimizeCallInst
5612 if (!llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI))
5613 continue;
5614 }
5615
5616 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5617 if (!IA)
5618 return true;
5619
5620 // If this is a memory operand, we're cool, otherwise bail out.
5621 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5622 return true;
5623 continue;
5624 }
5625
5626 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5627 PSI, BFI, SeenInsts))
5628 return true;
5629 }
5630
5631 return false;
5632}
5633
5635 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5636 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5638 unsigned SeenInsts = 0;
5639 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5640 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5641 PSI, BFI, SeenInsts);
5642}
5643
5644
5645/// Return true if Val is already known to be live at the use site that we're
5646/// folding it into. If so, there is no cost to include it in the addressing
5647/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5648/// instruction already.
5649bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5650 Value *KnownLive1,
5651 Value *KnownLive2) {
5652 // If Val is either of the known-live values, we know it is live!
5653 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5654 return true;
5655
5656 // All values other than instructions and arguments (e.g. constants) are live.
5657 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5658 return true;
5659
5660 // If Val is a constant sized alloca in the entry block, it is live, this is
5661 // true because it is just a reference to the stack/frame pointer, which is
5662 // live for the whole function.
5663 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5664 if (AI->isStaticAlloca())
5665 return true;
5666
5667 // Check to see if this value is already used in the memory instruction's
5668 // block. If so, it's already live into the block at the very least, so we
5669 // can reasonably fold it.
5670 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5671}
5672
5673/// It is possible for the addressing mode of the machine to fold the specified
5674/// instruction into a load or store that ultimately uses it.
5675/// However, the specified instruction has multiple uses.
5676/// Given this, it may actually increase register pressure to fold it
5677/// into the load. For example, consider this code:
5678///
5679/// X = ...
5680/// Y = X+1
5681/// use(Y) -> nonload/store
5682/// Z = Y+1
5683/// load Z
5684///
5685/// In this case, Y has multiple uses, and can be folded into the load of Z
5686/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5687/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5688/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5689/// number of computations either.
5690///
5691/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5692/// X was live across 'load Z' for other reasons, we actually *would* want to
5693/// fold the addressing mode in the Z case. This would make Y die earlier.
5694bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5695 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5696 if (IgnoreProfitability)
5697 return true;
5698
5699 // AMBefore is the addressing mode before this instruction was folded into it,
5700 // and AMAfter is the addressing mode after the instruction was folded. Get
5701 // the set of registers referenced by AMAfter and subtract out those
5702 // referenced by AMBefore: this is the set of values which folding in this
5703 // address extends the lifetime of.
5704 //
5705 // Note that there are only two potential values being referenced here,
5706 // BaseReg and ScaleReg (global addresses are always available, as are any
5707 // folded immediates).
5708 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5709
5710 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5711 // lifetime wasn't extended by adding this instruction.
5712 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5713 BaseReg = nullptr;
5714 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5715 ScaledReg = nullptr;
5716
5717 // If folding this instruction (and it's subexprs) didn't extend any live
5718 // ranges, we're ok with it.
5719 if (!BaseReg && !ScaledReg)
5720 return true;
5721
5722 // If all uses of this instruction can have the address mode sunk into them,
5723 // we can remove the addressing mode and effectively trade one live register
5724 // for another (at worst.) In this context, folding an addressing mode into
5725 // the use is just a particularly nice way of sinking it.
5727 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5728 return false; // Has a non-memory, non-foldable use!
5729
5730 // Now that we know that all uses of this instruction are part of a chain of
5731 // computation involving only operations that could theoretically be folded
5732 // into a memory use, loop over each of these memory operation uses and see
5733 // if they could *actually* fold the instruction. The assumption is that
5734 // addressing modes are cheap and that duplicating the computation involved
5735 // many times is worthwhile, even on a fastpath. For sinking candidates
5736 // (i.e. cold call sites), this serves as a way to prevent excessive code
5737 // growth since most architectures have some reasonable small and fast way to
5738 // compute an effective address. (i.e LEA on x86)
5739 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5740 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5741 Value *Address = Pair.first->get();
5742 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5743 Type *AddressAccessTy = Pair.second;
5744 unsigned AS = Address->getType()->getPointerAddressSpace();
5745
5746 // Do a match against the root of this address, ignoring profitability. This
5747 // will tell us if the addressing mode for the memory operation will
5748 // *actually* cover the shared instruction.
5749 ExtAddrMode Result;
5750 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5751 0);
5752 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5753 TPT.getRestorationPoint();
5754 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5755 AddressAccessTy, AS, UserI, Result,
5756 InsertedInsts, PromotedInsts, TPT,
5757 LargeOffsetGEP, OptSize, PSI, BFI);
5758 Matcher.IgnoreProfitability = true;
5759 bool Success = Matcher.matchAddr(Address, 0);
5760 (void)Success;
5761 assert(Success && "Couldn't select *anything*?");
5762
5763 // The match was to check the profitability, the changes made are not
5764 // part of the original matcher. Therefore, they should be dropped
5765 // otherwise the original matcher will not present the right state.
5766 TPT.rollback(LastKnownGood);
5767
5768 // If the match didn't cover I, then it won't be shared by it.
5769 if (!is_contained(MatchedAddrModeInsts, I))
5770 return false;
5771
5772 MatchedAddrModeInsts.clear();
5773 }
5774
5775 return true;
5776}
5777
5778/// Return true if the specified values are defined in a
5779/// different basic block than BB.
5780static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5782 return I->getParent() != BB;
5783 return false;
5784}
5785
5786// Find an insert position of Addr for MemoryInst. We can't guarantee MemoryInst
5787// is the first instruction that will use Addr. So we need to find the first
5788// user of Addr in current BB.
5790 Value *SunkAddr) {
5791 if (Addr->hasOneUse())
5792 return MemoryInst->getIterator();
5793
5794 // We already have a SunkAddr in current BB, but we may need to insert cast
5795 // instruction after it.
5796 if (SunkAddr) {
5797 if (Instruction *AddrInst = dyn_cast<Instruction>(SunkAddr))
5798 return std::next(AddrInst->getIterator());
5799 }
5800
5801 // Find the first user of Addr in current BB.
5802 Instruction *Earliest = MemoryInst;
5803 for (User *U : Addr->users()) {
5804 Instruction *UserInst = dyn_cast<Instruction>(U);
5805 if (UserInst && UserInst->getParent() == MemoryInst->getParent()) {
5806 if (isa<PHINode>(UserInst) || UserInst->isDebugOrPseudoInst())
5807 continue;
5808 if (UserInst->comesBefore(Earliest))
5809 Earliest = UserInst;
5810 }
5811 }
5812 return Earliest->getIterator();
5813}
5814
5815/// Sink addressing mode computation immediate before MemoryInst if doing so
5816/// can be done without increasing register pressure. The need for the
5817/// register pressure constraint means this can end up being an all or nothing
5818/// decision for all uses of the same addressing computation.
5819///
5820/// Load and Store Instructions often have addressing modes that can do
5821/// significant amounts of computation. As such, instruction selection will try
5822/// to get the load or store to do as much computation as possible for the
5823/// program. The problem is that isel can only see within a single block. As
5824/// such, we sink as much legal addressing mode work into the block as possible.
5825///
5826/// This method is used to optimize both load/store and inline asms with memory
5827/// operands. It's also used to sink addressing computations feeding into cold
5828/// call sites into their (cold) basic block.
5829///
5830/// The motivation for handling sinking into cold blocks is that doing so can
5831/// both enable other address mode sinking (by satisfying the register pressure
5832/// constraint above), and reduce register pressure globally (by removing the
5833/// addressing mode computation from the fast path entirely.).
5834bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5835 Type *AccessTy, unsigned AddrSpace) {
5836 Value *Repl = Addr;
5837
5838 // Try to collapse single-value PHI nodes. This is necessary to undo
5839 // unprofitable PRE transformations.
5840 SmallVector<Value *, 8> worklist;
5841 SmallPtrSet<Value *, 16> Visited;
5842 worklist.push_back(Addr);
5843
5844 // Use a worklist to iteratively look through PHI and select nodes, and
5845 // ensure that the addressing mode obtained from the non-PHI/select roots of
5846 // the graph are compatible.
5847 bool PhiOrSelectSeen = false;
5848 SmallVector<Instruction *, 16> AddrModeInsts;
5849 AddressingModeCombiner AddrModes(*DL, Addr);
5850 TypePromotionTransaction TPT(RemovedInsts);
5851 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5852 TPT.getRestorationPoint();
5853 while (!worklist.empty()) {
5854 Value *V = worklist.pop_back_val();
5855
5856 // We allow traversing cyclic Phi nodes.
5857 // In case of success after this loop we ensure that traversing through
5858 // Phi nodes ends up with all cases to compute address of the form
5859 // BaseGV + Base + Scale * Index + Offset
5860 // where Scale and Offset are constans and BaseGV, Base and Index
5861 // are exactly the same Values in all cases.
5862 // It means that BaseGV, Scale and Offset dominate our memory instruction
5863 // and have the same value as they had in address computation represented
5864 // as Phi. So we can safely sink address computation to memory instruction.
5865 if (!Visited.insert(V).second)
5866 continue;
5867
5868 // For a PHI node, push all of its incoming values.
5869 if (PHINode *P = dyn_cast<PHINode>(V)) {
5870 append_range(worklist, P->incoming_values());
5871 PhiOrSelectSeen = true;
5872 continue;
5873 }
5874 // Similar for select.
5875 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5876 worklist.push_back(SI->getFalseValue());
5877 worklist.push_back(SI->getTrueValue());
5878 PhiOrSelectSeen = true;
5879 continue;
5880 }
5881
5882 // For non-PHIs, determine the addressing mode being computed. Note that
5883 // the result may differ depending on what other uses our candidate
5884 // addressing instructions might have.
5885 AddrModeInsts.clear();
5886 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5887 0);
5888 // Defer the query (and possible computation of) the dom tree to point of
5889 // actual use. It's expected that most address matches don't actually need
5890 // the domtree.
5891 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5892 Function *F = MemoryInst->getParent()->getParent();
5893 return this->getDT(*F);
5894 };
5895 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5896 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5897 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5898 BFI.get());
5899
5900 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5901 if (GEP && !NewGEPBases.count(GEP)) {
5902 // If splitting the underlying data structure can reduce the offset of a
5903 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5904 // previously split data structures.
5905 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5906 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5907 }
5908
5909 NewAddrMode.OriginalValue = V;
5910 if (!AddrModes.addNewAddrMode(NewAddrMode))
5911 break;
5912 }
5913
5914 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5915 // or we have multiple but either couldn't combine them or combining them
5916 // wouldn't do anything useful, bail out now.
5917 if (!AddrModes.combineAddrModes()) {
5918 TPT.rollback(LastKnownGood);
5919 return false;
5920 }
5921 bool Modified = TPT.commit();
5922
5923 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5924 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5925
5926 // If all the instructions matched are already in this BB, don't do anything.
5927 // If we saw a Phi node then it is not local definitely, and if we saw a
5928 // select then we want to push the address calculation past it even if it's
5929 // already in this BB.
5930 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5931 return IsNonLocalValue(V, MemoryInst->getParent());
5932 })) {
5933 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5934 << "\n");
5935 return Modified;
5936 }
5937
5938 // Now that we determined the addressing expression we want to use and know
5939 // that we have to sink it into this block. Check to see if we have already
5940 // done this for some other load/store instr in this block. If so, reuse
5941 // the computation. Before attempting reuse, check if the address is valid
5942 // as it may have been erased.
5943
5944 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5945
5946 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5947 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5948
5949 // The current BB may be optimized multiple times, we can't guarantee the
5950 // reuse of Addr happens later, call findInsertPos to find an appropriate
5951 // insert position.
5952 auto InsertPos = findInsertPos(Addr, MemoryInst, SunkAddr);
5953
5954 // TODO: Adjust insert point considering (Base|Scaled)Reg if possible.
5955 if (!SunkAddr) {
5956 auto &DT = getDT(*MemoryInst->getFunction());
5957 if ((AddrMode.BaseReg && !DT.dominates(AddrMode.BaseReg, &*InsertPos)) ||
5958 (AddrMode.ScaledReg && !DT.dominates(AddrMode.ScaledReg, &*InsertPos)))
5959 return Modified;
5960 }
5961
5962 IRBuilder<> Builder(MemoryInst->getParent(), InsertPos);
5963
5964 if (SunkAddr) {
5965 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5966 << " for " << *MemoryInst << "\n");
5967 if (SunkAddr->getType() != Addr->getType()) {
5968 if (SunkAddr->getType()->getPointerAddressSpace() !=
5969 Addr->getType()->getPointerAddressSpace() &&
5970 !DL->isNonIntegralPointerType(Addr->getType())) {
5971 // There are two reasons the address spaces might not match: a no-op
5972 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5973 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5974 // TODO: allow bitcast between different address space pointers with the
5975 // same size.
5976 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5977 SunkAddr =
5978 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5979 } else
5980 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5981 }
5983 SubtargetInfo->addrSinkUsingGEPs())) {
5984 // By default, we use the GEP-based method when AA is used later. This
5985 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5986 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5987 << " for " << *MemoryInst << "\n");
5988 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5989
5990 // First, find the pointer.
5991 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5992 ResultPtr = AddrMode.BaseReg;
5993 AddrMode.BaseReg = nullptr;
5994 }
5995
5996 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5997 // We can't add more than one pointer together, nor can we scale a
5998 // pointer (both of which seem meaningless).
5999 if (ResultPtr || AddrMode.Scale != 1)
6000 return Modified;
6001
6002 ResultPtr = AddrMode.ScaledReg;
6003 AddrMode.Scale = 0;
6004 }
6005
6006 // It is only safe to sign extend the BaseReg if we know that the math
6007 // required to create it did not overflow before we extend it. Since
6008 // the original IR value was tossed in favor of a constant back when
6009 // the AddrMode was created we need to bail out gracefully if widths
6010 // do not match instead of extending it.
6011 //
6012 // (See below for code to add the scale.)
6013 if (AddrMode.Scale) {
6014 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
6015 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
6016 cast<IntegerType>(ScaledRegTy)->getBitWidth())
6017 return Modified;
6018 }
6019
6020 GlobalValue *BaseGV = AddrMode.BaseGV;
6021 if (BaseGV != nullptr) {
6022 if (ResultPtr)
6023 return Modified;
6024
6025 if (BaseGV->isThreadLocal()) {
6026 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
6027 } else {
6028 ResultPtr = BaseGV;
6029 }
6030 }
6031
6032 // If the real base value actually came from an inttoptr, then the matcher
6033 // will look through it and provide only the integer value. In that case,
6034 // use it here.
6035 if (!DL->isNonIntegralPointerType(Addr->getType())) {
6036 if (!ResultPtr && AddrMode.BaseReg) {
6037 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
6038 "sunkaddr");
6039 AddrMode.BaseReg = nullptr;
6040 } else if (!ResultPtr && AddrMode.Scale == 1) {
6041 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
6042 "sunkaddr");
6043 AddrMode.Scale = 0;
6044 }
6045 }
6046
6047 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
6048 !AddrMode.BaseOffs) {
6049 SunkAddr = Constant::getNullValue(Addr->getType());
6050 } else if (!ResultPtr) {
6051 return Modified;
6052 } else {
6053 Type *I8PtrTy =
6054 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
6055
6056 // Start with the base register. Do this first so that subsequent address
6057 // matching finds it last, which will prevent it from trying to match it
6058 // as the scaled value in case it happens to be a mul. That would be
6059 // problematic if we've sunk a different mul for the scale, because then
6060 // we'd end up sinking both muls.
6061 if (AddrMode.BaseReg) {
6062 Value *V = AddrMode.BaseReg;
6063 if (V->getType() != IntPtrTy)
6064 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6065
6066 ResultIndex = V;
6067 }
6068
6069 // Add the scale value.
6070 if (AddrMode.Scale) {
6071 Value *V = AddrMode.ScaledReg;
6072 if (V->getType() == IntPtrTy) {
6073 // done.
6074 } else {
6075 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
6076 cast<IntegerType>(V->getType())->getBitWidth() &&
6077 "We can't transform if ScaledReg is too narrow");
6078 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6079 }
6080
6081 if (AddrMode.Scale != 1)
6082 V = Builder.CreateMul(
6083 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6084 if (ResultIndex)
6085 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
6086 else
6087 ResultIndex = V;
6088 }
6089
6090 // Add in the Base Offset if present.
6091 if (AddrMode.BaseOffs) {
6092 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6093 if (ResultIndex) {
6094 // We need to add this separately from the scale above to help with
6095 // SDAG consecutive load/store merging.
6096 if (ResultPtr->getType() != I8PtrTy)
6097 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6098 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6099 AddrMode.InBounds);
6100 }
6101
6102 ResultIndex = V;
6103 }
6104
6105 if (!ResultIndex) {
6106 auto PtrInst = dyn_cast<Instruction>(ResultPtr);
6107 // We know that we have a pointer without any offsets. If this pointer
6108 // originates from a different basic block than the current one, we
6109 // must be able to recreate it in the current basic block.
6110 // We do not support the recreation of any instructions yet.
6111 if (PtrInst && PtrInst->getParent() != MemoryInst->getParent())
6112 return Modified;
6113 SunkAddr = ResultPtr;
6114 } else {
6115 if (ResultPtr->getType() != I8PtrTy)
6116 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
6117 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
6118 AddrMode.InBounds);
6119 }
6120
6121 if (SunkAddr->getType() != Addr->getType()) {
6122 if (SunkAddr->getType()->getPointerAddressSpace() !=
6123 Addr->getType()->getPointerAddressSpace() &&
6124 !DL->isNonIntegralPointerType(Addr->getType())) {
6125 // There are two reasons the address spaces might not match: a no-op
6126 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
6127 // ptrtoint/inttoptr pair to ensure we match the original semantics.
6128 // TODO: allow bitcast between different address space pointers with
6129 // the same size.
6130 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
6131 SunkAddr =
6132 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
6133 } else
6134 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
6135 }
6136 }
6137 } else {
6138 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
6139 // non-integral pointers, so in that case bail out now.
6140 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
6141 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
6142 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
6143 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
6144 if (DL->isNonIntegralPointerType(Addr->getType()) ||
6145 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
6146 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
6147 (AddrMode.BaseGV &&
6148 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
6149 return Modified;
6150
6151 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
6152 << " for " << *MemoryInst << "\n");
6153 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
6154 Value *Result = nullptr;
6155
6156 // Start with the base register. Do this first so that subsequent address
6157 // matching finds it last, which will prevent it from trying to match it
6158 // as the scaled value in case it happens to be a mul. That would be
6159 // problematic if we've sunk a different mul for the scale, because then
6160 // we'd end up sinking both muls.
6161 if (AddrMode.BaseReg) {
6162 Value *V = AddrMode.BaseReg;
6163 if (V->getType()->isPointerTy())
6164 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6165 if (V->getType() != IntPtrTy)
6166 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
6167 Result = V;
6168 }
6169
6170 // Add the scale value.
6171 if (AddrMode.Scale) {
6172 Value *V = AddrMode.ScaledReg;
6173 if (V->getType() == IntPtrTy) {
6174 // done.
6175 } else if (V->getType()->isPointerTy()) {
6176 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
6177 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
6178 cast<IntegerType>(V->getType())->getBitWidth()) {
6179 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
6180 } else {
6181 // It is only safe to sign extend the BaseReg if we know that the math
6182 // required to create it did not overflow before we extend it. Since
6183 // the original IR value was tossed in favor of a constant back when
6184 // the AddrMode was created we need to bail out gracefully if widths
6185 // do not match instead of extending it.
6187 if (I && (Result != AddrMode.BaseReg))
6188 I->eraseFromParent();
6189 return Modified;
6190 }
6191 if (AddrMode.Scale != 1)
6192 V = Builder.CreateMul(
6193 V, ConstantInt::getSigned(IntPtrTy, AddrMode.Scale), "sunkaddr");
6194 if (Result)
6195 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6196 else
6197 Result = V;
6198 }
6199
6200 // Add in the BaseGV if present.
6201 GlobalValue *BaseGV = AddrMode.BaseGV;
6202 if (BaseGV != nullptr) {
6203 Value *BaseGVPtr;
6204 if (BaseGV->isThreadLocal()) {
6205 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
6206 } else {
6207 BaseGVPtr = BaseGV;
6208 }
6209 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
6210 if (Result)
6211 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6212 else
6213 Result = V;
6214 }
6215
6216 // Add in the Base Offset if present.
6217 if (AddrMode.BaseOffs) {
6218 Value *V = ConstantInt::getSigned(IntPtrTy, AddrMode.BaseOffs);
6219 if (Result)
6220 Result = Builder.CreateAdd(Result, V, "sunkaddr");
6221 else
6222 Result = V;
6223 }
6224
6225 if (!Result)
6226 SunkAddr = Constant::getNullValue(Addr->getType());
6227 else
6228 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
6229 }
6230
6231 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
6232 // Store the newly computed address into the cache. In the case we reused a
6233 // value, this should be idempotent.
6234 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
6235
6236 // If we have no uses, recursively delete the value and all dead instructions
6237 // using it.
6238 if (Repl->use_empty()) {
6239 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
6240 RecursivelyDeleteTriviallyDeadInstructions(
6241 Repl, TLInfo, nullptr,
6242 [&](Value *V) { removeAllAssertingVHReferences(V); });
6243 });
6244 }
6245 ++NumMemoryInsts;
6246 return true;
6247}
6248
6249/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
6250/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
6251/// only handle a 2 operand GEP in the same basic block or a splat constant
6252/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
6253/// index.
6254///
6255/// If the existing GEP has a vector base pointer that is splat, we can look
6256/// through the splat to find the scalar pointer. If we can't find a scalar
6257/// pointer there's nothing we can do.
6258///
6259/// If we have a GEP with more than 2 indices where the middle indices are all
6260/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
6261///
6262/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
6263/// followed by a GEP with an all zeroes vector index. This will enable
6264/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
6265/// zero index.
6266bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6267 Value *Ptr) {
6268 Value *NewAddr;
6269
6270 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6271 // Don't optimize GEPs that don't have indices.
6272 if (!GEP->hasIndices())
6273 return false;
6274
6275 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6276 // FIXME: We should support this by sinking the GEP.
6277 if (MemoryInst->getParent() != GEP->getParent())
6278 return false;
6279
6280 SmallVector<Value *, 2> Ops(GEP->operands());
6281
6282 bool RewriteGEP = false;
6283
6284 if (Ops[0]->getType()->isVectorTy()) {
6285 Ops[0] = getSplatValue(Ops[0]);
6286 if (!Ops[0])
6287 return false;
6288 RewriteGEP = true;
6289 }
6290
6291 unsigned FinalIndex = Ops.size() - 1;
6292
6293 // Ensure all but the last index is 0.
6294 // FIXME: This isn't strictly required. All that's required is that they are
6295 // all scalars or splats.
6296 for (unsigned i = 1; i < FinalIndex; ++i) {
6297 auto *C = dyn_cast<Constant>(Ops[i]);
6298 if (!C)
6299 return false;
6300 if (isa<VectorType>(C->getType()))
6301 C = C->getSplatValue();
6302 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6303 if (!CI || !CI->isZero())
6304 return false;
6305 // Scalarize the index if needed.
6306 Ops[i] = CI;
6307 }
6308
6309 // Try to scalarize the final index.
6310 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6311 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6312 auto *C = dyn_cast<ConstantInt>(V);
6313 // Don't scalarize all zeros vector.
6314 if (!C || !C->isZero()) {
6315 Ops[FinalIndex] = V;
6316 RewriteGEP = true;
6317 }
6318 }
6319 }
6320
6321 // If we made any changes or the we have extra operands, we need to generate
6322 // new instructions.
6323 if (!RewriteGEP && Ops.size() == 2)
6324 return false;
6325
6326 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6327
6328 IRBuilder<> Builder(MemoryInst);
6329
6330 Type *SourceTy = GEP->getSourceElementType();
6331 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6332
6333 // If the final index isn't a vector, emit a scalar GEP containing all ops
6334 // and a vector GEP with all zeroes final index.
6335 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6336 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6337 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6338 auto *SecondTy = GetElementPtrInst::getIndexedType(
6339 SourceTy, ArrayRef(Ops).drop_front());
6340 NewAddr =
6341 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6342 } else {
6343 Value *Base = Ops[0];
6344 Value *Index = Ops[FinalIndex];
6345
6346 // Create a scalar GEP if there are more than 2 operands.
6347 if (Ops.size() != 2) {
6348 // Replace the last index with 0.
6349 Ops[FinalIndex] =
6350 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6351 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6353 SourceTy, ArrayRef(Ops).drop_front());
6354 }
6355
6356 // Now create the GEP with scalar pointer and vector index.
6357 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6358 }
6359 } else if (!isa<Constant>(Ptr)) {
6360 // Not a GEP, maybe its a splat and we can create a GEP to enable
6361 // SelectionDAGBuilder to use it as a uniform base.
6362 Value *V = getSplatValue(Ptr);
6363 if (!V)
6364 return false;
6365
6366 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6367
6368 IRBuilder<> Builder(MemoryInst);
6369
6370 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6371 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6372 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6373 Type *ScalarTy;
6374 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6375 Intrinsic::masked_gather) {
6376 ScalarTy = MemoryInst->getType()->getScalarType();
6377 } else {
6378 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6379 Intrinsic::masked_scatter);
6380 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6381 }
6382 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6383 } else {
6384 // Constant, SelectionDAGBuilder knows to check if its a splat.
6385 return false;
6386 }
6387
6388 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6389
6390 // If we have no uses, recursively delete the value and all dead instructions
6391 // using it.
6392 if (Ptr->use_empty())
6394 Ptr, TLInfo, nullptr,
6395 [&](Value *V) { removeAllAssertingVHReferences(V); });
6396
6397 return true;
6398}
6399
6400// This is a helper for CodeGenPrepare::optimizeMulWithOverflow.
6401// Check the pattern we are interested in where there are maximum 2 uses
6402// of the intrinsic which are the extract instructions.
6404 ExtractValueInst *&OverflowExtract) {
6405 // Bail out if it's more than 2 users:
6406 if (I->hasNUsesOrMore(3))
6407 return false;
6408
6409 for (User *U : I->users()) {
6410 auto *Extract = dyn_cast<ExtractValueInst>(U);
6411 if (!Extract || Extract->getNumIndices() != 1)
6412 return false;
6413
6414 unsigned Index = Extract->getIndices()[0];
6415 if (Index == 0)
6416 MulExtract = Extract;
6417 else if (Index == 1)
6418 OverflowExtract = Extract;
6419 else
6420 return false;
6421 }
6422 return true;
6423}
6424
6425// Rewrite the mul_with_overflow intrinsic by checking if both of the
6426// operands' value ranges are within the legal type. If so, we can optimize the
6427// multiplication algorithm. This code is supposed to be written during the step
6428// of type legalization, but given that we need to reconstruct the IR which is
6429// not doable there, we do it here.
6430// The IR after the optimization will look like:
6431// entry:
6432// if signed:
6433// ( (lhs_lo>>BW-1) ^ lhs_hi) || ( (rhs_lo>>BW-1) ^ rhs_hi) ? overflow,
6434// overflow_no
6435// else:
6436// (lhs_hi != 0) || (rhs_hi != 0) ? overflow, overflow_no
6437// overflow_no:
6438// overflow:
6439// overflow.res:
6440// \returns true if optimization was applied
6441// TODO: This optimization can be further improved to optimize branching on
6442// overflow where the 'overflow_no' BB can branch directly to the false
6443// successor of overflow, but that would add additional complexity so we leave
6444// it for future work.
6445bool CodeGenPrepare::optimizeMulWithOverflow(Instruction *I, bool IsSigned,
6446 ModifyDT &ModifiedDT) {
6447 // Check if target supports this optimization.
6449 I->getContext(),
6450 TLI->getValueType(*DL, I->getType()->getContainedType(0))))
6451 return false;
6452
6453 ExtractValueInst *MulExtract = nullptr, *OverflowExtract = nullptr;
6454 if (!matchOverflowPattern(I, MulExtract, OverflowExtract))
6455 return false;
6456
6457 // Keep track of the instruction to stop reoptimizing it again.
6458 InsertedInsts.insert(I);
6459
6460 Value *LHS = I->getOperand(0);
6461 Value *RHS = I->getOperand(1);
6462 Type *Ty = LHS->getType();
6463 unsigned VTHalfBitWidth = Ty->getScalarSizeInBits() / 2;
6464 Type *LegalTy = Ty->getWithNewBitWidth(VTHalfBitWidth);
6465
6466 // New BBs:
6467 BasicBlock *OverflowEntryBB =
6468 I->getParent()->splitBasicBlock(I, "", /*Before*/ true);
6469 OverflowEntryBB->takeName(I->getParent());
6470 // Keep the 'br' instruction that is generated as a result of the split to be
6471 // erased/replaced later.
6472 Instruction *OldTerminator = OverflowEntryBB->getTerminator();
6473 BasicBlock *NoOverflowBB =
6474 BasicBlock::Create(I->getContext(), "overflow.no", I->getFunction());
6475 NoOverflowBB->moveAfter(OverflowEntryBB);
6476 BasicBlock *OverflowBB =
6477 BasicBlock::Create(I->getContext(), "overflow", I->getFunction());
6478 OverflowBB->moveAfter(NoOverflowBB);
6479
6480 // BB overflow.entry:
6481 IRBuilder<> Builder(OverflowEntryBB);
6482 // Extract low and high halves of LHS:
6483 Value *LoLHS = Builder.CreateTrunc(LHS, LegalTy, "lo.lhs");
6484 Value *HiLHS = Builder.CreateLShr(LHS, VTHalfBitWidth, "lhs.lsr");
6485 HiLHS = Builder.CreateTrunc(HiLHS, LegalTy, "hi.lhs");
6486
6487 // Extract low and high halves of RHS:
6488 Value *LoRHS = Builder.CreateTrunc(RHS, LegalTy, "lo.rhs");
6489 Value *HiRHS = Builder.CreateLShr(RHS, VTHalfBitWidth, "rhs.lsr");
6490 HiRHS = Builder.CreateTrunc(HiRHS, LegalTy, "hi.rhs");
6491
6492 Value *IsAnyBitTrue;
6493 if (IsSigned) {
6494 Value *SignLoLHS =
6495 Builder.CreateAShr(LoLHS, VTHalfBitWidth - 1, "sign.lo.lhs");
6496 Value *SignLoRHS =
6497 Builder.CreateAShr(LoRHS, VTHalfBitWidth - 1, "sign.lo.rhs");
6498 Value *XorLHS = Builder.CreateXor(HiLHS, SignLoLHS);
6499 Value *XorRHS = Builder.CreateXor(HiRHS, SignLoRHS);
6500 Value *Or = Builder.CreateOr(XorLHS, XorRHS, "or.lhs.rhs");
6501 IsAnyBitTrue = Builder.CreateCmp(ICmpInst::ICMP_NE, Or,
6502 ConstantInt::getNullValue(Or->getType()));
6503 } else {
6504 Value *CmpLHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiLHS,
6505 ConstantInt::getNullValue(LegalTy));
6506 Value *CmpRHS = Builder.CreateCmp(ICmpInst::ICMP_NE, HiRHS,
6507 ConstantInt::getNullValue(LegalTy));
6508 IsAnyBitTrue = Builder.CreateOr(CmpLHS, CmpRHS, "or.lhs.rhs");
6509 }
6510 Builder.CreateCondBr(IsAnyBitTrue, OverflowBB, NoOverflowBB);
6511
6512 // BB overflow.no:
6513 Builder.SetInsertPoint(NoOverflowBB);
6514 Value *ExtLoLHS, *ExtLoRHS;
6515 if (IsSigned) {
6516 ExtLoLHS = Builder.CreateSExt(LoLHS, Ty, "lo.lhs.ext");
6517 ExtLoRHS = Builder.CreateSExt(LoRHS, Ty, "lo.rhs.ext");
6518 } else {
6519 ExtLoLHS = Builder.CreateZExt(LoLHS, Ty, "lo.lhs.ext");
6520 ExtLoRHS = Builder.CreateZExt(LoRHS, Ty, "lo.rhs.ext");
6521 }
6522
6523 Value *Mul = Builder.CreateMul(ExtLoLHS, ExtLoRHS, "mul.overflow.no");
6524
6525 // Create the 'overflow.res' BB to merge the results of
6526 // the two paths:
6527 BasicBlock *OverflowResBB = I->getParent();
6528 OverflowResBB->setName("overflow.res");
6529
6530 // BB overflow.no: jump to overflow.res BB
6531 Builder.CreateBr(OverflowResBB);
6532 // No we don't need the old terminator in overflow.entry BB, erase it:
6533 OldTerminator->eraseFromParent();
6534
6535 // BB overflow.res:
6536 Builder.SetInsertPoint(OverflowResBB, OverflowResBB->getFirstInsertionPt());
6537 // Create PHI nodes to merge results from no.overflow BB and overflow BB to
6538 // replace the extract instructions.
6539 PHINode *OverflowResPHI = Builder.CreatePHI(Ty, 2),
6540 *OverflowFlagPHI =
6541 Builder.CreatePHI(IntegerType::getInt1Ty(I->getContext()), 2);
6542
6543 // Add the incoming values from no.overflow BB and later from overflow BB.
6544 OverflowResPHI->addIncoming(Mul, NoOverflowBB);
6545 OverflowFlagPHI->addIncoming(ConstantInt::getFalse(I->getContext()),
6546 NoOverflowBB);
6547
6548 // Replace all users of MulExtract and OverflowExtract to use the PHI nodes.
6549 if (MulExtract) {
6550 MulExtract->replaceAllUsesWith(OverflowResPHI);
6551 MulExtract->eraseFromParent();
6552 }
6553 if (OverflowExtract) {
6554 OverflowExtract->replaceAllUsesWith(OverflowFlagPHI);
6555 OverflowExtract->eraseFromParent();
6556 }
6557
6558 // Remove the intrinsic from parent (overflow.res BB) as it will be part of
6559 // overflow BB
6560 I->removeFromParent();
6561 // BB overflow:
6562 I->insertInto(OverflowBB, OverflowBB->end());
6563 Builder.SetInsertPoint(OverflowBB, OverflowBB->end());
6564 Value *MulOverflow = Builder.CreateExtractValue(I, {0}, "mul.overflow");
6565 Value *OverflowFlag = Builder.CreateExtractValue(I, {1}, "overflow.flag");
6566 Builder.CreateBr(OverflowResBB);
6567
6568 // Add The Extracted values to the PHINodes in the overflow.res BB.
6569 OverflowResPHI->addIncoming(MulOverflow, OverflowBB);
6570 OverflowFlagPHI->addIncoming(OverflowFlag, OverflowBB);
6571
6572 ModifiedDT = ModifyDT::ModifyBBDT;
6573 return true;
6574}
6575
6576/// If there are any memory operands, use OptimizeMemoryInst to sink their
6577/// address computing into the block when possible / profitable.
6578bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6579 bool MadeChange = false;
6580
6581 const TargetRegisterInfo *TRI =
6583 TargetLowering::AsmOperandInfoVector TargetConstraints =
6584 TLI->ParseConstraints(*DL, TRI, *CS);
6585 unsigned ArgNo = 0;
6586 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6587 // Compute the constraint code and ConstraintType to use.
6588 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6589
6590 // TODO: Also handle C_Address?
6591 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6592 OpInfo.isIndirect) {
6593 Value *OpVal = CS->getArgOperand(ArgNo++);
6594 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6595 } else if (OpInfo.Type == InlineAsm::isInput)
6596 ArgNo++;
6597 }
6598
6599 return MadeChange;
6600}
6601
6602/// Check if all the uses of \p Val are equivalent (or free) zero or
6603/// sign extensions.
6604static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6605 assert(!Val->use_empty() && "Input must have at least one use");
6606 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6607 bool IsSExt = isa<SExtInst>(FirstUser);
6608 Type *ExtTy = FirstUser->getType();
6609 for (const User *U : Val->users()) {
6610 const Instruction *UI = cast<Instruction>(U);
6611 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6612 return false;
6613 Type *CurTy = UI->getType();
6614 // Same input and output types: Same instruction after CSE.
6615 if (CurTy == ExtTy)
6616 continue;
6617
6618 // If IsSExt is true, we are in this situation:
6619 // a = Val
6620 // b = sext ty1 a to ty2
6621 // c = sext ty1 a to ty3
6622 // Assuming ty2 is shorter than ty3, this could be turned into:
6623 // a = Val
6624 // b = sext ty1 a to ty2
6625 // c = sext ty2 b to ty3
6626 // However, the last sext is not free.
6627 if (IsSExt)
6628 return false;
6629
6630 // This is a ZExt, maybe this is free to extend from one type to another.
6631 // In that case, we would not account for a different use.
6632 Type *NarrowTy;
6633 Type *LargeTy;
6634 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6635 CurTy->getScalarType()->getIntegerBitWidth()) {
6636 NarrowTy = CurTy;
6637 LargeTy = ExtTy;
6638 } else {
6639 NarrowTy = ExtTy;
6640 LargeTy = CurTy;
6641 }
6642
6643 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6644 return false;
6645 }
6646 // All uses are the same or can be derived from one another for free.
6647 return true;
6648}
6649
6650/// Try to speculatively promote extensions in \p Exts and continue
6651/// promoting through newly promoted operands recursively as far as doing so is
6652/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6653/// When some promotion happened, \p TPT contains the proper state to revert
6654/// them.
6655///
6656/// \return true if some promotion happened, false otherwise.
6657bool CodeGenPrepare::tryToPromoteExts(
6658 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6659 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6660 unsigned CreatedInstsCost) {
6661 bool Promoted = false;
6662
6663 // Iterate over all the extensions to try to promote them.
6664 for (auto *I : Exts) {
6665 // Early check if we directly have ext(load).
6666 if (isa<LoadInst>(I->getOperand(0))) {
6667 ProfitablyMovedExts.push_back(I);
6668 continue;
6669 }
6670
6671 // Check whether or not we want to do any promotion. The reason we have
6672 // this check inside the for loop is to catch the case where an extension
6673 // is directly fed by a load because in such case the extension can be moved
6674 // up without any promotion on its operands.
6676 return false;
6677
6678 // Get the action to perform the promotion.
6679 TypePromotionHelper::Action TPH =
6680 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6681 // Check if we can promote.
6682 if (!TPH) {
6683 // Save the current extension as we cannot move up through its operand.
6684 ProfitablyMovedExts.push_back(I);
6685 continue;
6686 }
6687
6688 // Save the current state.
6689 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6690 TPT.getRestorationPoint();
6691 SmallVector<Instruction *, 4> NewExts;
6692 unsigned NewCreatedInstsCost = 0;
6693 unsigned ExtCost = !TLI->isExtFree(I);
6694 // Promote.
6695 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6696 &NewExts, nullptr, *TLI);
6697 assert(PromotedVal &&
6698 "TypePromotionHelper should have filtered out those cases");
6699
6700 // We would be able to merge only one extension in a load.
6701 // Therefore, if we have more than 1 new extension we heuristically
6702 // cut this search path, because it means we degrade the code quality.
6703 // With exactly 2, the transformation is neutral, because we will merge
6704 // one extension but leave one. However, we optimistically keep going,
6705 // because the new extension may be removed too. Also avoid replacing a
6706 // single free extension with multiple extensions, as this increases the
6707 // number of IR instructions while not providing any savings.
6708 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6709 // FIXME: It would be possible to propagate a negative value instead of
6710 // conservatively ceiling it to 0.
6711 TotalCreatedInstsCost =
6712 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6713 if (!StressExtLdPromotion &&
6714 (TotalCreatedInstsCost > 1 ||
6715 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6716 (ExtCost == 0 && NewExts.size() > 1))) {
6717 // This promotion is not profitable, rollback to the previous state, and
6718 // save the current extension in ProfitablyMovedExts as the latest
6719 // speculative promotion turned out to be unprofitable.
6720 TPT.rollback(LastKnownGood);
6721 ProfitablyMovedExts.push_back(I);
6722 continue;
6723 }
6724 // Continue promoting NewExts as far as doing so is profitable.
6725 SmallVector<Instruction *, 2> NewlyMovedExts;
6726 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6727 bool NewPromoted = false;
6728 for (auto *ExtInst : NewlyMovedExts) {
6729 Instruction *MovedExt = cast<Instruction>(ExtInst);
6730 Value *ExtOperand = MovedExt->getOperand(0);
6731 // If we have reached to a load, we need this extra profitability check
6732 // as it could potentially be merged into an ext(load).
6733 if (isa<LoadInst>(ExtOperand) &&
6734 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6735 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6736 continue;
6737
6738 ProfitablyMovedExts.push_back(MovedExt);
6739 NewPromoted = true;
6740 }
6741
6742 // If none of speculative promotions for NewExts is profitable, rollback
6743 // and save the current extension (I) as the last profitable extension.
6744 if (!NewPromoted) {
6745 TPT.rollback(LastKnownGood);
6746 ProfitablyMovedExts.push_back(I);
6747 continue;
6748 }
6749 // The promotion is profitable.
6750 Promoted = true;
6751 }
6752 return Promoted;
6753}
6754
6755/// Merging redundant sexts when one is dominating the other.
6756bool CodeGenPrepare::mergeSExts(Function &F) {
6757 bool Changed = false;
6758 for (auto &Entry : ValToSExtendedUses) {
6759 SExts &Insts = Entry.second;
6760 SExts CurPts;
6761 for (Instruction *Inst : Insts) {
6762 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6763 Inst->getOperand(0) != Entry.first)
6764 continue;
6765 bool inserted = false;
6766 for (auto &Pt : CurPts) {
6767 if (getDT(F).dominates(Inst, Pt)) {
6768 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6769 RemovedInsts.insert(Pt);
6770 Pt->removeFromParent();
6771 Pt = Inst;
6772 inserted = true;
6773 Changed = true;
6774 break;
6775 }
6776 if (!getDT(F).dominates(Pt, Inst))
6777 // Give up if we need to merge in a common dominator as the
6778 // experiments show it is not profitable.
6779 continue;
6780 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6781 RemovedInsts.insert(Inst);
6782 Inst->removeFromParent();
6783 inserted = true;
6784 Changed = true;
6785 break;
6786 }
6787 if (!inserted)
6788 CurPts.push_back(Inst);
6789 }
6790 }
6791 return Changed;
6792}
6793
6794// Splitting large data structures so that the GEPs accessing them can have
6795// smaller offsets so that they can be sunk to the same blocks as their users.
6796// For example, a large struct starting from %base is split into two parts
6797// where the second part starts from %new_base.
6798//
6799// Before:
6800// BB0:
6801// %base =
6802//
6803// BB1:
6804// %gep0 = gep %base, off0
6805// %gep1 = gep %base, off1
6806// %gep2 = gep %base, off2
6807//
6808// BB2:
6809// %load1 = load %gep0
6810// %load2 = load %gep1
6811// %load3 = load %gep2
6812//
6813// After:
6814// BB0:
6815// %base =
6816// %new_base = gep %base, off0
6817//
6818// BB1:
6819// %new_gep0 = %new_base
6820// %new_gep1 = gep %new_base, off1 - off0
6821// %new_gep2 = gep %new_base, off2 - off0
6822//
6823// BB2:
6824// %load1 = load i32, i32* %new_gep0
6825// %load2 = load i32, i32* %new_gep1
6826// %load3 = load i32, i32* %new_gep2
6827//
6828// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6829// their offsets are smaller enough to fit into the addressing mode.
6830bool CodeGenPrepare::splitLargeGEPOffsets() {
6831 bool Changed = false;
6832 for (auto &Entry : LargeOffsetGEPMap) {
6833 Value *OldBase = Entry.first;
6834 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
6835 &LargeOffsetGEPs = Entry.second;
6836 auto compareGEPOffset =
6837 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6838 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6839 if (LHS.first == RHS.first)
6840 return false;
6841 if (LHS.second != RHS.second)
6842 return LHS.second < RHS.second;
6843 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6844 };
6845 // Sorting all the GEPs of the same data structures based on the offsets.
6846 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6847 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6848 // Skip if all the GEPs have the same offsets.
6849 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6850 continue;
6851 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6852 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6853 Value *NewBaseGEP = nullptr;
6854
6855 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6856 GetElementPtrInst *GEP) {
6857 LLVMContext &Ctx = GEP->getContext();
6858 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6859 Type *I8PtrTy =
6860 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6861
6862 BasicBlock::iterator NewBaseInsertPt;
6863 BasicBlock *NewBaseInsertBB;
6864 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6865 // If the base of the struct is an instruction, the new base will be
6866 // inserted close to it.
6867 NewBaseInsertBB = BaseI->getParent();
6868 if (isa<PHINode>(BaseI))
6869 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6870 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6871 NewBaseInsertBB =
6872 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6873 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6874 } else
6875 NewBaseInsertPt = std::next(BaseI->getIterator());
6876 } else {
6877 // If the current base is an argument or global value, the new base
6878 // will be inserted to the entry block.
6879 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6880 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6881 }
6882 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6883 // Create a new base.
6884 // TODO: Avoid implicit trunc?
6885 // See https://github.com/llvm/llvm-project/issues/112510.
6886 Value *BaseIndex =
6887 ConstantInt::getSigned(PtrIdxTy, BaseOffset, /*ImplicitTrunc=*/true);
6888 NewBaseGEP = OldBase;
6889 if (NewBaseGEP->getType() != I8PtrTy)
6890 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6891 NewBaseGEP =
6892 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6893 NewGEPBases.insert(NewBaseGEP);
6894 return;
6895 };
6896
6897 // Check whether all the offsets can be encoded with prefered common base.
6898 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6899 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6900 BaseOffset = PreferBase;
6901 // Create a new base if the offset of the BaseGEP can be decoded with one
6902 // instruction.
6903 createNewBase(BaseOffset, OldBase, BaseGEP);
6904 }
6905
6906 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6907 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6908 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6909 int64_t Offset = LargeOffsetGEP->second;
6910 if (Offset != BaseOffset) {
6911 TargetLowering::AddrMode AddrMode;
6912 AddrMode.HasBaseReg = true;
6913 AddrMode.BaseOffs = Offset - BaseOffset;
6914 // The result type of the GEP might not be the type of the memory
6915 // access.
6916 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6917 GEP->getResultElementType(),
6918 GEP->getAddressSpace())) {
6919 // We need to create a new base if the offset to the current base is
6920 // too large to fit into the addressing mode. So, a very large struct
6921 // may be split into several parts.
6922 BaseGEP = GEP;
6923 BaseOffset = Offset;
6924 NewBaseGEP = nullptr;
6925 }
6926 }
6927
6928 // Generate a new GEP to replace the current one.
6929 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6930
6931 if (!NewBaseGEP) {
6932 // Create a new base if we don't have one yet. Find the insertion
6933 // pointer for the new base first.
6934 createNewBase(BaseOffset, OldBase, GEP);
6935 }
6936
6937 IRBuilder<> Builder(GEP);
6938 Value *NewGEP = NewBaseGEP;
6939 if (Offset != BaseOffset) {
6940 // Calculate the new offset for the new GEP.
6941 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6942 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6943 }
6944 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6945 LargeOffsetGEPID.erase(GEP);
6946 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6947 GEP->eraseFromParent();
6948 Changed = true;
6949 }
6950 }
6951 return Changed;
6952}
6953
6954bool CodeGenPrepare::optimizePhiType(
6955 PHINode *I, SmallPtrSetImpl<PHINode *> &Visited,
6956 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6957 // We are looking for a collection on interconnected phi nodes that together
6958 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6959 // are of the same type. Convert the whole set of nodes to the type of the
6960 // bitcast.
6961 Type *PhiTy = I->getType();
6962 Type *ConvertTy = nullptr;
6963 if (Visited.count(I) ||
6964 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6965 return false;
6966
6967 SmallVector<Instruction *, 4> Worklist;
6968 Worklist.push_back(cast<Instruction>(I));
6969 SmallPtrSet<PHINode *, 4> PhiNodes;
6970 SmallPtrSet<ConstantData *, 4> Constants;
6971 PhiNodes.insert(I);
6972 Visited.insert(I);
6973 SmallPtrSet<Instruction *, 4> Defs;
6974 SmallPtrSet<Instruction *, 4> Uses;
6975 // This works by adding extra bitcasts between load/stores and removing
6976 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6977 // we can get in the situation where we remove a bitcast in one iteration
6978 // just to add it again in the next. We need to ensure that at least one
6979 // bitcast we remove are anchored to something that will not change back.
6980 bool AnyAnchored = false;
6981
6982 while (!Worklist.empty()) {
6983 Instruction *II = Worklist.pop_back_val();
6984
6985 if (auto *Phi = dyn_cast<PHINode>(II)) {
6986 // Handle Defs, which might also be PHI's
6987 for (Value *V : Phi->incoming_values()) {
6988 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6989 if (!PhiNodes.count(OpPhi)) {
6990 if (!Visited.insert(OpPhi).second)
6991 return false;
6992 PhiNodes.insert(OpPhi);
6993 Worklist.push_back(OpPhi);
6994 }
6995 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6996 if (!OpLoad->isSimple())
6997 return false;
6998 if (Defs.insert(OpLoad).second)
6999 Worklist.push_back(OpLoad);
7000 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
7001 if (Defs.insert(OpEx).second)
7002 Worklist.push_back(OpEx);
7003 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7004 if (!ConvertTy)
7005 ConvertTy = OpBC->getOperand(0)->getType();
7006 if (OpBC->getOperand(0)->getType() != ConvertTy)
7007 return false;
7008 if (Defs.insert(OpBC).second) {
7009 Worklist.push_back(OpBC);
7010 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
7011 !isa<ExtractElementInst>(OpBC->getOperand(0));
7012 }
7013 } else if (auto *OpC = dyn_cast<ConstantData>(V))
7014 Constants.insert(OpC);
7015 else
7016 return false;
7017 }
7018 }
7019
7020 // Handle uses which might also be phi's
7021 for (User *V : II->users()) {
7022 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
7023 if (!PhiNodes.count(OpPhi)) {
7024 if (Visited.count(OpPhi))
7025 return false;
7026 PhiNodes.insert(OpPhi);
7027 Visited.insert(OpPhi);
7028 Worklist.push_back(OpPhi);
7029 }
7030 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
7031 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
7032 return false;
7033 Uses.insert(OpStore);
7034 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
7035 if (!ConvertTy)
7036 ConvertTy = OpBC->getType();
7037 if (OpBC->getType() != ConvertTy)
7038 return false;
7039 Uses.insert(OpBC);
7040 AnyAnchored |=
7041 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
7042 } else {
7043 return false;
7044 }
7045 }
7046 }
7047
7048 if (!ConvertTy || !AnyAnchored || PhiTy == ConvertTy ||
7049 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
7050 return false;
7051
7052 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
7053 << *ConvertTy << "\n");
7054
7055 // Create all the new phi nodes of the new type, and bitcast any loads to the
7056 // correct type.
7057 ValueToValueMap ValMap;
7058 for (ConstantData *C : Constants)
7059 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
7060 for (Instruction *D : Defs) {
7061 if (isa<BitCastInst>(D)) {
7062 ValMap[D] = D->getOperand(0);
7063 DeletedInstrs.insert(D);
7064 } else {
7065 BasicBlock::iterator insertPt = std::next(D->getIterator());
7066 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
7067 }
7068 }
7069 for (PHINode *Phi : PhiNodes)
7070 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
7071 Phi->getName() + ".tc", Phi->getIterator());
7072 // Pipe together all the PhiNodes.
7073 for (PHINode *Phi : PhiNodes) {
7074 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
7075 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
7076 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
7077 Phi->getIncomingBlock(i));
7078 Visited.insert(NewPhi);
7079 }
7080 // And finally pipe up the stores and bitcasts
7081 for (Instruction *U : Uses) {
7082 if (isa<BitCastInst>(U)) {
7083 DeletedInstrs.insert(U);
7084 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
7085 } else {
7086 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
7087 U->getIterator()));
7088 }
7089 }
7090
7091 // Save the removed phis to be deleted later.
7092 DeletedInstrs.insert_range(PhiNodes);
7093 return true;
7094}
7095
7096bool CodeGenPrepare::optimizePhiTypes(Function &F) {
7097 if (!OptimizePhiTypes)
7098 return false;
7099
7100 bool Changed = false;
7101 SmallPtrSet<PHINode *, 4> Visited;
7102 SmallPtrSet<Instruction *, 4> DeletedInstrs;
7103
7104 // Attempt to optimize all the phis in the functions to the correct type.
7105 for (auto &BB : F)
7106 for (auto &Phi : BB.phis())
7107 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
7108
7109 // Remove any old phi's that have been converted.
7110 for (auto *I : DeletedInstrs) {
7111 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
7112 I->eraseFromParent();
7113 }
7114
7115 return Changed;
7116}
7117
7118/// Return true, if an ext(load) can be formed from an extension in
7119/// \p MovedExts.
7120bool CodeGenPrepare::canFormExtLd(
7121 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
7122 Instruction *&Inst, bool HasPromoted) {
7123 for (auto *MovedExtInst : MovedExts) {
7124 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
7125 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
7126 Inst = MovedExtInst;
7127 break;
7128 }
7129 }
7130 if (!LI)
7131 return false;
7132
7133 // If they're already in the same block, there's nothing to do.
7134 // Make the cheap checks first if we did not promote.
7135 // If we promoted, we need to check if it is indeed profitable.
7136 if (!HasPromoted && LI->getParent() == Inst->getParent())
7137 return false;
7138
7139 return TLI->isExtLoad(LI, Inst, *DL);
7140}
7141
7142/// Move a zext or sext fed by a load into the same basic block as the load,
7143/// unless conditions are unfavorable. This allows SelectionDAG to fold the
7144/// extend into the load.
7145///
7146/// E.g.,
7147/// \code
7148/// %ld = load i32* %addr
7149/// %add = add nuw i32 %ld, 4
7150/// %zext = zext i32 %add to i64
7151// \endcode
7152/// =>
7153/// \code
7154/// %ld = load i32* %addr
7155/// %zext = zext i32 %ld to i64
7156/// %add = add nuw i64 %zext, 4
7157/// \encode
7158/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
7159/// allow us to match zext(load i32*) to i64.
7160///
7161/// Also, try to promote the computations used to obtain a sign extended
7162/// value used into memory accesses.
7163/// E.g.,
7164/// \code
7165/// a = add nsw i32 b, 3
7166/// d = sext i32 a to i64
7167/// e = getelementptr ..., i64 d
7168/// \endcode
7169/// =>
7170/// \code
7171/// f = sext i32 b to i64
7172/// a = add nsw i64 f, 3
7173/// e = getelementptr ..., i64 a
7174/// \endcode
7175///
7176/// \p Inst[in/out] the extension may be modified during the process if some
7177/// promotions apply.
7178bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
7179 bool AllowPromotionWithoutCommonHeader = false;
7180 /// See if it is an interesting sext operations for the address type
7181 /// promotion before trying to promote it, e.g., the ones with the right
7182 /// type and used in memory accesses.
7183 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
7184 *Inst, AllowPromotionWithoutCommonHeader);
7185 TypePromotionTransaction TPT(RemovedInsts);
7186 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
7187 TPT.getRestorationPoint();
7189 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
7190 Exts.push_back(Inst);
7191
7192 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
7193
7194 // Look for a load being extended.
7195 LoadInst *LI = nullptr;
7196 Instruction *ExtFedByLoad;
7197
7198 // Try to promote a chain of computation if it allows to form an extended
7199 // load.
7200 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
7201 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
7202 TPT.commit();
7203 // Move the extend into the same block as the load.
7204 ExtFedByLoad->moveAfter(LI);
7205 ++NumExtsMoved;
7206 Inst = ExtFedByLoad;
7207 return true;
7208 }
7209
7210 // Continue promoting SExts if known as considerable depending on targets.
7211 if (ATPConsiderable &&
7212 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
7213 HasPromoted, TPT, SpeculativelyMovedExts))
7214 return true;
7215
7216 TPT.rollback(LastKnownGood);
7217 return false;
7218}
7219
7220// Perform address type promotion if doing so is profitable.
7221// If AllowPromotionWithoutCommonHeader == false, we should find other sext
7222// instructions that sign extended the same initial value. However, if
7223// AllowPromotionWithoutCommonHeader == true, we expect promoting the
7224// extension is just profitable.
7225bool CodeGenPrepare::performAddressTypePromotion(
7226 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
7227 bool HasPromoted, TypePromotionTransaction &TPT,
7228 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
7229 bool Promoted = false;
7230 SmallPtrSet<Instruction *, 1> UnhandledExts;
7231 bool AllSeenFirst = true;
7232 for (auto *I : SpeculativelyMovedExts) {
7233 Value *HeadOfChain = I->getOperand(0);
7234 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
7235 SeenChainsForSExt.find(HeadOfChain);
7236 // If there is an unhandled SExt which has the same header, try to promote
7237 // it as well.
7238 if (AlreadySeen != SeenChainsForSExt.end()) {
7239 if (AlreadySeen->second != nullptr)
7240 UnhandledExts.insert(AlreadySeen->second);
7241 AllSeenFirst = false;
7242 }
7243 }
7244
7245 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
7246 SpeculativelyMovedExts.size() == 1)) {
7247 TPT.commit();
7248 if (HasPromoted)
7249 Promoted = true;
7250 for (auto *I : SpeculativelyMovedExts) {
7251 Value *HeadOfChain = I->getOperand(0);
7252 SeenChainsForSExt[HeadOfChain] = nullptr;
7253 ValToSExtendedUses[HeadOfChain].push_back(I);
7254 }
7255 // Update Inst as promotion happen.
7256 Inst = SpeculativelyMovedExts.pop_back_val();
7257 } else {
7258 // This is the first chain visited from the header, keep the current chain
7259 // as unhandled. Defer to promote this until we encounter another SExt
7260 // chain derived from the same header.
7261 for (auto *I : SpeculativelyMovedExts) {
7262 Value *HeadOfChain = I->getOperand(0);
7263 SeenChainsForSExt[HeadOfChain] = Inst;
7264 }
7265 return false;
7266 }
7267
7268 if (!AllSeenFirst && !UnhandledExts.empty())
7269 for (auto *VisitedSExt : UnhandledExts) {
7270 if (RemovedInsts.count(VisitedSExt))
7271 continue;
7272 TypePromotionTransaction TPT(RemovedInsts);
7274 SmallVector<Instruction *, 2> Chains;
7275 Exts.push_back(VisitedSExt);
7276 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
7277 TPT.commit();
7278 if (HasPromoted)
7279 Promoted = true;
7280 for (auto *I : Chains) {
7281 Value *HeadOfChain = I->getOperand(0);
7282 // Mark this as handled.
7283 SeenChainsForSExt[HeadOfChain] = nullptr;
7284 ValToSExtendedUses[HeadOfChain].push_back(I);
7285 }
7286 }
7287 return Promoted;
7288}
7289
7290bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
7291 BasicBlock *DefBB = I->getParent();
7292
7293 // If the result of a {s|z}ext and its source are both live out, rewrite all
7294 // other uses of the source with result of extension.
7295 Value *Src = I->getOperand(0);
7296 if (Src->hasOneUse())
7297 return false;
7298
7299 // Only do this xform if truncating is free.
7300 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
7301 return false;
7302
7303 // Only safe to perform the optimization if the source is also defined in
7304 // this block.
7305 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
7306 return false;
7307
7308 bool DefIsLiveOut = false;
7309 for (User *U : I->users()) {
7311
7312 // Figure out which BB this ext is used in.
7313 BasicBlock *UserBB = UI->getParent();
7314 if (UserBB == DefBB)
7315 continue;
7316 DefIsLiveOut = true;
7317 break;
7318 }
7319 if (!DefIsLiveOut)
7320 return false;
7321
7322 // Make sure none of the uses are PHI nodes.
7323 for (User *U : Src->users()) {
7325 BasicBlock *UserBB = UI->getParent();
7326 if (UserBB == DefBB)
7327 continue;
7328 // Be conservative. We don't want this xform to end up introducing
7329 // reloads just before load / store instructions.
7330 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
7331 return false;
7332 }
7333
7334 // InsertedTruncs - Only insert one trunc in each block once.
7335 DenseMap<BasicBlock *, Instruction *> InsertedTruncs;
7336
7337 bool MadeChange = false;
7338 for (Use &U : Src->uses()) {
7339 Instruction *User = cast<Instruction>(U.getUser());
7340
7341 // Figure out which BB this ext is used in.
7342 BasicBlock *UserBB = User->getParent();
7343 if (UserBB == DefBB)
7344 continue;
7345
7346 // Both src and def are live in this block. Rewrite the use.
7347 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
7348
7349 if (!InsertedTrunc) {
7350 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
7351 assert(InsertPt != UserBB->end());
7352 InsertedTrunc = new TruncInst(I, Src->getType(), "");
7353 InsertedTrunc->insertBefore(*UserBB, InsertPt);
7354 InsertedInsts.insert(InsertedTrunc);
7355 }
7356
7357 // Replace a use of the {s|z}ext source with a use of the result.
7358 U = InsertedTrunc;
7359 ++NumExtUses;
7360 MadeChange = true;
7361 }
7362
7363 return MadeChange;
7364}
7365
7366// Find loads whose uses only use some of the loaded value's bits. Add an "and"
7367// just after the load if the target can fold this into one extload instruction,
7368// with the hope of eliminating some of the other later "and" instructions using
7369// the loaded value. "and"s that are made trivially redundant by the insertion
7370// of the new "and" are removed by this function, while others (e.g. those whose
7371// path from the load goes through a phi) are left for isel to potentially
7372// remove.
7373//
7374// For example:
7375//
7376// b0:
7377// x = load i32
7378// ...
7379// b1:
7380// y = and x, 0xff
7381// z = use y
7382//
7383// becomes:
7384//
7385// b0:
7386// x = load i32
7387// x' = and x, 0xff
7388// ...
7389// b1:
7390// z = use x'
7391//
7392// whereas:
7393//
7394// b0:
7395// x1 = load i32
7396// ...
7397// b1:
7398// x2 = load i32
7399// ...
7400// b2:
7401// x = phi x1, x2
7402// y = and x, 0xff
7403//
7404// becomes (after a call to optimizeLoadExt for each load):
7405//
7406// b0:
7407// x1 = load i32
7408// x1' = and x1, 0xff
7409// ...
7410// b1:
7411// x2 = load i32
7412// x2' = and x2, 0xff
7413// ...
7414// b2:
7415// x = phi x1', x2'
7416// y = and x, 0xff
7417bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
7418 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
7419 return false;
7420
7421 // Skip loads we've already transformed.
7422 if (Load->hasOneUse() &&
7423 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
7424 return false;
7425
7426 // Look at all uses of Load, looking through phis, to determine how many bits
7427 // of the loaded value are needed.
7428 SmallVector<Instruction *, 8> WorkList;
7429 SmallPtrSet<Instruction *, 16> Visited;
7430 SmallVector<Instruction *, 8> AndsToMaybeRemove;
7431 SmallVector<Instruction *, 8> DropFlags;
7432 for (auto *U : Load->users())
7433 WorkList.push_back(cast<Instruction>(U));
7434
7435 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
7436 unsigned BitWidth = LoadResultVT.getSizeInBits();
7437 // If the BitWidth is 0, do not try to optimize the type
7438 if (BitWidth == 0)
7439 return false;
7440
7441 APInt DemandBits(BitWidth, 0);
7442 APInt WidestAndBits(BitWidth, 0);
7443
7444 while (!WorkList.empty()) {
7445 Instruction *I = WorkList.pop_back_val();
7446
7447 // Break use-def graph loops.
7448 if (!Visited.insert(I).second)
7449 continue;
7450
7451 // For a PHI node, push all of its users.
7452 if (auto *Phi = dyn_cast<PHINode>(I)) {
7453 for (auto *U : Phi->users())
7454 WorkList.push_back(cast<Instruction>(U));
7455 continue;
7456 }
7457
7458 switch (I->getOpcode()) {
7459 case Instruction::And: {
7460 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7461 if (!AndC)
7462 return false;
7463 APInt AndBits = AndC->getValue();
7464 DemandBits |= AndBits;
7465 // Keep track of the widest and mask we see.
7466 if (AndBits.ugt(WidestAndBits))
7467 WidestAndBits = AndBits;
7468 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7469 AndsToMaybeRemove.push_back(I);
7470 break;
7471 }
7472
7473 case Instruction::Shl: {
7474 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7475 if (!ShlC)
7476 return false;
7477 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7478 DemandBits.setLowBits(BitWidth - ShiftAmt);
7479 DropFlags.push_back(I);
7480 break;
7481 }
7482
7483 case Instruction::Trunc: {
7484 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7485 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7486 DemandBits.setLowBits(TruncBitWidth);
7487 DropFlags.push_back(I);
7488 break;
7489 }
7490
7491 default:
7492 return false;
7493 }
7494 }
7495
7496 uint32_t ActiveBits = DemandBits.getActiveBits();
7497 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7498 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7499 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7500 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7501 // followed by an AND.
7502 // TODO: Look into removing this restriction by fixing backends to either
7503 // return false for isLoadExtLegal for i1 or have them select this pattern to
7504 // a single instruction.
7505 //
7506 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7507 // mask, since these are the only ands that will be removed by isel.
7508 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7509 WidestAndBits != DemandBits)
7510 return false;
7511
7512 LLVMContext &Ctx = Load->getType()->getContext();
7513 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7514 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7515
7516 // Reject cases that won't be matched as extloads.
7517 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7518 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7519 return false;
7520
7521 IRBuilder<> Builder(Load->getNextNode());
7522 auto *NewAnd = cast<Instruction>(
7523 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7524 // Mark this instruction as "inserted by CGP", so that other
7525 // optimizations don't touch it.
7526 InsertedInsts.insert(NewAnd);
7527
7528 // Replace all uses of load with new and (except for the use of load in the
7529 // new and itself).
7530 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7531 NewAnd->setOperand(0, Load);
7532
7533 // Remove any and instructions that are now redundant.
7534 for (auto *And : AndsToMaybeRemove)
7535 // Check that the and mask is the same as the one we decided to put on the
7536 // new and.
7537 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7538 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7539 if (&*CurInstIterator == And)
7540 CurInstIterator = std::next(And->getIterator());
7541 And->eraseFromParent();
7542 ++NumAndUses;
7543 }
7544
7545 // NSW flags may not longer hold.
7546 for (auto *Inst : DropFlags)
7547 Inst->setHasNoSignedWrap(false);
7548
7549 ++NumAndsAdded;
7550 return true;
7551}
7552
7553/// Check if V (an operand of a select instruction) is an expensive instruction
7554/// that is only used once.
7556 auto *I = dyn_cast<Instruction>(V);
7557 // If it's safe to speculatively execute, then it should not have side
7558 // effects; therefore, it's safe to sink and possibly *not* execute.
7559 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7560 TTI->isExpensiveToSpeculativelyExecute(I);
7561}
7562
7563/// Returns true if a SelectInst should be turned into an explicit branch.
7565 const TargetLowering *TLI,
7566 SelectInst *SI) {
7567 // If even a predictable select is cheap, then a branch can't be cheaper.
7568 if (!TLI->isPredictableSelectExpensive())
7569 return false;
7570
7571 // FIXME: This should use the same heuristics as IfConversion to determine
7572 // whether a select is better represented as a branch.
7573
7574 // If metadata tells us that the select condition is obviously predictable,
7575 // then we want to replace the select with a branch.
7576 uint64_t TrueWeight, FalseWeight;
7577 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7578 uint64_t Max = std::max(TrueWeight, FalseWeight);
7579 uint64_t Sum = TrueWeight + FalseWeight;
7580 if (Sum != 0) {
7581 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7582 if (Probability > TTI->getPredictableBranchThreshold())
7583 return true;
7584 }
7585 }
7586
7587 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7588
7589 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7590 // comparison condition. If the compare has more than one use, there's
7591 // probably another cmov or setcc around, so it's not worth emitting a branch.
7592 if (!Cmp || !Cmp->hasOneUse())
7593 return false;
7594
7595 // If either operand of the select is expensive and only needed on one side
7596 // of the select, we should form a branch.
7597 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7598 sinkSelectOperand(TTI, SI->getFalseValue()))
7599 return true;
7600
7601 return false;
7602}
7603
7604/// If \p isTrue is true, return the true value of \p SI, otherwise return
7605/// false value of \p SI. If the true/false value of \p SI is defined by any
7606/// select instructions in \p Selects, look through the defining select
7607/// instruction until the true/false value is not defined in \p Selects.
7608static Value *
7610 const SmallPtrSet<const Instruction *, 2> &Selects) {
7611 Value *V = nullptr;
7612
7613 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7614 DefSI = dyn_cast<SelectInst>(V)) {
7615 assert(DefSI->getCondition() == SI->getCondition() &&
7616 "The condition of DefSI does not match with SI");
7617 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7618 }
7619
7620 assert(V && "Failed to get select true/false value");
7621 return V;
7622}
7623
7624bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7625 assert(Shift->isShift() && "Expected a shift");
7626
7627 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7628 // general vector shifts, and (3) the shift amount is a select-of-splatted
7629 // values, hoist the shifts before the select:
7630 // shift Op0, (select Cond, TVal, FVal) -->
7631 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7632 //
7633 // This is inverting a generic IR transform when we know that the cost of a
7634 // general vector shift is more than the cost of 2 shift-by-scalars.
7635 // We can't do this effectively in SDAG because we may not be able to
7636 // determine if the select operands are splats from within a basic block.
7637 Type *Ty = Shift->getType();
7638 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7639 return false;
7640 Value *Cond, *TVal, *FVal;
7641 if (!match(Shift->getOperand(1),
7642 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7643 return false;
7644 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7645 return false;
7646
7647 IRBuilder<> Builder(Shift);
7648 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7649 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7650 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7651 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7652 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7653 Shift->eraseFromParent();
7654 return true;
7655}
7656
7657bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7658 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7659 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7660 "Expected a funnel shift");
7661
7662 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7663 // than general vector shifts, and (3) the shift amount is select-of-splatted
7664 // values, hoist the funnel shifts before the select:
7665 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7666 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7667 //
7668 // This is inverting a generic IR transform when we know that the cost of a
7669 // general vector shift is more than the cost of 2 shift-by-scalars.
7670 // We can't do this effectively in SDAG because we may not be able to
7671 // determine if the select operands are splats from within a basic block.
7672 Type *Ty = Fsh->getType();
7673 if (!Ty->isVectorTy() || !TTI->isVectorShiftByScalarCheap(Ty))
7674 return false;
7675 Value *Cond, *TVal, *FVal;
7676 if (!match(Fsh->getOperand(2),
7677 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7678 return false;
7679 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7680 return false;
7681
7682 IRBuilder<> Builder(Fsh);
7683 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7684 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7685 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7686 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7687 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7688 Fsh->eraseFromParent();
7689 return true;
7690}
7691
7692/// If we have a SelectInst that will likely profit from branch prediction,
7693/// turn it into a branch.
7694bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7696 return false;
7697
7698 // If the SelectOptimize pass is enabled, selects have already been optimized.
7700 return false;
7701
7702 // Find all consecutive select instructions that share the same condition.
7704 ASI.push_back(SI);
7706 It != SI->getParent()->end(); ++It) {
7707 SelectInst *I = dyn_cast<SelectInst>(&*It);
7708 if (I && SI->getCondition() == I->getCondition()) {
7709 ASI.push_back(I);
7710 } else {
7711 break;
7712 }
7713 }
7714
7715 SelectInst *LastSI = ASI.back();
7716 // Increment the current iterator to skip all the rest of select instructions
7717 // because they will be either "not lowered" or "all lowered" to branch.
7718 CurInstIterator = std::next(LastSI->getIterator());
7719 // Examine debug-info attached to the consecutive select instructions. They
7720 // won't be individually optimised by optimizeInst, so we need to perform
7721 // DbgVariableRecord maintenence here instead.
7722 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7723 fixupDbgVariableRecordsOnInst(*SI);
7724
7725 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7726
7727 // Can we convert the 'select' to CF ?
7728 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7729 return false;
7730
7731 TargetLowering::SelectSupportKind SelectKind;
7732 if (SI->getType()->isVectorTy())
7733 SelectKind = TargetLowering::ScalarCondVectorVal;
7734 else
7735 SelectKind = TargetLowering::ScalarValSelect;
7736
7737 if (TLI->isSelectSupported(SelectKind) &&
7739 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7740 return false;
7741
7742 // The DominatorTree needs to be rebuilt by any consumers after this
7743 // transformation. We simply reset here rather than setting the ModifiedDT
7744 // flag to avoid restarting the function walk in runOnFunction for each
7745 // select optimized.
7746 DT.reset();
7747
7748 // Transform a sequence like this:
7749 // start:
7750 // %cmp = cmp uge i32 %a, %b
7751 // %sel = select i1 %cmp, i32 %c, i32 %d
7752 //
7753 // Into:
7754 // start:
7755 // %cmp = cmp uge i32 %a, %b
7756 // %cmp.frozen = freeze %cmp
7757 // br i1 %cmp.frozen, label %select.true, label %select.false
7758 // select.true:
7759 // br label %select.end
7760 // select.false:
7761 // br label %select.end
7762 // select.end:
7763 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7764 //
7765 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7766 // In addition, we may sink instructions that produce %c or %d from
7767 // the entry block into the destination(s) of the new branch.
7768 // If the true or false blocks do not contain a sunken instruction, that
7769 // block and its branch may be optimized away. In that case, one side of the
7770 // first branch will point directly to select.end, and the corresponding PHI
7771 // predecessor block will be the start block.
7772
7773 // Collect values that go on the true side and the values that go on the false
7774 // side.
7775 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7776 for (SelectInst *SI : ASI) {
7777 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7778 TrueInstrs.push_back(cast<Instruction>(V));
7779 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7780 FalseInstrs.push_back(cast<Instruction>(V));
7781 }
7782
7783 // Split the select block, according to how many (if any) values go on each
7784 // side.
7785 BasicBlock *StartBlock = SI->getParent();
7786 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7787 // We should split before any debug-info.
7788 SplitPt.setHeadBit(true);
7789
7790 IRBuilder<> IB(SI);
7791 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7792
7793 BasicBlock *TrueBlock = nullptr;
7794 BasicBlock *FalseBlock = nullptr;
7795 BasicBlock *EndBlock = nullptr;
7796 BranchInst *TrueBranch = nullptr;
7797 BranchInst *FalseBranch = nullptr;
7798 if (TrueInstrs.size() == 0) {
7800 CondFr, SplitPt, false, nullptr, nullptr, LI));
7801 FalseBlock = FalseBranch->getParent();
7802 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7803 } else if (FalseInstrs.size() == 0) {
7805 CondFr, SplitPt, false, nullptr, nullptr, LI));
7806 TrueBlock = TrueBranch->getParent();
7807 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7808 } else {
7809 Instruction *ThenTerm = nullptr;
7810 Instruction *ElseTerm = nullptr;
7811 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7812 nullptr, nullptr, LI);
7813 TrueBranch = cast<BranchInst>(ThenTerm);
7814 FalseBranch = cast<BranchInst>(ElseTerm);
7815 TrueBlock = TrueBranch->getParent();
7816 FalseBlock = FalseBranch->getParent();
7817 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7818 }
7819
7820 EndBlock->setName("select.end");
7821 if (TrueBlock)
7822 TrueBlock->setName("select.true.sink");
7823 if (FalseBlock)
7824 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7825 : "select.false.sink");
7826
7827 if (IsHugeFunc) {
7828 if (TrueBlock)
7829 FreshBBs.insert(TrueBlock);
7830 if (FalseBlock)
7831 FreshBBs.insert(FalseBlock);
7832 FreshBBs.insert(EndBlock);
7833 }
7834
7835 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7836
7837 static const unsigned MD[] = {
7838 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7839 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7840 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7841
7842 // Sink expensive instructions into the conditional blocks to avoid executing
7843 // them speculatively.
7844 for (Instruction *I : TrueInstrs)
7845 I->moveBefore(TrueBranch->getIterator());
7846 for (Instruction *I : FalseInstrs)
7847 I->moveBefore(FalseBranch->getIterator());
7848
7849 // If we did not create a new block for one of the 'true' or 'false' paths
7850 // of the condition, it means that side of the branch goes to the end block
7851 // directly and the path originates from the start block from the point of
7852 // view of the new PHI.
7853 if (TrueBlock == nullptr)
7854 TrueBlock = StartBlock;
7855 else if (FalseBlock == nullptr)
7856 FalseBlock = StartBlock;
7857
7858 SmallPtrSet<const Instruction *, 2> INS(llvm::from_range, ASI);
7859 // Use reverse iterator because later select may use the value of the
7860 // earlier select, and we need to propagate value through earlier select
7861 // to get the PHI operand.
7862 for (SelectInst *SI : llvm::reverse(ASI)) {
7863 // The select itself is replaced with a PHI Node.
7864 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7865 PN->insertBefore(EndBlock->begin());
7866 PN->takeName(SI);
7867 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7868 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7869 PN->setDebugLoc(SI->getDebugLoc());
7870
7871 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7872 SI->eraseFromParent();
7873 INS.erase(SI);
7874 ++NumSelectsExpanded;
7875 }
7876
7877 // Instruct OptimizeBlock to skip to the next block.
7878 CurInstIterator = StartBlock->end();
7879 return true;
7880}
7881
7882/// Some targets only accept certain types for splat inputs. For example a VDUP
7883/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7884/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7885bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7886 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7888 m_Undef(), m_ZeroMask())))
7889 return false;
7890 Type *NewType = TLI->shouldConvertSplatType(SVI);
7891 if (!NewType)
7892 return false;
7893
7894 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7895 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7896 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7897 "Expected a type of the same size!");
7898 auto *NewVecType =
7899 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7900
7901 // Create a bitcast (shuffle (insert (bitcast(..))))
7902 IRBuilder<> Builder(SVI->getContext());
7903 Builder.SetInsertPoint(SVI);
7904 Value *BC1 = Builder.CreateBitCast(
7905 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7906 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7907 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7908
7909 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7911 SVI, TLInfo, nullptr,
7912 [&](Value *V) { removeAllAssertingVHReferences(V); });
7913
7914 // Also hoist the bitcast up to its operand if it they are not in the same
7915 // block.
7916 if (auto *BCI = dyn_cast<Instruction>(BC1))
7917 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7918 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7919 !Op->isTerminator() && !Op->isEHPad())
7920 BCI->moveAfter(Op);
7921
7922 return true;
7923}
7924
7925bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7926 // If the operands of I can be folded into a target instruction together with
7927 // I, duplicate and sink them.
7928 SmallVector<Use *, 4> OpsToSink;
7929 if (!TTI->isProfitableToSinkOperands(I, OpsToSink))
7930 return false;
7931
7932 // OpsToSink can contain multiple uses in a use chain (e.g.
7933 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7934 // uses must come first, so we process the ops in reverse order so as to not
7935 // create invalid IR.
7936 BasicBlock *TargetBB = I->getParent();
7937 bool Changed = false;
7938 SmallVector<Use *, 4> ToReplace;
7939 Instruction *InsertPoint = I;
7940 DenseMap<const Instruction *, unsigned long> InstOrdering;
7941 unsigned long InstNumber = 0;
7942 for (const auto &I : *TargetBB)
7943 InstOrdering[&I] = InstNumber++;
7944
7945 for (Use *U : reverse(OpsToSink)) {
7946 auto *UI = cast<Instruction>(U->get());
7947 if (isa<PHINode>(UI) || UI->mayHaveSideEffects() || UI->mayReadFromMemory())
7948 continue;
7949 if (UI->getParent() == TargetBB) {
7950 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7951 InsertPoint = UI;
7952 continue;
7953 }
7954 ToReplace.push_back(U);
7955 }
7956
7957 SetVector<Instruction *> MaybeDead;
7958 DenseMap<Instruction *, Instruction *> NewInstructions;
7959 for (Use *U : ToReplace) {
7960 auto *UI = cast<Instruction>(U->get());
7961 Instruction *NI = UI->clone();
7962
7963 if (IsHugeFunc) {
7964 // Now we clone an instruction, its operands' defs may sink to this BB
7965 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7966 for (Value *Op : NI->operands())
7967 if (auto *OpDef = dyn_cast<Instruction>(Op))
7968 FreshBBs.insert(OpDef->getParent());
7969 }
7970
7971 NewInstructions[UI] = NI;
7972 MaybeDead.insert(UI);
7973 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7974 NI->insertBefore(InsertPoint->getIterator());
7975 InsertPoint = NI;
7976 InsertedInsts.insert(NI);
7977
7978 // Update the use for the new instruction, making sure that we update the
7979 // sunk instruction uses, if it is part of a chain that has already been
7980 // sunk.
7981 Instruction *OldI = cast<Instruction>(U->getUser());
7982 if (auto It = NewInstructions.find(OldI); It != NewInstructions.end())
7983 It->second->setOperand(U->getOperandNo(), NI);
7984 else
7985 U->set(NI);
7986 Changed = true;
7987 }
7988
7989 // Remove instructions that are dead after sinking.
7990 for (auto *I : MaybeDead) {
7991 if (!I->hasNUsesOrMore(1)) {
7992 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7993 I->eraseFromParent();
7994 }
7995 }
7996
7997 return Changed;
7998}
7999
8000bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
8001 Value *Cond = SI->getCondition();
8002 Type *OldType = Cond->getType();
8003 LLVMContext &Context = Cond->getContext();
8004 EVT OldVT = TLI->getValueType(*DL, OldType);
8005 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
8006 unsigned RegWidth = RegType.getSizeInBits();
8007
8008 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
8009 return false;
8010
8011 // If the register width is greater than the type width, expand the condition
8012 // of the switch instruction and each case constant to the width of the
8013 // register. By widening the type of the switch condition, subsequent
8014 // comparisons (for case comparisons) will not need to be extended to the
8015 // preferred register width, so we will potentially eliminate N-1 extends,
8016 // where N is the number of cases in the switch.
8017 auto *NewType = Type::getIntNTy(Context, RegWidth);
8018
8019 // Extend the switch condition and case constants using the target preferred
8020 // extend unless the switch condition is a function argument with an extend
8021 // attribute. In that case, we can avoid an unnecessary mask/extension by
8022 // matching the argument extension instead.
8023 Instruction::CastOps ExtType = Instruction::ZExt;
8024 // Some targets prefer SExt over ZExt.
8025 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
8026 ExtType = Instruction::SExt;
8027
8028 if (auto *Arg = dyn_cast<Argument>(Cond)) {
8029 if (Arg->hasSExtAttr())
8030 ExtType = Instruction::SExt;
8031 if (Arg->hasZExtAttr())
8032 ExtType = Instruction::ZExt;
8033 }
8034
8035 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
8036 ExtInst->insertBefore(SI->getIterator());
8037 ExtInst->setDebugLoc(SI->getDebugLoc());
8038 SI->setCondition(ExtInst);
8039 for (auto Case : SI->cases()) {
8040 const APInt &NarrowConst = Case.getCaseValue()->getValue();
8041 APInt WideConst = (ExtType == Instruction::ZExt)
8042 ? NarrowConst.zext(RegWidth)
8043 : NarrowConst.sext(RegWidth);
8044 Case.setValue(ConstantInt::get(Context, WideConst));
8045 }
8046
8047 return true;
8048}
8049
8050bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
8051 // The SCCP optimization tends to produce code like this:
8052 // switch(x) { case 42: phi(42, ...) }
8053 // Materializing the constant for the phi-argument needs instructions; So we
8054 // change the code to:
8055 // switch(x) { case 42: phi(x, ...) }
8056
8057 Value *Condition = SI->getCondition();
8058 // Avoid endless loop in degenerate case.
8059 if (isa<ConstantInt>(*Condition))
8060 return false;
8061
8062 bool Changed = false;
8063 BasicBlock *SwitchBB = SI->getParent();
8064 Type *ConditionType = Condition->getType();
8065
8066 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
8067 ConstantInt *CaseValue = Case.getCaseValue();
8068 BasicBlock *CaseBB = Case.getCaseSuccessor();
8069 // Set to true if we previously checked that `CaseBB` is only reached by
8070 // a single case from this switch.
8071 bool CheckedForSinglePred = false;
8072 for (PHINode &PHI : CaseBB->phis()) {
8073 Type *PHIType = PHI.getType();
8074 // If ZExt is free then we can also catch patterns like this:
8075 // switch((i32)x) { case 42: phi((i64)42, ...); }
8076 // and replace `(i64)42` with `zext i32 %x to i64`.
8077 bool TryZExt =
8078 PHIType->isIntegerTy() &&
8079 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
8080 TLI->isZExtFree(ConditionType, PHIType);
8081 if (PHIType == ConditionType || TryZExt) {
8082 // Set to true to skip this case because of multiple preds.
8083 bool SkipCase = false;
8084 Value *Replacement = nullptr;
8085 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
8086 Value *PHIValue = PHI.getIncomingValue(I);
8087 if (PHIValue != CaseValue) {
8088 if (!TryZExt)
8089 continue;
8090 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
8091 if (!PHIValueInt ||
8092 PHIValueInt->getValue() !=
8093 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
8094 continue;
8095 }
8096 if (PHI.getIncomingBlock(I) != SwitchBB)
8097 continue;
8098 // We cannot optimize if there are multiple case labels jumping to
8099 // this block. This check may get expensive when there are many
8100 // case labels so we test for it last.
8101 if (!CheckedForSinglePred) {
8102 CheckedForSinglePred = true;
8103 if (SI->findCaseDest(CaseBB) == nullptr) {
8104 SkipCase = true;
8105 break;
8106 }
8107 }
8108
8109 if (Replacement == nullptr) {
8110 if (PHIValue == CaseValue) {
8111 Replacement = Condition;
8112 } else {
8113 IRBuilder<> Builder(SI);
8114 Replacement = Builder.CreateZExt(Condition, PHIType);
8115 }
8116 }
8117 PHI.setIncomingValue(I, Replacement);
8118 Changed = true;
8119 }
8120 if (SkipCase)
8121 break;
8122 }
8123 }
8124 }
8125 return Changed;
8126}
8127
8128bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
8129 bool Changed = optimizeSwitchType(SI);
8130 Changed |= optimizeSwitchPhiConstants(SI);
8131 return Changed;
8132}
8133
8134namespace {
8135
8136/// Helper class to promote a scalar operation to a vector one.
8137/// This class is used to move downward extractelement transition.
8138/// E.g.,
8139/// a = vector_op <2 x i32>
8140/// b = extractelement <2 x i32> a, i32 0
8141/// c = scalar_op b
8142/// store c
8143///
8144/// =>
8145/// a = vector_op <2 x i32>
8146/// c = vector_op a (equivalent to scalar_op on the related lane)
8147/// * d = extractelement <2 x i32> c, i32 0
8148/// * store d
8149/// Assuming both extractelement and store can be combine, we get rid of the
8150/// transition.
8151class VectorPromoteHelper {
8152 /// DataLayout associated with the current module.
8153 const DataLayout &DL;
8154
8155 /// Used to perform some checks on the legality of vector operations.
8156 const TargetLowering &TLI;
8157
8158 /// Used to estimated the cost of the promoted chain.
8159 const TargetTransformInfo &TTI;
8160
8161 /// The transition being moved downwards.
8162 Instruction *Transition;
8163
8164 /// The sequence of instructions to be promoted.
8165 SmallVector<Instruction *, 4> InstsToBePromoted;
8166
8167 /// Cost of combining a store and an extract.
8168 unsigned StoreExtractCombineCost;
8169
8170 /// Instruction that will be combined with the transition.
8171 Instruction *CombineInst = nullptr;
8172
8173 /// The instruction that represents the current end of the transition.
8174 /// Since we are faking the promotion until we reach the end of the chain
8175 /// of computation, we need a way to get the current end of the transition.
8176 Instruction *getEndOfTransition() const {
8177 if (InstsToBePromoted.empty())
8178 return Transition;
8179 return InstsToBePromoted.back();
8180 }
8181
8182 /// Return the index of the original value in the transition.
8183 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
8184 /// c, is at index 0.
8185 unsigned getTransitionOriginalValueIdx() const {
8186 assert(isa<ExtractElementInst>(Transition) &&
8187 "Other kind of transitions are not supported yet");
8188 return 0;
8189 }
8190
8191 /// Return the index of the index in the transition.
8192 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
8193 /// is at index 1.
8194 unsigned getTransitionIdx() const {
8195 assert(isa<ExtractElementInst>(Transition) &&
8196 "Other kind of transitions are not supported yet");
8197 return 1;
8198 }
8199
8200 /// Get the type of the transition.
8201 /// This is the type of the original value.
8202 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
8203 /// transition is <2 x i32>.
8204 Type *getTransitionType() const {
8205 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
8206 }
8207
8208 /// Promote \p ToBePromoted by moving \p Def downward through.
8209 /// I.e., we have the following sequence:
8210 /// Def = Transition <ty1> a to <ty2>
8211 /// b = ToBePromoted <ty2> Def, ...
8212 /// =>
8213 /// b = ToBePromoted <ty1> a, ...
8214 /// Def = Transition <ty1> ToBePromoted to <ty2>
8215 void promoteImpl(Instruction *ToBePromoted);
8216
8217 /// Check whether or not it is profitable to promote all the
8218 /// instructions enqueued to be promoted.
8219 bool isProfitableToPromote() {
8220 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
8221 unsigned Index = isa<ConstantInt>(ValIdx)
8222 ? cast<ConstantInt>(ValIdx)->getZExtValue()
8223 : -1;
8224 Type *PromotedType = getTransitionType();
8225
8226 StoreInst *ST = cast<StoreInst>(CombineInst);
8227 unsigned AS = ST->getPointerAddressSpace();
8228 // Check if this store is supported.
8230 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
8231 ST->getAlign())) {
8232 // If this is not supported, there is no way we can combine
8233 // the extract with the store.
8234 return false;
8235 }
8236
8237 // The scalar chain of computation has to pay for the transition
8238 // scalar to vector.
8239 // The vector chain has to account for the combining cost.
8242 InstructionCost ScalarCost =
8243 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
8244 InstructionCost VectorCost = StoreExtractCombineCost;
8245 for (const auto &Inst : InstsToBePromoted) {
8246 // Compute the cost.
8247 // By construction, all instructions being promoted are arithmetic ones.
8248 // Moreover, one argument is a constant that can be viewed as a splat
8249 // constant.
8250 Value *Arg0 = Inst->getOperand(0);
8251 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
8252 isa<ConstantFP>(Arg0);
8253 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
8254 if (IsArg0Constant)
8256 else
8258
8259 ScalarCost += TTI.getArithmeticInstrCost(
8260 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
8261 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
8262 CostKind, Arg0Info, Arg1Info);
8263 }
8264 LLVM_DEBUG(
8265 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
8266 << ScalarCost << "\nVector: " << VectorCost << '\n');
8267 return ScalarCost > VectorCost;
8268 }
8269
8270 /// Generate a constant vector with \p Val with the same
8271 /// number of elements as the transition.
8272 /// \p UseSplat defines whether or not \p Val should be replicated
8273 /// across the whole vector.
8274 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
8275 /// otherwise we generate a vector with as many poison as possible:
8276 /// <poison, ..., poison, Val, poison, ..., poison> where \p Val is only
8277 /// used at the index of the extract.
8278 Value *getConstantVector(Constant *Val, bool UseSplat) const {
8279 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
8280 if (!UseSplat) {
8281 // If we cannot determine where the constant must be, we have to
8282 // use a splat constant.
8283 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
8284 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
8285 ExtractIdx = CstVal->getSExtValue();
8286 else
8287 UseSplat = true;
8288 }
8289
8290 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
8291 if (UseSplat)
8292 return ConstantVector::getSplat(EC, Val);
8293
8294 if (!EC.isScalable()) {
8295 SmallVector<Constant *, 4> ConstVec;
8296 PoisonValue *PoisonVal = PoisonValue::get(Val->getType());
8297 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
8298 if (Idx == ExtractIdx)
8299 ConstVec.push_back(Val);
8300 else
8301 ConstVec.push_back(PoisonVal);
8302 }
8303 return ConstantVector::get(ConstVec);
8304 } else
8306 "Generate scalable vector for non-splat is unimplemented");
8307 }
8308
8309 /// Check if promoting to a vector type an operand at \p OperandIdx
8310 /// in \p Use can trigger undefined behavior.
8311 static bool canCauseUndefinedBehavior(const Instruction *Use,
8312 unsigned OperandIdx) {
8313 // This is not safe to introduce undef when the operand is on
8314 // the right hand side of a division-like instruction.
8315 if (OperandIdx != 1)
8316 return false;
8317 switch (Use->getOpcode()) {
8318 default:
8319 return false;
8320 case Instruction::SDiv:
8321 case Instruction::UDiv:
8322 case Instruction::SRem:
8323 case Instruction::URem:
8324 return true;
8325 case Instruction::FDiv:
8326 case Instruction::FRem:
8327 return !Use->hasNoNaNs();
8328 }
8329 llvm_unreachable(nullptr);
8330 }
8331
8332public:
8333 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
8334 const TargetTransformInfo &TTI, Instruction *Transition,
8335 unsigned CombineCost)
8336 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
8337 StoreExtractCombineCost(CombineCost) {
8338 assert(Transition && "Do not know how to promote null");
8339 }
8340
8341 /// Check if we can promote \p ToBePromoted to \p Type.
8342 bool canPromote(const Instruction *ToBePromoted) const {
8343 // We could support CastInst too.
8344 return isa<BinaryOperator>(ToBePromoted);
8345 }
8346
8347 /// Check if it is profitable to promote \p ToBePromoted
8348 /// by moving downward the transition through.
8349 bool shouldPromote(const Instruction *ToBePromoted) const {
8350 // Promote only if all the operands can be statically expanded.
8351 // Indeed, we do not want to introduce any new kind of transitions.
8352 for (const Use &U : ToBePromoted->operands()) {
8353 const Value *Val = U.get();
8354 if (Val == getEndOfTransition()) {
8355 // If the use is a division and the transition is on the rhs,
8356 // we cannot promote the operation, otherwise we may create a
8357 // division by zero.
8358 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
8359 return false;
8360 continue;
8361 }
8362 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
8363 !isa<ConstantFP>(Val))
8364 return false;
8365 }
8366 // Check that the resulting operation is legal.
8367 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
8368 if (!ISDOpcode)
8369 return false;
8370 return StressStoreExtract ||
8372 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
8373 }
8374
8375 /// Check whether or not \p Use can be combined
8376 /// with the transition.
8377 /// I.e., is it possible to do Use(Transition) => AnotherUse?
8378 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
8379
8380 /// Record \p ToBePromoted as part of the chain to be promoted.
8381 void enqueueForPromotion(Instruction *ToBePromoted) {
8382 InstsToBePromoted.push_back(ToBePromoted);
8383 }
8384
8385 /// Set the instruction that will be combined with the transition.
8386 void recordCombineInstruction(Instruction *ToBeCombined) {
8387 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
8388 CombineInst = ToBeCombined;
8389 }
8390
8391 /// Promote all the instructions enqueued for promotion if it is
8392 /// is profitable.
8393 /// \return True if the promotion happened, false otherwise.
8394 bool promote() {
8395 // Check if there is something to promote.
8396 // Right now, if we do not have anything to combine with,
8397 // we assume the promotion is not profitable.
8398 if (InstsToBePromoted.empty() || !CombineInst)
8399 return false;
8400
8401 // Check cost.
8402 if (!StressStoreExtract && !isProfitableToPromote())
8403 return false;
8404
8405 // Promote.
8406 for (auto &ToBePromoted : InstsToBePromoted)
8407 promoteImpl(ToBePromoted);
8408 InstsToBePromoted.clear();
8409 return true;
8410 }
8411};
8412
8413} // end anonymous namespace
8414
8415void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
8416 // At this point, we know that all the operands of ToBePromoted but Def
8417 // can be statically promoted.
8418 // For Def, we need to use its parameter in ToBePromoted:
8419 // b = ToBePromoted ty1 a
8420 // Def = Transition ty1 b to ty2
8421 // Move the transition down.
8422 // 1. Replace all uses of the promoted operation by the transition.
8423 // = ... b => = ... Def.
8424 assert(ToBePromoted->getType() == Transition->getType() &&
8425 "The type of the result of the transition does not match "
8426 "the final type");
8427 ToBePromoted->replaceAllUsesWith(Transition);
8428 // 2. Update the type of the uses.
8429 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
8430 Type *TransitionTy = getTransitionType();
8431 ToBePromoted->mutateType(TransitionTy);
8432 // 3. Update all the operands of the promoted operation with promoted
8433 // operands.
8434 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
8435 for (Use &U : ToBePromoted->operands()) {
8436 Value *Val = U.get();
8437 Value *NewVal = nullptr;
8438 if (Val == Transition)
8439 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
8440 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
8441 isa<ConstantFP>(Val)) {
8442 // Use a splat constant if it is not safe to use undef.
8443 NewVal = getConstantVector(
8444 cast<Constant>(Val),
8445 isa<UndefValue>(Val) ||
8446 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
8447 } else
8448 llvm_unreachable("Did you modified shouldPromote and forgot to update "
8449 "this?");
8450 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8451 }
8452 Transition->moveAfter(ToBePromoted);
8453 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8454}
8455
8456/// Some targets can do store(extractelement) with one instruction.
8457/// Try to push the extractelement towards the stores when the target
8458/// has this feature and this is profitable.
8459bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8460 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8461 if (DisableStoreExtract ||
8464 Inst->getOperand(1), CombineCost)))
8465 return false;
8466
8467 // At this point we know that Inst is a vector to scalar transition.
8468 // Try to move it down the def-use chain, until:
8469 // - We can combine the transition with its single use
8470 // => we got rid of the transition.
8471 // - We escape the current basic block
8472 // => we would need to check that we are moving it at a cheaper place and
8473 // we do not do that for now.
8474 BasicBlock *Parent = Inst->getParent();
8475 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8476 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8477 // If the transition has more than one use, assume this is not going to be
8478 // beneficial.
8479 while (Inst->hasOneUse()) {
8480 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8481 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8482
8483 if (ToBePromoted->getParent() != Parent) {
8484 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8485 << ToBePromoted->getParent()->getName()
8486 << ") than the transition (" << Parent->getName()
8487 << ").\n");
8488 return false;
8489 }
8490
8491 if (VPH.canCombine(ToBePromoted)) {
8492 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8493 << "will be combined with: " << *ToBePromoted << '\n');
8494 VPH.recordCombineInstruction(ToBePromoted);
8495 bool Changed = VPH.promote();
8496 NumStoreExtractExposed += Changed;
8497 return Changed;
8498 }
8499
8500 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8501 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8502 return false;
8503
8504 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8505
8506 VPH.enqueueForPromotion(ToBePromoted);
8507 Inst = ToBePromoted;
8508 }
8509 return false;
8510}
8511
8512/// For the instruction sequence of store below, F and I values
8513/// are bundled together as an i64 value before being stored into memory.
8514/// Sometimes it is more efficient to generate separate stores for F and I,
8515/// which can remove the bitwise instructions or sink them to colder places.
8516///
8517/// (store (or (zext (bitcast F to i32) to i64),
8518/// (shl (zext I to i64), 32)), addr) -->
8519/// (store F, addr) and (store I, addr+4)
8520///
8521/// Similarly, splitting for other merged store can also be beneficial, like:
8522/// For pair of {i32, i32}, i64 store --> two i32 stores.
8523/// For pair of {i32, i16}, i64 store --> two i32 stores.
8524/// For pair of {i16, i16}, i32 store --> two i16 stores.
8525/// For pair of {i16, i8}, i32 store --> two i16 stores.
8526/// For pair of {i8, i8}, i16 store --> two i8 stores.
8527///
8528/// We allow each target to determine specifically which kind of splitting is
8529/// supported.
8530///
8531/// The store patterns are commonly seen from the simple code snippet below
8532/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8533/// void goo(const std::pair<int, float> &);
8534/// hoo() {
8535/// ...
8536/// goo(std::make_pair(tmp, ftmp));
8537/// ...
8538/// }
8539///
8540/// Although we already have similar splitting in DAG Combine, we duplicate
8541/// it in CodeGenPrepare to catch the case in which pattern is across
8542/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8543/// during code expansion.
8545 const TargetLowering &TLI) {
8546 // Handle simple but common cases only.
8547 Type *StoreType = SI.getValueOperand()->getType();
8548
8549 // The code below assumes shifting a value by <number of bits>,
8550 // whereas scalable vectors would have to be shifted by
8551 // <2log(vscale) + number of bits> in order to store the
8552 // low/high parts. Bailing out for now.
8553 if (StoreType->isScalableTy())
8554 return false;
8555
8556 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8557 DL.getTypeSizeInBits(StoreType) == 0)
8558 return false;
8559
8560 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8561 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8562 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8563 return false;
8564
8565 // Don't split the store if it is volatile.
8566 if (SI.isVolatile())
8567 return false;
8568
8569 // Match the following patterns:
8570 // (store (or (zext LValue to i64),
8571 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8572 // or
8573 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8574 // (zext LValue to i64),
8575 // Expect both operands of OR and the first operand of SHL have only
8576 // one use.
8577 Value *LValue, *HValue;
8578 if (!match(SI.getValueOperand(),
8581 m_SpecificInt(HalfValBitSize))))))
8582 return false;
8583
8584 // Check LValue and HValue are int with size less or equal than 32.
8585 if (!LValue->getType()->isIntegerTy() ||
8586 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8587 !HValue->getType()->isIntegerTy() ||
8588 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8589 return false;
8590
8591 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8592 // as the input of target query.
8593 auto *LBC = dyn_cast<BitCastInst>(LValue);
8594 auto *HBC = dyn_cast<BitCastInst>(HValue);
8595 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8596 : EVT::getEVT(LValue->getType());
8597 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8598 : EVT::getEVT(HValue->getType());
8599 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8600 return false;
8601
8602 // Start to split store.
8603 IRBuilder<> Builder(SI.getContext());
8604 Builder.SetInsertPoint(&SI);
8605
8606 // If LValue/HValue is a bitcast in another BB, create a new one in current
8607 // BB so it may be merged with the splitted stores by dag combiner.
8608 if (LBC && LBC->getParent() != SI.getParent())
8609 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8610 if (HBC && HBC->getParent() != SI.getParent())
8611 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8612
8613 bool IsLE = SI.getDataLayout().isLittleEndian();
8614 auto CreateSplitStore = [&](Value *V, bool Upper) {
8615 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8616 Value *Addr = SI.getPointerOperand();
8617 Align Alignment = SI.getAlign();
8618 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8619 if (IsOffsetStore) {
8620 Addr = Builder.CreateGEP(
8621 SplitStoreType, Addr,
8622 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8623
8624 // When splitting the store in half, naturally one half will retain the
8625 // alignment of the original wider store, regardless of whether it was
8626 // over-aligned or not, while the other will require adjustment.
8627 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8628 }
8629 Builder.CreateAlignedStore(V, Addr, Alignment);
8630 };
8631
8632 CreateSplitStore(LValue, false);
8633 CreateSplitStore(HValue, true);
8634
8635 // Delete the old store.
8636 SI.eraseFromParent();
8637 return true;
8638}
8639
8640// Return true if the GEP has two operands, the first operand is of a sequential
8641// type, and the second operand is a constant.
8644 return GEP->getNumOperands() == 2 && I.isSequential() &&
8645 isa<ConstantInt>(GEP->getOperand(1));
8646}
8647
8648// Try unmerging GEPs to reduce liveness interference (register pressure) across
8649// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8650// reducing liveness interference across those edges benefits global register
8651// allocation. Currently handles only certain cases.
8652//
8653// For example, unmerge %GEPI and %UGEPI as below.
8654//
8655// ---------- BEFORE ----------
8656// SrcBlock:
8657// ...
8658// %GEPIOp = ...
8659// ...
8660// %GEPI = gep %GEPIOp, Idx
8661// ...
8662// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8663// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8664// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8665// %UGEPI)
8666//
8667// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8668// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8669// ...
8670//
8671// DstBi:
8672// ...
8673// %UGEPI = gep %GEPIOp, UIdx
8674// ...
8675// ---------------------------
8676//
8677// ---------- AFTER ----------
8678// SrcBlock:
8679// ... (same as above)
8680// (* %GEPI is still alive on the indirectbr edges)
8681// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8682// unmerging)
8683// ...
8684//
8685// DstBi:
8686// ...
8687// %UGEPI = gep %GEPI, (UIdx-Idx)
8688// ...
8689// ---------------------------
8690//
8691// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8692// no longer alive on them.
8693//
8694// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8695// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8696// not to disable further simplications and optimizations as a result of GEP
8697// merging.
8698//
8699// Note this unmerging may increase the length of the data flow critical path
8700// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8701// between the register pressure and the length of data-flow critical
8702// path. Restricting this to the uncommon IndirectBr case would minimize the
8703// impact of potentially longer critical path, if any, and the impact on compile
8704// time.
8706 const TargetTransformInfo *TTI) {
8707 BasicBlock *SrcBlock = GEPI->getParent();
8708 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8709 // (non-IndirectBr) cases exit early here.
8710 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8711 return false;
8712 // Check that GEPI is a simple gep with a single constant index.
8713 if (!GEPSequentialConstIndexed(GEPI))
8714 return false;
8715 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8716 // Check that GEPI is a cheap one.
8717 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8720 return false;
8721 Value *GEPIOp = GEPI->getOperand(0);
8722 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8723 if (!isa<Instruction>(GEPIOp))
8724 return false;
8725 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8726 if (GEPIOpI->getParent() != SrcBlock)
8727 return false;
8728 // Check that GEP is used outside the block, meaning it's alive on the
8729 // IndirectBr edge(s).
8730 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8731 if (auto *I = dyn_cast<Instruction>(Usr)) {
8732 if (I->getParent() != SrcBlock) {
8733 return true;
8734 }
8735 }
8736 return false;
8737 }))
8738 return false;
8739 // The second elements of the GEP chains to be unmerged.
8740 std::vector<GetElementPtrInst *> UGEPIs;
8741 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8742 // on IndirectBr edges.
8743 for (User *Usr : GEPIOp->users()) {
8744 if (Usr == GEPI)
8745 continue;
8746 // Check if Usr is an Instruction. If not, give up.
8747 if (!isa<Instruction>(Usr))
8748 return false;
8749 auto *UI = cast<Instruction>(Usr);
8750 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8751 if (UI->getParent() == SrcBlock)
8752 continue;
8753 // Check if Usr is a GEP. If not, give up.
8754 if (!isa<GetElementPtrInst>(Usr))
8755 return false;
8756 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8757 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8758 // the pointer operand to it. If so, record it in the vector. If not, give
8759 // up.
8760 if (!GEPSequentialConstIndexed(UGEPI))
8761 return false;
8762 if (UGEPI->getOperand(0) != GEPIOp)
8763 return false;
8764 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8765 return false;
8766 if (GEPIIdx->getType() !=
8767 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8768 return false;
8769 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8770 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8773 return false;
8774 UGEPIs.push_back(UGEPI);
8775 }
8776 if (UGEPIs.size() == 0)
8777 return false;
8778 // Check the materializing cost of (Uidx-Idx).
8779 for (GetElementPtrInst *UGEPI : UGEPIs) {
8780 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8781 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8783 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8784 if (ImmCost > TargetTransformInfo::TCC_Basic)
8785 return false;
8786 }
8787 // Now unmerge between GEPI and UGEPIs.
8788 for (GetElementPtrInst *UGEPI : UGEPIs) {
8789 UGEPI->setOperand(0, GEPI);
8790 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8791 Constant *NewUGEPIIdx = ConstantInt::get(
8792 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8793 UGEPI->setOperand(1, NewUGEPIIdx);
8794 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8795 // inbounds to avoid UB.
8796 if (!GEPI->isInBounds()) {
8797 UGEPI->setIsInBounds(false);
8798 }
8799 }
8800 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8801 // alive on IndirectBr edges).
8802 assert(llvm::none_of(GEPIOp->users(),
8803 [&](User *Usr) {
8804 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8805 }) &&
8806 "GEPIOp is used outside SrcBlock");
8807 return true;
8808}
8809
8810static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8812 bool IsHugeFunc) {
8813 // Try and convert
8814 // %c = icmp ult %x, 8
8815 // br %c, bla, blb
8816 // %tc = lshr %x, 3
8817 // to
8818 // %tc = lshr %x, 3
8819 // %c = icmp eq %tc, 0
8820 // br %c, bla, blb
8821 // Creating the cmp to zero can be better for the backend, especially if the
8822 // lshr produces flags that can be used automatically.
8823 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8824 return false;
8825
8826 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8827 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8828 return false;
8829
8830 Value *X = Cmp->getOperand(0);
8831 if (!X->hasUseList())
8832 return false;
8833
8834 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8835
8836 for (auto *U : X->users()) {
8838 // A quick dominance check
8839 if (!UI ||
8840 (UI->getParent() != Branch->getParent() &&
8841 UI->getParent() != Branch->getSuccessor(0) &&
8842 UI->getParent() != Branch->getSuccessor(1)) ||
8843 (UI->getParent() != Branch->getParent() &&
8844 !UI->getParent()->getSinglePredecessor()))
8845 continue;
8846
8847 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8848 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8849 IRBuilder<> Builder(Branch);
8850 if (UI->getParent() != Branch->getParent())
8851 UI->moveBefore(Branch->getIterator());
8853 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8854 ConstantInt::get(UI->getType(), 0));
8855 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8856 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8857 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8858 return true;
8859 }
8860 if (Cmp->isEquality() &&
8861 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8862 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))) ||
8863 match(UI, m_Xor(m_Specific(X), m_SpecificInt(CmpC))))) {
8864 IRBuilder<> Builder(Branch);
8865 if (UI->getParent() != Branch->getParent())
8866 UI->moveBefore(Branch->getIterator());
8868 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8869 ConstantInt::get(UI->getType(), 0));
8870 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8871 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8872 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8873 return true;
8874 }
8875 }
8876 return false;
8877}
8878
8879bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8880 bool AnyChange = false;
8881 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8882
8883 // Bail out if we inserted the instruction to prevent optimizations from
8884 // stepping on each other's toes.
8885 if (InsertedInsts.count(I))
8886 return AnyChange;
8887
8888 // TODO: Move into the switch on opcode below here.
8889 if (PHINode *P = dyn_cast<PHINode>(I)) {
8890 // It is possible for very late stage optimizations (such as SimplifyCFG)
8891 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8892 // trivial PHI, go ahead and zap it here.
8893 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8894 LargeOffsetGEPMap.erase(P);
8895 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8896 P->eraseFromParent();
8897 ++NumPHIsElim;
8898 return true;
8899 }
8900 return AnyChange;
8901 }
8902
8903 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8904 // If the source of the cast is a constant, then this should have
8905 // already been constant folded. The only reason NOT to constant fold
8906 // it is if something (e.g. LSR) was careful to place the constant
8907 // evaluation in a block other than then one that uses it (e.g. to hoist
8908 // the address of globals out of a loop). If this is the case, we don't
8909 // want to forward-subst the cast.
8910 if (isa<Constant>(CI->getOperand(0)))
8911 return AnyChange;
8912
8913 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8914 return true;
8915
8917 isa<TruncInst>(I)) &&
8919 I, LI->getLoopFor(I->getParent()), *TTI))
8920 return true;
8921
8922 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8923 /// Sink a zext or sext into its user blocks if the target type doesn't
8924 /// fit in one register
8925 if (TLI->getTypeAction(CI->getContext(),
8926 TLI->getValueType(*DL, CI->getType())) ==
8927 TargetLowering::TypeExpandInteger) {
8928 return SinkCast(CI);
8929 } else {
8931 I, LI->getLoopFor(I->getParent()), *TTI))
8932 return true;
8933
8934 bool MadeChange = optimizeExt(I);
8935 return MadeChange | optimizeExtUses(I);
8936 }
8937 }
8938 return AnyChange;
8939 }
8940
8941 if (auto *Cmp = dyn_cast<CmpInst>(I))
8942 if (optimizeCmp(Cmp, ModifiedDT))
8943 return true;
8944
8945 if (match(I, m_URem(m_Value(), m_Value())))
8946 if (optimizeURem(I))
8947 return true;
8948
8949 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8950 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8951 bool Modified = optimizeLoadExt(LI);
8952 unsigned AS = LI->getPointerAddressSpace();
8953 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8954 return Modified;
8955 }
8956
8957 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8958 if (splitMergedValStore(*SI, *DL, *TLI))
8959 return true;
8960 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8961 unsigned AS = SI->getPointerAddressSpace();
8962 return optimizeMemoryInst(I, SI->getOperand(1),
8963 SI->getOperand(0)->getType(), AS);
8964 }
8965
8966 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8967 unsigned AS = RMW->getPointerAddressSpace();
8968 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8969 }
8970
8971 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8972 unsigned AS = CmpX->getPointerAddressSpace();
8973 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8974 CmpX->getCompareOperand()->getType(), AS);
8975 }
8976
8977 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8978
8979 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8980 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8981 return true;
8982
8983 // TODO: Move this into the switch on opcode - it handles shifts already.
8984 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8985 BinOp->getOpcode() == Instruction::LShr)) {
8986 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8987 if (CI && TLI->hasExtractBitsInsn())
8988 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8989 return true;
8990 }
8991
8992 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8993 if (GEPI->hasAllZeroIndices()) {
8994 /// The GEP operand must be a pointer, so must its result -> BitCast
8995 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8996 GEPI->getName(), GEPI->getIterator());
8997 NC->setDebugLoc(GEPI->getDebugLoc());
8998 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
9000 GEPI, TLInfo, nullptr,
9001 [&](Value *V) { removeAllAssertingVHReferences(V); });
9002 ++NumGEPsElim;
9003 optimizeInst(NC, ModifiedDT);
9004 return true;
9005 }
9007 return true;
9008 }
9009 }
9010
9011 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
9012 // freeze(icmp a, const)) -> icmp (freeze a), const
9013 // This helps generate efficient conditional jumps.
9014 Instruction *CmpI = nullptr;
9015 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
9016 CmpI = II;
9017 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
9018 CmpI = F->getFastMathFlags().none() ? F : nullptr;
9019
9020 if (CmpI && CmpI->hasOneUse()) {
9021 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
9022 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
9024 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
9026 if (Const0 || Const1) {
9027 if (!Const0 || !Const1) {
9028 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
9029 F->takeName(FI);
9030 CmpI->setOperand(Const0 ? 1 : 0, F);
9031 }
9032 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
9033 FI->eraseFromParent();
9034 return true;
9035 }
9036 }
9037 return AnyChange;
9038 }
9039
9040 if (tryToSinkFreeOperands(I))
9041 return true;
9042
9043 switch (I->getOpcode()) {
9044 case Instruction::Shl:
9045 case Instruction::LShr:
9046 case Instruction::AShr:
9047 return optimizeShiftInst(cast<BinaryOperator>(I));
9048 case Instruction::Call:
9049 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
9050 case Instruction::Select:
9051 return optimizeSelectInst(cast<SelectInst>(I));
9052 case Instruction::ShuffleVector:
9053 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
9054 case Instruction::Switch:
9055 return optimizeSwitchInst(cast<SwitchInst>(I));
9056 case Instruction::ExtractElement:
9057 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
9058 case Instruction::Br:
9059 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
9060 }
9061
9062 return AnyChange;
9063}
9064
9065/// Given an OR instruction, check to see if this is a bitreverse
9066/// idiom. If so, insert the new intrinsic and return true.
9067bool CodeGenPrepare::makeBitReverse(Instruction &I) {
9068 if (!I.getType()->isIntegerTy() ||
9070 TLI->getValueType(*DL, I.getType(), true)))
9071 return false;
9072
9073 SmallVector<Instruction *, 4> Insts;
9074 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
9075 return false;
9076 Instruction *LastInst = Insts.back();
9077 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
9079 &I, TLInfo, nullptr,
9080 [&](Value *V) { removeAllAssertingVHReferences(V); });
9081 return true;
9082}
9083
9084// In this pass we look for GEP and cast instructions that are used
9085// across basic blocks and rewrite them to improve basic-block-at-a-time
9086// selection.
9087bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
9088 SunkAddrs.clear();
9089 bool MadeChange = false;
9090
9091 do {
9092 CurInstIterator = BB.begin();
9093 ModifiedDT = ModifyDT::NotModifyDT;
9094 while (CurInstIterator != BB.end()) {
9095 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
9096 if (ModifiedDT != ModifyDT::NotModifyDT) {
9097 // For huge function we tend to quickly go though the inner optmization
9098 // opportunities in the BB. So we go back to the BB head to re-optimize
9099 // each instruction instead of go back to the function head.
9100 if (IsHugeFunc) {
9101 DT.reset();
9102 getDT(*BB.getParent());
9103 break;
9104 } else {
9105 return true;
9106 }
9107 }
9108 }
9109 } while (ModifiedDT == ModifyDT::ModifyInstDT);
9110
9111 bool MadeBitReverse = true;
9112 while (MadeBitReverse) {
9113 MadeBitReverse = false;
9114 for (auto &I : reverse(BB)) {
9115 if (makeBitReverse(I)) {
9116 MadeBitReverse = MadeChange = true;
9117 break;
9118 }
9119 }
9120 }
9121 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
9122
9123 return MadeChange;
9124}
9125
9126bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
9127 bool AnyChange = false;
9128 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
9129 AnyChange |= fixupDbgVariableRecord(DVR);
9130 return AnyChange;
9131}
9132
9133// FIXME: should updating debug-info really cause the "changed" flag to fire,
9134// which can cause a function to be reprocessed?
9135bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
9136 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
9137 DVR.Type != DbgVariableRecord::LocationType::Assign)
9138 return false;
9139
9140 // Does this DbgVariableRecord refer to a sunk address calculation?
9141 bool AnyChange = false;
9142 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
9143 DVR.location_ops().end());
9144 for (Value *Location : LocationOps) {
9145 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
9146 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
9147 if (SunkAddr) {
9148 // Point dbg.value at locally computed address, which should give the best
9149 // opportunity to be accurately lowered. This update may change the type
9150 // of pointer being referred to; however this makes no difference to
9151 // debugging information, and we can't generate bitcasts that may affect
9152 // codegen.
9153 DVR.replaceVariableLocationOp(Location, SunkAddr);
9154 AnyChange = true;
9155 }
9156 }
9157 return AnyChange;
9158}
9159
9161 DVR->removeFromParent();
9162 BasicBlock *VIBB = VI->getParent();
9163 if (isa<PHINode>(VI))
9164 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
9165 else
9166 VIBB->insertDbgRecordAfter(DVR, &*VI);
9167}
9168
9169// A llvm.dbg.value may be using a value before its definition, due to
9170// optimizations in this pass and others. Scan for such dbg.values, and rescue
9171// them by moving the dbg.value to immediately after the value definition.
9172// FIXME: Ideally this should never be necessary, and this has the potential
9173// to re-order dbg.value intrinsics.
9174bool CodeGenPrepare::placeDbgValues(Function &F) {
9175 bool MadeChange = false;
9176 DominatorTree DT(F);
9177
9178 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
9179 SmallVector<Instruction *, 4> VIs;
9180 for (Value *V : DbgItem->location_ops())
9181 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
9182 VIs.push_back(VI);
9183
9184 // This item may depend on multiple instructions, complicating any
9185 // potential sink. This block takes the defensive approach, opting to
9186 // "undef" the item if it has more than one instruction and any of them do
9187 // not dominate iem.
9188 for (Instruction *VI : VIs) {
9189 if (VI->isTerminator())
9190 continue;
9191
9192 // If VI is a phi in a block with an EHPad terminator, we can't insert
9193 // after it.
9194 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
9195 continue;
9196
9197 // If the defining instruction dominates the dbg.value, we do not need
9198 // to move the dbg.value.
9199 if (DT.dominates(VI, Position))
9200 continue;
9201
9202 // If we depend on multiple instructions and any of them doesn't
9203 // dominate this DVI, we probably can't salvage it: moving it to
9204 // after any of the instructions could cause us to lose the others.
9205 if (VIs.size() > 1) {
9206 LLVM_DEBUG(
9207 dbgs()
9208 << "Unable to find valid location for Debug Value, undefing:\n"
9209 << *DbgItem);
9210 DbgItem->setKillLocation();
9211 break;
9212 }
9213
9214 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
9215 << *DbgItem << ' ' << *VI);
9216 DbgInserterHelper(DbgItem, VI->getIterator());
9217 MadeChange = true;
9218 ++NumDbgValueMoved;
9219 }
9220 };
9221
9222 for (BasicBlock &BB : F) {
9223 for (Instruction &Insn : llvm::make_early_inc_range(BB)) {
9224 // Process any DbgVariableRecord records attached to this
9225 // instruction.
9226 for (DbgVariableRecord &DVR : llvm::make_early_inc_range(
9227 filterDbgVars(Insn.getDbgRecordRange()))) {
9228 if (DVR.Type != DbgVariableRecord::LocationType::Value)
9229 continue;
9230 DbgProcessor(&DVR, &Insn);
9231 }
9232 }
9233 }
9234
9235 return MadeChange;
9236}
9237
9238// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
9239// probes can be chained dependencies of other regular DAG nodes and block DAG
9240// combine optimizations.
9241bool CodeGenPrepare::placePseudoProbes(Function &F) {
9242 bool MadeChange = false;
9243 for (auto &Block : F) {
9244 // Move the rest probes to the beginning of the block.
9245 auto FirstInst = Block.getFirstInsertionPt();
9246 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
9247 ++FirstInst;
9248 BasicBlock::iterator I(FirstInst);
9249 I++;
9250 while (I != Block.end()) {
9251 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
9252 II->moveBefore(FirstInst);
9253 MadeChange = true;
9254 }
9255 }
9256 }
9257 return MadeChange;
9258}
9259
9260/// Scale down both weights to fit into uint32_t.
9261static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
9262 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
9263 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
9264 NewTrue = NewTrue / Scale;
9265 NewFalse = NewFalse / Scale;
9266}
9267
9268/// Some targets prefer to split a conditional branch like:
9269/// \code
9270/// %0 = icmp ne i32 %a, 0
9271/// %1 = icmp ne i32 %b, 0
9272/// %or.cond = or i1 %0, %1
9273/// br i1 %or.cond, label %TrueBB, label %FalseBB
9274/// \endcode
9275/// into multiple branch instructions like:
9276/// \code
9277/// bb1:
9278/// %0 = icmp ne i32 %a, 0
9279/// br i1 %0, label %TrueBB, label %bb2
9280/// bb2:
9281/// %1 = icmp ne i32 %b, 0
9282/// br i1 %1, label %TrueBB, label %FalseBB
9283/// \endcode
9284/// This usually allows instruction selection to do even further optimizations
9285/// and combine the compare with the branch instruction. Currently this is
9286/// applied for targets which have "cheap" jump instructions.
9287///
9288/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
9289///
9290bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
9291 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
9292 return false;
9293
9294 bool MadeChange = false;
9295 for (auto &BB : F) {
9296 // Does this BB end with the following?
9297 // %cond1 = icmp|fcmp|binary instruction ...
9298 // %cond2 = icmp|fcmp|binary instruction ...
9299 // %cond.or = or|and i1 %cond1, cond2
9300 // br i1 %cond.or label %dest1, label %dest2"
9301 Instruction *LogicOp;
9302 BasicBlock *TBB, *FBB;
9303 if (!match(BB.getTerminator(),
9304 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
9305 continue;
9306
9307 auto *Br1 = cast<BranchInst>(BB.getTerminator());
9308 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
9309 continue;
9310
9311 // The merging of mostly empty BB can cause a degenerate branch.
9312 if (TBB == FBB)
9313 continue;
9314
9315 unsigned Opc;
9316 Value *Cond1, *Cond2;
9317 if (match(LogicOp,
9318 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
9319 Opc = Instruction::And;
9320 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
9321 m_OneUse(m_Value(Cond2)))))
9322 Opc = Instruction::Or;
9323 else
9324 continue;
9325
9326 auto IsGoodCond = [](Value *Cond) {
9327 return match(
9328 Cond,
9330 m_LogicalOr(m_Value(), m_Value()))));
9331 };
9332 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
9333 continue;
9334
9335 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
9336
9337 // Create a new BB.
9338 auto *TmpBB =
9339 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
9340 BB.getParent(), BB.getNextNode());
9341 if (IsHugeFunc)
9342 FreshBBs.insert(TmpBB);
9343
9344 // Update original basic block by using the first condition directly by the
9345 // branch instruction and removing the no longer needed and/or instruction.
9346 Br1->setCondition(Cond1);
9347 LogicOp->eraseFromParent();
9348
9349 // Depending on the condition we have to either replace the true or the
9350 // false successor of the original branch instruction.
9351 if (Opc == Instruction::And)
9352 Br1->setSuccessor(0, TmpBB);
9353 else
9354 Br1->setSuccessor(1, TmpBB);
9355
9356 // Fill in the new basic block.
9357 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
9358 if (auto *I = dyn_cast<Instruction>(Cond2)) {
9359 I->removeFromParent();
9360 I->insertBefore(Br2->getIterator());
9361 }
9362
9363 // Update PHI nodes in both successors. The original BB needs to be
9364 // replaced in one successor's PHI nodes, because the branch comes now from
9365 // the newly generated BB (NewBB). In the other successor we need to add one
9366 // incoming edge to the PHI nodes, because both branch instructions target
9367 // now the same successor. Depending on the original branch condition
9368 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
9369 // we perform the correct update for the PHI nodes.
9370 // This doesn't change the successor order of the just created branch
9371 // instruction (or any other instruction).
9372 if (Opc == Instruction::Or)
9373 std::swap(TBB, FBB);
9374
9375 // Replace the old BB with the new BB.
9376 TBB->replacePhiUsesWith(&BB, TmpBB);
9377
9378 // Add another incoming edge from the new BB.
9379 for (PHINode &PN : FBB->phis()) {
9380 auto *Val = PN.getIncomingValueForBlock(&BB);
9381 PN.addIncoming(Val, TmpBB);
9382 }
9383
9384 // Update the branch weights (from SelectionDAGBuilder::
9385 // FindMergedConditions).
9386 if (Opc == Instruction::Or) {
9387 // Codegen X | Y as:
9388 // BB1:
9389 // jmp_if_X TBB
9390 // jmp TmpBB
9391 // TmpBB:
9392 // jmp_if_Y TBB
9393 // jmp FBB
9394 //
9395
9396 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
9397 // The requirement is that
9398 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
9399 // = TrueProb for original BB.
9400 // Assuming the original weights are A and B, one choice is to set BB1's
9401 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
9402 // assumes that
9403 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
9404 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
9405 // TmpBB, but the math is more complicated.
9406 uint64_t TrueWeight, FalseWeight;
9407 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9408 uint64_t NewTrueWeight = TrueWeight;
9409 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
9410 scaleWeights(NewTrueWeight, NewFalseWeight);
9411 Br1->setMetadata(LLVMContext::MD_prof,
9412 MDBuilder(Br1->getContext())
9413 .createBranchWeights(TrueWeight, FalseWeight,
9414 hasBranchWeightOrigin(*Br1)));
9415
9416 NewTrueWeight = TrueWeight;
9417 NewFalseWeight = 2 * FalseWeight;
9418 scaleWeights(NewTrueWeight, NewFalseWeight);
9419 Br2->setMetadata(LLVMContext::MD_prof,
9420 MDBuilder(Br2->getContext())
9421 .createBranchWeights(TrueWeight, FalseWeight));
9422 }
9423 } else {
9424 // Codegen X & Y as:
9425 // BB1:
9426 // jmp_if_X TmpBB
9427 // jmp FBB
9428 // TmpBB:
9429 // jmp_if_Y TBB
9430 // jmp FBB
9431 //
9432 // This requires creation of TmpBB after CurBB.
9433
9434 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9435 // The requirement is that
9436 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9437 // = FalseProb for original BB.
9438 // Assuming the original weights are A and B, one choice is to set BB1's
9439 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9440 // assumes that
9441 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9442 uint64_t TrueWeight, FalseWeight;
9443 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9444 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9445 uint64_t NewFalseWeight = FalseWeight;
9446 scaleWeights(NewTrueWeight, NewFalseWeight);
9447 Br1->setMetadata(LLVMContext::MD_prof,
9448 MDBuilder(Br1->getContext())
9449 .createBranchWeights(TrueWeight, FalseWeight));
9450
9451 NewTrueWeight = 2 * TrueWeight;
9452 NewFalseWeight = FalseWeight;
9453 scaleWeights(NewTrueWeight, NewFalseWeight);
9454 Br2->setMetadata(LLVMContext::MD_prof,
9455 MDBuilder(Br2->getContext())
9456 .createBranchWeights(TrueWeight, FalseWeight));
9457 }
9458 }
9459
9460 ModifiedDT = ModifyDT::ModifyBBDT;
9461 MadeChange = true;
9462
9463 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9464 TmpBB->dump());
9465 }
9466 return MadeChange;
9467}
#define Success
return SDValue()
static unsigned getIntrinsicID(const SDNode *N)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static void replaceAllUsesWith(Value *Old, Value *New, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool matchOverflowPattern(Instruction *&I, ExtractValueInst *&MulExtract, ExtractValueInst *&OverflowExtract)
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, Value *&AddInstOut, Value *&AddOffsetOut, PHINode *&LoopIncrPNOut)
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static void DbgInserterHelper(DbgVariableRecord *DVR, BasicBlock::iterator VI)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static BasicBlock::iterator findInsertPos(Value *Addr, Instruction *MemoryInst, Value *SunkAddr)
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinking and/cmp into branches."))
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallPtrSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static cl::opt< OutputCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(OutputCostKind::RecipThroughput), cl::values(clEnumValN(OutputCostKind::RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(OutputCostKind::Latency, "latency", "Instruction latency"), clEnumValN(OutputCostKind::CodeSize, "code-size", "Code size"), clEnumValN(OutputCostKind::SizeAndLatency, "size-latency", "Code size and latency"), clEnumValN(OutputCostKind::All, "all", "Print all cost kinds")))
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
static Value * getCondition(Instruction *I)
Hexagon Common GEP
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This defines the Use class.
iv users
Definition IVUsers.cpp:48
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition LICM.cpp:1450
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
MachineInstr unsigned OpIdx
uint64_t IntrinsicInst * II
OptimizedStructLayoutField Field
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
Remove Loads Into Fake Uses
This file contains some templates that are useful if you are working with the STL at all.
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, bool HasBranchDivergence, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
BinaryOperator * Mul
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI APInt zext(unsigned width) const
Zero extend to a new width.
Definition APInt.cpp:1023
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition APInt.h:1189
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition APInt.h:381
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition APInt.h:436
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition APInt.h:1546
unsigned logBase2() const
Definition APInt.h:1776
LLVM_ABI APInt sext(unsigned width) const
Sign extend to a new width.
Definition APInt.cpp:996
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition APInt.h:441
int64_t getSExtValue() const
Get sign extended value.
Definition APInt.h:1577
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
An instruction that atomically checks whether a specified value is in a memory location,...
static unsigned getPointerOperandIndex()
an instruction that atomically reads a memory location, combines it with another value,...
static unsigned getPointerOperandIndex()
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:539
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition BasicBlock.h:701
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
InstListType::const_iterator const_iterator
Definition BasicBlock.h:171
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI void moveAfter(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it right after MovePos in the function M...
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
LLVM_ABI void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
BinaryOps getOpcode() const
Definition InstrTypes.h:374
static LLVM_ABI BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
LLVM_ABI void setBlockFreq(const BasicBlock *BB, BlockFrequency Freq)
LLVM_ABI BlockFrequency getBlockFreq(const BasicBlock *BB) const
getblockFreq - Return block frequency.
Conditional or Unconditional Branch instruction.
LLVM_ABI void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static LLVM_ABI BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition InstrTypes.h:448
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition InstrTypes.h:664
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition InstrTypes.h:827
static LLVM_ABI CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition InstrTypes.h:765
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
static LLVM_ABI Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getNeg(Constant *C, bool HasNSW=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition Constants.h:174
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition Constants.h:159
static LLVM_ABI Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void removeFromParent()
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
LLVM_ABI iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool erase(const KeyT &Val)
Definition DenseMap.h:330
unsigned size() const
Definition DenseMap.h:110
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
bool none() const
Definition FMF.h:57
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const BasicBlock & getEntryBlock() const
Definition Function.h:813
LLVM_ABI const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
static LLVM_ABI Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
LLVM_ABI bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition Globals.cpp:343
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
LLVM_ABI uint64_t getGlobalSize(const DataLayout &DL) const
Get the size of this global variable in bytes.
Definition Globals.cpp:561
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalVariable.
This instruction compares its operands according to the predicate given to the constructor.
bool isEquality() const
Return true if this predicate is either EQ or NE.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2775
LLVM_ABI Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI bool isDebugOrPseudoInst() const LLVM_READONLY
Return true if the instruction is a DbgInfoIntrinsic or PseudoProbeInst.
LLVM_ABI void setHasNoSignedWrap(bool b=true)
Set or clear the nsw flag on this instruction, which must be an operator which supports this flag.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI bool mayReadFromMemory() const LLVM_READONLY
Return true if this instruction may read memory.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
bool isShift() const
LLVM_ABI void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
LLVM_ABI std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
A wrapper class for inspecting calls to intrinsic functions.
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Analysis pass that exposes the LoopInfo for a function.
Definition LoopInfo.h:569
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition LoopInfo.h:596
Represents a single loop in the control flow graph.
Definition LoopInfo.h:40
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
LLVM_ABI void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
iterator end()
Definition MapVector.h:67
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition MapVector.h:194
iterator find(const KeyT &Key)
Definition MapVector.h:154
bool empty() const
Definition MapVector.h:77
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition MapVector.h:124
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
bool isFunctionColdInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains only cold code.
LLVM_ABI bool isFunctionHotnessUnknown(const Function &F) const
Returns true if the hotness of F is unknown.
bool isFunctionHotInCallGraph(const FuncT *F, BFIT &BFI) const
Returns true if F contains hot code.
LLVM_ABI bool hasPartialSampleProfile() const
Returns true if module M has partial-profile sample profile.
LLVM_ABI bool hasHugeWorkingSetSize() const
Returns true if the working set size of the code is considered huge.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, const Instruction *MDFrom=nullptr)
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
value_type pop_back_val()
Definition SetVector.h:279
VectorType * getType() const
Overload to return most specific vector type.
size_type size() const
Definition SmallPtrSet.h:99
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:175
bool erase(const T &V)
Definition SmallSet.h:199
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:183
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::iterator iterator
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
static unsigned getPointerOperandIndex()
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
virtual bool shouldOptimizeMulOverflowWithZeroHighBits(LLVMContext &Context, EVT VT) const
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isCtpopFast(EVT VT) const
Return true if ctpop instruction is fast.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual bool hasMultipleConditionRegisters(EVT VT) const
Does the target have multiple (allocatable) condition registers that can be used to store the results...
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy,Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
virtual bool getAddrModeArguments(const IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
TargetOptions Options
unsigned EnableFastISel
EnableFastISel - This flag enables fast-path instruction selection which trades away generated code q...
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
LLVM_ABI InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, const Value *Op0=nullptr, const Value *Op1=nullptr, TTI::VectorInstrContext VIC=TTI::VectorInstrContext::None) const
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
LLVM_ABI InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args={}, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
LLVM_ABI InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
LLVM_ABI bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
@ TCC_Basic
The cost of a typical 'add' instruction.
LLVM_ABI bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
LLVM_ABI bool isProfitableToSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
LLVM_ABI bool isScalableTy(SmallPtrSetImpl< const Type * > &Visited) const
Return true if this is a type whose size is a known multiple of vscale.
Definition Type.cpp:61
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition Type.h:255
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
const Use & getOperandUse(unsigned i) const
Definition User.h:220
void setOperand(unsigned i, Value *Val)
Definition User.h:212
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:25
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:962
LLVM_ABI bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition Value.cpp:242
LLVM_ABI void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
user_iterator user_end()
Definition Value.h:410
iterator_range< use_iterator > uses()
Definition Value.h:380
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition Value.h:838
user_iterator_impl< User > user_iterator
Definition Value.h:391
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
LLVM_ABI void dump() const
Support for debugging, callable in GDB: V->dump()
bool pointsToAliveValue() const
int getNumOccurrences() const
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isNonZero() const
Definition TypeSize.h:155
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition ilist_node.h:348
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
@ Entry
Definition COFF.h:862
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWAdd(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::Xor > m_Xor(const LHS &L, const RHS &R)
ap_match< APInt > m_APIntAllowPoison(const APInt *&Res)
Match APInt while allowing poison in splat vector constants.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
IntrinsicID_match m_Intrinsic()
Match intrinsic calls like this: m_Intrinsic<Intrinsic::fabs>(m_Value(X))
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
ExtractValue_match< Ind, Val_t > m_ExtractValue(const Val_t &V)
Match a single index ExtractValue instruction.
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
match_immconstant_ty m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoSignedWrap > m_NSWAdd(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
@ CE
Windows NT (Windows on ARM)
Definition MCAsmInfo.h:48
initializer< Ty > init(const Ty &Val)
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
@ Assume
Do not drop type tests (default).
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
SmallVector< Node, 4 > NodeList
Definition RDFGraph.h:550
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
std::enable_if_t< std::is_signed_v< T >, T > MulOverflow(T X, T Y, T &Result)
Multiply two signed integers, computing the two's complement truncated result, returning true if an o...
Definition MathExtras.h:753
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1763
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition Local.cpp:533
LLVM_ABI bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition Local.cpp:134
LLVM_ABI void findDbgValues(Value *V, SmallVectorImpl< DbgVariableRecord * > &DbgVariableRecords)
Finds the dbg.values describing a value.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2250
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition Alignment.h:134
LLVM_ABI void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition Utils.cpp:1731
auto successors(const MachineBasicBlock *BB)
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2128
constexpr from_range_t from_range
LLVM_ABI Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
auto cast_or_null(const Y &Val)
Definition Casting.h:714
LLVM_ABI void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
LLVM_ABI bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true, bool IgnoreUBImplyingAttrs=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
LLVM_ABI Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
LLVM_ABI bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
constexpr auto equal_to(T &&Arg)
Functor variant of std::equal_to that can be used as a UnaryPredicate in functional algorithms like a...
Definition STLExtras.h:2163
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:154
LLVM_ABI bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI Value * simplifyAddInst(Value *LHS, Value *RHS, bool IsNSW, bool IsNUW, const SimplifyQuery &Q)
Given operands for an Add, fold the result or return null.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition Local.h:252
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2190
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
LLVM_ABI bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
LLVM_ABI bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition Local.cpp:3786
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
generic_gep_type_iterator<> gep_type_iterator
LLVM_ABI FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition Analysis.cpp:203
LLVM_ABI bool VerifyLoopInfo
Enable verification of loop info.
Definition LoopInfo.cpp:51
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition Analysis.cpp:588
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Or
Bitwise or logical OR of integers.
@ Xor
Bitwise or logical XOR of integers.
@ And
Bitwise or logical AND of integers.
@ Sub
Subtraction of integers.
@ Add
Sum of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition STLExtras.h:2002
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
LLVM_ABI bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
auto predecessors(const MachineBasicBlock *BB)
iterator_range< pointer_iterator< WrappedIteratorT > > make_pointer_range(RangeT &&Range)
Definition iterator.h:368
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
bool pred_empty(const BasicBlock *BB)
Definition CFG.h:119
std::enable_if_t< std::is_signed_v< T >, T > AddOverflow(T X, T Y, T &Result)
Add two signed integers, computing the two's complement truncated result, returning true if overflow ...
Definition MathExtras.h:701
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
std::pair< Value *, FPClassTest > fcmpToClassTest(FCmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
DenseMap< const Value *, Value * > ValueToValueMap
LLVM_ABI CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NC
Definition regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
static LLVM_ABI EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition ValueTypes.h:248
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:152
This contains information for each constraint that we are lowering.