LLVM 19.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
43#include "llvm/Config/llvm-config.h"
44#include "llvm/IR/Argument.h"
45#include "llvm/IR/Attributes.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/Constant.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DebugInfo.h"
52#include "llvm/IR/Dominators.h"
53#include "llvm/IR/Function.h"
55#include "llvm/IR/GlobalValue.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/InlineAsm.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
63#include "llvm/IR/Intrinsics.h"
64#include "llvm/IR/IntrinsicsAArch64.h"
65#include "llvm/IR/LLVMContext.h"
66#include "llvm/IR/MDBuilder.h"
67#include "llvm/IR/Module.h"
68#include "llvm/IR/Operator.h"
71#include "llvm/IR/Statepoint.h"
72#include "llvm/IR/Type.h"
73#include "llvm/IR/Use.h"
74#include "llvm/IR/User.h"
75#include "llvm/IR/Value.h"
76#include "llvm/IR/ValueHandle.h"
77#include "llvm/IR/ValueMap.h"
79#include "llvm/Pass.h"
85#include "llvm/Support/Debug.h"
96#include <algorithm>
97#include <cassert>
98#include <cstdint>
99#include <iterator>
100#include <limits>
101#include <memory>
102#include <optional>
103#include <utility>
104#include <vector>
105
106using namespace llvm;
107using namespace llvm::PatternMatch;
108
109#define DEBUG_TYPE "codegenprepare"
110
111STATISTIC(NumBlocksElim, "Number of blocks eliminated");
112STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
113STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
114STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
115 "sunken Cmps");
116STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
117 "of sunken Casts");
118STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
119 "computations were sunk");
120STATISTIC(NumMemoryInstsPhiCreated,
121 "Number of phis created when address "
122 "computations were sunk to memory instructions");
123STATISTIC(NumMemoryInstsSelectCreated,
124 "Number of select created when address "
125 "computations were sunk to memory instructions");
126STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
127STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
128STATISTIC(NumAndsAdded,
129 "Number of and mask instructions added to form ext loads");
130STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
131STATISTIC(NumRetsDup, "Number of return instructions duplicated");
132STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
133STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
134STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
135
137 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
138 cl::desc("Disable branch optimizations in CodeGenPrepare"));
139
140static cl::opt<bool>
141 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
142 cl::desc("Disable GC optimizations in CodeGenPrepare"));
143
144static cl::opt<bool>
145 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
146 cl::init(false),
147 cl::desc("Disable select to branch conversion."));
148
149static cl::opt<bool>
150 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
151 cl::desc("Address sinking in CGP using GEPs."));
152
153static cl::opt<bool>
154 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
155 cl::desc("Enable sinkinig and/cmp into branches."));
156
158 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
159 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
160
162 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
163 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
164
166 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
167 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
168 "CodeGenPrepare"));
169
171 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
172 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
173 "optimization in CodeGenPrepare"));
174
176 "disable-preheader-prot", cl::Hidden, cl::init(false),
177 cl::desc("Disable protection against removing loop preheaders"));
178
180 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
181 cl::desc("Use profile info to add section prefix for hot/cold functions"));
182
184 "profile-unknown-in-special-section", cl::Hidden,
185 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
186 "profile, we cannot tell the function is cold for sure because "
187 "it may be a function newly added without ever being sampled. "
188 "With the flag enabled, compiler can put such profile unknown "
189 "functions into a special section, so runtime system can choose "
190 "to handle it in a different way than .text section, to save "
191 "RAM for example. "));
192
194 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
195 cl::desc("Use the basic-block-sections profile to determine the text "
196 "section prefix for hot functions. Functions with "
197 "basic-block-sections profile will be placed in `.text.hot` "
198 "regardless of their FDO profile info. Other functions won't be "
199 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
200 "profiles."));
201
203 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
204 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
205 "(frequency of destination block) is greater than this ratio"));
206
208 "force-split-store", cl::Hidden, cl::init(false),
209 cl::desc("Force store splitting no matter what the target query says."));
210
212 "cgp-type-promotion-merge", cl::Hidden,
213 cl::desc("Enable merging of redundant sexts when one is dominating"
214 " the other."),
215 cl::init(true));
216
218 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
219 cl::desc("Disables combining addressing modes with different parts "
220 "in optimizeMemoryInst."));
221
222static cl::opt<bool>
223 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
224 cl::desc("Allow creation of Phis in Address sinking."));
225
227 "addr-sink-new-select", cl::Hidden, cl::init(true),
228 cl::desc("Allow creation of selects in Address sinking."));
229
231 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
232 cl::desc("Allow combining of BaseReg field in Address sinking."));
233
235 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
236 cl::desc("Allow combining of BaseGV field in Address sinking."));
237
239 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
240 cl::desc("Allow combining of BaseOffs field in Address sinking."));
241
243 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
244 cl::desc("Allow combining of ScaledReg field in Address sinking."));
245
246static cl::opt<bool>
247 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
248 cl::init(true),
249 cl::desc("Enable splitting large offset of GEP."));
250
252 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
253 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
254
255static cl::opt<bool>
256 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
257 cl::desc("Enable BFI update verification for "
258 "CodeGenPrepare."));
259
260static cl::opt<bool>
261 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
262 cl::desc("Enable converting phi types in CodeGenPrepare"));
263
265 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
266 cl::desc("Least BB number of huge function."));
267
269 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
271 cl::desc("Max number of address users to look at"));
272
273static cl::opt<bool>
274 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
275 cl::desc("Disable elimination of dead PHI nodes."));
276
277namespace {
278
279enum ExtType {
280 ZeroExtension, // Zero extension has been seen.
281 SignExtension, // Sign extension has been seen.
282 BothExtension // This extension type is used if we saw sext after
283 // ZeroExtension had been set, or if we saw zext after
284 // SignExtension had been set. It makes the type
285 // information of a promoted instruction invalid.
286};
287
288enum ModifyDT {
289 NotModifyDT, // Not Modify any DT.
290 ModifyBBDT, // Modify the Basic Block Dominator Tree.
291 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
292 // This usually means we move/delete/insert instruction
293 // in a Basic Block. So we should re-iterate instructions
294 // in such Basic Block.
295};
296
297using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
298using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
299using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
301using ValueToSExts = MapVector<Value *, SExts>;
302
303class TypePromotionTransaction;
304
305class CodeGenPrepare {
306 friend class CodeGenPrepareLegacyPass;
307 const TargetMachine *TM = nullptr;
308 const TargetSubtargetInfo *SubtargetInfo = nullptr;
309 const TargetLowering *TLI = nullptr;
310 const TargetRegisterInfo *TRI = nullptr;
311 const TargetTransformInfo *TTI = nullptr;
312 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
313 const TargetLibraryInfo *TLInfo = nullptr;
314 LoopInfo *LI = nullptr;
315 std::unique_ptr<BlockFrequencyInfo> BFI;
316 std::unique_ptr<BranchProbabilityInfo> BPI;
317 ProfileSummaryInfo *PSI = nullptr;
318
319 /// As we scan instructions optimizing them, this is the next instruction
320 /// to optimize. Transforms that can invalidate this should update it.
321 BasicBlock::iterator CurInstIterator;
322
323 /// Keeps track of non-local addresses that have been sunk into a block.
324 /// This allows us to avoid inserting duplicate code for blocks with
325 /// multiple load/stores of the same address. The usage of WeakTrackingVH
326 /// enables SunkAddrs to be treated as a cache whose entries can be
327 /// invalidated if a sunken address computation has been erased.
329
330 /// Keeps track of all instructions inserted for the current function.
331 SetOfInstrs InsertedInsts;
332
333 /// Keeps track of the type of the related instruction before their
334 /// promotion for the current function.
335 InstrToOrigTy PromotedInsts;
336
337 /// Keep track of instructions removed during promotion.
338 SetOfInstrs RemovedInsts;
339
340 /// Keep track of sext chains based on their initial value.
341 DenseMap<Value *, Instruction *> SeenChainsForSExt;
342
343 /// Keep track of GEPs accessing the same data structures such as structs or
344 /// arrays that are candidates to be split later because of their large
345 /// size.
348 LargeOffsetGEPMap;
349
350 /// Keep track of new GEP base after splitting the GEPs having large offset.
351 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
352
353 /// Map serial numbers to Large offset GEPs.
354 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
355
356 /// Keep track of SExt promoted.
357 ValueToSExts ValToSExtendedUses;
358
359 /// True if the function has the OptSize attribute.
360 bool OptSize;
361
362 /// DataLayout for the Function being processed.
363 const DataLayout *DL = nullptr;
364
365 /// Building the dominator tree can be expensive, so we only build it
366 /// lazily and update it when required.
367 std::unique_ptr<DominatorTree> DT;
368
369public:
370 CodeGenPrepare(){};
371 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
372 /// If encounter huge function, we need to limit the build time.
373 bool IsHugeFunc = false;
374
375 /// FreshBBs is like worklist, it collected the updated BBs which need
376 /// to be optimized again.
377 /// Note: Consider building time in this pass, when a BB updated, we need
378 /// to insert such BB into FreshBBs for huge function.
380
381 void releaseMemory() {
382 // Clear per function information.
383 InsertedInsts.clear();
384 PromotedInsts.clear();
385 FreshBBs.clear();
386 BPI.reset();
387 BFI.reset();
388 }
389
391
392private:
393 template <typename F>
394 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
395 // Substituting can cause recursive simplifications, which can invalidate
396 // our iterator. Use a WeakTrackingVH to hold onto it in case this
397 // happens.
398 Value *CurValue = &*CurInstIterator;
399 WeakTrackingVH IterHandle(CurValue);
400
401 f();
402
403 // If the iterator instruction was recursively deleted, start over at the
404 // start of the block.
405 if (IterHandle != CurValue) {
406 CurInstIterator = BB->begin();
407 SunkAddrs.clear();
408 }
409 }
410
411 // Get the DominatorTree, building if necessary.
412 DominatorTree &getDT(Function &F) {
413 if (!DT)
414 DT = std::make_unique<DominatorTree>(F);
415 return *DT;
416 }
417
418 void removeAllAssertingVHReferences(Value *V);
419 bool eliminateAssumptions(Function &F);
420 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
421 bool eliminateMostlyEmptyBlocks(Function &F);
422 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
423 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
424 void eliminateMostlyEmptyBlock(BasicBlock *BB);
425 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
426 bool isPreheader);
427 bool makeBitReverse(Instruction &I);
428 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
429 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
430 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
431 unsigned AddrSpace);
432 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
433 bool optimizeInlineAsmInst(CallInst *CS);
434 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
435 bool optimizeExt(Instruction *&I);
436 bool optimizeExtUses(Instruction *I);
437 bool optimizeLoadExt(LoadInst *Load);
438 bool optimizeShiftInst(BinaryOperator *BO);
439 bool optimizeFunnelShift(IntrinsicInst *Fsh);
440 bool optimizeSelectInst(SelectInst *SI);
441 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
442 bool optimizeSwitchType(SwitchInst *SI);
443 bool optimizeSwitchPhiConstants(SwitchInst *SI);
444 bool optimizeSwitchInst(SwitchInst *SI);
445 bool optimizeExtractElementInst(Instruction *Inst);
446 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
447 bool fixupDbgValue(Instruction *I);
448 bool fixupDbgVariableRecord(DbgVariableRecord &I);
449 bool fixupDbgVariableRecordsOnInst(Instruction &I);
450 bool placeDbgValues(Function &F);
451 bool placePseudoProbes(Function &F);
452 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
453 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
454 bool tryToPromoteExts(TypePromotionTransaction &TPT,
456 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
457 unsigned CreatedInstsCost = 0);
458 bool mergeSExts(Function &F);
459 bool splitLargeGEPOffsets();
460 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
461 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
462 bool optimizePhiTypes(Function &F);
463 bool performAddressTypePromotion(
464 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
465 bool HasPromoted, TypePromotionTransaction &TPT,
466 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
467 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
468 bool simplifyOffsetableRelocate(GCStatepointInst &I);
469
470 bool tryToSinkFreeOperands(Instruction *I);
471 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
472 CmpInst *Cmp, Intrinsic::ID IID);
473 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
474 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
475 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
476 void verifyBFIUpdates(Function &F);
477 bool _run(Function &F);
478};
479
480class CodeGenPrepareLegacyPass : public FunctionPass {
481public:
482 static char ID; // Pass identification, replacement for typeid
483
484 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
486 }
487
488 bool runOnFunction(Function &F) override;
489
490 StringRef getPassName() const override { return "CodeGen Prepare"; }
491
492 void getAnalysisUsage(AnalysisUsage &AU) const override {
493 // FIXME: When we can selectively preserve passes, preserve the domtree.
500 }
501};
502
503} // end anonymous namespace
504
505char CodeGenPrepareLegacyPass::ID = 0;
506
507bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
508 if (skipFunction(F))
509 return false;
510 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
511 CodeGenPrepare CGP(TM);
512 CGP.DL = &F.getParent()->getDataLayout();
513 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
514 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
515 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
516 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
517 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
518 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
519 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
520 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
521 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
522 auto BBSPRWP =
523 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
524 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
525
526 return CGP._run(F);
527}
528
529INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
530 "Optimize for code generation", false, false)
537INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
538 "Optimize for code generation", false, false)
539
541 return new CodeGenPrepareLegacyPass();
542}
543
546 CodeGenPrepare CGP(TM);
547
548 bool Changed = CGP.run(F, AM);
549 if (!Changed)
550 return PreservedAnalyses::all();
551
556 return PA;
557}
558
559bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
560 DL = &F.getParent()->getDataLayout();
561 SubtargetInfo = TM->getSubtargetImpl(F);
562 TLI = SubtargetInfo->getTargetLowering();
563 TRI = SubtargetInfo->getRegisterInfo();
564 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
566 LI = &AM.getResult<LoopAnalysis>(F);
567 BPI.reset(new BranchProbabilityInfo(F, *LI));
568 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
569 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
570 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
571 BBSectionsProfileReader =
573 return _run(F);
574}
575
576bool CodeGenPrepare::_run(Function &F) {
577 bool EverMadeChange = false;
578
579 OptSize = F.hasOptSize();
580 // Use the basic-block-sections profile to promote hot functions to .text.hot
581 // if requested.
582 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
583 BBSectionsProfileReader->isFunctionHot(F.getName())) {
584 F.setSectionPrefix("hot");
585 } else if (ProfileGuidedSectionPrefix) {
586 // The hot attribute overwrites profile count based hotness while profile
587 // counts based hotness overwrite the cold attribute.
588 // This is a conservative behabvior.
589 if (F.hasFnAttribute(Attribute::Hot) ||
590 PSI->isFunctionHotInCallGraph(&F, *BFI))
591 F.setSectionPrefix("hot");
592 // If PSI shows this function is not hot, we will placed the function
593 // into unlikely section if (1) PSI shows this is a cold function, or
594 // (2) the function has a attribute of cold.
595 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
596 F.hasFnAttribute(Attribute::Cold))
597 F.setSectionPrefix("unlikely");
598 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
599 PSI->isFunctionHotnessUnknown(F))
600 F.setSectionPrefix("unknown");
601 }
602
603 /// This optimization identifies DIV instructions that can be
604 /// profitably bypassed and carried out with a shorter, faster divide.
605 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
606 const DenseMap<unsigned int, unsigned int> &BypassWidths =
608 BasicBlock *BB = &*F.begin();
609 while (BB != nullptr) {
610 // bypassSlowDivision may create new BBs, but we don't want to reapply the
611 // optimization to those blocks.
612 BasicBlock *Next = BB->getNextNode();
613 // F.hasOptSize is already checked in the outer if statement.
614 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
615 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
616 BB = Next;
617 }
618 }
619
620 // Get rid of @llvm.assume builtins before attempting to eliminate empty
621 // blocks, since there might be blocks that only contain @llvm.assume calls
622 // (plus arguments that we can get rid of).
623 EverMadeChange |= eliminateAssumptions(F);
624
625 // Eliminate blocks that contain only PHI nodes and an
626 // unconditional branch.
627 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
628
629 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
631 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
632
633 // Split some critical edges where one of the sources is an indirect branch,
634 // to help generate sane code for PHIs involving such edges.
635 EverMadeChange |=
636 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
637
638 // If we are optimzing huge function, we need to consider the build time.
639 // Because the basic algorithm's complex is near O(N!).
640 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
641
642 // Transformations above may invalidate dominator tree and/or loop info.
643 DT.reset();
644 LI->releaseMemory();
645 LI->analyze(getDT(F));
646
647 bool MadeChange = true;
648 bool FuncIterated = false;
649 while (MadeChange) {
650 MadeChange = false;
651
653 if (FuncIterated && !FreshBBs.contains(&BB))
654 continue;
655
656 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
657 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
658
659 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
660 DT.reset();
661
662 MadeChange |= Changed;
663 if (IsHugeFunc) {
664 // If the BB is updated, it may still has chance to be optimized.
665 // This usually happen at sink optimization.
666 // For example:
667 //
668 // bb0:
669 // %and = and i32 %a, 4
670 // %cmp = icmp eq i32 %and, 0
671 //
672 // If the %cmp sink to other BB, the %and will has chance to sink.
673 if (Changed)
674 FreshBBs.insert(&BB);
675 else if (FuncIterated)
676 FreshBBs.erase(&BB);
677 } else {
678 // For small/normal functions, we restart BB iteration if the dominator
679 // tree of the Function was changed.
680 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
681 break;
682 }
683 }
684 // We have iterated all the BB in the (only work for huge) function.
685 FuncIterated = IsHugeFunc;
686
687 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
688 MadeChange |= mergeSExts(F);
689 if (!LargeOffsetGEPMap.empty())
690 MadeChange |= splitLargeGEPOffsets();
691 MadeChange |= optimizePhiTypes(F);
692
693 if (MadeChange)
694 eliminateFallThrough(F, DT.get());
695
696#ifndef NDEBUG
697 if (MadeChange && VerifyLoopInfo)
698 LI->verify(getDT(F));
699#endif
700
701 // Really free removed instructions during promotion.
702 for (Instruction *I : RemovedInsts)
703 I->deleteValue();
704
705 EverMadeChange |= MadeChange;
706 SeenChainsForSExt.clear();
707 ValToSExtendedUses.clear();
708 RemovedInsts.clear();
709 LargeOffsetGEPMap.clear();
710 LargeOffsetGEPID.clear();
711 }
712
713 NewGEPBases.clear();
714 SunkAddrs.clear();
715
716 if (!DisableBranchOpts) {
717 MadeChange = false;
718 // Use a set vector to get deterministic iteration order. The order the
719 // blocks are removed may affect whether or not PHI nodes in successors
720 // are removed.
722 for (BasicBlock &BB : F) {
724 MadeChange |= ConstantFoldTerminator(&BB, true);
725 if (!MadeChange)
726 continue;
727
728 for (BasicBlock *Succ : Successors)
729 if (pred_empty(Succ))
730 WorkList.insert(Succ);
731 }
732
733 // Delete the dead blocks and any of their dead successors.
734 MadeChange |= !WorkList.empty();
735 while (!WorkList.empty()) {
736 BasicBlock *BB = WorkList.pop_back_val();
738
739 DeleteDeadBlock(BB);
740
741 for (BasicBlock *Succ : Successors)
742 if (pred_empty(Succ))
743 WorkList.insert(Succ);
744 }
745
746 // Merge pairs of basic blocks with unconditional branches, connected by
747 // a single edge.
748 if (EverMadeChange || MadeChange)
749 MadeChange |= eliminateFallThrough(F);
750
751 EverMadeChange |= MadeChange;
752 }
753
754 if (!DisableGCOpts) {
756 for (BasicBlock &BB : F)
757 for (Instruction &I : BB)
758 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
759 Statepoints.push_back(SP);
760 for (auto &I : Statepoints)
761 EverMadeChange |= simplifyOffsetableRelocate(*I);
762 }
763
764 // Do this last to clean up use-before-def scenarios introduced by other
765 // preparatory transforms.
766 EverMadeChange |= placeDbgValues(F);
767 EverMadeChange |= placePseudoProbes(F);
768
769#ifndef NDEBUG
771 verifyBFIUpdates(F);
772#endif
773
774 return EverMadeChange;
775}
776
777bool CodeGenPrepare::eliminateAssumptions(Function &F) {
778 bool MadeChange = false;
779 for (BasicBlock &BB : F) {
780 CurInstIterator = BB.begin();
781 while (CurInstIterator != BB.end()) {
782 Instruction *I = &*(CurInstIterator++);
783 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
784 MadeChange = true;
785 Value *Operand = Assume->getOperand(0);
786 Assume->eraseFromParent();
787
788 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
789 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
790 });
791 }
792 }
793 }
794 return MadeChange;
795}
796
797/// An instruction is about to be deleted, so remove all references to it in our
798/// GEP-tracking data strcutures.
799void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
800 LargeOffsetGEPMap.erase(V);
801 NewGEPBases.erase(V);
802
803 auto GEP = dyn_cast<GetElementPtrInst>(V);
804 if (!GEP)
805 return;
806
807 LargeOffsetGEPID.erase(GEP);
808
809 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
810 if (VecI == LargeOffsetGEPMap.end())
811 return;
812
813 auto &GEPVector = VecI->second;
814 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
815
816 if (GEPVector.empty())
817 LargeOffsetGEPMap.erase(VecI);
818}
819
820// Verify BFI has been updated correctly by recomputing BFI and comparing them.
821void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
822 DominatorTree NewDT(F);
823 LoopInfo NewLI(NewDT);
824 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
825 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
826 NewBFI.verifyMatch(*BFI);
827}
828
829/// Merge basic blocks which are connected by a single edge, where one of the
830/// basic blocks has a single successor pointing to the other basic block,
831/// which has a single predecessor.
832bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
833 bool Changed = false;
834 // Scan all of the blocks in the function, except for the entry block.
835 // Use a temporary array to avoid iterator being invalidated when
836 // deleting blocks.
838 for (auto &Block : llvm::drop_begin(F))
839 Blocks.push_back(&Block);
840
842 for (auto &Block : Blocks) {
843 auto *BB = cast_or_null<BasicBlock>(Block);
844 if (!BB)
845 continue;
846 // If the destination block has a single pred, then this is a trivial
847 // edge, just collapse it.
848 BasicBlock *SinglePred = BB->getSinglePredecessor();
849
850 // Don't merge if BB's address is taken.
851 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
852 continue;
853
854 // Make an effort to skip unreachable blocks.
855 if (DT && !DT->isReachableFromEntry(BB))
856 continue;
857
858 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
859 if (Term && !Term->isConditional()) {
860 Changed = true;
861 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
862
863 // Merge BB into SinglePred and delete it.
864 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
865 /* MemDep */ nullptr,
866 /* PredecessorWithTwoSuccessors */ false, DT);
867 Preds.insert(SinglePred);
868
869 if (IsHugeFunc) {
870 // Update FreshBBs to optimize the merged BB.
871 FreshBBs.insert(SinglePred);
872 FreshBBs.erase(BB);
873 }
874 }
875 }
876
877 // (Repeatedly) merging blocks into their predecessors can create redundant
878 // debug intrinsics.
879 for (const auto &Pred : Preds)
880 if (auto *BB = cast_or_null<BasicBlock>(Pred))
882
883 return Changed;
884}
885
886/// Find a destination block from BB if BB is mergeable empty block.
887BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
888 // If this block doesn't end with an uncond branch, ignore it.
889 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
890 if (!BI || !BI->isUnconditional())
891 return nullptr;
892
893 // If the instruction before the branch (skipping debug info) isn't a phi
894 // node, then other stuff is happening here.
896 if (BBI != BB->begin()) {
897 --BBI;
898 while (isa<DbgInfoIntrinsic>(BBI)) {
899 if (BBI == BB->begin())
900 break;
901 --BBI;
902 }
903 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
904 return nullptr;
905 }
906
907 // Do not break infinite loops.
908 BasicBlock *DestBB = BI->getSuccessor(0);
909 if (DestBB == BB)
910 return nullptr;
911
912 if (!canMergeBlocks(BB, DestBB))
913 DestBB = nullptr;
914
915 return DestBB;
916}
917
918/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
919/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
920/// edges in ways that are non-optimal for isel. Start by eliminating these
921/// blocks so we can split them the way we want them.
922bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
924 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
925 while (!LoopList.empty()) {
926 Loop *L = LoopList.pop_back_val();
927 llvm::append_range(LoopList, *L);
928 if (BasicBlock *Preheader = L->getLoopPreheader())
929 Preheaders.insert(Preheader);
930 }
931
932 bool MadeChange = false;
933 // Copy blocks into a temporary array to avoid iterator invalidation issues
934 // as we remove them.
935 // Note that this intentionally skips the entry block.
937 for (auto &Block : llvm::drop_begin(F)) {
938 // Delete phi nodes that could block deleting other empty blocks.
940 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
941 Blocks.push_back(&Block);
942 }
943
944 for (auto &Block : Blocks) {
945 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
946 if (!BB)
947 continue;
948 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
949 if (!DestBB ||
950 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
951 continue;
952
953 eliminateMostlyEmptyBlock(BB);
954 MadeChange = true;
955 }
956 return MadeChange;
957}
958
959bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
960 BasicBlock *DestBB,
961 bool isPreheader) {
962 // Do not delete loop preheaders if doing so would create a critical edge.
963 // Loop preheaders can be good locations to spill registers. If the
964 // preheader is deleted and we create a critical edge, registers may be
965 // spilled in the loop body instead.
966 if (!DisablePreheaderProtect && isPreheader &&
967 !(BB->getSinglePredecessor() &&
969 return false;
970
971 // Skip merging if the block's successor is also a successor to any callbr
972 // that leads to this block.
973 // FIXME: Is this really needed? Is this a correctness issue?
974 for (BasicBlock *Pred : predecessors(BB)) {
975 if (isa<CallBrInst>(Pred->getTerminator()) &&
976 llvm::is_contained(successors(Pred), DestBB))
977 return false;
978 }
979
980 // Try to skip merging if the unique predecessor of BB is terminated by a
981 // switch or indirect branch instruction, and BB is used as an incoming block
982 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
983 // add COPY instructions in the predecessor of BB instead of BB (if it is not
984 // merged). Note that the critical edge created by merging such blocks wont be
985 // split in MachineSink because the jump table is not analyzable. By keeping
986 // such empty block (BB), ISel will place COPY instructions in BB, not in the
987 // predecessor of BB.
988 BasicBlock *Pred = BB->getUniquePredecessor();
989 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
990 isa<IndirectBrInst>(Pred->getTerminator())))
991 return true;
992
993 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
994 return true;
995
996 // We use a simple cost heuristic which determine skipping merging is
997 // profitable if the cost of skipping merging is less than the cost of
998 // merging : Cost(skipping merging) < Cost(merging BB), where the
999 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1000 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1001 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1002 // Freq(Pred) / Freq(BB) > 2.
1003 // Note that if there are multiple empty blocks sharing the same incoming
1004 // value for the PHIs in the DestBB, we consider them together. In such
1005 // case, Cost(merging BB) will be the sum of their frequencies.
1006
1007 if (!isa<PHINode>(DestBB->begin()))
1008 return true;
1009
1010 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1011
1012 // Find all other incoming blocks from which incoming values of all PHIs in
1013 // DestBB are the same as the ones from BB.
1014 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1015 if (DestBBPred == BB)
1016 continue;
1017
1018 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1019 return DestPN.getIncomingValueForBlock(BB) ==
1020 DestPN.getIncomingValueForBlock(DestBBPred);
1021 }))
1022 SameIncomingValueBBs.insert(DestBBPred);
1023 }
1024
1025 // See if all BB's incoming values are same as the value from Pred. In this
1026 // case, no reason to skip merging because COPYs are expected to be place in
1027 // Pred already.
1028 if (SameIncomingValueBBs.count(Pred))
1029 return true;
1030
1031 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1032 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1033
1034 for (auto *SameValueBB : SameIncomingValueBBs)
1035 if (SameValueBB->getUniquePredecessor() == Pred &&
1036 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1037 BBFreq += BFI->getBlockFreq(SameValueBB);
1038
1039 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1040 return !Limit || PredFreq <= *Limit;
1041}
1042
1043/// Return true if we can merge BB into DestBB if there is a single
1044/// unconditional branch between them, and BB contains no other non-phi
1045/// instructions.
1046bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1047 const BasicBlock *DestBB) const {
1048 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1049 // the successor. If there are more complex condition (e.g. preheaders),
1050 // don't mess around with them.
1051 for (const PHINode &PN : BB->phis()) {
1052 for (const User *U : PN.users()) {
1053 const Instruction *UI = cast<Instruction>(U);
1054 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1055 return false;
1056 // If User is inside DestBB block and it is a PHINode then check
1057 // incoming value. If incoming value is not from BB then this is
1058 // a complex condition (e.g. preheaders) we want to avoid here.
1059 if (UI->getParent() == DestBB) {
1060 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1061 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1062 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1063 if (Insn && Insn->getParent() == BB &&
1064 Insn->getParent() != UPN->getIncomingBlock(I))
1065 return false;
1066 }
1067 }
1068 }
1069 }
1070
1071 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1072 // and DestBB may have conflicting incoming values for the block. If so, we
1073 // can't merge the block.
1074 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1075 if (!DestBBPN)
1076 return true; // no conflict.
1077
1078 // Collect the preds of BB.
1080 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1081 // It is faster to get preds from a PHI than with pred_iterator.
1082 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1083 BBPreds.insert(BBPN->getIncomingBlock(i));
1084 } else {
1085 BBPreds.insert(pred_begin(BB), pred_end(BB));
1086 }
1087
1088 // Walk the preds of DestBB.
1089 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1090 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1091 if (BBPreds.count(Pred)) { // Common predecessor?
1092 for (const PHINode &PN : DestBB->phis()) {
1093 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1094 const Value *V2 = PN.getIncomingValueForBlock(BB);
1095
1096 // If V2 is a phi node in BB, look up what the mapped value will be.
1097 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1098 if (V2PN->getParent() == BB)
1099 V2 = V2PN->getIncomingValueForBlock(Pred);
1100
1101 // If there is a conflict, bail out.
1102 if (V1 != V2)
1103 return false;
1104 }
1105 }
1106 }
1107
1108 return true;
1109}
1110
1111/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1112static void replaceAllUsesWith(Value *Old, Value *New,
1114 bool IsHuge) {
1115 auto *OldI = dyn_cast<Instruction>(Old);
1116 if (OldI) {
1117 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1118 UI != E; ++UI) {
1119 Instruction *User = cast<Instruction>(*UI);
1120 if (IsHuge)
1121 FreshBBs.insert(User->getParent());
1122 }
1123 }
1124 Old->replaceAllUsesWith(New);
1125}
1126
1127/// Eliminate a basic block that has only phi's and an unconditional branch in
1128/// it.
1129void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1130 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1131 BasicBlock *DestBB = BI->getSuccessor(0);
1132
1133 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1134 << *BB << *DestBB);
1135
1136 // If the destination block has a single pred, then this is a trivial edge,
1137 // just collapse it.
1138 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1139 if (SinglePred != DestBB) {
1140 assert(SinglePred == BB &&
1141 "Single predecessor not the same as predecessor");
1142 // Merge DestBB into SinglePred/BB and delete it.
1144 // Note: BB(=SinglePred) will not be deleted on this path.
1145 // DestBB(=its single successor) is the one that was deleted.
1146 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1147
1148 if (IsHugeFunc) {
1149 // Update FreshBBs to optimize the merged BB.
1150 FreshBBs.insert(SinglePred);
1151 FreshBBs.erase(DestBB);
1152 }
1153 return;
1154 }
1155 }
1156
1157 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1158 // to handle the new incoming edges it is about to have.
1159 for (PHINode &PN : DestBB->phis()) {
1160 // Remove the incoming value for BB, and remember it.
1161 Value *InVal = PN.removeIncomingValue(BB, false);
1162
1163 // Two options: either the InVal is a phi node defined in BB or it is some
1164 // value that dominates BB.
1165 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1166 if (InValPhi && InValPhi->getParent() == BB) {
1167 // Add all of the input values of the input PHI as inputs of this phi.
1168 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1169 PN.addIncoming(InValPhi->getIncomingValue(i),
1170 InValPhi->getIncomingBlock(i));
1171 } else {
1172 // Otherwise, add one instance of the dominating value for each edge that
1173 // we will be adding.
1174 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1175 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1176 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1177 } else {
1178 for (BasicBlock *Pred : predecessors(BB))
1179 PN.addIncoming(InVal, Pred);
1180 }
1181 }
1182 }
1183
1184 // The PHIs are now updated, change everything that refers to BB to use
1185 // DestBB and remove BB.
1186 BB->replaceAllUsesWith(DestBB);
1187 BB->eraseFromParent();
1188 ++NumBlocksElim;
1189
1190 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1191}
1192
1193// Computes a map of base pointer relocation instructions to corresponding
1194// derived pointer relocation instructions given a vector of all relocate calls
1196 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1198 &RelocateInstMap) {
1199 // Collect information in two maps: one primarily for locating the base object
1200 // while filling the second map; the second map is the final structure holding
1201 // a mapping between Base and corresponding Derived relocate calls
1203 for (auto *ThisRelocate : AllRelocateCalls) {
1204 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1205 ThisRelocate->getDerivedPtrIndex());
1206 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1207 }
1208 for (auto &Item : RelocateIdxMap) {
1209 std::pair<unsigned, unsigned> Key = Item.first;
1210 if (Key.first == Key.second)
1211 // Base relocation: nothing to insert
1212 continue;
1213
1214 GCRelocateInst *I = Item.second;
1215 auto BaseKey = std::make_pair(Key.first, Key.first);
1216
1217 // We're iterating over RelocateIdxMap so we cannot modify it.
1218 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1219 if (MaybeBase == RelocateIdxMap.end())
1220 // TODO: We might want to insert a new base object relocate and gep off
1221 // that, if there are enough derived object relocates.
1222 continue;
1223
1224 RelocateInstMap[MaybeBase->second].push_back(I);
1225 }
1226}
1227
1228// Accepts a GEP and extracts the operands into a vector provided they're all
1229// small integer constants
1231 SmallVectorImpl<Value *> &OffsetV) {
1232 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1233 // Only accept small constant integer operands
1234 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1235 if (!Op || Op->getZExtValue() > 20)
1236 return false;
1237 }
1238
1239 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1240 OffsetV.push_back(GEP->getOperand(i));
1241 return true;
1242}
1243
1244// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1245// replace, computes a replacement, and affects it.
1246static bool
1248 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1249 bool MadeChange = false;
1250 // We must ensure the relocation of derived pointer is defined after
1251 // relocation of base pointer. If we find a relocation corresponding to base
1252 // defined earlier than relocation of base then we move relocation of base
1253 // right before found relocation. We consider only relocation in the same
1254 // basic block as relocation of base. Relocations from other basic block will
1255 // be skipped by optimization and we do not care about them.
1256 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1257 &*R != RelocatedBase; ++R)
1258 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1259 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1260 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1261 RelocatedBase->moveBefore(RI);
1262 MadeChange = true;
1263 break;
1264 }
1265
1266 for (GCRelocateInst *ToReplace : Targets) {
1267 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1268 "Not relocating a derived object of the original base object");
1269 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1270 // A duplicate relocate call. TODO: coalesce duplicates.
1271 continue;
1272 }
1273
1274 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1275 // Base and derived relocates are in different basic blocks.
1276 // In this case transform is only valid when base dominates derived
1277 // relocate. However it would be too expensive to check dominance
1278 // for each such relocate, so we skip the whole transformation.
1279 continue;
1280 }
1281
1282 Value *Base = ToReplace->getBasePtr();
1283 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1284 if (!Derived || Derived->getPointerOperand() != Base)
1285 continue;
1286
1288 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1289 continue;
1290
1291 // Create a Builder and replace the target callsite with a gep
1292 assert(RelocatedBase->getNextNode() &&
1293 "Should always have one since it's not a terminator");
1294
1295 // Insert after RelocatedBase
1296 IRBuilder<> Builder(RelocatedBase->getNextNode());
1297 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1298
1299 // If gc_relocate does not match the actual type, cast it to the right type.
1300 // In theory, there must be a bitcast after gc_relocate if the type does not
1301 // match, and we should reuse it to get the derived pointer. But it could be
1302 // cases like this:
1303 // bb1:
1304 // ...
1305 // %g1 = call coldcc i8 addrspace(1)*
1306 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1307 //
1308 // bb2:
1309 // ...
1310 // %g2 = call coldcc i8 addrspace(1)*
1311 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1312 //
1313 // merge:
1314 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1315 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1316 //
1317 // In this case, we can not find the bitcast any more. So we insert a new
1318 // bitcast no matter there is already one or not. In this way, we can handle
1319 // all cases, and the extra bitcast should be optimized away in later
1320 // passes.
1321 Value *ActualRelocatedBase = RelocatedBase;
1322 if (RelocatedBase->getType() != Base->getType()) {
1323 ActualRelocatedBase =
1324 Builder.CreateBitCast(RelocatedBase, Base->getType());
1325 }
1326 Value *Replacement =
1327 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1328 ArrayRef(OffsetV));
1329 Replacement->takeName(ToReplace);
1330 // If the newly generated derived pointer's type does not match the original
1331 // derived pointer's type, cast the new derived pointer to match it. Same
1332 // reasoning as above.
1333 Value *ActualReplacement = Replacement;
1334 if (Replacement->getType() != ToReplace->getType()) {
1335 ActualReplacement =
1336 Builder.CreateBitCast(Replacement, ToReplace->getType());
1337 }
1338 ToReplace->replaceAllUsesWith(ActualReplacement);
1339 ToReplace->eraseFromParent();
1340
1341 MadeChange = true;
1342 }
1343 return MadeChange;
1344}
1345
1346// Turns this:
1347//
1348// %base = ...
1349// %ptr = gep %base + 15
1350// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1351// %base' = relocate(%tok, i32 4, i32 4)
1352// %ptr' = relocate(%tok, i32 4, i32 5)
1353// %val = load %ptr'
1354//
1355// into this:
1356//
1357// %base = ...
1358// %ptr = gep %base + 15
1359// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1360// %base' = gc.relocate(%tok, i32 4, i32 4)
1361// %ptr' = gep %base' + 15
1362// %val = load %ptr'
1363bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1364 bool MadeChange = false;
1365 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1366 for (auto *U : I.users())
1367 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1368 // Collect all the relocate calls associated with a statepoint
1369 AllRelocateCalls.push_back(Relocate);
1370
1371 // We need at least one base pointer relocation + one derived pointer
1372 // relocation to mangle
1373 if (AllRelocateCalls.size() < 2)
1374 return false;
1375
1376 // RelocateInstMap is a mapping from the base relocate instruction to the
1377 // corresponding derived relocate instructions
1379 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1380 if (RelocateInstMap.empty())
1381 return false;
1382
1383 for (auto &Item : RelocateInstMap)
1384 // Item.first is the RelocatedBase to offset against
1385 // Item.second is the vector of Targets to replace
1386 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1387 return MadeChange;
1388}
1389
1390/// Sink the specified cast instruction into its user blocks.
1391static bool SinkCast(CastInst *CI) {
1392 BasicBlock *DefBB = CI->getParent();
1393
1394 /// InsertedCasts - Only insert a cast in each block once.
1396
1397 bool MadeChange = false;
1398 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1399 UI != E;) {
1400 Use &TheUse = UI.getUse();
1401 Instruction *User = cast<Instruction>(*UI);
1402
1403 // Figure out which BB this cast is used in. For PHI's this is the
1404 // appropriate predecessor block.
1405 BasicBlock *UserBB = User->getParent();
1406 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1407 UserBB = PN->getIncomingBlock(TheUse);
1408 }
1409
1410 // Preincrement use iterator so we don't invalidate it.
1411 ++UI;
1412
1413 // The first insertion point of a block containing an EH pad is after the
1414 // pad. If the pad is the user, we cannot sink the cast past the pad.
1415 if (User->isEHPad())
1416 continue;
1417
1418 // If the block selected to receive the cast is an EH pad that does not
1419 // allow non-PHI instructions before the terminator, we can't sink the
1420 // cast.
1421 if (UserBB->getTerminator()->isEHPad())
1422 continue;
1423
1424 // If this user is in the same block as the cast, don't change the cast.
1425 if (UserBB == DefBB)
1426 continue;
1427
1428 // If we have already inserted a cast into this block, use it.
1429 CastInst *&InsertedCast = InsertedCasts[UserBB];
1430
1431 if (!InsertedCast) {
1432 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1433 assert(InsertPt != UserBB->end());
1434 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1435 CI->getType(), "");
1436 InsertedCast->insertBefore(*UserBB, InsertPt);
1437 InsertedCast->setDebugLoc(CI->getDebugLoc());
1438 }
1439
1440 // Replace a use of the cast with a use of the new cast.
1441 TheUse = InsertedCast;
1442 MadeChange = true;
1443 ++NumCastUses;
1444 }
1445
1446 // If we removed all uses, nuke the cast.
1447 if (CI->use_empty()) {
1448 salvageDebugInfo(*CI);
1449 CI->eraseFromParent();
1450 MadeChange = true;
1451 }
1452
1453 return MadeChange;
1454}
1455
1456/// If the specified cast instruction is a noop copy (e.g. it's casting from
1457/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1458/// reduce the number of virtual registers that must be created and coalesced.
1459///
1460/// Return true if any changes are made.
1462 const DataLayout &DL) {
1463 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1464 // than sinking only nop casts, but is helpful on some platforms.
1465 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1466 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1467 ASC->getDestAddressSpace()))
1468 return false;
1469 }
1470
1471 // If this is a noop copy,
1472 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1473 EVT DstVT = TLI.getValueType(DL, CI->getType());
1474
1475 // This is an fp<->int conversion?
1476 if (SrcVT.isInteger() != DstVT.isInteger())
1477 return false;
1478
1479 // If this is an extension, it will be a zero or sign extension, which
1480 // isn't a noop.
1481 if (SrcVT.bitsLT(DstVT))
1482 return false;
1483
1484 // If these values will be promoted, find out what they will be promoted
1485 // to. This helps us consider truncates on PPC as noop copies when they
1486 // are.
1487 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1489 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1490 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1492 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1493
1494 // If, after promotion, these are the same types, this is a noop copy.
1495 if (SrcVT != DstVT)
1496 return false;
1497
1498 return SinkCast(CI);
1499}
1500
1501// Match a simple increment by constant operation. Note that if a sub is
1502// matched, the step is negated (as if the step had been canonicalized to
1503// an add, even though we leave the instruction alone.)
1504bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1505 Constant *&Step) {
1506 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1507 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1508 m_Instruction(LHS), m_Constant(Step)))))
1509 return true;
1510 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1511 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1512 m_Instruction(LHS), m_Constant(Step))))) {
1513 Step = ConstantExpr::getNeg(Step);
1514 return true;
1515 }
1516 return false;
1517}
1518
1519/// If given \p PN is an inductive variable with value IVInc coming from the
1520/// backedge, and on each iteration it gets increased by Step, return pair
1521/// <IVInc, Step>. Otherwise, return std::nullopt.
1522static std::optional<std::pair<Instruction *, Constant *>>
1523getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1524 const Loop *L = LI->getLoopFor(PN->getParent());
1525 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1526 return std::nullopt;
1527 auto *IVInc =
1528 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1529 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1530 return std::nullopt;
1531 Instruction *LHS = nullptr;
1532 Constant *Step = nullptr;
1533 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1534 return std::make_pair(IVInc, Step);
1535 return std::nullopt;
1536}
1537
1538static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1539 auto *I = dyn_cast<Instruction>(V);
1540 if (!I)
1541 return false;
1542 Instruction *LHS = nullptr;
1543 Constant *Step = nullptr;
1544 if (!matchIncrement(I, LHS, Step))
1545 return false;
1546 if (auto *PN = dyn_cast<PHINode>(LHS))
1547 if (auto IVInc = getIVIncrement(PN, LI))
1548 return IVInc->first == I;
1549 return false;
1550}
1551
1552bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1553 Value *Arg0, Value *Arg1,
1554 CmpInst *Cmp,
1555 Intrinsic::ID IID) {
1556 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1557 if (!isIVIncrement(BO, LI))
1558 return false;
1559 const Loop *L = LI->getLoopFor(BO->getParent());
1560 assert(L && "L should not be null after isIVIncrement()");
1561 // Do not risk on moving increment into a child loop.
1562 if (LI->getLoopFor(Cmp->getParent()) != L)
1563 return false;
1564
1565 // Finally, we need to ensure that the insert point will dominate all
1566 // existing uses of the increment.
1567
1568 auto &DT = getDT(*BO->getParent()->getParent());
1569 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1570 // If we're moving up the dom tree, all uses are trivially dominated.
1571 // (This is the common case for code produced by LSR.)
1572 return true;
1573
1574 // Otherwise, special case the single use in the phi recurrence.
1575 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1576 };
1577 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1578 // We used to use a dominator tree here to allow multi-block optimization.
1579 // But that was problematic because:
1580 // 1. It could cause a perf regression by hoisting the math op into the
1581 // critical path.
1582 // 2. It could cause a perf regression by creating a value that was live
1583 // across multiple blocks and increasing register pressure.
1584 // 3. Use of a dominator tree could cause large compile-time regression.
1585 // This is because we recompute the DT on every change in the main CGP
1586 // run-loop. The recomputing is probably unnecessary in many cases, so if
1587 // that was fixed, using a DT here would be ok.
1588 //
1589 // There is one important particular case we still want to handle: if BO is
1590 // the IV increment. Important properties that make it profitable:
1591 // - We can speculate IV increment anywhere in the loop (as long as the
1592 // indvar Phi is its only user);
1593 // - Upon computing Cmp, we effectively compute something equivalent to the
1594 // IV increment (despite it loops differently in the IR). So moving it up
1595 // to the cmp point does not really increase register pressure.
1596 return false;
1597 }
1598
1599 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1600 if (BO->getOpcode() == Instruction::Add &&
1601 IID == Intrinsic::usub_with_overflow) {
1602 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1603 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1604 }
1605
1606 // Insert at the first instruction of the pair.
1607 Instruction *InsertPt = nullptr;
1608 for (Instruction &Iter : *Cmp->getParent()) {
1609 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1610 // the overflow intrinsic are defined.
1611 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1612 InsertPt = &Iter;
1613 break;
1614 }
1615 }
1616 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1617
1618 IRBuilder<> Builder(InsertPt);
1619 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1620 if (BO->getOpcode() != Instruction::Xor) {
1621 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1622 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1623 } else
1624 assert(BO->hasOneUse() &&
1625 "Patterns with XOr should use the BO only in the compare");
1626 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1627 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1628 Cmp->eraseFromParent();
1629 BO->eraseFromParent();
1630 return true;
1631}
1632
1633/// Match special-case patterns that check for unsigned add overflow.
1635 BinaryOperator *&Add) {
1636 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1637 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1638 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1639
1640 // We are not expecting non-canonical/degenerate code. Just bail out.
1641 if (isa<Constant>(A))
1642 return false;
1643
1644 ICmpInst::Predicate Pred = Cmp->getPredicate();
1645 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1646 B = ConstantInt::get(B->getType(), 1);
1647 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1648 B = ConstantInt::get(B->getType(), -1);
1649 else
1650 return false;
1651
1652 // Check the users of the variable operand of the compare looking for an add
1653 // with the adjusted constant.
1654 for (User *U : A->users()) {
1655 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1656 Add = cast<BinaryOperator>(U);
1657 return true;
1658 }
1659 }
1660 return false;
1661}
1662
1663/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1664/// intrinsic. Return true if any changes were made.
1665bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1666 ModifyDT &ModifiedDT) {
1667 bool EdgeCase = false;
1668 Value *A, *B;
1670 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1672 return false;
1673 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1674 A = Add->getOperand(0);
1675 B = Add->getOperand(1);
1676 EdgeCase = true;
1677 }
1678
1680 TLI->getValueType(*DL, Add->getType()),
1681 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1682 return false;
1683
1684 // We don't want to move around uses of condition values this late, so we
1685 // check if it is legal to create the call to the intrinsic in the basic
1686 // block containing the icmp.
1687 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1688 return false;
1689
1690 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1691 Intrinsic::uadd_with_overflow))
1692 return false;
1693
1694 // Reset callers - do not crash by iterating over a dead instruction.
1695 ModifiedDT = ModifyDT::ModifyInstDT;
1696 return true;
1697}
1698
1699bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1700 ModifyDT &ModifiedDT) {
1701 // We are not expecting non-canonical/degenerate code. Just bail out.
1702 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1703 if (isa<Constant>(A) && isa<Constant>(B))
1704 return false;
1705
1706 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1707 ICmpInst::Predicate Pred = Cmp->getPredicate();
1708 if (Pred == ICmpInst::ICMP_UGT) {
1709 std::swap(A, B);
1710 Pred = ICmpInst::ICMP_ULT;
1711 }
1712 // Convert special-case: (A == 0) is the same as (A u< 1).
1713 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1714 B = ConstantInt::get(B->getType(), 1);
1715 Pred = ICmpInst::ICMP_ULT;
1716 }
1717 // Convert special-case: (A != 0) is the same as (0 u< A).
1718 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1719 std::swap(A, B);
1720 Pred = ICmpInst::ICMP_ULT;
1721 }
1722 if (Pred != ICmpInst::ICMP_ULT)
1723 return false;
1724
1725 // Walk the users of a variable operand of a compare looking for a subtract or
1726 // add with that same operand. Also match the 2nd operand of the compare to
1727 // the add/sub, but that may be a negated constant operand of an add.
1728 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1729 BinaryOperator *Sub = nullptr;
1730 for (User *U : CmpVariableOperand->users()) {
1731 // A - B, A u< B --> usubo(A, B)
1732 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1733 Sub = cast<BinaryOperator>(U);
1734 break;
1735 }
1736
1737 // A + (-C), A u< C (canonicalized form of (sub A, C))
1738 const APInt *CmpC, *AddC;
1739 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1740 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1741 Sub = cast<BinaryOperator>(U);
1742 break;
1743 }
1744 }
1745 if (!Sub)
1746 return false;
1747
1749 TLI->getValueType(*DL, Sub->getType()),
1750 Sub->hasNUsesOrMore(1)))
1751 return false;
1752
1753 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1754 Cmp, Intrinsic::usub_with_overflow))
1755 return false;
1756
1757 // Reset callers - do not crash by iterating over a dead instruction.
1758 ModifiedDT = ModifyDT::ModifyInstDT;
1759 return true;
1760}
1761
1762/// Sink the given CmpInst into user blocks to reduce the number of virtual
1763/// registers that must be created and coalesced. This is a clear win except on
1764/// targets with multiple condition code registers (PowerPC), where it might
1765/// lose; some adjustment may be wanted there.
1766///
1767/// Return true if any changes are made.
1768static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1770 return false;
1771
1772 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1773 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1774 return false;
1775
1776 // Only insert a cmp in each block once.
1778
1779 bool MadeChange = false;
1780 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1781 UI != E;) {
1782 Use &TheUse = UI.getUse();
1783 Instruction *User = cast<Instruction>(*UI);
1784
1785 // Preincrement use iterator so we don't invalidate it.
1786 ++UI;
1787
1788 // Don't bother for PHI nodes.
1789 if (isa<PHINode>(User))
1790 continue;
1791
1792 // Figure out which BB this cmp is used in.
1793 BasicBlock *UserBB = User->getParent();
1794 BasicBlock *DefBB = Cmp->getParent();
1795
1796 // If this user is in the same block as the cmp, don't change the cmp.
1797 if (UserBB == DefBB)
1798 continue;
1799
1800 // If we have already inserted a cmp into this block, use it.
1801 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1802
1803 if (!InsertedCmp) {
1804 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1805 assert(InsertPt != UserBB->end());
1806 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1807 Cmp->getOperand(0), Cmp->getOperand(1), "");
1808 InsertedCmp->insertBefore(*UserBB, InsertPt);
1809 // Propagate the debug info.
1810 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1811 }
1812
1813 // Replace a use of the cmp with a use of the new cmp.
1814 TheUse = InsertedCmp;
1815 MadeChange = true;
1816 ++NumCmpUses;
1817 }
1818
1819 // If we removed all uses, nuke the cmp.
1820 if (Cmp->use_empty()) {
1821 Cmp->eraseFromParent();
1822 MadeChange = true;
1823 }
1824
1825 return MadeChange;
1826}
1827
1828/// For pattern like:
1829///
1830/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1831/// ...
1832/// DomBB:
1833/// ...
1834/// br DomCond, TrueBB, CmpBB
1835/// CmpBB: (with DomBB being the single predecessor)
1836/// ...
1837/// Cmp = icmp eq CmpOp0, CmpOp1
1838/// ...
1839///
1840/// It would use two comparison on targets that lowering of icmp sgt/slt is
1841/// different from lowering of icmp eq (PowerPC). This function try to convert
1842/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1843/// After that, DomCond and Cmp can use the same comparison so reduce one
1844/// comparison.
1845///
1846/// Return true if any changes are made.
1848 const TargetLowering &TLI) {
1850 return false;
1851
1852 ICmpInst::Predicate Pred = Cmp->getPredicate();
1853 if (Pred != ICmpInst::ICMP_EQ)
1854 return false;
1855
1856 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1857 // icmp slt/sgt would introduce more redundant LLVM IR.
1858 for (User *U : Cmp->users()) {
1859 if (isa<BranchInst>(U))
1860 continue;
1861 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1862 continue;
1863 return false;
1864 }
1865
1866 // This is a cheap/incomplete check for dominance - just match a single
1867 // predecessor with a conditional branch.
1868 BasicBlock *CmpBB = Cmp->getParent();
1869 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1870 if (!DomBB)
1871 return false;
1872
1873 // We want to ensure that the only way control gets to the comparison of
1874 // interest is that a less/greater than comparison on the same operands is
1875 // false.
1876 Value *DomCond;
1877 BasicBlock *TrueBB, *FalseBB;
1878 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1879 return false;
1880 if (CmpBB != FalseBB)
1881 return false;
1882
1883 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1884 ICmpInst::Predicate DomPred;
1885 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1886 return false;
1887 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1888 return false;
1889
1890 // Convert the equality comparison to the opposite of the dominating
1891 // comparison and swap the direction for all branch/select users.
1892 // We have conceptually converted:
1893 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1894 // to
1895 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1896 // And similarly for branches.
1897 for (User *U : Cmp->users()) {
1898 if (auto *BI = dyn_cast<BranchInst>(U)) {
1899 assert(BI->isConditional() && "Must be conditional");
1900 BI->swapSuccessors();
1901 continue;
1902 }
1903 if (auto *SI = dyn_cast<SelectInst>(U)) {
1904 // Swap operands
1905 SI->swapValues();
1906 SI->swapProfMetadata();
1907 continue;
1908 }
1909 llvm_unreachable("Must be a branch or a select");
1910 }
1911 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1912 return true;
1913}
1914
1915/// Many architectures use the same instruction for both subtract and cmp. Try
1916/// to swap cmp operands to match subtract operations to allow for CSE.
1918 Value *Op0 = Cmp->getOperand(0);
1919 Value *Op1 = Cmp->getOperand(1);
1920 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1921 isa<Constant>(Op1) || Op0 == Op1)
1922 return false;
1923
1924 // If a subtract already has the same operands as a compare, swapping would be
1925 // bad. If a subtract has the same operands as a compare but in reverse order,
1926 // then swapping is good.
1927 int GoodToSwap = 0;
1928 unsigned NumInspected = 0;
1929 for (const User *U : Op0->users()) {
1930 // Avoid walking many users.
1931 if (++NumInspected > 128)
1932 return false;
1933 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
1934 GoodToSwap++;
1935 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
1936 GoodToSwap--;
1937 }
1938
1939 if (GoodToSwap > 0) {
1940 Cmp->swapOperands();
1941 return true;
1942 }
1943 return false;
1944}
1945
1946static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1947 const DataLayout &DL) {
1948 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1949 if (!FCmp)
1950 return false;
1951
1952 // Don't fold if the target offers free fabs and the predicate is legal.
1953 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
1954 if (TLI.isFAbsFree(VT) &&
1956 VT.getSimpleVT()))
1957 return false;
1958
1959 // Reverse the canonicalization if it is a FP class test
1960 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
1961 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
1962 };
1963 auto [ClassVal, ClassTest] =
1964 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
1965 FCmp->getOperand(0), FCmp->getOperand(1));
1966 if (!ClassVal)
1967 return false;
1968
1969 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1970 return false;
1971
1972 IRBuilder<> Builder(Cmp);
1973 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
1974 Cmp->replaceAllUsesWith(IsFPClass);
1976 return true;
1977}
1978
1979bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
1980 if (sinkCmpExpression(Cmp, *TLI))
1981 return true;
1982
1983 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1984 return true;
1985
1986 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1987 return true;
1988
1989 if (foldICmpWithDominatingICmp(Cmp, *TLI))
1990 return true;
1991
1993 return true;
1994
1995 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
1996 return true;
1997
1998 return false;
1999}
2000
2001/// Duplicate and sink the given 'and' instruction into user blocks where it is
2002/// used in a compare to allow isel to generate better code for targets where
2003/// this operation can be combined.
2004///
2005/// Return true if any changes are made.
2007 SetOfInstrs &InsertedInsts) {
2008 // Double-check that we're not trying to optimize an instruction that was
2009 // already optimized by some other part of this pass.
2010 assert(!InsertedInsts.count(AndI) &&
2011 "Attempting to optimize already optimized and instruction");
2012 (void)InsertedInsts;
2013
2014 // Nothing to do for single use in same basic block.
2015 if (AndI->hasOneUse() &&
2016 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2017 return false;
2018
2019 // Try to avoid cases where sinking/duplicating is likely to increase register
2020 // pressure.
2021 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2022 !isa<ConstantInt>(AndI->getOperand(1)) &&
2023 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2024 return false;
2025
2026 for (auto *U : AndI->users()) {
2027 Instruction *User = cast<Instruction>(U);
2028
2029 // Only sink 'and' feeding icmp with 0.
2030 if (!isa<ICmpInst>(User))
2031 return false;
2032
2033 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2034 if (!CmpC || !CmpC->isZero())
2035 return false;
2036 }
2037
2038 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2039 return false;
2040
2041 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2042 LLVM_DEBUG(AndI->getParent()->dump());
2043
2044 // Push the 'and' into the same block as the icmp 0. There should only be
2045 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2046 // others, so we don't need to keep track of which BBs we insert into.
2047 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2048 UI != E;) {
2049 Use &TheUse = UI.getUse();
2050 Instruction *User = cast<Instruction>(*UI);
2051
2052 // Preincrement use iterator so we don't invalidate it.
2053 ++UI;
2054
2055 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2056
2057 // Keep the 'and' in the same place if the use is already in the same block.
2058 Instruction *InsertPt =
2059 User->getParent() == AndI->getParent() ? AndI : User;
2060 Instruction *InsertedAnd = BinaryOperator::Create(
2061 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2062 InsertPt->getIterator());
2063 // Propagate the debug info.
2064 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2065
2066 // Replace a use of the 'and' with a use of the new 'and'.
2067 TheUse = InsertedAnd;
2068 ++NumAndUses;
2069 LLVM_DEBUG(User->getParent()->dump());
2070 }
2071
2072 // We removed all uses, nuke the and.
2073 AndI->eraseFromParent();
2074 return true;
2075}
2076
2077/// Check if the candidates could be combined with a shift instruction, which
2078/// includes:
2079/// 1. Truncate instruction
2080/// 2. And instruction and the imm is a mask of the low bits:
2081/// imm & (imm+1) == 0
2083 if (!isa<TruncInst>(User)) {
2084 if (User->getOpcode() != Instruction::And ||
2085 !isa<ConstantInt>(User->getOperand(1)))
2086 return false;
2087
2088 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2089
2090 if ((Cimm & (Cimm + 1)).getBoolValue())
2091 return false;
2092 }
2093 return true;
2094}
2095
2096/// Sink both shift and truncate instruction to the use of truncate's BB.
2097static bool
2100 const TargetLowering &TLI, const DataLayout &DL) {
2101 BasicBlock *UserBB = User->getParent();
2103 auto *TruncI = cast<TruncInst>(User);
2104 bool MadeChange = false;
2105
2106 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2107 TruncE = TruncI->user_end();
2108 TruncUI != TruncE;) {
2109
2110 Use &TruncTheUse = TruncUI.getUse();
2111 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2112 // Preincrement use iterator so we don't invalidate it.
2113
2114 ++TruncUI;
2115
2116 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2117 if (!ISDOpcode)
2118 continue;
2119
2120 // If the use is actually a legal node, there will not be an
2121 // implicit truncate.
2122 // FIXME: always querying the result type is just an
2123 // approximation; some nodes' legality is determined by the
2124 // operand or other means. There's no good way to find out though.
2126 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2127 continue;
2128
2129 // Don't bother for PHI nodes.
2130 if (isa<PHINode>(TruncUser))
2131 continue;
2132
2133 BasicBlock *TruncUserBB = TruncUser->getParent();
2134
2135 if (UserBB == TruncUserBB)
2136 continue;
2137
2138 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2139 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2140
2141 if (!InsertedShift && !InsertedTrunc) {
2142 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2143 assert(InsertPt != TruncUserBB->end());
2144 // Sink the shift
2145 if (ShiftI->getOpcode() == Instruction::AShr)
2146 InsertedShift =
2147 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2148 else
2149 InsertedShift =
2150 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2151 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2152 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2153
2154 // Sink the trunc
2155 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2156 TruncInsertPt++;
2157 // It will go ahead of any debug-info.
2158 TruncInsertPt.setHeadBit(true);
2159 assert(TruncInsertPt != TruncUserBB->end());
2160
2161 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2162 TruncI->getType(), "");
2163 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2164 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2165
2166 MadeChange = true;
2167
2168 TruncTheUse = InsertedTrunc;
2169 }
2170 }
2171 return MadeChange;
2172}
2173
2174/// Sink the shift *right* instruction into user blocks if the uses could
2175/// potentially be combined with this shift instruction and generate BitExtract
2176/// instruction. It will only be applied if the architecture supports BitExtract
2177/// instruction. Here is an example:
2178/// BB1:
2179/// %x.extract.shift = lshr i64 %arg1, 32
2180/// BB2:
2181/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2182/// ==>
2183///
2184/// BB2:
2185/// %x.extract.shift.1 = lshr i64 %arg1, 32
2186/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2187///
2188/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2189/// instruction.
2190/// Return true if any changes are made.
2192 const TargetLowering &TLI,
2193 const DataLayout &DL) {
2194 BasicBlock *DefBB = ShiftI->getParent();
2195
2196 /// Only insert instructions in each block once.
2198
2199 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2200
2201 bool MadeChange = false;
2202 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2203 UI != E;) {
2204 Use &TheUse = UI.getUse();
2205 Instruction *User = cast<Instruction>(*UI);
2206 // Preincrement use iterator so we don't invalidate it.
2207 ++UI;
2208
2209 // Don't bother for PHI nodes.
2210 if (isa<PHINode>(User))
2211 continue;
2212
2214 continue;
2215
2216 BasicBlock *UserBB = User->getParent();
2217
2218 if (UserBB == DefBB) {
2219 // If the shift and truncate instruction are in the same BB. The use of
2220 // the truncate(TruncUse) may still introduce another truncate if not
2221 // legal. In this case, we would like to sink both shift and truncate
2222 // instruction to the BB of TruncUse.
2223 // for example:
2224 // BB1:
2225 // i64 shift.result = lshr i64 opnd, imm
2226 // trunc.result = trunc shift.result to i16
2227 //
2228 // BB2:
2229 // ----> We will have an implicit truncate here if the architecture does
2230 // not have i16 compare.
2231 // cmp i16 trunc.result, opnd2
2232 //
2233 if (isa<TruncInst>(User) &&
2234 shiftIsLegal
2235 // If the type of the truncate is legal, no truncate will be
2236 // introduced in other basic blocks.
2237 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2238 MadeChange =
2239 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2240
2241 continue;
2242 }
2243 // If we have already inserted a shift into this block, use it.
2244 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2245
2246 if (!InsertedShift) {
2247 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2248 assert(InsertPt != UserBB->end());
2249
2250 if (ShiftI->getOpcode() == Instruction::AShr)
2251 InsertedShift =
2252 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2253 else
2254 InsertedShift =
2255 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2256 InsertedShift->insertBefore(*UserBB, InsertPt);
2257 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2258
2259 MadeChange = true;
2260 }
2261
2262 // Replace a use of the shift with a use of the new shift.
2263 TheUse = InsertedShift;
2264 }
2265
2266 // If we removed all uses, or there are none, nuke the shift.
2267 if (ShiftI->use_empty()) {
2268 salvageDebugInfo(*ShiftI);
2269 ShiftI->eraseFromParent();
2270 MadeChange = true;
2271 }
2272
2273 return MadeChange;
2274}
2275
2276/// If counting leading or trailing zeros is an expensive operation and a zero
2277/// input is defined, add a check for zero to avoid calling the intrinsic.
2278///
2279/// We want to transform:
2280/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2281///
2282/// into:
2283/// entry:
2284/// %cmpz = icmp eq i64 %A, 0
2285/// br i1 %cmpz, label %cond.end, label %cond.false
2286/// cond.false:
2287/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2288/// br label %cond.end
2289/// cond.end:
2290/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2291///
2292/// If the transform is performed, return true and set ModifiedDT to true.
2293static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2294 LoopInfo &LI,
2295 const TargetLowering *TLI,
2296 const DataLayout *DL, ModifyDT &ModifiedDT,
2298 bool IsHugeFunc) {
2299 // If a zero input is undefined, it doesn't make sense to despeculate that.
2300 if (match(CountZeros->getOperand(1), m_One()))
2301 return false;
2302
2303 // If it's cheap to speculate, there's nothing to do.
2304 Type *Ty = CountZeros->getType();
2305 auto IntrinsicID = CountZeros->getIntrinsicID();
2306 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2307 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2308 return false;
2309
2310 // Only handle legal scalar cases. Anything else requires too much work.
2311 unsigned SizeInBits = Ty->getScalarSizeInBits();
2312 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2313 return false;
2314
2315 // Bail if the value is never zero.
2316 Use &Op = CountZeros->getOperandUse(0);
2317 if (isKnownNonZero(Op, *DL))
2318 return false;
2319
2320 // The intrinsic will be sunk behind a compare against zero and branch.
2321 BasicBlock *StartBlock = CountZeros->getParent();
2322 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2323 if (IsHugeFunc)
2324 FreshBBs.insert(CallBlock);
2325
2326 // Create another block after the count zero intrinsic. A PHI will be added
2327 // in this block to select the result of the intrinsic or the bit-width
2328 // constant if the input to the intrinsic is zero.
2329 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2330 // Any debug-info after CountZeros should not be included.
2331 SplitPt.setHeadBit(true);
2332 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2333 if (IsHugeFunc)
2334 FreshBBs.insert(EndBlock);
2335
2336 // Update the LoopInfo. The new blocks are in the same loop as the start
2337 // block.
2338 if (Loop *L = LI.getLoopFor(StartBlock)) {
2339 L->addBasicBlockToLoop(CallBlock, LI);
2340 L->addBasicBlockToLoop(EndBlock, LI);
2341 }
2342
2343 // Set up a builder to create a compare, conditional branch, and PHI.
2344 IRBuilder<> Builder(CountZeros->getContext());
2345 Builder.SetInsertPoint(StartBlock->getTerminator());
2346 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2347
2348 // Replace the unconditional branch that was created by the first split with
2349 // a compare against zero and a conditional branch.
2350 Value *Zero = Constant::getNullValue(Ty);
2351 // Avoid introducing branch on poison. This also replaces the ctz operand.
2353 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2354 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2355 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2356 StartBlock->getTerminator()->eraseFromParent();
2357
2358 // Create a PHI in the end block to select either the output of the intrinsic
2359 // or the bit width of the operand.
2360 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2361 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2362 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2363 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2364 PN->addIncoming(BitWidth, StartBlock);
2365 PN->addIncoming(CountZeros, CallBlock);
2366
2367 // We are explicitly handling the zero case, so we can set the intrinsic's
2368 // undefined zero argument to 'true'. This will also prevent reprocessing the
2369 // intrinsic; we only despeculate when a zero input is defined.
2370 CountZeros->setArgOperand(1, Builder.getTrue());
2371 ModifiedDT = ModifyDT::ModifyBBDT;
2372 return true;
2373}
2374
2375bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2376 BasicBlock *BB = CI->getParent();
2377
2378 // Lower inline assembly if we can.
2379 // If we found an inline asm expession, and if the target knows how to
2380 // lower it to normal LLVM code, do so now.
2381 if (CI->isInlineAsm()) {
2382 if (TLI->ExpandInlineAsm(CI)) {
2383 // Avoid invalidating the iterator.
2384 CurInstIterator = BB->begin();
2385 // Avoid processing instructions out of order, which could cause
2386 // reuse before a value is defined.
2387 SunkAddrs.clear();
2388 return true;
2389 }
2390 // Sink address computing for memory operands into the block.
2391 if (optimizeInlineAsmInst(CI))
2392 return true;
2393 }
2394
2395 // Align the pointer arguments to this call if the target thinks it's a good
2396 // idea
2397 unsigned MinSize;
2398 Align PrefAlign;
2399 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2400 for (auto &Arg : CI->args()) {
2401 // We want to align both objects whose address is used directly and
2402 // objects whose address is used in casts and GEPs, though it only makes
2403 // sense for GEPs if the offset is a multiple of the desired alignment and
2404 // if size - offset meets the size threshold.
2405 if (!Arg->getType()->isPointerTy())
2406 continue;
2407 APInt Offset(DL->getIndexSizeInBits(
2408 cast<PointerType>(Arg->getType())->getAddressSpace()),
2409 0);
2411 uint64_t Offset2 = Offset.getLimitedValue();
2412 if (!isAligned(PrefAlign, Offset2))
2413 continue;
2414 AllocaInst *AI;
2415 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2416 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2417 AI->setAlignment(PrefAlign);
2418 // Global variables can only be aligned if they are defined in this
2419 // object (i.e. they are uniquely initialized in this object), and
2420 // over-aligning global variables that have an explicit section is
2421 // forbidden.
2422 GlobalVariable *GV;
2423 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2424 GV->getPointerAlignment(*DL) < PrefAlign &&
2425 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2426 GV->setAlignment(PrefAlign);
2427 }
2428 }
2429 // If this is a memcpy (or similar) then we may be able to improve the
2430 // alignment.
2431 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2432 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2433 MaybeAlign MIDestAlign = MI->getDestAlign();
2434 if (!MIDestAlign || DestAlign > *MIDestAlign)
2435 MI->setDestAlignment(DestAlign);
2436 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2437 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2438 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2439 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2440 MTI->setSourceAlignment(SrcAlign);
2441 }
2442 }
2443
2444 // If we have a cold call site, try to sink addressing computation into the
2445 // cold block. This interacts with our handling for loads and stores to
2446 // ensure that we can fold all uses of a potential addressing computation
2447 // into their uses. TODO: generalize this to work over profiling data
2448 if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
2449 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2450 for (auto &Arg : CI->args()) {
2451 if (!Arg->getType()->isPointerTy())
2452 continue;
2453 unsigned AS = Arg->getType()->getPointerAddressSpace();
2454 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2455 return true;
2456 }
2457
2458 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2459 if (II) {
2460 switch (II->getIntrinsicID()) {
2461 default:
2462 break;
2463 case Intrinsic::assume:
2464 llvm_unreachable("llvm.assume should have been removed already");
2465 case Intrinsic::allow_runtime_check:
2466 case Intrinsic::allow_ubsan_check:
2467 case Intrinsic::experimental_widenable_condition: {
2468 // Give up on future widening opportunities so that we can fold away dead
2469 // paths and merge blocks before going into block-local instruction
2470 // selection.
2471 if (II->use_empty()) {
2472 II->eraseFromParent();
2473 return true;
2474 }
2475 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2476 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2477 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2478 });
2479 return true;
2480 }
2481 case Intrinsic::objectsize:
2482 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2483 case Intrinsic::is_constant:
2484 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2485 case Intrinsic::aarch64_stlxr:
2486 case Intrinsic::aarch64_stxr: {
2487 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2488 if (!ExtVal || !ExtVal->hasOneUse() ||
2489 ExtVal->getParent() == CI->getParent())
2490 return false;
2491 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2492 ExtVal->moveBefore(CI);
2493 // Mark this instruction as "inserted by CGP", so that other
2494 // optimizations don't touch it.
2495 InsertedInsts.insert(ExtVal);
2496 return true;
2497 }
2498
2499 case Intrinsic::launder_invariant_group:
2500 case Intrinsic::strip_invariant_group: {
2501 Value *ArgVal = II->getArgOperand(0);
2502 auto it = LargeOffsetGEPMap.find(II);
2503 if (it != LargeOffsetGEPMap.end()) {
2504 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2505 // Make sure not to have to deal with iterator invalidation
2506 // after possibly adding ArgVal to LargeOffsetGEPMap.
2507 auto GEPs = std::move(it->second);
2508 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2509 LargeOffsetGEPMap.erase(II);
2510 }
2511
2512 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2513 II->eraseFromParent();
2514 return true;
2515 }
2516 case Intrinsic::cttz:
2517 case Intrinsic::ctlz:
2518 // If counting zeros is expensive, try to avoid it.
2519 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2520 IsHugeFunc);
2521 case Intrinsic::fshl:
2522 case Intrinsic::fshr:
2523 return optimizeFunnelShift(II);
2524 case Intrinsic::dbg_assign:
2525 case Intrinsic::dbg_value:
2526 return fixupDbgValue(II);
2527 case Intrinsic::masked_gather:
2528 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2529 case Intrinsic::masked_scatter:
2530 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2531 }
2532
2534 Type *AccessTy;
2535 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2536 while (!PtrOps.empty()) {
2537 Value *PtrVal = PtrOps.pop_back_val();
2538 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2539 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2540 return true;
2541 }
2542 }
2543
2544 // From here on out we're working with named functions.
2545 if (!CI->getCalledFunction())
2546 return false;
2547
2548 // Lower all default uses of _chk calls. This is very similar
2549 // to what InstCombineCalls does, but here we are only lowering calls
2550 // to fortified library functions (e.g. __memcpy_chk) that have the default
2551 // "don't know" as the objectsize. Anything else should be left alone.
2552 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2553 IRBuilder<> Builder(CI);
2554 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2555 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2556 CI->eraseFromParent();
2557 return true;
2558 }
2559
2560 return false;
2561}
2562
2564 const CallInst *CI) {
2565 assert(CI && CI->use_empty());
2566
2567 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2568 switch (II->getIntrinsicID()) {
2569 case Intrinsic::memset:
2570 case Intrinsic::memcpy:
2571 case Intrinsic::memmove:
2572 return true;
2573 default:
2574 return false;
2575 }
2576
2577 LibFunc LF;
2578 Function *Callee = CI->getCalledFunction();
2579 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2580 switch (LF) {
2581 case LibFunc_strcpy:
2582 case LibFunc_strncpy:
2583 case LibFunc_strcat:
2584 case LibFunc_strncat:
2585 return true;
2586 default:
2587 return false;
2588 }
2589
2590 return false;
2591}
2592
2593/// Look for opportunities to duplicate return instructions to the predecessor
2594/// to enable tail call optimizations. The case it is currently looking for is
2595/// the following one. Known intrinsics or library function that may be tail
2596/// called are taken into account as well.
2597/// @code
2598/// bb0:
2599/// %tmp0 = tail call i32 @f0()
2600/// br label %return
2601/// bb1:
2602/// %tmp1 = tail call i32 @f1()
2603/// br label %return
2604/// bb2:
2605/// %tmp2 = tail call i32 @f2()
2606/// br label %return
2607/// return:
2608/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2609/// ret i32 %retval
2610/// @endcode
2611///
2612/// =>
2613///
2614/// @code
2615/// bb0:
2616/// %tmp0 = tail call i32 @f0()
2617/// ret i32 %tmp0
2618/// bb1:
2619/// %tmp1 = tail call i32 @f1()
2620/// ret i32 %tmp1
2621/// bb2:
2622/// %tmp2 = tail call i32 @f2()
2623/// ret i32 %tmp2
2624/// @endcode
2625bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2626 ModifyDT &ModifiedDT) {
2627 if (!BB->getTerminator())
2628 return false;
2629
2630 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2631 if (!RetI)
2632 return false;
2633
2634 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2635
2636 PHINode *PN = nullptr;
2637 ExtractValueInst *EVI = nullptr;
2638 BitCastInst *BCI = nullptr;
2639 Value *V = RetI->getReturnValue();
2640 if (V) {
2641 BCI = dyn_cast<BitCastInst>(V);
2642 if (BCI)
2643 V = BCI->getOperand(0);
2644
2645 EVI = dyn_cast<ExtractValueInst>(V);
2646 if (EVI) {
2647 V = EVI->getOperand(0);
2648 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2649 return false;
2650 }
2651
2652 PN = dyn_cast<PHINode>(V);
2653 }
2654
2655 if (PN && PN->getParent() != BB)
2656 return false;
2657
2658 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2659 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2660 if (BC && BC->hasOneUse())
2661 Inst = BC->user_back();
2662
2663 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2664 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2665 return false;
2666 };
2667
2668 // Make sure there are no instructions between the first instruction
2669 // and return.
2670 const Instruction *BI = BB->getFirstNonPHI();
2671 // Skip over debug and the bitcast.
2672 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2673 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
2674 BI = BI->getNextNode();
2675 if (BI != RetI)
2676 return false;
2677
2678 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2679 /// call.
2680 const Function *F = BB->getParent();
2681 SmallVector<BasicBlock *, 4> TailCallBBs;
2682 if (PN) {
2683 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2684 // Look through bitcasts.
2685 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2686 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2687 BasicBlock *PredBB = PN->getIncomingBlock(I);
2688 // Make sure the phi value is indeed produced by the tail call.
2689 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2690 TLI->mayBeEmittedAsTailCall(CI) &&
2691 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2692 TailCallBBs.push_back(PredBB);
2693 } else {
2694 // Consider the cases in which the phi value is indirectly produced by
2695 // the tail call, for example when encountering memset(), memmove(),
2696 // strcpy(), whose return value may have been optimized out. In such
2697 // cases, the value needs to be the first function argument.
2698 //
2699 // bb0:
2700 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2701 // br label %return
2702 // return:
2703 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2704 if (PredBB && PredBB->getSingleSuccessor() == BB)
2705 CI = dyn_cast_or_null<CallInst>(
2706 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2707
2708 if (CI && CI->use_empty() &&
2709 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2710 IncomingVal == CI->getArgOperand(0) &&
2711 TLI->mayBeEmittedAsTailCall(CI) &&
2712 attributesPermitTailCall(F, CI, RetI, *TLI))
2713 TailCallBBs.push_back(PredBB);
2714 }
2715 }
2716 } else {
2718 for (BasicBlock *Pred : predecessors(BB)) {
2719 if (!VisitedBBs.insert(Pred).second)
2720 continue;
2721 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2722 CallInst *CI = dyn_cast<CallInst>(I);
2723 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2724 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2725 // Either we return void or the return value must be the first
2726 // argument of a known intrinsic or library function.
2727 if (!V || isa<UndefValue>(V) ||
2728 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2729 V == CI->getArgOperand(0))) {
2730 TailCallBBs.push_back(Pred);
2731 }
2732 }
2733 }
2734 }
2735 }
2736
2737 bool Changed = false;
2738 for (auto const &TailCallBB : TailCallBBs) {
2739 // Make sure the call instruction is followed by an unconditional branch to
2740 // the return block.
2741 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2742 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2743 continue;
2744
2745 // Duplicate the return into TailCallBB.
2746 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2748 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
2749 BFI->setBlockFreq(BB,
2750 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
2751 ModifiedDT = ModifyDT::ModifyBBDT;
2752 Changed = true;
2753 ++NumRetsDup;
2754 }
2755
2756 // If we eliminated all predecessors of the block, delete the block now.
2757 if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
2758 BB->eraseFromParent();
2759
2760 return Changed;
2761}
2762
2763//===----------------------------------------------------------------------===//
2764// Memory Optimization
2765//===----------------------------------------------------------------------===//
2766
2767namespace {
2768
2769/// This is an extended version of TargetLowering::AddrMode
2770/// which holds actual Value*'s for register values.
2771struct ExtAddrMode : public TargetLowering::AddrMode {
2772 Value *BaseReg = nullptr;
2773 Value *ScaledReg = nullptr;
2774 Value *OriginalValue = nullptr;
2775 bool InBounds = true;
2776
2777 enum FieldName {
2778 NoField = 0x00,
2779 BaseRegField = 0x01,
2780 BaseGVField = 0x02,
2781 BaseOffsField = 0x04,
2782 ScaledRegField = 0x08,
2783 ScaleField = 0x10,
2784 MultipleFields = 0xff
2785 };
2786
2787 ExtAddrMode() = default;
2788
2789 void print(raw_ostream &OS) const;
2790 void dump() const;
2791
2792 FieldName compare(const ExtAddrMode &other) {
2793 // First check that the types are the same on each field, as differing types
2794 // is something we can't cope with later on.
2795 if (BaseReg && other.BaseReg &&
2796 BaseReg->getType() != other.BaseReg->getType())
2797 return MultipleFields;
2798 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
2799 return MultipleFields;
2800 if (ScaledReg && other.ScaledReg &&
2801 ScaledReg->getType() != other.ScaledReg->getType())
2802 return MultipleFields;
2803
2804 // Conservatively reject 'inbounds' mismatches.
2805 if (InBounds != other.InBounds)
2806 return MultipleFields;
2807
2808 // Check each field to see if it differs.
2809 unsigned Result = NoField;
2810 if (BaseReg != other.BaseReg)
2811 Result |= BaseRegField;
2812 if (BaseGV != other.BaseGV)
2813 Result |= BaseGVField;
2814 if (BaseOffs != other.BaseOffs)
2815 Result |= BaseOffsField;
2816 if (ScaledReg != other.ScaledReg)
2817 Result |= ScaledRegField;
2818 // Don't count 0 as being a different scale, because that actually means
2819 // unscaled (which will already be counted by having no ScaledReg).
2820 if (Scale && other.Scale && Scale != other.Scale)
2821 Result |= ScaleField;
2822
2823 if (llvm::popcount(Result) > 1)
2824 return MultipleFields;
2825 else
2826 return static_cast<FieldName>(Result);
2827 }
2828
2829 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2830 // with no offset.
2831 bool isTrivial() {
2832 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2833 // trivial if at most one of these terms is nonzero, except that BaseGV and
2834 // BaseReg both being zero actually means a null pointer value, which we
2835 // consider to be 'non-zero' here.
2836 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2837 }
2838
2839 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2840 switch (Field) {
2841 default:
2842 return nullptr;
2843 case BaseRegField:
2844 return BaseReg;
2845 case BaseGVField:
2846 return BaseGV;
2847 case ScaledRegField:
2848 return ScaledReg;
2849 case BaseOffsField:
2850 return ConstantInt::get(IntPtrTy, BaseOffs);
2851 }
2852 }
2853
2854 void SetCombinedField(FieldName Field, Value *V,
2855 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2856 switch (Field) {
2857 default:
2858 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
2859 break;
2860 case ExtAddrMode::BaseRegField:
2861 BaseReg = V;
2862 break;
2863 case ExtAddrMode::BaseGVField:
2864 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2865 // in the BaseReg field.
2866 assert(BaseReg == nullptr);
2867 BaseReg = V;
2868 BaseGV = nullptr;
2869 break;
2870 case ExtAddrMode::ScaledRegField:
2871 ScaledReg = V;
2872 // If we have a mix of scaled and unscaled addrmodes then we want scale
2873 // to be the scale and not zero.
2874 if (!Scale)
2875 for (const ExtAddrMode &AM : AddrModes)
2876 if (AM.Scale) {
2877 Scale = AM.Scale;
2878 break;
2879 }
2880 break;
2881 case ExtAddrMode::BaseOffsField:
2882 // The offset is no longer a constant, so it goes in ScaledReg with a
2883 // scale of 1.
2884 assert(ScaledReg == nullptr);
2885 ScaledReg = V;
2886 Scale = 1;
2887 BaseOffs = 0;
2888 break;
2889 }
2890 }
2891};
2892
2893#ifndef NDEBUG
2894static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2895 AM.print(OS);
2896 return OS;
2897}
2898#endif
2899
2900#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2901void ExtAddrMode::print(raw_ostream &OS) const {
2902 bool NeedPlus = false;
2903 OS << "[";
2904 if (InBounds)
2905 OS << "inbounds ";
2906 if (BaseGV) {
2907 OS << "GV:";
2908 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2909 NeedPlus = true;
2910 }
2911
2912 if (BaseOffs) {
2913 OS << (NeedPlus ? " + " : "") << BaseOffs;
2914 NeedPlus = true;
2915 }
2916
2917 if (BaseReg) {
2918 OS << (NeedPlus ? " + " : "") << "Base:";
2919 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2920 NeedPlus = true;
2921 }
2922 if (Scale) {
2923 OS << (NeedPlus ? " + " : "") << Scale << "*";
2924 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2925 }
2926
2927 OS << ']';
2928}
2929
2930LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
2931 print(dbgs());
2932 dbgs() << '\n';
2933}
2934#endif
2935
2936} // end anonymous namespace
2937
2938namespace {
2939
2940/// This class provides transaction based operation on the IR.
2941/// Every change made through this class is recorded in the internal state and
2942/// can be undone (rollback) until commit is called.
2943/// CGP does not check if instructions could be speculatively executed when
2944/// moved. Preserving the original location would pessimize the debugging
2945/// experience, as well as negatively impact the quality of sample PGO.
2946class TypePromotionTransaction {
2947 /// This represents the common interface of the individual transaction.
2948 /// Each class implements the logic for doing one specific modification on
2949 /// the IR via the TypePromotionTransaction.
2950 class TypePromotionAction {
2951 protected:
2952 /// The Instruction modified.
2953 Instruction *Inst;
2954
2955 public:
2956 /// Constructor of the action.
2957 /// The constructor performs the related action on the IR.
2958 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2959
2960 virtual ~TypePromotionAction() = default;
2961
2962 /// Undo the modification done by this action.
2963 /// When this method is called, the IR must be in the same state as it was
2964 /// before this action was applied.
2965 /// \pre Undoing the action works if and only if the IR is in the exact same
2966 /// state as it was directly after this action was applied.
2967 virtual void undo() = 0;
2968
2969 /// Advocate every change made by this action.
2970 /// When the results on the IR of the action are to be kept, it is important
2971 /// to call this function, otherwise hidden information may be kept forever.
2972 virtual void commit() {
2973 // Nothing to be done, this action is not doing anything.
2974 }
2975 };
2976
2977 /// Utility to remember the position of an instruction.
2978 class InsertionHandler {
2979 /// Position of an instruction.
2980 /// Either an instruction:
2981 /// - Is the first in a basic block: BB is used.
2982 /// - Has a previous instruction: PrevInst is used.
2983 union {
2984 Instruction *PrevInst;
2985 BasicBlock *BB;
2986 } Point;
2987 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
2988
2989 /// Remember whether or not the instruction had a previous instruction.
2990 bool HasPrevInstruction;
2991
2992 public:
2993 /// Record the position of \p Inst.
2994 InsertionHandler(Instruction *Inst) {
2995 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
2996 BasicBlock *BB = Inst->getParent();
2997
2998 // Record where we would have to re-insert the instruction in the sequence
2999 // of DbgRecords, if we ended up reinserting.
3000 if (BB->IsNewDbgInfoFormat)
3001 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3002
3003 if (HasPrevInstruction) {
3004 Point.PrevInst = &*std::prev(Inst->getIterator());
3005 } else {
3006 Point.BB = BB;
3007 }
3008 }
3009
3010 /// Insert \p Inst at the recorded position.
3011 void insert(Instruction *Inst) {
3012 if (HasPrevInstruction) {
3013 if (Inst->getParent())
3014 Inst->removeFromParent();
3015 Inst->insertAfter(&*Point.PrevInst);
3016 } else {
3017 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3018 if (Inst->getParent())
3019 Inst->moveBefore(*Point.BB, Position);
3020 else
3021 Inst->insertBefore(*Point.BB, Position);
3022 }
3023
3024 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3025 }
3026 };
3027
3028 /// Move an instruction before another.
3029 class InstructionMoveBefore : public TypePromotionAction {
3030 /// Original position of the instruction.
3031 InsertionHandler Position;
3032
3033 public:
3034 /// Move \p Inst before \p Before.
3035 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
3036 : TypePromotionAction(Inst), Position(Inst) {
3037 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3038 << "\n");
3039 Inst->moveBefore(Before);
3040 }
3041
3042 /// Move the instruction back to its original position.
3043 void undo() override {
3044 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3045 Position.insert(Inst);
3046 }
3047 };
3048
3049 /// Set the operand of an instruction with a new value.
3050 class OperandSetter : public TypePromotionAction {
3051 /// Original operand of the instruction.
3052 Value *Origin;
3053
3054 /// Index of the modified instruction.
3055 unsigned Idx;
3056
3057 public:
3058 /// Set \p Idx operand of \p Inst with \p NewVal.
3059 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3060 : TypePromotionAction(Inst), Idx(Idx) {
3061 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3062 << "for:" << *Inst << "\n"
3063 << "with:" << *NewVal << "\n");
3064 Origin = Inst->getOperand(Idx);
3065 Inst->setOperand(Idx, NewVal);
3066 }
3067
3068 /// Restore the original value of the instruction.
3069 void undo() override {
3070 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3071 << "for: " << *Inst << "\n"
3072 << "with: " << *Origin << "\n");
3073 Inst->setOperand(Idx, Origin);
3074 }
3075 };
3076
3077 /// Hide the operands of an instruction.
3078 /// Do as if this instruction was not using any of its operands.
3079 class OperandsHider : public TypePromotionAction {
3080 /// The list of original operands.
3081 SmallVector<Value *, 4> OriginalValues;
3082
3083 public:
3084 /// Remove \p Inst from the uses of the operands of \p Inst.
3085 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3086 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3087 unsigned NumOpnds = Inst->getNumOperands();
3088 OriginalValues.reserve(NumOpnds);
3089 for (unsigned It = 0; It < NumOpnds; ++It) {
3090 // Save the current operand.
3091 Value *Val = Inst->getOperand(It);
3092 OriginalValues.push_back(Val);
3093 // Set a dummy one.
3094 // We could use OperandSetter here, but that would imply an overhead
3095 // that we are not willing to pay.
3096 Inst->setOperand(It, UndefValue::get(Val->getType()));
3097 }
3098 }
3099
3100 /// Restore the original list of uses.
3101 void undo() override {
3102 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3103 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3104 Inst->setOperand(It, OriginalValues[It]);
3105 }
3106 };
3107
3108 /// Build a truncate instruction.
3109 class TruncBuilder : public TypePromotionAction {
3110 Value *Val;
3111
3112 public:
3113 /// Build a truncate instruction of \p Opnd producing a \p Ty
3114 /// result.
3115 /// trunc Opnd to Ty.
3116 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3117 IRBuilder<> Builder(Opnd);
3118 Builder.SetCurrentDebugLocation(DebugLoc());
3119 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3120 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3121 }
3122
3123 /// Get the built value.
3124 Value *getBuiltValue() { return Val; }
3125
3126 /// Remove the built instruction.
3127 void undo() override {
3128 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3129 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3130 IVal->eraseFromParent();
3131 }
3132 };
3133
3134 /// Build a sign extension instruction.
3135 class SExtBuilder : public TypePromotionAction {
3136 Value *Val;
3137
3138 public:
3139 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3140 /// result.
3141 /// sext Opnd to Ty.
3142 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3143 : TypePromotionAction(InsertPt) {
3144 IRBuilder<> Builder(InsertPt);
3145 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3146 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3147 }
3148
3149 /// Get the built value.
3150 Value *getBuiltValue() { return Val; }
3151
3152 /// Remove the built instruction.
3153 void undo() override {
3154 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3155 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3156 IVal->eraseFromParent();
3157 }
3158 };
3159
3160 /// Build a zero extension instruction.
3161 class ZExtBuilder : public TypePromotionAction {
3162 Value *Val;
3163
3164 public:
3165 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3166 /// result.
3167 /// zext Opnd to Ty.
3168 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3169 : TypePromotionAction(InsertPt) {
3170 IRBuilder<> Builder(InsertPt);
3171 Builder.SetCurrentDebugLocation(DebugLoc());
3172 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3173 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3174 }
3175
3176 /// Get the built value.
3177 Value *getBuiltValue() { return Val; }
3178
3179 /// Remove the built instruction.
3180 void undo() override {
3181 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3182 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3183 IVal->eraseFromParent();
3184 }
3185 };
3186
3187 /// Mutate an instruction to another type.
3188 class TypeMutator : public TypePromotionAction {
3189 /// Record the original type.
3190 Type *OrigTy;
3191
3192 public:
3193 /// Mutate the type of \p Inst into \p NewTy.
3194 TypeMutator(Instruction *Inst, Type *NewTy)
3195 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3196 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3197 << "\n");
3198 Inst->mutateType(NewTy);
3199 }
3200
3201 /// Mutate the instruction back to its original type.
3202 void undo() override {
3203 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3204 << "\n");
3205 Inst->mutateType(OrigTy);
3206 }
3207 };
3208
3209 /// Replace the uses of an instruction by another instruction.
3210 class UsesReplacer : public TypePromotionAction {
3211 /// Helper structure to keep track of the replaced uses.
3212 struct InstructionAndIdx {
3213 /// The instruction using the instruction.
3214 Instruction *Inst;
3215
3216 /// The index where this instruction is used for Inst.
3217 unsigned Idx;
3218
3219 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3220 : Inst(Inst), Idx(Idx) {}
3221 };
3222
3223 /// Keep track of the original uses (pair Instruction, Index).
3225 /// Keep track of the debug users.
3227 /// And non-instruction debug-users too.
3228 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3229
3230 /// Keep track of the new value so that we can undo it by replacing
3231 /// instances of the new value with the original value.
3232 Value *New;
3233
3235
3236 public:
3237 /// Replace all the use of \p Inst by \p New.
3238 UsesReplacer(Instruction *Inst, Value *New)
3239 : TypePromotionAction(Inst), New(New) {
3240 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3241 << "\n");
3242 // Record the original uses.
3243 for (Use &U : Inst->uses()) {
3244 Instruction *UserI = cast<Instruction>(U.getUser());
3245 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3246 }
3247 // Record the debug uses separately. They are not in the instruction's
3248 // use list, but they are replaced by RAUW.
3249 findDbgValues(DbgValues, Inst, &DbgVariableRecords);
3250
3251 // Now, we can replace the uses.
3252 Inst->replaceAllUsesWith(New);
3253 }
3254
3255 /// Reassign the original uses of Inst to Inst.
3256 void undo() override {
3257 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3258 for (InstructionAndIdx &Use : OriginalUses)
3259 Use.Inst->setOperand(Use.Idx, Inst);
3260 // RAUW has replaced all original uses with references to the new value,
3261 // including the debug uses. Since we are undoing the replacements,
3262 // the original debug uses must also be reinstated to maintain the
3263 // correctness and utility of debug value instructions.
3264 for (auto *DVI : DbgValues)
3265 DVI->replaceVariableLocationOp(New, Inst);
3266 // Similar story with DbgVariableRecords, the non-instruction
3267 // representation of dbg.values.
3268 for (DbgVariableRecord *DVR : DbgVariableRecords)
3269 DVR->replaceVariableLocationOp(New, Inst);
3270 }
3271 };
3272
3273 /// Remove an instruction from the IR.
3274 class InstructionRemover : public TypePromotionAction {
3275 /// Original position of the instruction.
3276 InsertionHandler Inserter;
3277
3278 /// Helper structure to hide all the link to the instruction. In other
3279 /// words, this helps to do as if the instruction was removed.
3280 OperandsHider Hider;
3281
3282 /// Keep track of the uses replaced, if any.
3283 UsesReplacer *Replacer = nullptr;
3284
3285 /// Keep track of instructions removed.
3286 SetOfInstrs &RemovedInsts;
3287
3288 public:
3289 /// Remove all reference of \p Inst and optionally replace all its
3290 /// uses with New.
3291 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3292 /// \pre If !Inst->use_empty(), then New != nullptr
3293 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3294 Value *New = nullptr)
3295 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3296 RemovedInsts(RemovedInsts) {
3297 if (New)
3298 Replacer = new UsesReplacer(Inst, New);
3299 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3300 RemovedInsts.insert(Inst);
3301 /// The instructions removed here will be freed after completing
3302 /// optimizeBlock() for all blocks as we need to keep track of the
3303 /// removed instructions during promotion.
3304 Inst->removeFromParent();
3305 }
3306
3307 ~InstructionRemover() override { delete Replacer; }
3308
3309 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3310 InstructionRemover(const InstructionRemover &other) = delete;
3311
3312 /// Resurrect the instruction and reassign it to the proper uses if
3313 /// new value was provided when build this action.
3314 void undo() override {
3315 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3316 Inserter.insert(Inst);
3317 if (Replacer)
3318 Replacer->undo();
3319 Hider.undo();
3320 RemovedInsts.erase(Inst);
3321 }
3322 };
3323
3324public:
3325 /// Restoration point.
3326 /// The restoration point is a pointer to an action instead of an iterator
3327 /// because the iterator may be invalidated but not the pointer.
3328 using ConstRestorationPt = const TypePromotionAction *;
3329
3330 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3331 : RemovedInsts(RemovedInsts) {}
3332
3333 /// Advocate every changes made in that transaction. Return true if any change
3334 /// happen.
3335 bool commit();
3336
3337 /// Undo all the changes made after the given point.
3338 void rollback(ConstRestorationPt Point);
3339
3340 /// Get the current restoration point.
3341 ConstRestorationPt getRestorationPoint() const;
3342
3343 /// \name API for IR modification with state keeping to support rollback.
3344 /// @{
3345 /// Same as Instruction::setOperand.
3346 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3347
3348 /// Same as Instruction::eraseFromParent.
3349 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3350
3351 /// Same as Value::replaceAllUsesWith.
3352 void replaceAllUsesWith(Instruction *Inst, Value *New);
3353
3354 /// Same as Value::mutateType.
3355 void mutateType(Instruction *Inst, Type *NewTy);
3356
3357 /// Same as IRBuilder::createTrunc.
3358 Value *createTrunc(Instruction *Opnd, Type *Ty);
3359
3360 /// Same as IRBuilder::createSExt.
3361 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3362
3363 /// Same as IRBuilder::createZExt.
3364 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3365
3366private:
3367 /// The ordered list of actions made so far.
3369
3370 using CommitPt =
3372
3373 SetOfInstrs &RemovedInsts;
3374};
3375
3376} // end anonymous namespace
3377
3378void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3379 Value *NewVal) {
3380 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3381 Inst, Idx, NewVal));
3382}
3383
3384void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3385 Value *NewVal) {
3386 Actions.push_back(
3387 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3388 Inst, RemovedInsts, NewVal));
3389}
3390
3391void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3392 Value *New) {
3393 Actions.push_back(
3394 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3395}
3396
3397void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3398 Actions.push_back(
3399 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3400}
3401
3402Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3403 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3404 Value *Val = Ptr->getBuiltValue();
3405 Actions.push_back(std::move(Ptr));
3406 return Val;
3407}
3408
3409Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3410 Type *Ty) {
3411 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3412 Value *Val = Ptr->getBuiltValue();
3413 Actions.push_back(std::move(Ptr));
3414 return Val;
3415}
3416
3417Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3418 Type *Ty) {
3419 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3420 Value *Val = Ptr->getBuiltValue();
3421 Actions.push_back(std::move(Ptr));
3422 return Val;
3423}
3424
3425TypePromotionTransaction::ConstRestorationPt
3426TypePromotionTransaction::getRestorationPoint() const {
3427 return !Actions.empty() ? Actions.back().get() : nullptr;
3428}
3429
3430bool TypePromotionTransaction::commit() {
3431 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3432 Action->commit();
3433 bool Modified = !Actions.empty();
3434 Actions.clear();
3435 return Modified;
3436}
3437
3438void TypePromotionTransaction::rollback(
3439 TypePromotionTransaction::ConstRestorationPt Point) {
3440 while (!Actions.empty() && Point != Actions.back().get()) {
3441 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3442 Curr->undo();
3443 }
3444}
3445
3446namespace {
3447
3448/// A helper class for matching addressing modes.
3449///
3450/// This encapsulates the logic for matching the target-legal addressing modes.
3451class AddressingModeMatcher {
3452 SmallVectorImpl<Instruction *> &AddrModeInsts;
3453 const TargetLowering &TLI;
3454 const TargetRegisterInfo &TRI;
3455 const DataLayout &DL;
3456 const LoopInfo &LI;
3457 const std::function<const DominatorTree &()> getDTFn;
3458
3459 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3460 /// the memory instruction that we're computing this address for.
3461 Type *AccessTy;
3462 unsigned AddrSpace;
3463 Instruction *MemoryInst;
3464
3465 /// This is the addressing mode that we're building up. This is
3466 /// part of the return value of this addressing mode matching stuff.
3468
3469 /// The instructions inserted by other CodeGenPrepare optimizations.
3470 const SetOfInstrs &InsertedInsts;
3471
3472 /// A map from the instructions to their type before promotion.
3473 InstrToOrigTy &PromotedInsts;
3474
3475 /// The ongoing transaction where every action should be registered.
3476 TypePromotionTransaction &TPT;
3477
3478 // A GEP which has too large offset to be folded into the addressing mode.
3479 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3480
3481 /// This is set to true when we should not do profitability checks.
3482 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3483 bool IgnoreProfitability;
3484
3485 /// True if we are optimizing for size.
3486 bool OptSize = false;
3487
3488 ProfileSummaryInfo *PSI;
3490
3491 AddressingModeMatcher(
3493 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3494 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3495 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3496 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3497 TypePromotionTransaction &TPT,
3498 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3499 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3500 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3501 DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
3502 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3503 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3504 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3505 IgnoreProfitability = false;
3506 }
3507
3508public:
3509 /// Find the maximal addressing mode that a load/store of V can fold,
3510 /// give an access type of AccessTy. This returns a list of involved
3511 /// instructions in AddrModeInsts.
3512 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3513 /// optimizations.
3514 /// \p PromotedInsts maps the instructions to their type before promotion.
3515 /// \p The ongoing transaction where every action should be registered.
3516 static ExtAddrMode
3517 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3518 SmallVectorImpl<Instruction *> &AddrModeInsts,
3519 const TargetLowering &TLI, const LoopInfo &LI,
3520 const std::function<const DominatorTree &()> getDTFn,
3521 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3522 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3523 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3524 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3526
3527 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3528 AccessTy, AS, MemoryInst, Result,
3529 InsertedInsts, PromotedInsts, TPT,
3530 LargeOffsetGEP, OptSize, PSI, BFI)
3531 .matchAddr(V, 0);
3532 (void)Success;
3533 assert(Success && "Couldn't select *anything*?");
3534 return Result;
3535 }
3536
3537private:
3538 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3539 bool matchAddr(Value *Addr, unsigned Depth);
3540 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3541 bool *MovedAway = nullptr);
3542 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3543 ExtAddrMode &AMBefore,
3544 ExtAddrMode &AMAfter);
3545 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3546 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3547 Value *PromotedOperand) const;
3548};
3549
3550class PhiNodeSet;
3551
3552/// An iterator for PhiNodeSet.
3553class PhiNodeSetIterator {
3554 PhiNodeSet *const Set;
3555 size_t CurrentIndex = 0;
3556
3557public:
3558 /// The constructor. Start should point to either a valid element, or be equal
3559 /// to the size of the underlying SmallVector of the PhiNodeSet.
3560 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3561 PHINode *operator*() const;
3562 PhiNodeSetIterator &operator++();
3563 bool operator==(const PhiNodeSetIterator &RHS) const;
3564 bool operator!=(const PhiNodeSetIterator &RHS) const;
3565};
3566
3567/// Keeps a set of PHINodes.
3568///
3569/// This is a minimal set implementation for a specific use case:
3570/// It is very fast when there are very few elements, but also provides good
3571/// performance when there are many. It is similar to SmallPtrSet, but also
3572/// provides iteration by insertion order, which is deterministic and stable
3573/// across runs. It is also similar to SmallSetVector, but provides removing
3574/// elements in O(1) time. This is achieved by not actually removing the element
3575/// from the underlying vector, so comes at the cost of using more memory, but
3576/// that is fine, since PhiNodeSets are used as short lived objects.
3577class PhiNodeSet {
3578 friend class PhiNodeSetIterator;
3579
3581 using iterator = PhiNodeSetIterator;
3582
3583 /// Keeps the elements in the order of their insertion in the underlying
3584 /// vector. To achieve constant time removal, it never deletes any element.
3586
3587 /// Keeps the elements in the underlying set implementation. This (and not the
3588 /// NodeList defined above) is the source of truth on whether an element
3589 /// is actually in the collection.
3590 MapType NodeMap;
3591
3592 /// Points to the first valid (not deleted) element when the set is not empty
3593 /// and the value is not zero. Equals to the size of the underlying vector
3594 /// when the set is empty. When the value is 0, as in the beginning, the
3595 /// first element may or may not be valid.
3596 size_t FirstValidElement = 0;
3597
3598public:
3599 /// Inserts a new element to the collection.
3600 /// \returns true if the element is actually added, i.e. was not in the
3601 /// collection before the operation.
3602 bool insert(PHINode *Ptr) {
3603 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3604 NodeList.push_back(Ptr);
3605 return true;
3606 }
3607 return false;
3608 }
3609
3610 /// Removes the element from the collection.
3611 /// \returns whether the element is actually removed, i.e. was in the
3612 /// collection before the operation.
3613 bool erase(PHINode *Ptr) {
3614 if (NodeMap.erase(Ptr)) {
3615 SkipRemovedElements(FirstValidElement);
3616 return true;
3617 }
3618 return false;
3619 }
3620
3621 /// Removes all elements and clears the collection.
3622 void clear() {
3623 NodeMap.clear();
3624 NodeList.clear();
3625 FirstValidElement = 0;
3626 }
3627
3628 /// \returns an iterator that will iterate the elements in the order of
3629 /// insertion.
3630 iterator begin() {
3631 if (FirstValidElement == 0)
3632 SkipRemovedElements(FirstValidElement);
3633 return PhiNodeSetIterator(this, FirstValidElement);
3634 }
3635
3636 /// \returns an iterator that points to the end of the collection.
3637 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3638
3639 /// Returns the number of elements in the collection.
3640 size_t size() const { return NodeMap.size(); }
3641
3642 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3643 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3644
3645private:
3646 /// Updates the CurrentIndex so that it will point to a valid element.
3647 ///
3648 /// If the element of NodeList at CurrentIndex is valid, it does not
3649 /// change it. If there are no more valid elements, it updates CurrentIndex
3650 /// to point to the end of the NodeList.
3651 void SkipRemovedElements(size_t &CurrentIndex) {
3652 while (CurrentIndex < NodeList.size()) {
3653 auto it = NodeMap.find(NodeList[CurrentIndex]);
3654 // If the element has been deleted and added again later, NodeMap will
3655 // point to a different index, so CurrentIndex will still be invalid.
3656 if (it != NodeMap.end() && it->second == CurrentIndex)
3657 break;
3658 ++CurrentIndex;
3659 }
3660 }
3661};
3662
3663PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3664 : Set(Set), CurrentIndex(Start) {}
3665
3666PHINode *PhiNodeSetIterator::operator*() const {
3667 assert(CurrentIndex < Set->NodeList.size() &&
3668 "PhiNodeSet access out of range");
3669 return Set->NodeList[CurrentIndex];
3670}
3671
3672PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3673 assert(CurrentIndex < Set->NodeList.size() &&
3674 "PhiNodeSet access out of range");
3675 ++CurrentIndex;
3676 Set->SkipRemovedElements(CurrentIndex);
3677 return *this;
3678}
3679
3680bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3681 return CurrentIndex == RHS.CurrentIndex;
3682}
3683
3684bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3685 return !((*this) == RHS);
3686}
3687
3688/// Keep track of simplification of Phi nodes.
3689/// Accept the set of all phi nodes and erase phi node from this set
3690/// if it is simplified.
3691class SimplificationTracker {
3693 const SimplifyQuery &SQ;
3694 // Tracks newly created Phi nodes. The elements are iterated by insertion
3695 // order.
3696 PhiNodeSet AllPhiNodes;
3697 // Tracks newly created Select nodes.
3698 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3699
3700public:
3701 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
3702
3703 Value *Get(Value *V) {
3704 do {
3705 auto SV = Storage.find(V);
3706 if (SV == Storage.end())
3707 return V;
3708 V = SV->second;
3709 } while (true);
3710 }
3711
3712 Value *Simplify(Value *Val) {
3713 SmallVector<Value *, 32> WorkList;
3715 WorkList.push_back(Val);
3716 while (!WorkList.empty()) {
3717 auto *P = WorkList.pop_back_val();
3718 if (!Visited.insert(P).second)
3719 continue;
3720 if (auto *PI = dyn_cast<Instruction>(P))
3721 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
3722 for (auto *U : PI->users())
3723 WorkList.push_back(cast<Value>(U));
3724 Put(PI, V);
3725 PI->replaceAllUsesWith(V);
3726 if (auto *PHI = dyn_cast<PHINode>(PI))
3727 AllPhiNodes.erase(PHI);
3728 if (auto *Select = dyn_cast<SelectInst>(PI))
3729 AllSelectNodes.erase(Select);
3730 PI->eraseFromParent();
3731 }
3732 }
3733 return Get(Val);
3734 }
3735
3736 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
3737
3738 void ReplacePhi(PHINode *From, PHINode *To) {
3739 Value *OldReplacement = Get(From);
3740 while (OldReplacement != From) {
3741 From = To;
3742 To = dyn_cast<PHINode>(OldReplacement);
3743 OldReplacement = Get(From);
3744 }
3745 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
3746 Put(From, To);
3747 From->replaceAllUsesWith(To);
3748 AllPhiNodes.erase(From);
3749 From->eraseFromParent();
3750 }
3751
3752 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
3753
3754 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3755
3756 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3757
3758 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3759
3760 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3761
3762 void destroyNewNodes(Type *CommonType) {
3763 // For safe erasing, replace the uses with dummy value first.
3764 auto *Dummy = PoisonValue::get(CommonType);
3765 for (auto *I : AllPhiNodes) {
3766 I->replaceAllUsesWith(Dummy);
3767 I->eraseFromParent();
3768 }
3769 AllPhiNodes.clear();
3770 for (auto *I : AllSelectNodes) {
3771 I->replaceAllUsesWith(Dummy);
3772 I->eraseFromParent();
3773 }
3774 AllSelectNodes.clear();
3775 }
3776};
3777
3778/// A helper class for combining addressing modes.
3779class AddressingModeCombiner {
3780 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3781 typedef std::pair<PHINode *, PHINode *> PHIPair;
3782
3783private:
3784 /// The addressing modes we've collected.
3786
3787 /// The field in which the AddrModes differ, when we have more than one.
3788 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3789
3790 /// Are the AddrModes that we have all just equal to their original values?
3791 bool AllAddrModesTrivial = true;
3792
3793 /// Common Type for all different fields in addressing modes.
3794 Type *CommonType = nullptr;
3795
3796 /// SimplifyQuery for simplifyInstruction utility.
3797 const SimplifyQuery &SQ;
3798
3799 /// Original Address.
3800 Value *Original;
3801
3802 /// Common value among addresses
3803 Value *CommonValue = nullptr;
3804
3805public:
3806 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3807 : SQ(_SQ), Original(OriginalValue) {}
3808
3809 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
3810
3811 /// Get the combined AddrMode
3812 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
3813
3814 /// Add a new AddrMode if it's compatible with the AddrModes we already
3815 /// have.
3816 /// \return True iff we succeeded in doing so.
3817 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3818 // Take note of if we have any non-trivial AddrModes, as we need to detect
3819 // when all AddrModes are trivial as then we would introduce a phi or select
3820 // which just duplicates what's already there.
3821 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3822
3823 // If this is the first addrmode then everything is fine.
3824 if (AddrModes.empty()) {
3825 AddrModes.emplace_back(NewAddrMode);
3826 return true;
3827 }
3828
3829 // Figure out how different this is from the other address modes, which we
3830 // can do just by comparing against the first one given that we only care
3831 // about the cumulative difference.
3832 ExtAddrMode::FieldName ThisDifferentField =
3833 AddrModes[0].compare(NewAddrMode);
3834 if (DifferentField == ExtAddrMode::NoField)
3835 DifferentField = ThisDifferentField;
3836 else if (DifferentField != ThisDifferentField)
3837 DifferentField = ExtAddrMode::MultipleFields;
3838
3839 // If NewAddrMode differs in more than one dimension we cannot handle it.
3840 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3841
3842 // If Scale Field is different then we reject.
3843 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3844
3845 // We also must reject the case when base offset is different and
3846 // scale reg is not null, we cannot handle this case due to merge of
3847 // different offsets will be used as ScaleReg.
3848 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3849 !NewAddrMode.ScaledReg);
3850
3851 // We also must reject the case when GV is different and BaseReg installed
3852 // due to we want to use base reg as a merge of GV values.
3853 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3854 !NewAddrMode.HasBaseReg);
3855
3856 // Even if NewAddMode is the same we still need to collect it due to
3857 // original value is different. And later we will need all original values
3858 // as anchors during finding the common Phi node.
3859 if (CanHandle)
3860 AddrModes.emplace_back(NewAddrMode);
3861 else
3862 AddrModes.clear();
3863
3864 return CanHandle;
3865 }
3866
3867 /// Combine the addressing modes we've collected into a single
3868 /// addressing mode.
3869 /// \return True iff we successfully combined them or we only had one so
3870 /// didn't need to combine them anyway.
3871 bool combineAddrModes() {
3872 // If we have no AddrModes then they can't be combined.
3873 if (AddrModes.size() == 0)
3874 return false;
3875
3876 // A single AddrMode can trivially be combined.
3877 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
3878 return true;
3879
3880 // If the AddrModes we collected are all just equal to the value they are
3881 // derived from then combining them wouldn't do anything useful.
3882 if (AllAddrModesTrivial)
3883 return false;
3884
3885 if (!addrModeCombiningAllowed())
3886 return false;
3887
3888 // Build a map between <original value, basic block where we saw it> to
3889 // value of base register.
3890 // Bail out if there is no common type.
3891 FoldAddrToValueMapping Map;
3892 if (!initializeMap(Map))
3893 return false;
3894
3895 CommonValue = findCommon(Map);
3896 if (CommonValue)
3897 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3898 return CommonValue != nullptr;
3899 }
3900
3901private:
3902 /// `CommonValue` may be a placeholder inserted by us.
3903 /// If the placeholder is not used, we should remove this dead instruction.
3904 void eraseCommonValueIfDead() {
3905 if (CommonValue && CommonValue->getNumUses() == 0)
3906 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
3907 CommonInst->eraseFromParent();
3908 }
3909
3910 /// Initialize Map with anchor values. For address seen
3911 /// we set the value of different field saw in this address.
3912 /// At the same time we find a common type for different field we will
3913 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3914 /// Return false if there is no common type found.
3915 bool initializeMap(FoldAddrToValueMapping &Map) {
3916 // Keep track of keys where the value is null. We will need to replace it
3917 // with constant null when we know the common type.
3918 SmallVector<Value *, 2> NullValue;
3919 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3920 for (auto &AM : AddrModes) {
3921 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3922 if (DV) {
3923 auto *Type = DV->getType();
3924 if (CommonType && CommonType != Type)
3925 return false;
3926 CommonType = Type;
3927 Map[AM.OriginalValue] = DV;
3928 } else {
3929 NullValue.push_back(AM.OriginalValue);
3930 }
3931 }
3932 assert(CommonType && "At least one non-null value must be!");
3933 for (auto *V : NullValue)
3934 Map[V] = Constant::getNullValue(CommonType);
3935 return true;
3936 }
3937
3938 /// We have mapping between value A and other value B where B was a field in
3939 /// addressing mode represented by A. Also we have an original value C
3940 /// representing an address we start with. Traversing from C through phi and
3941 /// selects we ended up with A's in a map. This utility function tries to find
3942 /// a value V which is a field in addressing mode C and traversing through phi
3943 /// nodes and selects we will end up in corresponded values B in a map.
3944 /// The utility will create a new Phi/Selects if needed.
3945 // The simple example looks as follows:
3946 // BB1:
3947 // p1 = b1 + 40
3948 // br cond BB2, BB3
3949 // BB2:
3950 // p2 = b2 + 40
3951 // br BB3
3952 // BB3:
3953 // p = phi [p1, BB1], [p2, BB2]
3954 // v = load p
3955 // Map is
3956 // p1 -> b1
3957 // p2 -> b2
3958 // Request is
3959 // p -> ?
3960 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3961 Value *findCommon(FoldAddrToValueMapping &Map) {
3962 // Tracks the simplification of newly created phi nodes. The reason we use
3963 // this mapping is because we will add new created Phi nodes in AddrToBase.
3964 // Simplification of Phi nodes is recursive, so some Phi node may
3965 // be simplified after we added it to AddrToBase. In reality this
3966 // simplification is possible only if original phi/selects were not
3967 // simplified yet.
3968 // Using this mapping we can find the current value in AddrToBase.
3969 SimplificationTracker ST(SQ);
3970
3971 // First step, DFS to create PHI nodes for all intermediate blocks.
3972 // Also fill traverse order for the second step.
3973 SmallVector<Value *, 32> TraverseOrder;
3974 InsertPlaceholders(Map, TraverseOrder, ST);
3975
3976 // Second Step, fill new nodes by merged values and simplify if possible.
3977 FillPlaceholders(Map, TraverseOrder, ST);
3978
3979 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
3980 ST.destroyNewNodes(CommonType);
3981 return nullptr;
3982 }
3983
3984 // Now we'd like to match New Phi nodes to existed ones.
3985 unsigned PhiNotMatchedCount = 0;
3986 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
3987 ST.destroyNewNodes(CommonType);
3988 return nullptr;
3989 }
3990
3991 auto *Result = ST.Get(Map.find(Original)->second);
3992 if (Result) {
3993 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3994 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3995 }
3996 return Result;
3997 }
3998
3999 /// Try to match PHI node to Candidate.
4000 /// Matcher tracks the matched Phi nodes.
4001 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4003 PhiNodeSet &PhiNodesToMatch) {
4004 SmallVector<PHIPair, 8> WorkList;
4005 Matcher.insert({PHI, Candidate});
4006 SmallSet<PHINode *, 8> MatchedPHIs;
4007 MatchedPHIs.insert(PHI);
4008 WorkList.push_back({PHI, Candidate});
4009 SmallSet<PHIPair, 8> Visited;
4010 while (!WorkList.empty()) {
4011 auto Item = WorkList.pop_back_val();
4012 if (!Visited.insert(Item).second)
4013 continue;
4014 // We iterate over all incoming values to Phi to compare them.
4015 // If values are different and both of them Phi and the first one is a
4016 // Phi we added (subject to match) and both of them is in the same basic
4017 // block then we can match our pair if values match. So we state that
4018 // these values match and add it to work list to verify that.
4019 for (auto *B : Item.first->blocks()) {
4020 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4021 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4022 if (FirstValue == SecondValue)
4023 continue;
4024
4025 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4026 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4027
4028 // One of them is not Phi or
4029 // The first one is not Phi node from the set we'd like to match or
4030 // Phi nodes from different basic blocks then
4031 // we will not be able to match.
4032 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4033 FirstPhi->getParent() != SecondPhi->getParent())
4034 return false;
4035
4036 // If we already matched them then continue.
4037 if (Matcher.count({FirstPhi, SecondPhi}))
4038 continue;
4039 // So the values are different and does not match. So we need them to
4040 // match. (But we register no more than one match per PHI node, so that
4041 // we won't later try to replace them twice.)
4042 if (MatchedPHIs.insert(FirstPhi).second)
4043 Matcher.insert({FirstPhi, SecondPhi});
4044 // But me must check it.
4045 WorkList.push_back({FirstPhi, SecondPhi});
4046 }
4047 }
4048 return true;
4049 }
4050
4051 /// For the given set of PHI nodes (in the SimplificationTracker) try
4052 /// to find their equivalents.
4053 /// Returns false if this matching fails and creation of new Phi is disabled.
4054 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4055 unsigned &PhiNotMatchedCount) {
4056 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4057 // order, so the replacements (ReplacePhi) are also done in a deterministic
4058 // order.
4060 SmallPtrSet<PHINode *, 8> WillNotMatch;
4061 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4062 while (PhiNodesToMatch.size()) {
4063 PHINode *PHI = *PhiNodesToMatch.begin();
4064
4065 // Add us, if no Phi nodes in the basic block we do not match.
4066 WillNotMatch.clear();
4067 WillNotMatch.insert(PHI);
4068
4069 // Traverse all Phis until we found equivalent or fail to do that.
4070 bool IsMatched = false;
4071 for (auto &P : PHI->getParent()->phis()) {
4072 // Skip new Phi nodes.
4073 if (PhiNodesToMatch.count(&P))
4074 continue;
4075 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4076 break;
4077 // If it does not match, collect all Phi nodes from matcher.
4078 // if we end up with no match, them all these Phi nodes will not match
4079 // later.
4080 for (auto M : Matched)
4081 WillNotMatch.insert(M.first);
4082 Matched.clear();
4083 }
4084 if (IsMatched) {
4085 // Replace all matched values and erase them.
4086 for (auto MV : Matched)
4087 ST.ReplacePhi(MV.first, MV.second);
4088 Matched.clear();
4089 continue;
4090 }
4091 // If we are not allowed to create new nodes then bail out.
4092 if (!AllowNewPhiNodes)
4093 return false;
4094 // Just remove all seen values in matcher. They will not match anything.
4095 PhiNotMatchedCount += WillNotMatch.size();
4096 for (auto *P : WillNotMatch)
4097 PhiNodesToMatch.erase(P);
4098 }
4099 return true;
4100 }
4101 /// Fill the placeholders with values from predecessors and simplify them.
4102 void FillPlaceholders(FoldAddrToValueMapping &Map,
4103 SmallVectorImpl<Value *> &TraverseOrder,
4104 SimplificationTracker &ST) {
4105 while (!TraverseOrder.empty()) {
4106 Value *Current = TraverseOrder.pop_back_val();
4107 assert(Map.contains(Current) && "No node to fill!!!");
4108 Value *V = Map[Current];
4109
4110 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4111 // CurrentValue also must be Select.
4112 auto *CurrentSelect = cast<SelectInst>(Current);
4113 auto *TrueValue = CurrentSelect->getTrueValue();
4114 assert(Map.contains(TrueValue) && "No True Value!");
4115 Select->setTrueValue(ST.Get(Map[TrueValue]));
4116 auto *FalseValue = CurrentSelect->getFalseValue();
4117 assert(Map.contains(FalseValue) && "No False Value!");
4118 Select->setFalseValue(ST.Get(Map[FalseValue]));
4119 } else {
4120 // Must be a Phi node then.
4121 auto *PHI = cast<PHINode>(V);
4122 // Fill the Phi node with values from predecessors.
4123 for (auto *B : predecessors(PHI->getParent())) {
4124 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4125 assert(Map.contains(PV) && "No predecessor Value!");
4126 PHI->addIncoming(ST.Get(Map[PV]), B);
4127 }
4128 }
4129 Map[Current] = ST.Simplify(V);
4130 }
4131 }
4132
4133 /// Starting from original value recursively iterates over def-use chain up to
4134 /// known ending values represented in a map. For each traversed phi/select
4135 /// inserts a placeholder Phi or Select.
4136 /// Reports all new created Phi/Select nodes by adding them to set.
4137 /// Also reports and order in what values have been traversed.
4138 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4139 SmallVectorImpl<Value *> &TraverseOrder,
4140 SimplificationTracker &ST) {
4141 SmallVector<Value *, 32> Worklist;
4142 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4143 "Address must be a Phi or Select node");
4144 auto *Dummy = PoisonValue::get(CommonType);
4145 Worklist.push_back(Original);
4146 while (!Worklist.empty()) {
4147 Value *Current = Worklist.pop_back_val();
4148 // if it is already visited or it is an ending value then skip it.
4149 if (Map.contains(Current))
4150 continue;
4151 TraverseOrder.push_back(Current);
4152
4153 // CurrentValue must be a Phi node or select. All others must be covered
4154 // by anchors.
4155 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4156 // Is it OK to get metadata from OrigSelect?!
4157 // Create a Select placeholder with dummy value.
4159 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4160 CurrentSelect->getName(),
4161 CurrentSelect->getIterator(), CurrentSelect);
4162 Map[Current] = Select;
4163 ST.insertNewSelect(Select);
4164 // We are interested in True and False values.
4165 Worklist.push_back(CurrentSelect->getTrueValue());
4166 Worklist.push_back(CurrentSelect->getFalseValue());
4167 } else {
4168 // It must be a Phi node then.
4169 PHINode *CurrentPhi = cast<PHINode>(Current);
4170 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4171 PHINode *PHI =
4172 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4173 Map[Current] = PHI;
4174 ST.insertNewPhi(PHI);
4175 append_range(Worklist, CurrentPhi->incoming_values());
4176 }
4177 }
4178 }
4179
4180 bool addrModeCombiningAllowed() {
4182 return false;
4183 switch (DifferentField) {
4184 default:
4185 return false;
4186 case ExtAddrMode::BaseRegField:
4188 case ExtAddrMode::BaseGVField:
4189 return AddrSinkCombineBaseGV;
4190 case ExtAddrMode::BaseOffsField:
4192 case ExtAddrMode::ScaledRegField:
4194 }
4195 }
4196};
4197} // end anonymous namespace
4198
4199/// Try adding ScaleReg*Scale to the current addressing mode.
4200/// Return true and update AddrMode if this addr mode is legal for the target,
4201/// false if not.
4202bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4203 unsigned Depth) {
4204 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4205 // mode. Just process that directly.
4206 if (Scale == 1)
4207 return matchAddr(ScaleReg, Depth);
4208
4209 // If the scale is 0, it takes nothing to add this.
4210 if (Scale == 0)
4211 return true;
4212
4213 // If we already have a scale of this value, we can add to it, otherwise, we
4214 // need an available scale field.
4215 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4216 return false;
4217
4218 ExtAddrMode TestAddrMode = AddrMode;
4219
4220 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4221 // [A+B + A*7] -> [B+A*8].
4222 TestAddrMode.Scale += Scale;
4223 TestAddrMode.ScaledReg = ScaleReg;
4224
4225 // If the new address isn't legal, bail out.
4226 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4227 return false;
4228
4229 // It was legal, so commit it.
4230 AddrMode = TestAddrMode;
4231
4232 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4233 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4234 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4235 // go any further: we can reuse it and cannot eliminate it.
4236 ConstantInt *CI = nullptr;
4237 Value *AddLHS = nullptr;
4238 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4239 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4240 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4241 TestAddrMode.InBounds = false;
4242 TestAddrMode.ScaledReg = AddLHS;
4243 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4244
4245 // If this addressing mode is legal, commit it and remember that we folded
4246 // this instruction.
4247 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4248 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4249 AddrMode = TestAddrMode;
4250 return true;
4251 }
4252 // Restore status quo.
4253 TestAddrMode = AddrMode;
4254 }
4255
4256 // If this is an add recurrence with a constant step, return the increment
4257 // instruction and the canonicalized step.
4258 auto GetConstantStep =
4259 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4260 auto *PN = dyn_cast<PHINode>(V);
4261 if (!PN)
4262 return std::nullopt;
4263 auto IVInc = getIVIncrement(PN, &LI);
4264 if (!IVInc)
4265 return std::nullopt;
4266 // TODO: The result of the intrinsics above is two-complement. However when
4267 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4268 // If it has nuw or nsw flags, we need to make sure that these flags are
4269 // inferrable at the point of memory instruction. Otherwise we are replacing
4270 // well-defined two-complement computation with poison. Currently, to avoid
4271 // potentially complex analysis needed to prove this, we reject such cases.
4272 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4273 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4274 return std::nullopt;
4275 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4276 return std::make_pair(IVInc->first, ConstantStep->getValue());
4277 return std::nullopt;
4278 };
4279
4280 // Try to account for the following special case:
4281 // 1. ScaleReg is an inductive variable;
4282 // 2. We use it with non-zero offset;
4283 // 3. IV's increment is available at the point of memory instruction.
4284 //
4285 // In this case, we may reuse the IV increment instead of the IV Phi to
4286 // achieve the following advantages:
4287 // 1. If IV step matches the offset, we will have no need in the offset;
4288 // 2. Even if they don't match, we will reduce the overlap of living IV
4289 // and IV increment, that will potentially lead to better register
4290 // assignment.
4291 if (AddrMode.BaseOffs) {
4292 if (auto IVStep = GetConstantStep(ScaleReg)) {
4293 Instruction *IVInc = IVStep->first;
4294 // The following assert is important to ensure a lack of infinite loops.
4295 // This transforms is (intentionally) the inverse of the one just above.
4296 // If they don't agree on the definition of an increment, we'd alternate
4297 // back and forth indefinitely.
4298 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4299 APInt Step = IVStep->second;
4300 APInt Offset = Step * AddrMode.Scale;
4301 if (Offset.isSignedIntN(64)) {
4302 TestAddrMode.InBounds = false;
4303 TestAddrMode.ScaledReg = IVInc;
4304 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4305 // If this addressing mode is legal, commit it..
4306 // (Note that we defer the (expensive) domtree base legality check
4307 // to the very last possible point.)
4308 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4309 getDTFn().dominates(IVInc, MemoryInst)) {
4310 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4311 AddrMode = TestAddrMode;
4312 return true;
4313 }
4314 // Restore status quo.
4315 TestAddrMode = AddrMode;
4316 }
4317 }
4318 }
4319
4320 // Otherwise, just return what we have.
4321 return true;
4322}
4323
4324/// This is a little filter, which returns true if an addressing computation
4325/// involving I might be folded into a load/store accessing it.
4326/// This doesn't need to be perfect, but needs to accept at least
4327/// the set of instructions that MatchOperationAddr can.
4329 switch (I->getOpcode()) {
4330 case Instruction::BitCast:
4331 case Instruction::AddrSpaceCast:
4332 // Don't touch identity bitcasts.
4333 if (I->getType() == I->getOperand(0)->getType())
4334 return false;
4335 return I->getType()->isIntOrPtrTy();
4336 case Instruction::PtrToInt:
4337 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4338 return true;
4339 case Instruction::IntToPtr:
4340 // We know the input is intptr_t, so this is foldable.
4341 return true;
4342 case Instruction::Add:
4343 return true;
4344 case Instruction::Mul:
4345 case Instruction::Shl:
4346 // Can only handle X*C and X << C.
4347 return isa<ConstantInt>(I->getOperand(1));
4348 case Instruction::GetElementPtr:
4349 return true;
4350 default:
4351 return false;
4352 }
4353}
4354
4355/// Check whether or not \p Val is a legal instruction for \p TLI.
4356/// \note \p Val is assumed to be the product of some type promotion.
4357/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4358/// to be legal, as the non-promoted value would have had the same state.
4360 const DataLayout &DL, Value *Val) {
4361 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4362 if (!PromotedInst)
4363 return false;
4364 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4365 // If the ISDOpcode is undefined, it was undefined before the promotion.
4366 if (!ISDOpcode)
4367 return true;
4368 // Otherwise, check if the promoted instruction is legal or not.
4369 return TLI.isOperationLegalOrCustom(
4370 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4371}
4372
4373namespace {
4374
4375/// Hepler class to perform type promotion.
4376class TypePromotionHelper {
4377 /// Utility function to add a promoted instruction \p ExtOpnd to
4378 /// \p PromotedInsts and record the type of extension we have seen.
4379 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4380 Instruction *ExtOpnd, bool IsSExt) {
4381 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4382 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4383 if (It != PromotedInsts.end()) {
4384 // If the new extension is same as original, the information in
4385 // PromotedInsts[ExtOpnd] is still correct.
4386 if (It->second.getInt() == ExtTy)
4387 return;
4388
4389 // Now the new extension is different from old extension, we make
4390 // the type information invalid by setting extension type to
4391 // BothExtension.
4392 ExtTy = BothExtension;
4393 }
4394 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4395 }
4396
4397 /// Utility function to query the original type of instruction \p Opnd
4398 /// with a matched extension type. If the extension doesn't match, we
4399 /// cannot use the information we had on the original type.
4400 /// BothExtension doesn't match any extension type.
4401 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4402 Instruction *Opnd, bool IsSExt) {
4403 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4404 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4405 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4406 return It->second.getPointer();
4407 return nullptr;
4408 }
4409
4410 /// Utility function to check whether or not a sign or zero extension
4411 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4412 /// either using the operands of \p Inst or promoting \p Inst.
4413 /// The type of the extension is defined by \p IsSExt.
4414 /// In other words, check if:
4415 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4416 /// #1 Promotion applies:
4417 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4418 /// #2 Operand reuses:
4419 /// ext opnd1 to ConsideredExtType.
4420 /// \p PromotedInsts maps the instructions to their type before promotion.
4421 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4422 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4423
4424 /// Utility function to determine if \p OpIdx should be promoted when
4425 /// promoting \p Inst.
4426 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4427 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4428 }
4429
4430 /// Utility function to promote the operand of \p Ext when this
4431 /// operand is a promotable trunc or sext or zext.
4432 /// \p PromotedInsts maps the instructions to their type before promotion.
4433 /// \p CreatedInstsCost[out] contains the cost of all instructions
4434 /// created to promote the operand of Ext.
4435 /// Newly added extensions are inserted in \p Exts.
4436 /// Newly added truncates are inserted in \p Truncs.
4437 /// Should never be called directly.
4438 /// \return The promoted value which is used instead of Ext.
4439 static Value *promoteOperandForTruncAndAnyExt(
4440 Instruction *Ext, TypePromotionTransaction &TPT,
4441 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4444
4445 /// Utility function to promote the operand of \p Ext when this
4446 /// operand is promotable and is not a supported trunc or sext.
4447 /// \p PromotedInsts maps the instructions to their type before promotion.
4448 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4449 /// created to promote the operand of Ext.
4450 /// Newly added extensions are inserted in \p Exts.
4451 /// Newly added truncates are inserted in \p Truncs.
4452 /// Should never be called directly.
4453 /// \return The promoted value which is used instead of Ext.
4454 static Value *promoteOperandForOther(Instruction *Ext,
4455 TypePromotionTransaction &TPT,
4456 InstrToOrigTy &PromotedInsts,
4457 unsigned &CreatedInstsCost,
4460 const TargetLowering &TLI, bool IsSExt);
4461
4462 /// \see promoteOperandForOther.
4463 static Value *signExtendOperandForOther(
4464 Instruction *Ext, TypePromotionTransaction &TPT,
4465 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4467 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4468 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4469 Exts, Truncs, TLI, true);
4470 }
4471
4472 /// \see promoteOperandForOther.
4473 static Value *zeroExtendOperandForOther(
4474 Instruction *Ext, TypePromotionTransaction &TPT,
4475 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4477 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4478 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4479 Exts, Truncs, TLI, false);
4480 }
4481
4482public:
4483 /// Type for the utility function that promotes the operand of Ext.
4484 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4485 InstrToOrigTy &PromotedInsts,
4486 unsigned &CreatedInstsCost,
4489 const TargetLowering &TLI);
4490
4491 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4492 /// action to promote the operand of \p Ext instead of using Ext.
4493 /// \return NULL if no promotable action is possible with the current
4494 /// sign extension.
4495 /// \p InsertedInsts keeps track of all the instructions inserted by the
4496 /// other CodeGenPrepare optimizations. This information is important
4497 /// because we do not want to promote these instructions as CodeGenPrepare
4498 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4499 /// \p PromotedInsts maps the instructions to their type before promotion.
4500 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4501 const TargetLowering &TLI,
4502 const InstrToOrigTy &PromotedInsts);
4503};
4504
4505} // end anonymous namespace
4506
4507bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4508 Type *ConsideredExtType,
4509 const InstrToOrigTy &PromotedInsts,
4510 bool IsSExt) {
4511 // The promotion helper does not know how to deal with vector types yet.
4512 // To be able to fix that, we would need to fix the places where we
4513 // statically extend, e.g., constants and such.
4514 if (Inst->getType()->isVectorTy())
4515 return false;
4516
4517 // We can always get through zext.
4518 if (isa<ZExtInst>(Inst))
4519 return true;
4520
4521 // sext(sext) is ok too.
4522 if (IsSExt && isa<SExtInst>(Inst))
4523 return true;
4524
4525 // We can get through binary operator, if it is legal. In other words, the
4526 // binary operator must have a nuw or nsw flag.
4527 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4528 if (isa<OverflowingBinaryOperator>(BinOp) &&
4529 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4530 (IsSExt && BinOp->hasNoSignedWrap())))
4531 return true;
4532
4533 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4534 if ((Inst->getOpcode() == Instruction::And ||
4535 Inst->getOpcode() == Instruction::Or))
4536 return true;
4537
4538 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4539 if (Inst->getOpcode() == Instruction::Xor) {
4540 // Make sure it is not a NOT.
4541 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4542 if (!Cst->getValue().isAllOnes())
4543 return true;
4544 }
4545
4546 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4547 // It may change a poisoned value into a regular value, like
4548 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4549 // poisoned value regular value
4550 // It should be OK since undef covers valid value.
4551 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4552 return true;
4553
4554 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4555 // It may change a poisoned value into a regular value, like
4556 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4557 // poisoned value regular value
4558 // It should be OK since undef covers valid value.
4559 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4560 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4561 if (ExtInst->hasOneUse()) {
4562 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4563 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4564 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4565 if (Cst &&
4566 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4567 return true;
4568 }
4569 }
4570 }
4571
4572 // Check if we can do the following simplification.
4573 // ext(trunc(opnd)) --> ext(opnd)
4574 if (!isa<TruncInst>(Inst))
4575 return false;
4576
4577 Value *OpndVal = Inst->getOperand(0);
4578 // Check if we can use this operand in the extension.
4579 // If the type is larger than the result type of the extension, we cannot.
4580 if (!OpndVal->getType()->isIntegerTy() ||
4581 OpndVal->getType()->getIntegerBitWidth() >
4582 ConsideredExtType->getIntegerBitWidth())
4583 return false;
4584
4585 // If the operand of the truncate is not an instruction, we will not have
4586 // any information on the dropped bits.
4587 // (Actually we could for constant but it is not worth the extra logic).
4588 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4589 if (!Opnd)
4590 return false;
4591
4592 // Check if the source of the type is narrow enough.
4593 // I.e., check that trunc just drops extended bits of the same kind of
4594 // the extension.
4595 // #1 get the type of the operand and check the kind of the extended bits.
4596 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4597 if (OpndType)
4598 ;
4599 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4600 OpndType = Opnd->getOperand(0)->getType();
4601 else
4602 return false;
4603
4604 // #2 check that the truncate just drops extended bits.
4605 return Inst->getType()->getIntegerBitWidth() >=
4606 OpndType->getIntegerBitWidth();
4607}
4608
4609TypePromotionHelper::Action TypePromotionHelper::getAction(
4610 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4611 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4612 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4613 "Unexpected instruction type");
4614 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4615 Type *ExtTy = Ext->getType();
4616 bool IsSExt = isa<SExtInst>(Ext);
4617 // If the operand of the extension is not an instruction, we cannot
4618 // get through.
4619 // If it, check we can get through.
4620 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4621 return nullptr;
4622
4623 // Do not promote if the operand has been added by codegenprepare.
4624 // Otherwise, it means we are undoing an optimization that is likely to be
4625 // redone, thus causing potential infinite loop.
4626 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4627 return nullptr;
4628
4629 // SExt or Trunc instructions.
4630 // Return the related handler.
4631 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4632 isa<ZExtInst>(ExtOpnd))
4633 return promoteOperandForTruncAndAnyExt;
4634
4635 // Regular instruction.
4636 // Abort early if we will have to insert non-free instructions.
4637 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4638 return nullptr;
4639 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4640}
4641
4642Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4643 Instruction *SExt, TypePromotionTransaction &TPT,
4644 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4646 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4647 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4648 // get through it and this method should not be called.
4649 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4650 Value *ExtVal = SExt;
4651 bool HasMergedNonFreeExt = false;
4652 if (isa<ZExtInst>(SExtOpnd)) {
4653 // Replace s|zext(zext(opnd))
4654 // => zext(opnd).
4655 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4656 Value *ZExt =
4657 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4658 TPT.replaceAllUsesWith(SExt, ZExt);
4659 TPT.eraseInstruction(SExt);
4660 ExtVal = ZExt;
4661 } else {
4662 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4663 // => z|sext(opnd).
4664 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4665 }
4666 CreatedInstsCost = 0;
4667
4668 // Remove dead code.
4669 if (SExtOpnd->use_empty())
4670 TPT.eraseInstruction(SExtOpnd);
4671
4672 // Check if the extension is still needed.
4673 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4674 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4675 if (ExtInst) {
4676 if (Exts)
4677 Exts->push_back(ExtInst);
4678 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4679 }
4680 return ExtVal;
4681 }
4682
4683 // At this point we have: ext ty opnd to ty.
4684 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4685 Value *NextVal = ExtInst->getOperand(0);
4686 TPT.eraseInstruction(ExtInst, NextVal);
4687 return NextVal;
4688}
4689
4690Value *TypePromotionHelper::promoteOperandForOther(
4691 Instruction *Ext, TypePromotionTransaction &TPT,
4692 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4695 bool IsSExt) {
4696 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4697 // get through it and this method should not be called.
4698 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4699 CreatedInstsCost = 0;
4700 if (!ExtOpnd->hasOneUse()) {
4701 // ExtOpnd will be promoted.
4702 // All its uses, but Ext, will need to use a truncated value of the
4703 // promoted version.
4704 // Create the truncate now.
4705 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4706 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4707 // Insert it just after the definition.
4708 ITrunc->moveAfter(ExtOpnd);
4709 if (Truncs)
4710 Truncs->push_back(ITrunc);
4711 }
4712
4713 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4714 // Restore the operand of Ext (which has been replaced by the previous call
4715 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4716 TPT.setOperand(Ext, 0, ExtOpnd);
4717 }
4718
4719 // Get through the Instruction:
4720 // 1. Update its type.
4721 // 2. Replace the uses of Ext by Inst.
4722 // 3. Extend each operand that needs to be extended.
4723
4724 // Remember the original type of the instruction before promotion.
4725 // This is useful to know that the high bits are sign extended bits.
4726 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4727 // Step #1.
4728 TPT.mutateType(ExtOpnd, Ext->getType());
4729 // Step #2.
4730 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4731 // Step #3.
4732 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
4733 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4734 ++OpIdx) {
4735 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
4736 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4737 !shouldExtOperand(ExtOpnd, OpIdx)) {
4738 LLVM_DEBUG(dbgs() << "No need to propagate\n");
4739 continue;
4740 }
4741 // Check if we can statically extend the operand.
4742 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4743 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4744 LLVM_DEBUG(dbgs() << "Statically extend\n");
4745 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4746 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4747 : Cst->getValue().zext(BitWidth);
4748 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4749 continue;
4750 }
4751 // UndefValue are typed, so we have to statically sign extend them.
4752 if (isa<UndefValue>(Opnd)) {
4753 LLVM_DEBUG(dbgs() << "Statically extend\n");
4754 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4755 continue;
4756 }
4757
4758 // Otherwise we have to explicitly sign extend the operand.
4759 Value *ValForExtOpnd = IsSExt
4760 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
4761 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
4762 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4763 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
4764 if (!InstForExtOpnd)
4765 continue;
4766
4767 if (Exts)
4768 Exts->push_back(InstForExtOpnd);
4769
4770 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
4771 }
4772 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
4773 TPT.eraseInstruction(Ext);
4774 return ExtOpnd;
4775}
4776
4777/// Check whether or not promoting an instruction to a wider type is profitable.
4778/// \p NewCost gives the cost of extension instructions created by the
4779/// promotion.
4780/// \p OldCost gives the cost of extension instructions before the promotion
4781/// plus the number of instructions that have been
4782/// matched in the addressing mode the promotion.
4783/// \p PromotedOperand is the value that has been promoted.
4784/// \return True if the promotion is profitable, false otherwise.
4785bool AddressingModeMatcher::isPromotionProfitable(
4786 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4787 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
4788 << '\n');
4789 // The cost of the new extensions is greater than the cost of the
4790 // old extension plus what we folded.
4791 // This is not profitable.
4792 if (NewCost > OldCost)
4793 return false;
4794 if (NewCost < OldCost)
4795 return true;
4796 // The promotion is neutral but it may help folding the sign extension in
4797 // loads for instance.
4798 // Check that we did not create an illegal instruction.
4799 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4800}
4801
4802/// Given an instruction or constant expr, see if we can fold the operation
4803/// into the addressing mode. If so, update the addressing mode and return
4804/// true, otherwise return false without modifying AddrMode.
4805/// If \p MovedAway is not NULL, it contains the information of whether or
4806/// not AddrInst has to be folded into the addressing mode on success.
4807/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4808/// because it has been moved away.
4809/// Thus AddrInst must not be added in the matched instructions.
4810/// This state can happen when AddrInst is a sext, since it may be moved away.
4811/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4812/// not be referenced anymore.
4813bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4814 unsigned Depth,
4815 bool *MovedAway) {
4816 // Avoid exponential behavior on extremely deep expression trees.
4817 if (Depth >= 5)
4818 return false;
4819
4820 // By default, all matched instructions stay in place.
4821 if (MovedAway)
4822 *MovedAway = false;
4823
4824 switch (Opcode) {
4825 case Instruction::PtrToInt:
4826 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4827 return matchAddr(AddrInst->getOperand(0), Depth);
4828 case Instruction::IntToPtr: {
4829 auto AS = AddrInst->getType()->getPointerAddressSpace();
4830 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4831 // This inttoptr is a no-op if the integer type is pointer sized.
4832 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4833 return matchAddr(AddrInst->getOperand(0), Depth);
4834 return false;
4835 }
4836 case Instruction::BitCast:
4837 // BitCast is always a noop, and we can handle it as long as it is
4838 // int->int or pointer->pointer (we don't want int<->fp or something).
4839 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4840 // Don't touch identity bitcasts. These were probably put here by LSR,
4841 // and we don't want to mess around with them. Assume it knows what it
4842 // is doing.
4843 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4844 return matchAddr(AddrInst->getOperand(0), Depth);
4845 return false;
4846 case Instruction::AddrSpaceCast: {
4847 unsigned SrcAS =
4848 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4849 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4850 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
4851 return matchAddr(AddrInst->getOperand(0), Depth);
4852 return false;
4853 }
4854 case Instruction::Add: {
4855 // Check to see if we can merge in one operand, then the other. If so, we
4856 // win.
4857 ExtAddrMode BackupAddrMode = AddrMode;
4858 unsigned OldSize = AddrModeInsts.size();
4859 // Start a transaction at this point.
4860 // The LHS may match but not the RHS.
4861 // Therefore, we need a higher level restoration point to undo partially
4862 // matched operation.
4863 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4864 TPT.getRestorationPoint();
4865
4866 // Try to match an integer constant second to increase its chance of ending
4867 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
4868 int First = 0, Second = 1;
4869 if (isa<ConstantInt>(AddrInst->getOperand(First))
4870 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
4871 std::swap(First, Second);
4872 AddrMode.InBounds = false;
4873 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
4874 matchAddr(AddrInst->getOperand(Second), Depth + 1))
4875 return true;
4876
4877 // Restore the old addr mode info.
4878 AddrMode = BackupAddrMode;
4879 AddrModeInsts.resize(OldSize);
4880 TPT.rollback(LastKnownGood);
4881
4882 // Otherwise this was over-aggressive. Try merging operands in the opposite
4883 // order.
4884 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
4885 matchAddr(AddrInst->getOperand(First), Depth + 1))
4886 return true;
4887
4888 // Otherwise we definitely can't merge the ADD in.
4889 AddrMode = BackupAddrMode;
4890 AddrModeInsts.resize(OldSize);
4891 TPT.rollback(LastKnownGood);
4892 break;
4893 }
4894 // case Instruction::Or:
4895 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4896 // break;
4897 case Instruction::Mul:
4898 case Instruction::Shl: {
4899 // Can only handle X*C and X << C.
4900 AddrMode.InBounds = false;
4901 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4902 if (!RHS || RHS->getBitWidth() > 64)
4903 return false;
4904 int64_t Scale = Opcode == Instruction::Shl
4905 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
4906 : RHS->getSExtValue();
4907
4908 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4909 }
4910 case Instruction::GetElementPtr: {
4911 // Scan the GEP. We check it if it contains constant offsets and at most
4912 // one variable offset.
4913 int VariableOperand = -1;
4914 unsigned VariableScale = 0;
4915
4916 int64_t ConstantOffset = 0;
4917 gep_type_iterator GTI = gep_type_begin(AddrInst);
4918 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4919 if (StructType *STy = GTI.getStructTypeOrNull()) {
4920 const StructLayout *SL = DL.getStructLayout(STy);
4921 unsigned Idx =
4922 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4923 ConstantOffset += SL->getElementOffset(Idx);
4924 } else {
4926 if (TS.isNonZero()) {
4927 // The optimisations below currently only work for fixed offsets.
4928 if (TS.isScalable())
4929 return false;
4930 int64_t TypeSize = TS.getFixedValue();
4931 if (ConstantInt *CI =
4932 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4933 const APInt &CVal = CI->getValue();
4934 if (CVal.getSignificantBits() <= 64) {
4935 ConstantOffset += CVal.getSExtValue() * TypeSize;
4936 continue;
4937 }
4938 }
4939 // We only allow one variable index at the moment.
4940 if (VariableOperand != -1)
4941 return false;
4942
4943 // Remember the variable index.
4944 VariableOperand = i;
4945 VariableScale = TypeSize;
4946 }
4947 }
4948 }
4949
4950 // A common case is for the GEP to only do a constant offset. In this case,
4951 // just add it to the disp field and check validity.
4952 if (VariableOperand == -1) {
4953 AddrMode.BaseOffs += ConstantOffset;
4954 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
4955 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4956 AddrMode.InBounds = false;
4957 return true;
4958 }
4959 AddrMode.BaseOffs -= ConstantOffset;
4960
4961 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4962 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4963 ConstantOffset > 0) {
4964 // Record GEPs with non-zero offsets as candidates for splitting in
4965 // the event that the offset cannot fit into the r+i addressing mode.
4966 // Simple and common case that only one GEP is used in calculating the
4967 // address for the memory access.
4968 Value *Base = AddrInst->getOperand(0);
4969 auto *BaseI = dyn_cast<Instruction>(Base);
4970 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4971 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4972 (BaseI && !isa<CastInst>(BaseI) &&
4973 !isa<GetElementPtrInst>(BaseI))) {
4974 // Make sure the parent block allows inserting non-PHI instructions
4975 // before the terminator.
4976 BasicBlock *Parent = BaseI ? BaseI->getParent()
4977 : &GEP->getFunction()->getEntryBlock();
4978 if (!Parent->getTerminator()->isEHPad())
4979 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4980 }
4981 }
4982
4983 return false;
4984 }
4985
4986 // Save the valid addressing mode in case we can't match.
4987 ExtAddrMode BackupAddrMode = AddrMode;
4988 unsigned OldSize = AddrModeInsts.size();
4989
4990 // See if the scale and offset amount is valid for this target.
4991 AddrMode.BaseOffs += ConstantOffset;
4992 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4993 AddrMode.InBounds = false;
4994
4995 // Match the base operand of the GEP.
4996 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
4997 // If it couldn't be matched, just stuff the value in a register.
4998 if (AddrMode.HasBaseReg) {
4999 AddrMode = BackupAddrMode;
5000 AddrModeInsts.resize(OldSize);
5001 return false;
5002 }
5003 AddrMode.HasBaseReg = true;
5004 AddrMode.BaseReg = AddrInst->getOperand(0);
5005 }
5006
5007 // Match the remaining variable portion of the GEP.
5008 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5009 Depth)) {
5010 // If it couldn't be matched, try stuffing the base into a register
5011 // instead of matching it, and retrying the match of the scale.
5012 AddrMode = BackupAddrMode;
5013 AddrModeInsts.resize(OldSize);
5014 if (AddrMode.HasBaseReg)
5015 return false;
5016 AddrMode.HasBaseReg = true;
5017 AddrMode.BaseReg = AddrInst->getOperand(0);
5018 AddrMode.BaseOffs += ConstantOffset;
5019 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5020 VariableScale, Depth)) {
5021 // If even that didn't work, bail.
5022 AddrMode = BackupAddrMode;
5023 AddrModeInsts.resize(OldSize);
5024 return false;
5025 }
5026 }
5027
5028 return true;
5029 }
5030 case Instruction::SExt:
5031 case Instruction::ZExt: {
5032 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5033 if (!Ext)
5034 return false;
5035
5036 // Try to move this ext out of the way of the addressing mode.
5037 // Ask for a method for doing so.
5038 TypePromotionHelper::Action TPH =
5039 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5040 if (!TPH)
5041 return false;
5042
5043 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5044 TPT.getRestorationPoint();
5045 unsigned CreatedInstsCost = 0;
5046 unsigned ExtCost = !TLI.isExtFree(Ext);
5047 Value *PromotedOperand =
5048 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5049 // SExt has been moved away.
5050 // Thus either it will be rematched later in the recursive calls or it is
5051 // gone. Anyway, we must not fold it into the addressing mode at this point.
5052 // E.g.,
5053 // op = add opnd, 1
5054 // idx = ext op
5055 // addr = gep base, idx
5056 // is now:
5057 // promotedOpnd = ext opnd <- no match here
5058 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5059 // addr = gep base, op <- match
5060 if (MovedAway)
5061 *MovedAway = true;
5062
5063 assert(PromotedOperand &&
5064 "TypePromotionHelper should have filtered out those cases");
5065
5066 ExtAddrMode BackupAddrMode = AddrMode;
5067 unsigned OldSize = AddrModeInsts.size();
5068
5069 if (!matchAddr(PromotedOperand, Depth) ||
5070 // The total of the new cost is equal to the cost of the created
5071 // instructions.
5072 // The total of the old cost is equal to the cost of the extension plus
5073 // what we have saved in the addressing mode.
5074 !isPromotionProfitable(CreatedInstsCost,
5075 ExtCost + (AddrModeInsts.size() - OldSize),
5076 PromotedOperand)) {
5077 AddrMode = BackupAddrMode;
5078 AddrModeInsts.resize(OldSize);
5079 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5080 TPT.rollback(LastKnownGood);
5081 return false;
5082 }
5083 return true;
5084 }
5085 case Instruction::Call:
5086 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5087 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5088 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5089 if (TLI.addressingModeSupportsTLS(GV))
5090 return matchAddr(AddrInst->getOperand(0), Depth);
5091 }
5092 }
5093 break;
5094 }
5095 return false;
5096}
5097
5098/// If we can, try to add the value of 'Addr' into the current addressing mode.
5099/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5100/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5101/// for the target.
5102///
5103bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5104 // Start a transaction at this point that we will rollback if the matching
5105 // fails.
5106 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5107 TPT.getRestorationPoint();
5108 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5109 if (CI->getValue().isSignedIntN(64)) {
5110 // Fold in immediates if legal for the target.
5111 AddrMode.BaseOffs += CI->getSExtValue();
5112 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5113 return true;
5114 AddrMode.BaseOffs -= CI->getSExtValue();
5115 }
5116 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5117 // If this is a global variable, try to fold it into the addressing mode.
5118 if (!AddrMode.BaseGV) {
5119 AddrMode.BaseGV = GV;
5120 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5121 return true;
5122 AddrMode.BaseGV = nullptr;
5123 }
5124 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5125 ExtAddrMode BackupAddrMode = AddrMode;
5126 unsigned OldSize = AddrModeInsts.size();
5127
5128 // Check to see if it is possible to fold this operation.
5129 bool MovedAway = false;
5130 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5131 // This instruction may have been moved away. If so, there is nothing
5132 // to check here.
5133 if (MovedAway)
5134 return true;
5135 // Okay, it's possible to fold this. Check to see if it is actually
5136 // *profitable* to do so. We use a simple cost model to avoid increasing
5137 // register pressure too much.
5138 if (I->hasOneUse() ||
5139 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5140 AddrModeInsts.push_back(I);
5141 return true;
5142 }
5143
5144 // It isn't profitable to do this, roll back.
5145 AddrMode = BackupAddrMode;
5146 AddrModeInsts.resize(OldSize);
5147 TPT.rollback(LastKnownGood);
5148 }
5149 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5150 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5151 return true;
5152 TPT.rollback(LastKnownGood);
5153 } else if (isa<ConstantPointerNull>(Addr)) {
5154 // Null pointer gets folded without affecting the addressing mode.
5155 return true;
5156 }
5157
5158 // Worse case, the target should support [reg] addressing modes. :)
5159 if (!AddrMode.HasBaseReg) {
5160 AddrMode.HasBaseReg = true;
5161 AddrMode.BaseReg = Addr;
5162 // Still check for legality in case the target supports [imm] but not [i+r].
5163 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5164 return true;
5165 AddrMode.HasBaseReg = false;
5166 AddrMode.BaseReg = nullptr;
5167 }
5168
5169 // If the base register is already taken, see if we can do [r+r].
5170 if (AddrMode.Scale == 0) {
5171 AddrMode.Scale = 1;
5172 AddrMode.ScaledReg = Addr;
5173 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5174 return true;
5175 AddrMode.Scale = 0;
5176 AddrMode.ScaledReg = nullptr;
5177 }
5178 // Couldn't match.
5179 TPT.rollback(LastKnownGood);
5180 return false;
5181}
5182
5183/// Check to see if all uses of OpVal by the specified inline asm call are due
5184/// to memory operands. If so, return true, otherwise return false.
5186 const TargetLowering &TLI,
5187 const TargetRegisterInfo &TRI) {
5188 const Function *F = CI->getFunction();
5189 TargetLowering::AsmOperandInfoVector TargetConstraints =
5190 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, *CI);
5191
5192 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5193 // Compute the constraint code and ConstraintType to use.
5194 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5195
5196 // If this asm operand is our Value*, and if it isn't an indirect memory
5197 // operand, we can't fold it! TODO: Also handle C_Address?
5198 if (OpInfo.CallOperandVal == OpVal &&
5199 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5200 !OpInfo.isIndirect))
5201 return false;
5202 }
5203
5204 return true;
5205}
5206
5207/// Recursively walk all the uses of I until we find a memory use.
5208/// If we find an obviously non-foldable instruction, return true.
5209/// Add accessed addresses and types to MemoryUses.
5211 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5212 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5213 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5214 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5215 // If we already considered this instruction, we're done.
5216 if (!ConsideredInsts.insert(I).second)
5217 return false;
5218
5219 // If this is an obviously unfoldable instruction, bail out.
5220 if (!MightBeFoldableInst(I))
5221 return true;
5222
5223 // Loop over all the uses, recursively processing them.
5224 for (Use &U : I->uses()) {
5225 // Conservatively return true if we're seeing a large number or a deep chain
5226 // of users. This avoids excessive compilation times in pathological cases.
5227 if (SeenInsts++ >= MaxAddressUsersToScan)
5228 return true;
5229
5230 Instruction *UserI = cast<Instruction>(U.getUser());
5231 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5232 MemoryUses.push_back({&U, LI->getType()});
5233 continue;
5234 }
5235
5236 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5237 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5238 return true; // Storing addr, not into addr.
5239 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5240 continue;
5241 }
5242
5243 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5244 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5245 return true; // Storing addr, not into addr.
5246 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5247 continue;
5248 }
5249
5250 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5251 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5252 return true; // Storing addr, not into addr.
5253 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5254 continue;
5255 }
5256
5257 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5258 if (CI->hasFnAttr(Attribute::Cold)) {
5259 // If this is a cold call, we can sink the addressing calculation into
5260 // the cold path. See optimizeCallInst
5261 bool OptForSize =
5262 OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
5263 if (!OptForSize)
5264 continue;
5265 }
5266
5267 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5268 if (!IA)
5269 return true;
5270
5271 // If this is a memory operand, we're cool, otherwise bail out.
5272 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5273 return true;
5274 continue;
5275 }
5276
5277 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5278 PSI, BFI, SeenInsts))
5279 return true;
5280 }
5281
5282 return false;
5283}
5284
5286 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5287 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5289 unsigned SeenInsts = 0;
5290 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5291 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5292 PSI, BFI, SeenInsts);
5293}
5294
5295
5296/// Return true if Val is already known to be live at the use site that we're
5297/// folding it into. If so, there is no cost to include it in the addressing
5298/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5299/// instruction already.
5300bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5301 Value *KnownLive1,
5302 Value *KnownLive2) {
5303 // If Val is either of the known-live values, we know it is live!
5304 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5305 return true;
5306
5307 // All values other than instructions and arguments (e.g. constants) are live.
5308 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5309 return true;
5310
5311 // If Val is a constant sized alloca in the entry block, it is live, this is
5312 // true because it is just a reference to the stack/frame pointer, which is
5313 // live for the whole function.
5314 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5315 if (AI->isStaticAlloca())
5316 return true;
5317
5318 // Check to see if this value is already used in the memory instruction's
5319 // block. If so, it's already live into the block at the very least, so we
5320 // can reasonably fold it.
5321 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5322}
5323
5324/// It is possible for the addressing mode of the machine to fold the specified
5325/// instruction into a load or store that ultimately uses it.
5326/// However, the specified instruction has multiple uses.
5327/// Given this, it may actually increase register pressure to fold it
5328/// into the load. For example, consider this code:
5329///
5330/// X = ...
5331/// Y = X+1
5332/// use(Y) -> nonload/store
5333/// Z = Y+1
5334/// load Z
5335///
5336/// In this case, Y has multiple uses, and can be folded into the load of Z
5337/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5338/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5339/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5340/// number of computations either.
5341///
5342/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5343/// X was live across 'load Z' for other reasons, we actually *would* want to
5344/// fold the addressing mode in the Z case. This would make Y die earlier.
5345bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5346 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5347 if (IgnoreProfitability)
5348 return true;
5349
5350 // AMBefore is the addressing mode before this instruction was folded into it,
5351 // and AMAfter is the addressing mode after the instruction was folded. Get
5352 // the set of registers referenced by AMAfter and subtract out those
5353 // referenced by AMBefore: this is the set of values which folding in this
5354 // address extends the lifetime of.
5355 //
5356 // Note that there are only two potential values being referenced here,
5357 // BaseReg and ScaleReg (global addresses are always available, as are any
5358 // folded immediates).
5359 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5360
5361 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5362 // lifetime wasn't extended by adding this instruction.
5363 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5364 BaseReg = nullptr;
5365 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5366 ScaledReg = nullptr;
5367
5368 // If folding this instruction (and it's subexprs) didn't extend any live
5369 // ranges, we're ok with it.
5370 if (!BaseReg && !ScaledReg)
5371 return true;
5372
5373 // If all uses of this instruction can have the address mode sunk into them,
5374 // we can remove the addressing mode and effectively trade one live register
5375 // for another (at worst.) In this context, folding an addressing mode into
5376 // the use is just a particularly nice way of sinking it.
5378 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5379 return false; // Has a non-memory, non-foldable use!
5380
5381 // Now that we know that all uses of this instruction are part of a chain of
5382 // computation involving only operations that could theoretically be folded
5383 // into a memory use, loop over each of these memory operation uses and see
5384 // if they could *actually* fold the instruction. The assumption is that
5385 // addressing modes are cheap and that duplicating the computation involved
5386 // many times is worthwhile, even on a fastpath. For sinking candidates
5387 // (i.e. cold call sites), this serves as a way to prevent excessive code
5388 // growth since most architectures have some reasonable small and fast way to
5389 // compute an effective address. (i.e LEA on x86)
5390 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5391 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5392 Value *Address = Pair.first->get();
5393 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5394 Type *AddressAccessTy = Pair.second;
5395 unsigned AS = Address->getType()->getPointerAddressSpace();
5396
5397 // Do a match against the root of this address, ignoring profitability. This
5398 // will tell us if the addressing mode for the memory operation will
5399 // *actually* cover the shared instruction.
5401 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5402 0);
5403 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5404 TPT.getRestorationPoint();
5405 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5406 AddressAccessTy, AS, UserI, Result,
5407 InsertedInsts, PromotedInsts, TPT,
5408 LargeOffsetGEP, OptSize, PSI, BFI);
5409 Matcher.IgnoreProfitability = true;
5410 bool Success = Matcher.matchAddr(Address, 0);
5411 (void)Success;
5412 assert(Success && "Couldn't select *anything*?");
5413
5414 // The match was to check the profitability, the changes made are not
5415 // part of the original matcher. Therefore, they should be dropped
5416 // otherwise the original matcher will not present the right state.
5417 TPT.rollback(LastKnownGood);
5418
5419 // If the match didn't cover I, then it won't be shared by it.
5420 if (!is_contained(MatchedAddrModeInsts, I))
5421 return false;
5422
5423 MatchedAddrModeInsts.clear();
5424 }
5425
5426 return true;
5427}
5428
5429/// Return true if the specified values are defined in a
5430/// different basic block than BB.
5431static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5432 if (Instruction *I = dyn_cast<Instruction>(V))
5433 return I->getParent() != BB;
5434 return false;
5435}
5436
5437/// Sink addressing mode computation immediate before MemoryInst if doing so
5438/// can be done without increasing register pressure. The need for the
5439/// register pressure constraint means this can end up being an all or nothing
5440/// decision for all uses of the same addressing computation.
5441///
5442/// Load and Store Instructions often have addressing modes that can do
5443/// significant amounts of computation. As such, instruction selection will try
5444/// to get the load or store to do as much computation as possible for the
5445/// program. The problem is that isel can only see within a single block. As
5446/// such, we sink as much legal addressing mode work into the block as possible.
5447///
5448/// This method is used to optimize both load/store and inline asms with memory
5449/// operands. It's also used to sink addressing computations feeding into cold
5450/// call sites into their (cold) basic block.
5451///
5452/// The motivation for handling sinking into cold blocks is that doing so can
5453/// both enable other address mode sinking (by satisfying the register pressure
5454/// constraint above), and reduce register pressure globally (by removing the
5455/// addressing mode computation from the fast path entirely.).
5456bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5457 Type *AccessTy, unsigned AddrSpace) {
5458 Value *Repl = Addr;
5459
5460 // Try to collapse single-value PHI nodes. This is necessary to undo
5461 // unprofitable PRE transformations.
5462 SmallVector<Value *, 8> worklist;
5464 worklist.push_back(Addr);
5465
5466 // Use a worklist to iteratively look through PHI and select nodes, and
5467 // ensure that the addressing mode obtained from the non-PHI/select roots of
5468 // the graph are compatible.
5469 bool PhiOrSelectSeen = false;
5470 SmallVector<Instruction *, 16> AddrModeInsts;
5471 const SimplifyQuery SQ(*DL, TLInfo);
5472 AddressingModeCombiner AddrModes(SQ, Addr);
5473 TypePromotionTransaction TPT(RemovedInsts);
5474 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5475 TPT.getRestorationPoint();
5476 while (!worklist.empty()) {
5477 Value *V = worklist.pop_back_val();
5478
5479 // We allow traversing cyclic Phi nodes.
5480 // In case of success after this loop we ensure that traversing through
5481 // Phi nodes ends up with all cases to compute address of the form
5482 // BaseGV + Base + Scale * Index + Offset
5483 // where Scale and Offset are constans and BaseGV, Base and Index
5484 // are exactly the same Values in all cases.
5485 // It means that BaseGV, Scale and Offset dominate our memory instruction
5486 // and have the same value as they had in address computation represented
5487 // as Phi. So we can safely sink address computation to memory instruction.
5488 if (!Visited.insert(V).second)
5489 continue;
5490
5491 // For a PHI node, push all of its incoming values.
5492 if (PHINode *P = dyn_cast<PHINode>(V)) {
5493 append_range(worklist, P->incoming_values());
5494 PhiOrSelectSeen = true;
5495 continue;
5496 }
5497 // Similar for select.
5498 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5499 worklist.push_back(SI->getFalseValue());
5500 worklist.push_back(SI->getTrueValue());
5501 PhiOrSelectSeen = true;
5502 continue;
5503 }
5504
5505 // For non-PHIs, determine the addressing mode being computed. Note that
5506 // the result may differ depending on what other uses our candidate
5507 // addressing instructions might have.
5508 AddrModeInsts.clear();
5509 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5510 0);
5511 // Defer the query (and possible computation of) the dom tree to point of
5512 // actual use. It's expected that most address matches don't actually need
5513 // the domtree.
5514 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5515 Function *F = MemoryInst->getParent()->getParent();
5516 return this->getDT(*F);
5517 };
5518 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5519 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5520 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5521 BFI.get());
5522
5523 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5524 if (GEP && !NewGEPBases.count(GEP)) {
5525 // If splitting the underlying data structure can reduce the offset of a
5526 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5527 // previously split data structures.
5528 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5529 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5530 }
5531
5532 NewAddrMode.OriginalValue = V;
5533 if (!AddrModes.addNewAddrMode(NewAddrMode))
5534 break;
5535 }
5536
5537 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5538 // or we have multiple but either couldn't combine them or combining them
5539 // wouldn't do anything useful, bail out now.
5540 if (!AddrModes.combineAddrModes()) {
5541 TPT.rollback(LastKnownGood);
5542 return false;
5543 }
5544 bool Modified = TPT.commit();
5545
5546 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5547 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5548
5549 // If all the instructions matched are already in this BB, don't do anything.
5550 // If we saw a Phi node then it is not local definitely, and if we saw a
5551 // select then we want to push the address calculation past it even if it's
5552 // already in this BB.
5553 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5554 return IsNonLocalValue(V, MemoryInst->getParent());
5555 })) {
5556 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5557 << "\n");
5558 return Modified;
5559 }
5560
5561 // Insert this computation right after this user. Since our caller is
5562 // scanning from the top of the BB to the bottom, reuse of the expr are
5563 // guaranteed to happen later.
5564 IRBuilder<> Builder(MemoryInst);
5565
5566 // Now that we determined the addressing expression we want to use and know
5567 // that we have to sink it into this block. Check to see if we have already
5568 // done this for some other load/store instr in this block. If so, reuse
5569 // the computation. Before attempting reuse, check if the address is valid
5570 // as it may have been erased.
5571
5572 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5573
5574 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5575 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5576 if (SunkAddr) {
5577 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5578 << " for " << *MemoryInst << "\n");
5579 if (SunkAddr->getType() != Addr->getType()) {
5580 if (SunkAddr->getType()->getPointerAddressSpace() !=
5581 Addr->getType()->getPointerAddressSpace() &&
5582 !DL->isNonIntegralPointerType(Addr->getType())) {
5583 // There are two reasons the address spaces might not match: a no-op
5584 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5585 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5586 // TODO: allow bitcast between different address space pointers with the
5587 // same size.
5588 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5589 SunkAddr =
5590 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5591 } else
5592 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5593 }
5594 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
5595 SubtargetInfo->addrSinkUsingGEPs())) {
5596 // By default, we use the GEP-based method when AA is used later. This
5597 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5598 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5599 << " for " << *MemoryInst << "\n");
5600 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5601
5602 // First, find the pointer.
5603 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5604 ResultPtr = AddrMode.BaseReg;
5605 AddrMode.BaseReg = nullptr;
5606 }
5607
5608 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5609 // We can't add more than one pointer together, nor can we scale a
5610 // pointer (both of which seem meaningless).
5611 if (ResultPtr || AddrMode.Scale != 1)
5612 return Modified;
5613
5614 ResultPtr = AddrMode.ScaledReg;
5615 AddrMode.Scale = 0;
5616 }
5617
5618 // It is only safe to sign extend the BaseReg if we know that the math
5619 // required to create it did not overflow before we extend it. Since
5620 // the original IR value was tossed in favor of a constant back when
5621 // the AddrMode was created we need to bail out gracefully if widths
5622 // do not match instead of extending it.
5623 //
5624 // (See below for code to add the scale.)
5625 if (AddrMode.Scale) {
5626 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
5627 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5628 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5629 return Modified;
5630 }
5631
5632 GlobalValue *BaseGV = AddrMode.BaseGV;
5633 if (BaseGV != nullptr) {
5634 if (ResultPtr)
5635 return Modified;
5636
5637 if (BaseGV->isThreadLocal()) {
5638 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
5639 } else {
5640 ResultPtr = BaseGV;
5641 }
5642 }
5643
5644 // If the real base value actually came from an inttoptr, then the matcher
5645 // will look through it and provide only the integer value. In that case,
5646 // use it here.
5647 if (!DL->isNonIntegralPointerType(Addr->getType())) {
5648 if (!ResultPtr && AddrMode.BaseReg) {
5649 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
5650 "sunkaddr");
5651 AddrMode.BaseReg = nullptr;
5652 } else if (!ResultPtr && AddrMode.Scale == 1) {
5653 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
5654 "sunkaddr");
5655 AddrMode.Scale = 0;
5656 }
5657 }
5658
5659 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
5660 !AddrMode.BaseOffs) {
5661 SunkAddr = Constant::getNullValue(Addr->getType());
5662 } else if (!ResultPtr) {
5663 return Modified;
5664 } else {
5665 Type *I8PtrTy =
5666 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
5667
5668 // Start with the base register. Do this first so that subsequent address
5669 // matching finds it last, which will prevent it from trying to match it
5670 // as the scaled value in case it happens to be a mul. That would be
5671 // problematic if we've sunk a different mul for the scale, because then
5672 // we'd end up sinking both muls.
5673 if (AddrMode.BaseReg) {
5674 Value *V = AddrMode.BaseReg;
5675 if (V->getType() != IntPtrTy)
5676 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5677
5678 ResultIndex = V;
5679 }
5680
5681 // Add the scale value.
5682 if (AddrMode.Scale) {
5683 Value *V = AddrMode.ScaledReg;
5684 if (V->getType() == IntPtrTy) {
5685 // done.
5686 } else {
5687 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
5688 cast<IntegerType>(V->getType())->getBitWidth() &&
5689 "We can't transform if ScaledReg is too narrow");
5690 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5691 }
5692
5693 if (AddrMode.Scale != 1)
5694 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5695 "sunkaddr");
5696 if (ResultIndex)
5697 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5698 else
5699 ResultIndex = V;
5700 }
5701
5702 // Add in the Base Offset if present.
5703 if (AddrMode.BaseOffs) {
5704 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5705 if (ResultIndex) {
5706 // We need to add this separately from the scale above to help with
5707 // SDAG consecutive load/store merging.
5708 if (ResultPtr->getType() != I8PtrTy)
5709 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5710 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5711 AddrMode.InBounds);
5712 }
5713
5714 ResultIndex = V;
5715 }
5716
5717 if (!ResultIndex) {
5718 SunkAddr = ResultPtr;
5719 } else {
5720 if (ResultPtr->getType() != I8PtrTy)
5721 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5722 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5723 AddrMode.InBounds);
5724 }
5725
5726 if (SunkAddr->getType() != Addr->getType()) {
5727 if (SunkAddr->getType()->getPointerAddressSpace() !=
5728 Addr->getType()->getPointerAddressSpace() &&
5729 !DL->isNonIntegralPointerType(Addr->getType())) {
5730 // There are two reasons the address spaces might not match: a no-op
5731 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5732 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5733 // TODO: allow bitcast between different address space pointers with
5734 // the same size.
5735 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5736 SunkAddr =
5737 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5738 } else
5739 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5740 }
5741 }
5742 } else {
5743 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5744 // non-integral pointers, so in that case bail out now.
5745 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5746 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5747 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5748 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5749 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5750 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5751 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5752 (AddrMode.BaseGV &&
5753 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5754 return Modified;
5755
5756 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5757 << " for " << *MemoryInst << "\n");
5758 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5759 Value *Result = nullptr;
5760
5761 // Start with the base register. Do this first so that subsequent address
5762 // matching finds it last, which will prevent it from trying to match it
5763 // as the scaled value in case it happens to be a mul. That would be
5764 // problematic if we've sunk a different mul for the scale, because then
5765 // we'd end up sinking both muls.
5766 if (AddrMode.BaseReg) {
5767 Value *V = AddrMode.BaseReg;
5768 if (V->getType()->isPointerTy())
5769 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5770 if (V->getType() != IntPtrTy)
5771 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5772 Result = V;
5773 }
5774
5775 // Add the scale value.
5776 if (AddrMode.Scale) {
5777 Value *V = AddrMode.ScaledReg;
5778 if (V->getType() == IntPtrTy) {
5779 // done.
5780 } else if (V->getType()->isPointerTy()) {
5781 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5782 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5783 cast<IntegerType>(V->getType())->getBitWidth()) {
5784 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5785 } else {
5786 // It is only safe to sign extend the BaseReg if we know that the math
5787 // required to create it did not overflow before we extend it. Since
5788 // the original IR value was tossed in favor of a constant back when
5789 // the AddrMode was created we need to bail out gracefully if widths
5790 // do not match instead of extending it.
5791 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5792 if (I && (Result != AddrMode.BaseReg))
5793 I->eraseFromParent();
5794 return Modified;
5795 }
5796 if (AddrMode.Scale != 1)
5797 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5798 "sunkaddr");
5799 if (Result)
5800 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5801 else
5802 Result = V;
5803 }
5804
5805 // Add in the BaseGV if present.
5806 GlobalValue *BaseGV = AddrMode.BaseGV;
5807 if (BaseGV != nullptr) {
5808 Value *BaseGVPtr;
5809 if (BaseGV->isThreadLocal()) {
5810 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
5811 } else {
5812 BaseGVPtr = BaseGV;
5813 }
5814 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
5815 if (Result)
5816 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5817 else
5818 Result = V;
5819 }
5820
5821 // Add in the Base Offset if present.
5822 if (AddrMode.BaseOffs) {
5823 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5824 if (Result)
5825 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5826 else
5827 Result = V;
5828 }
5829
5830 if (!Result)
5831 SunkAddr = Constant::getNullValue(Addr->getType());
5832 else
5833 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5834 }
5835
5836 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5837 // Store the newly computed address into the cache. In the case we reused a
5838 // value, this should be idempotent.
5839 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5840
5841 // If we have no uses, recursively delete the value and all dead instructions
5842 // using it.
5843 if (Repl->use_empty()) {
5844 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
5845 RecursivelyDeleteTriviallyDeadInstructions(
5846 Repl, TLInfo, nullptr,
5847 [&](Value *V) { removeAllAssertingVHReferences(V); });
5848 });
5849 }
5850 ++NumMemoryInsts;
5851 return true;
5852}
5853
5854/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
5855/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
5856/// only handle a 2 operand GEP in the same basic block or a splat constant
5857/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
5858/// index.
5859///
5860/// If the existing GEP has a vector base pointer that is splat, we can look
5861/// through the splat to find the scalar pointer. If we can't find a scalar
5862/// pointer there's nothing we can do.
5863///
5864/// If we have a GEP with more than 2 indices where the middle indices are all
5865/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
5866///
5867/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
5868/// followed by a GEP with an all zeroes vector index. This will enable
5869/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
5870/// zero index.
5871bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
5872 Value *Ptr) {
5873 Value *NewAddr;
5874
5875 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
5876 // Don't optimize GEPs that don't have indices.
5877 if (!GEP->hasIndices())
5878 return false;
5879
5880 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
5881 // FIXME: We should support this by sinking the GEP.
5882 if (MemoryInst->getParent() != GEP->getParent())
5883 return false;
5884
5885 SmallVector<Value *, 2> Ops(GEP->operands());
5886
5887 bool RewriteGEP = false;
5888
5889 if (Ops[0]->getType()->isVectorTy()) {
5890 Ops[0] = getSplatValue(Ops[0]);
5891 if (!Ops[0])
5892 return false;
5893 RewriteGEP = true;
5894 }
5895
5896 unsigned FinalIndex = Ops.size() - 1;
5897
5898 // Ensure all but the last index is 0.
5899 // FIXME: This isn't strictly required. All that's required is that they are
5900 // all scalars or splats.
5901 for (unsigned i = 1; i < FinalIndex; ++i) {
5902 auto *C = dyn_cast<Constant>(Ops[i]);
5903 if (!C)
5904 return false;
5905 if (isa<VectorType>(C->getType()))
5906 C = C->getSplatValue();
5907 auto *CI = dyn_cast_or_null<ConstantInt>(C);
5908 if (!CI || !CI->isZero())
5909 return false;
5910 // Scalarize the index if needed.
5911 Ops[i] = CI;
5912 }
5913
5914 // Try to scalarize the final index.
5915 if (Ops[FinalIndex]->getType()->isVectorTy()) {
5916 if (Value *V = getSplatValue(Ops[FinalIndex])) {
5917 auto *C = dyn_cast<ConstantInt>(V);
5918 // Don't scalarize all zeros vector.
5919 if (!C || !C->isZero()) {
5920 Ops[FinalIndex] = V;
5921 RewriteGEP = true;
5922 }
5923 }
5924 }
5925
5926 // If we made any changes or the we have extra operands, we need to generate
5927 // new instructions.
5928 if (!RewriteGEP && Ops.size() == 2)
5929 return false;
5930
5931 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5932
5933 IRBuilder<> Builder(MemoryInst);
5934
5935 Type *SourceTy = GEP->getSourceElementType();
5936 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
5937
5938 // If the final index isn't a vector, emit a scalar GEP containing all ops
5939 // and a vector GEP with all zeroes final index.
5940 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
5941 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
5942 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5943 auto *SecondTy = GetElementPtrInst::getIndexedType(
5944 SourceTy, ArrayRef(Ops).drop_front());
5945 NewAddr =
5946 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
5947 } else {
5948 Value *Base = Ops[0];
5949 Value *Index = Ops[FinalIndex];
5950
5951 // Create a scalar GEP if there are more than 2 operands.
5952 if (Ops.size() != 2) {
5953 // Replace the last index with 0.
5954 Ops[FinalIndex] =
5955 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
5956 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
5958 SourceTy, ArrayRef(Ops).drop_front());
5959 }
5960
5961 // Now create the GEP with scalar pointer and vector index.
5962 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
5963 }
5964 } else if (!isa<Constant>(Ptr)) {
5965 // Not a GEP, maybe its a splat and we can create a GEP to enable
5966 // SelectionDAGBuilder to use it as a uniform base.
5968 if (!V)
5969 return false;
5970
5971 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
5972
5973 IRBuilder<> Builder(MemoryInst);
5974
5975 // Emit a vector GEP with a scalar pointer and all 0s vector index.
5976 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
5977 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
5978 Type *ScalarTy;
5979 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
5980 Intrinsic::masked_gather) {
5981 ScalarTy = MemoryInst->getType()->getScalarType();
5982 } else {
5983 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
5984 Intrinsic::masked_scatter);
5985 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
5986 }
5987 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
5988 } else {
5989 // Constant, SelectionDAGBuilder knows to check if its a splat.
5990 return false;
5991 }
5992
5993 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
5994
5995 // If we have no uses, recursively delete the value and all dead instructions
5996 // using it.
5997 if (Ptr->use_empty())
5999 Ptr, TLInfo, nullptr,
6000 [&](Value *V) { removeAllAssertingVHReferences(V); });
6001
6002 return true;
6003}
6004
6005/// If there are any memory operands, use OptimizeMemoryInst to sink their
6006/// address computing into the block when possible / profitable.
6007bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6008 bool MadeChange = false;
6009
6010 const TargetRegisterInfo *TRI =
6011 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6012 TargetLowering::AsmOperandInfoVector TargetConstraints =
6013 TLI->ParseConstraints(*DL, TRI, *CS);
6014 unsigned ArgNo = 0;
6015 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6016 // Compute the constraint code and ConstraintType to use.
6017 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6018
6019 // TODO: Also handle C_Address?
6020 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6021 OpInfo.isIndirect) {
6022 Value *OpVal = CS->getArgOperand(ArgNo++);
6023 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6024 } else if (OpInfo.Type == InlineAsm::isInput)
6025 ArgNo++;
6026 }
6027
6028 return MadeChange;
6029}
6030
6031/// Check if all the uses of \p Val are equivalent (or free) zero or
6032/// sign extensions.
6033static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6034 assert(!Val->use_empty() && "Input must have at least one use");
6035 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6036 bool IsSExt = isa<SExtInst>(FirstUser);
6037 Type *ExtTy = FirstUser->getType();
6038 for (const User *U : Val->users()) {
6039 const Instruction *UI = cast<Instruction>(U);
6040 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6041 return false;
6042 Type *CurTy = UI->getType();
6043 // Same input and output types: Same instruction after CSE.
6044 if (CurTy == ExtTy)
6045 continue;
6046
6047 // If IsSExt is true, we are in this situation:
6048 // a = Val
6049 // b = sext ty1 a to ty2
6050 // c = sext ty1 a to ty3
6051 // Assuming ty2 is shorter than ty3, this could be turned into:
6052 // a = Val
6053 // b = sext ty1 a to ty2
6054 // c = sext ty2 b to ty3
6055 // However, the last sext is not free.
6056 if (IsSExt)
6057 return false;
6058
6059 // This is a ZExt, maybe this is free to extend from one type to another.
6060 // In that case, we would not account for a different use.
6061 Type *NarrowTy;
6062 Type *LargeTy;
6063 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6064 CurTy->getScalarType()->getIntegerBitWidth()) {
6065 NarrowTy = CurTy;
6066 LargeTy = ExtTy;
6067 } else {
6068 NarrowTy = ExtTy;
6069 LargeTy = CurTy;
6070 }
6071
6072 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6073 return false;
6074 }
6075 // All uses are the same or can be derived from one another for free.
6076 return true;
6077}
6078
6079/// Try to speculatively promote extensions in \p Exts and continue
6080/// promoting through newly promoted operands recursively as far as doing so is
6081/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6082/// When some promotion happened, \p TPT contains the proper state to revert
6083/// them.
6084///
6085/// \return true if some promotion happened, false otherwise.
6086bool CodeGenPrepare::tryToPromoteExts(
6087 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6088 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6089 unsigned CreatedInstsCost) {
6090 bool Promoted = false;
6091
6092 // Iterate over all the extensions to try to promote them.
6093 for (auto *I : Exts) {
6094 // Early check if we directly have ext(load).
6095 if (isa<LoadInst>(I->getOperand(0))) {
6096 ProfitablyMovedExts.push_back(I);
6097 continue;
6098 }
6099
6100 // Check whether or not we want to do any promotion. The reason we have
6101 // this check inside the for loop is to catch the case where an extension
6102 // is directly fed by a load because in such case the extension can be moved
6103 // up without any promotion on its operands.
6105 return false;
6106
6107 // Get the action to perform the promotion.
6108 TypePromotionHelper::Action TPH =
6109 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6110 // Check if we can promote.
6111 if (!TPH) {
6112 // Save the current extension as we cannot move up through its operand.
6113 ProfitablyMovedExts.push_back(I);
6114 continue;
6115 }
6116
6117 // Save the current state.
6118 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6119 TPT.getRestorationPoint();
6121 unsigned NewCreatedInstsCost = 0;
6122 unsigned ExtCost = !TLI->isExtFree(I);
6123 // Promote.
6124 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6125 &NewExts, nullptr, *TLI);
6126 assert(PromotedVal &&
6127 "TypePromotionHelper should have filtered out those cases");
6128
6129 // We would be able to merge only one extension in a load.
6130 // Therefore, if we have more than 1 new extension we heuristically
6131 // cut this search path, because it means we degrade the code quality.
6132 // With exactly 2, the transformation is neutral, because we will merge
6133 // one extension but leave one. However, we optimistically keep going,
6134 // because the new extension may be removed too. Also avoid replacing a
6135 // single free extension with multiple extensions, as this increases the
6136 // number of IR instructions while not providing any savings.
6137 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6138 // FIXME: It would be possible to propagate a negative value instead of
6139 // conservatively ceiling it to 0.
6140 TotalCreatedInstsCost =
6141 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6142 if (!StressExtLdPromotion &&
6143 (TotalCreatedInstsCost > 1 ||
6144 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6145 (ExtCost == 0 && NewExts.size() > 1))) {
6146 // This promotion is not profitable, rollback to the previous state, and
6147 // save the current extension in ProfitablyMovedExts as the latest
6148 // speculative promotion turned out to be unprofitable.
6149 TPT.rollback(LastKnownGood);
6150 ProfitablyMovedExts.push_back(I);
6151 continue;
6152 }
6153 // Continue promoting NewExts as far as doing so is profitable.
6154 SmallVector<Instruction *, 2> NewlyMovedExts;
6155 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6156 bool NewPromoted = false;
6157 for (auto *ExtInst : NewlyMovedExts) {
6158 Instruction *MovedExt = cast<Instruction>(ExtInst);
6159 Value *ExtOperand = MovedExt->getOperand(0);
6160 // If we have reached to a load, we need this extra profitability check
6161 // as it could potentially be merged into an ext(load).
6162 if (isa<LoadInst>(ExtOperand) &&
6163 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6164 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6165 continue;
6166
6167 ProfitablyMovedExts.push_back(MovedExt);
6168 NewPromoted = true;
6169 }
6170
6171 // If none of speculative promotions for NewExts is profitable, rollback
6172 // and save the current extension (I) as the last profitable extension.
6173 if (!NewPromoted) {
6174 TPT.rollback(LastKnownGood);
6175 ProfitablyMovedExts.push_back(I);
6176 continue;
6177 }
6178 // The promotion is profitable.
6179 Promoted = true;
6180 }
6181 return Promoted;
6182}
6183
6184/// Merging redundant sexts when one is dominating the other.
6185bool CodeGenPrepare::mergeSExts(Function &F) {
6186 bool Changed = false;
6187 for (auto &Entry : ValToSExtendedUses) {
6188 SExts &Insts = Entry.second;
6189 SExts CurPts;
6190 for (Instruction *Inst : Insts) {
6191 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6192 Inst->getOperand(0) != Entry.first)
6193 continue;
6194 bool inserted = false;
6195 for (auto &Pt : CurPts) {
6196 if (getDT(F).dominates(Inst, Pt)) {
6197 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6198 RemovedInsts.insert(Pt);
6199 Pt->removeFromParent();
6200 Pt = Inst;
6201 inserted = true;
6202 Changed = true;
6203 break;
6204 }
6205 if (!getDT(F).dominates(Pt, Inst))
6206 // Give up if we need to merge in a common dominator as the
6207 // experiments show it is not profitable.
6208 continue;
6209 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6210 RemovedInsts.insert(Inst);
6211 Inst->removeFromParent();
6212 inserted = true;
6213 Changed = true;
6214 break;
6215 }
6216 if (!inserted)
6217 CurPts.push_back(Inst);
6218 }
6219 }
6220 return Changed;
6221}
6222
6223// Splitting large data structures so that the GEPs accessing them can have
6224// smaller offsets so that they can be sunk to the same blocks as their users.
6225// For example, a large struct starting from %base is split into two parts
6226// where the second part starts from %new_base.
6227//
6228// Before:
6229// BB0:
6230// %base =
6231//
6232// BB1:
6233// %gep0 = gep %base, off0
6234// %gep1 = gep %base, off1
6235// %gep2 = gep %base, off2
6236//
6237// BB2:
6238// %load1 = load %gep0
6239// %load2 = load %gep1
6240// %load3 = load %gep2
6241//
6242// After:
6243// BB0:
6244// %base =
6245// %new_base = gep %base, off0
6246//
6247// BB1:
6248// %new_gep0 = %new_base
6249// %new_gep1 = gep %new_base, off1 - off0
6250// %new_gep2 = gep %new_base, off2 - off0
6251//
6252// BB2:
6253// %load1 = load i32, i32* %new_gep0
6254// %load2 = load i32, i32* %new_gep1
6255// %load3 = load i32, i32* %new_gep2
6256//
6257// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6258// their offsets are smaller enough to fit into the addressing mode.
6259bool CodeGenPrepare::splitLargeGEPOffsets() {
6260 bool Changed = false;
6261 for (auto &Entry : LargeOffsetGEPMap) {
6262 Value *OldBase = Entry.first;
6264 &LargeOffsetGEPs = Entry.second;
6265 auto compareGEPOffset =
6266 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6267 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6268 if (LHS.first == RHS.first)
6269 return false;
6270 if (LHS.second != RHS.second)
6271 return LHS.second < RHS.second;
6272 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6273 };
6274 // Sorting all the GEPs of the same data structures based on the offsets.
6275 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6276 LargeOffsetGEPs.erase(
6277 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
6278 LargeOffsetGEPs.end());
6279 // Skip if all the GEPs have the same offsets.
6280 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6281 continue;
6282 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6283 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6284 Value *NewBaseGEP = nullptr;
6285
6286 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6288 LLVMContext &Ctx = GEP->getContext();
6289 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6290 Type *I8PtrTy =
6291 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6292
6293 BasicBlock::iterator NewBaseInsertPt;
6294 BasicBlock *NewBaseInsertBB;
6295 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6296 // If the base of the struct is an instruction, the new base will be
6297 // inserted close to it.
6298 NewBaseInsertBB = BaseI->getParent();
6299 if (isa<PHINode>(BaseI))
6300 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6301 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6302 NewBaseInsertBB =
6303 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6304 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6305 } else
6306 NewBaseInsertPt = std::next(BaseI->getIterator());
6307 } else {
6308 // If the current base is an argument or global value, the new base
6309 // will be inserted to the entry block.
6310 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6311 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6312 }
6313 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6314 // Create a new base.
6315 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6316 NewBaseGEP = OldBase;
6317 if (NewBaseGEP->getType() != I8PtrTy)
6318 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6319 NewBaseGEP =
6320 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6321 NewGEPBases.insert(NewBaseGEP);
6322 return;
6323 };
6324
6325 // Check whether all the offsets can be encoded with prefered common base.
6326 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6327 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6328 BaseOffset = PreferBase;
6329 // Create a new base if the offset of the BaseGEP can be decoded with one
6330 // instruction.
6331 createNewBase(BaseOffset, OldBase, BaseGEP);
6332 }
6333
6334 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6335 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6336 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6337 int64_t Offset = LargeOffsetGEP->second;
6338 if (Offset != BaseOffset) {
6340 AddrMode.HasBaseReg = true;
6341 AddrMode.BaseOffs = Offset - BaseOffset;
6342 // The result type of the GEP might not be the type of the memory
6343 // access.
6344 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6345 GEP->getResultElementType(),
6346 GEP->getAddressSpace())) {
6347 // We need to create a new base if the offset to the current base is
6348 // too large to fit into the addressing mode. So, a very large struct
6349 // may be split into several parts.
6350 BaseGEP = GEP;
6351 BaseOffset = Offset;
6352 NewBaseGEP = nullptr;
6353 }
6354 }
6355
6356 // Generate a new GEP to replace the current one.
6357 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6358
6359 if (!NewBaseGEP) {
6360 // Create a new base if we don't have one yet. Find the insertion
6361 // pointer for the new base first.
6362 createNewBase(BaseOffset, OldBase, GEP);
6363 }
6364
6365 IRBuilder<> Builder(GEP);
6366 Value *NewGEP = NewBaseGEP;
6367 if (Offset != BaseOffset) {
6368 // Calculate the new offset for the new GEP.
6369 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6370 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6371 }
6372 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6373 LargeOffsetGEPID.erase(GEP);
6374 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6375 GEP->eraseFromParent();
6376 Changed = true;
6377 }
6378 }
6379 return Changed;
6380}
6381
6382bool CodeGenPrepare::optimizePhiType(
6384 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6385 // We are looking for a collection on interconnected phi nodes that together
6386 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6387 // are of the same type. Convert the whole set of nodes to the type of the
6388 // bitcast.
6389 Type *PhiTy = I->getType();
6390 Type *ConvertTy = nullptr;
6391 if (Visited.count(I) ||
6392 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6393 return false;
6394
6396 Worklist.push_back(cast<Instruction>(I));
6399 PhiNodes.insert(I);
6400 Visited.insert(I);
6403 // This works by adding extra bitcasts between load/stores and removing
6404 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6405 // we can get in the situation where we remove a bitcast in one iteration
6406 // just to add it again in the next. We need to ensure that at least one
6407 // bitcast we remove are anchored to something that will not change back.
6408 bool AnyAnchored = false;
6409
6410 while (!Worklist.empty()) {
6411 Instruction *II = Worklist.pop_back_val();
6412
6413 if (auto *Phi = dyn_cast<PHINode>(II)) {
6414 // Handle Defs, which might also be PHI's
6415 for (Value *V : Phi->incoming_values()) {
6416 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6417 if (!PhiNodes.count(OpPhi)) {
6418 if (!Visited.insert(OpPhi).second)
6419 return false;
6420 PhiNodes.insert(OpPhi);
6421 Worklist.push_back(OpPhi);
6422 }
6423 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6424 if (!OpLoad->isSimple())
6425 return false;
6426 if (Defs.insert(OpLoad).second)
6427 Worklist.push_back(OpLoad);
6428 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6429 if (Defs.insert(OpEx).second)
6430 Worklist.push_back(OpEx);
6431 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6432 if (!ConvertTy)
6433 ConvertTy = OpBC->getOperand(0)->getType();
6434 if (OpBC->getOperand(0)->getType() != ConvertTy)
6435 return false;
6436 if (Defs.insert(OpBC).second) {
6437 Worklist.push_back(OpBC);
6438 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6439 !isa<ExtractElementInst>(OpBC->getOperand(0));
6440 }
6441 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6442 Constants.insert(OpC);
6443 else
6444 return false;
6445 }
6446 }
6447
6448 // Handle uses which might also be phi's
6449 for (User *V : II->users()) {
6450 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6451 if (!PhiNodes.count(OpPhi)) {
6452 if (Visited.count(OpPhi))
6453 return false;
6454 PhiNodes.insert(OpPhi);
6455 Visited.insert(OpPhi);
6456 Worklist.push_back(OpPhi);
6457 }
6458 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6459 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6460 return false;
6461 Uses.insert(OpStore);
6462 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6463 if (!ConvertTy)
6464 ConvertTy = OpBC->getType();
6465 if (OpBC->getType() != ConvertTy)
6466 return false;
6467 Uses.insert(OpBC);
6468 AnyAnchored |=
6469 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6470 } else {
6471 return false;
6472 }
6473 }
6474 }
6475
6476 if (!ConvertTy || !AnyAnchored ||
6477 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6478 return false;
6479
6480 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6481 << *ConvertTy << "\n");
6482
6483 // Create all the new phi nodes of the new type, and bitcast any loads to the
6484 // correct type.
6485 ValueToValueMap ValMap;
6486 for (ConstantData *C : Constants)
6487 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6488 for (Instruction *D : Defs) {
6489 if (isa<BitCastInst>(D)) {
6490 ValMap[D] = D->getOperand(0);
6491 DeletedInstrs.insert(D);
6492 } else {
6493 BasicBlock::iterator insertPt = std::next(D->getIterator());
6494 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6495 }
6496 }
6497 for (PHINode *Phi : PhiNodes)
6498 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6499 Phi->getName() + ".tc", Phi->getIterator());
6500 // Pipe together all the PhiNodes.
6501 for (PHINode *Phi : PhiNodes) {
6502 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6503 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6504 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6505 Phi->getIncomingBlock(i));
6506 Visited.insert(NewPhi);
6507 }
6508 // And finally pipe up the stores and bitcasts
6509 for (Instruction *U : Uses) {
6510 if (isa<BitCastInst>(U)) {
6511 DeletedInstrs.insert(U);
6512 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6513 } else {
6514 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6515 U->getIterator()));
6516 }
6517 }
6518
6519 // Save the removed phis to be deleted later.
6520 for (PHINode *Phi : PhiNodes)
6521 DeletedInstrs.insert(Phi);
6522 return true;
6523}
6524
6525bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6526 if (!OptimizePhiTypes)
6527 return false;
6528
6529 bool Changed = false;
6531 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6532
6533 // Attempt to optimize all the phis in the functions to the correct type.
6534 for (auto &BB : F)
6535 for (auto &Phi : BB.phis())
6536 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6537
6538 // Remove any old phi's that have been converted.
6539 for (auto *I : DeletedInstrs) {
6540 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6541 I->eraseFromParent();
6542 }
6543
6544 return Changed;
6545}
6546
6547/// Return true, if an ext(load) can be formed from an extension in
6548/// \p MovedExts.
6549bool CodeGenPrepare::canFormExtLd(
6550 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6551 Instruction *&Inst, bool HasPromoted) {
6552 for (auto *MovedExtInst : MovedExts) {
6553 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6554 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6555 Inst = MovedExtInst;
6556 break;
6557 }
6558 }
6559 if (!LI)
6560 return false;
6561
6562 // If they're already in the same block, there's nothing to do.
6563 // Make the cheap checks first if we did not promote.
6564 // If we promoted, we need to check if it is indeed profitable.
6565 if (!HasPromoted && LI->getParent() == Inst->getParent())
6566 return false;
6567
6568 return TLI->isExtLoad(LI, Inst, *DL);
6569}
6570
6571/// Move a zext or sext fed by a load into the same basic block as the load,
6572/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6573/// extend into the load.
6574///
6575/// E.g.,
6576/// \code
6577/// %ld = load i32* %addr
6578/// %add = add nuw i32 %ld, 4
6579/// %zext = zext i32 %add to i64
6580// \endcode
6581/// =>
6582/// \code
6583/// %ld = load i32* %addr
6584/// %zext = zext i32 %ld to i64
6585/// %add = add nuw i64 %zext, 4
6586/// \encode
6587/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6588/// allow us to match zext(load i32*) to i64.
6589///
6590/// Also, try to promote the computations used to obtain a sign extended
6591/// value used into memory accesses.
6592/// E.g.,
6593/// \code
6594/// a = add nsw i32 b, 3
6595/// d = sext i32 a to i64
6596/// e = getelementptr ..., i64 d
6597/// \endcode
6598/// =>
6599/// \code
6600/// f = sext i32 b to i64
6601/// a = add nsw i64 f, 3
6602/// e = getelementptr ..., i64 a
6603/// \endcode
6604///
6605/// \p Inst[in/out] the extension may be modified during the process if some
6606/// promotions apply.
6607bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6608 bool AllowPromotionWithoutCommonHeader = false;
6609 /// See if it is an interesting sext operations for the address type
6610 /// promotion before trying to promote it, e.g., the ones with the right
6611 /// type and used in memory accesses.
6612 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6613 *Inst, AllowPromotionWithoutCommonHeader);
6614 TypePromotionTransaction TPT(RemovedInsts);
6615 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6616 TPT.getRestorationPoint();
6618 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6619 Exts.push_back(Inst);
6620
6621 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6622
6623 // Look for a load being extended.
6624 LoadInst *LI = nullptr;
6625 Instruction *ExtFedByLoad;
6626
6627 // Try to promote a chain of computation if it allows to form an extended
6628 // load.
6629 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6630 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
6631 TPT.commit();
6632 // Move the extend into the same block as the load.
6633 ExtFedByLoad->moveAfter(LI);
6634 ++NumExtsMoved;
6635 Inst = ExtFedByLoad;
6636 return true;
6637 }
6638
6639 // Continue promoting SExts if known as considerable depending on targets.
6640 if (ATPConsiderable &&
6641 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6642 HasPromoted, TPT, SpeculativelyMovedExts))
6643 return true;
6644
6645 TPT.rollback(LastKnownGood);
6646 return false;
6647}
6648
6649// Perform address type promotion if doing so is profitable.
6650// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6651// instructions that sign extended the same initial value. However, if
6652// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6653// extension is just profitable.
6654bool CodeGenPrepare::performAddressTypePromotion(
6655 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
6656 bool HasPromoted, TypePromotionTransaction &TPT,
6657 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6658 bool Promoted = false;
6659 SmallPtrSet<Instruction *, 1> UnhandledExts;
6660 bool AllSeenFirst = true;
6661 for (auto *I : SpeculativelyMovedExts) {
6662 Value *HeadOfChain = I->getOperand(0);
6664 SeenChainsForSExt.find(HeadOfChain);
6665 // If there is an unhandled SExt which has the same header, try to promote
6666 // it as well.
6667 if (AlreadySeen != SeenChainsForSExt.end()) {
6668 if (AlreadySeen->second != nullptr)
6669 UnhandledExts.insert(AlreadySeen->second);
6670 AllSeenFirst = false;
6671 }
6672 }
6673
6674 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6675 SpeculativelyMovedExts.size() == 1)) {
6676 TPT.commit();
6677 if (HasPromoted)
6678 Promoted = true;
6679 for (auto *I : SpeculativelyMovedExts) {
6680 Value *HeadOfChain = I->getOperand(0);
6681 SeenChainsForSExt[HeadOfChain] = nullptr;
6682 ValToSExtendedUses[HeadOfChain].push_back(I);
6683 }
6684 // Update Inst as promotion happen.
6685 Inst = SpeculativelyMovedExts.pop_back_val();
6686 } else {
6687 // This is the first chain visited from the header, keep the current chain
6688 // as unhandled. Defer to promote this until we encounter another SExt
6689 // chain derived from the same header.
6690 for (auto *I : SpeculativelyMovedExts) {
6691 Value *HeadOfChain = I->getOperand(0);
6692 SeenChainsForSExt[HeadOfChain] = Inst;
6693 }
6694 return false;
6695 }
6696
6697 if (!AllSeenFirst && !UnhandledExts.empty())
6698 for (auto *VisitedSExt : UnhandledExts) {
6699 if (RemovedInsts.count(VisitedSExt))
6700 continue;
6701 TypePromotionTransaction TPT(RemovedInsts);
6704 Exts.push_back(VisitedSExt);
6705 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6706 TPT.commit();
6707 if (HasPromoted)
6708 Promoted = true;
6709 for (auto *I : Chains) {
6710 Value *HeadOfChain = I->getOperand(0);
6711 // Mark this as handled.
6712 SeenChainsForSExt[HeadOfChain] = nullptr;
6713 ValToSExtendedUses[HeadOfChain].push_back(I);
6714 }
6715 }
6716 return Promoted;
6717}
6718
6719bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
6720 BasicBlock *DefBB = I->getParent();
6721
6722 // If the result of a {s|z}ext and its source are both live out, rewrite all
6723 // other uses of the source with result of extension.
6724 Value *Src = I->getOperand(0);
6725 if (Src->hasOneUse())
6726 return false;
6727
6728 // Only do this xform if truncating is free.
6729 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
6730 return false;
6731
6732 // Only safe to perform the optimization if the source is also defined in
6733 // this block.
6734 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
6735 return false;
6736
6737 bool DefIsLiveOut = false;
6738 for (User *U : I->users()) {
6739 Instruction *UI = cast<Instruction>(U);
6740
6741 // Figure out which BB this ext is used in.
6742 BasicBlock *UserBB = UI->getParent();
6743 if (UserBB == DefBB)
6744 continue;
6745 DefIsLiveOut = true;
6746 break;
6747 }
6748 if (!DefIsLiveOut)
6749 return false;
6750
6751 // Make sure none of the uses are PHI nodes.
6752 for (User *U : Src->users()) {
6753 Instruction *UI = cast<Instruction>(U);
6754 BasicBlock *UserBB = UI->getParent();
6755 if (UserBB == DefBB)
6756 continue;
6757 // Be conservative. We don't want this xform to end up introducing
6758 // reloads just before load / store instructions.
6759 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
6760 return false;
6761 }
6762
6763 // InsertedTruncs - Only insert one trunc in each block once.
6765
6766 bool MadeChange = false;
6767 for (Use &U : Src->uses()) {
6768 Instruction *User = cast<Instruction>(U.getUser());
6769
6770 // Figure out which BB this ext is used in.
6771 BasicBlock *UserBB = User->getParent();
6772 if (UserBB == DefBB)
6773 continue;
6774
6775 // Both src and def are live in this block. Rewrite the use.
6776 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
6777
6778 if (!InsertedTrunc) {
6779 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
6780 assert(InsertPt != UserBB->end());
6781 InsertedTrunc = new TruncInst(I, Src->getType(), "");
6782 InsertedTrunc->insertBefore(*UserBB, InsertPt);
6783 InsertedInsts.insert(InsertedTrunc);
6784 }
6785
6786 // Replace a use of the {s|z}ext source with a use of the result.
6787 U = InsertedTrunc;
6788 ++NumExtUses;
6789 MadeChange = true;
6790 }
6791
6792 return MadeChange;
6793}
6794
6795// Find loads whose uses only use some of the loaded value's bits. Add an "and"
6796// just after the load if the target can fold this into one extload instruction,
6797// with the hope of eliminating some of the other later "and" instructions using
6798// the loaded value. "and"s that are made trivially redundant by the insertion
6799// of the new "and" are removed by this function, while others (e.g. those whose
6800// path from the load goes through a phi) are left for isel to potentially
6801// remove.
6802//
6803// For example:
6804//
6805// b0:
6806// x = load i32
6807// ...
6808// b1:
6809// y = and x, 0xff
6810// z = use y
6811//
6812// becomes:
6813//
6814// b0:
6815// x = load i32
6816// x' = and x, 0xff
6817// ...
6818// b1:
6819// z = use x'
6820//
6821// whereas:
6822//
6823// b0:
6824// x1 = load i32
6825// ...
6826// b1:
6827// x2 = load i32
6828// ...
6829// b2:
6830// x = phi x1, x2
6831// y = and x, 0xff
6832//
6833// becomes (after a call to optimizeLoadExt for each load):
6834//
6835// b0:
6836// x1 = load i32
6837// x1' = and x1, 0xff
6838// ...
6839// b1:
6840// x2 = load i32
6841// x2' = and x2, 0xff
6842// ...
6843// b2:
6844// x = phi x1', x2'
6845// y = and x, 0xff
6846bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
6847 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
6848 return false;
6849
6850 // Skip loads we've already transformed.
6851 if (Load->hasOneUse() &&
6852 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
6853 return false;
6854
6855 // Look at all uses of Load, looking through phis, to determine how many bits
6856 // of the loaded value are needed.
6859 SmallVector<Instruction *, 8> AndsToMaybeRemove;
6860 for (auto *U : Load->users())
6861 WorkList.push_back(cast<Instruction>(U));
6862
6863 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
6864 unsigned BitWidth = LoadResultVT.getSizeInBits();
6865 // If the BitWidth is 0, do not try to optimize the type
6866 if (BitWidth == 0)
6867 return false;
6868
6869 APInt DemandBits(BitWidth, 0);
6870 APInt WidestAndBits(BitWidth, 0);
6871
6872 while (!WorkList.empty()) {
6873 Instruction *I = WorkList.pop_back_val();
6874
6875 // Break use-def graph loops.
6876 if (!Visited.insert(I).second)
6877 continue;
6878
6879 // For a PHI node, push all of its users.
6880 if (auto *Phi = dyn_cast<PHINode>(I)) {
6881 for (auto *U : Phi->users())
6882 WorkList.push_back(cast<Instruction>(U));
6883 continue;
6884 }
6885
6886 switch (I->getOpcode()) {
6887 case Instruction::And: {
6888 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
6889 if (!AndC)
6890 return false;
6891 APInt AndBits = AndC->getValue();
6892 DemandBits |= AndBits;
6893 // Keep track of the widest and mask we see.
6894 if (AndBits.ugt(WidestAndBits))
6895 WidestAndBits = AndBits;
6896 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
6897 AndsToMaybeRemove.push_back(I);
6898 break;
6899 }
6900
6901 case Instruction::Shl: {
6902 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
6903 if (!ShlC)
6904 return false;
6905 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
6906 DemandBits.setLowBits(BitWidth - ShiftAmt);
6907 break;
6908 }
6909
6910 case Instruction::Trunc: {
6911 EVT TruncVT = TLI->getValueType(*DL, I->getType());
6912 unsigned TruncBitWidth = TruncVT.getSizeInBits();
6913 DemandBits.setLowBits(TruncBitWidth);
6914 break;
6915 }
6916
6917 default:
6918 return false;
6919 }
6920 }
6921
6922 uint32_t ActiveBits = DemandBits.getActiveBits();
6923 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
6924 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
6925 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
6926 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
6927 // followed by an AND.
6928 // TODO: Look into removing this restriction by fixing backends to either
6929 // return false for isLoadExtLegal for i1 or have them select this pattern to
6930 // a single instruction.
6931 //
6932 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
6933 // mask, since these are the only ands that will be removed by isel.
6934 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
6935 WidestAndBits != DemandBits)
6936 return false;
6937
6938 LLVMContext &Ctx = Load->getType()->getContext();
6939 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
6940 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
6941
6942 // Reject cases that won't be matched as extloads.
6943 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
6944 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
6945 return false;
6946
6947 IRBuilder<> Builder(Load->getNextNonDebugInstruction());
6948 auto *NewAnd = cast<Instruction>(
6949 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
6950 // Mark this instruction as "inserted by CGP", so that other
6951 // optimizations don't touch it.
6952 InsertedInsts.insert(NewAnd);
6953
6954 // Replace all uses of load with new and (except for the use of load in the
6955 // new and itself).
6956 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
6957 NewAnd->setOperand(0, Load);
6958
6959 // Remove any and instructions that are now redundant.
6960 for (auto *And : AndsToMaybeRemove)
6961 // Check that the and mask is the same as the one we decided to put on the
6962 // new and.
6963 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
6964 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
6965 if (&*CurInstIterator == And)
6966 CurInstIterator = std::next(And->getIterator());
6967 And->eraseFromParent();
6968 ++NumAndUses;
6969 }
6970
6971 ++NumAndsAdded;
6972 return true;
6973}
6974
6975/// Check if V (an operand of a select instruction) is an expensive instruction
6976/// that is only used once.
6978 auto *I = dyn_cast<Instruction>(V);
6979 // If it's safe to speculatively execute, then it should not have side
6980 // effects; therefore, it's safe to sink and possibly *not* execute.
6981 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
6983}
6984
6985/// Returns true if a SelectInst should be turned into an explicit branch.
6987 const TargetLowering *TLI,
6988 SelectInst *SI) {
6989 // If even a predictable select is cheap, then a branch can't be cheaper.
6990 if (!TLI->isPredictableSelectExpensive())
6991 return false;
6992
6993 // FIXME: This should use the same heuristics as IfConversion to determine
6994 // whether a select is better represented as a branch.
6995
6996 // If metadata tells us that the select condition is obviously predictable,
6997 // then we want to replace the select with a branch.
6998 uint64_t TrueWeight, FalseWeight;
6999 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7000 uint64_t Max = std::max(TrueWeight, FalseWeight);
7001 uint64_t Sum = TrueWeight + FalseWeight;
7002 if (Sum != 0) {
7003 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7004 if (Probability > TTI->getPredictableBranchThreshold())
7005 return true;
7006 }
7007 }
7008
7009 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7010
7011 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7012 // comparison condition. If the compare has more than one use, there's
7013 // probably another cmov or setcc around, so it's not worth emitting a branch.
7014 if (!Cmp || !Cmp->hasOneUse())
7015 return false;
7016
7017 // If either operand of the select is expensive and only needed on one side
7018 // of the select, we should form a branch.
7019 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7020 sinkSelectOperand(TTI, SI->getFalseValue()))
7021 return true;
7022
7023 return false;
7024}
7025
7026/// If \p isTrue is true, return the true value of \p SI, otherwise return
7027/// false value of \p SI. If the true/false value of \p SI is defined by any
7028/// select instructions in \p Selects, look through the defining select
7029/// instruction until the true/false value is not defined in \p Selects.
7030static Value *
7032 const SmallPtrSet<const Instruction *, 2> &Selects) {
7033 Value *V = nullptr;
7034
7035 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7036 DefSI = dyn_cast<SelectInst>(V)) {
7037 assert(DefSI->getCondition() == SI->getCondition() &&
7038 "The condition of DefSI does not match with SI");
7039 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7040 }
7041
7042 assert(V && "Failed to get select true/false value");
7043 return V;
7044}
7045
7046bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7047 assert(Shift->isShift() && "Expected a shift");
7048
7049 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7050 // general vector shifts, and (3) the shift amount is a select-of-splatted
7051 // values, hoist the shifts before the select:
7052 // shift Op0, (select Cond, TVal, FVal) -->
7053 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7054 //
7055 // This is inverting a generic IR transform when we know that the cost of a
7056 // general vector shift is more than the cost of 2 shift-by-scalars.
7057 // We can't do this effectively in SDAG because we may not be able to
7058 // determine if the select operands are splats from within a basic block.
7059 Type *Ty = Shift->getType();
7060 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7061 return false;
7062 Value *Cond, *TVal, *FVal;
7063 if (!match(Shift->getOperand(1),
7064 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7065 return false;
7066 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7067 return false;
7068
7069 IRBuilder<> Builder(Shift);
7070 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7071 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7072 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7073 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7074 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7075 Shift->eraseFromParent();
7076 return true;
7077}
7078
7079bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7080 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7081 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7082 "Expected a funnel shift");
7083
7084 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7085 // than general vector shifts, and (3) the shift amount is select-of-splatted
7086 // values, hoist the funnel shifts before the select:
7087 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7088 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7089 //
7090 // This is inverting a generic IR transform when we know that the cost of a
7091 // general vector shift is more than the cost of 2 shift-by-scalars.
7092 // We can't do this effectively in SDAG because we may not be able to
7093 // determine if the select operands are splats from within a basic block.
7094 Type *Ty = Fsh->getType();
7095 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7096 return false;
7097 Value *Cond, *TVal, *FVal;
7098 if (!match(Fsh->getOperand(2),
7099 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7100 return false;
7101 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7102 return false;
7103
7104 IRBuilder<> Builder(Fsh);
7105 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7106 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7107 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7108 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7109 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7110 Fsh->eraseFromParent();
7111 return true;
7112}
7113
7114/// If we have a SelectInst that will likely profit from branch prediction,
7115/// turn it into a branch.
7116bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7118 return false;
7119
7120 // If the SelectOptimize pass is enabled, selects have already been optimized.
7122 return false;
7123
7124 // Find all consecutive select instructions that share the same condition.
7126 ASI.push_back(SI);
7128 It != SI->getParent()->end(); ++It) {
7129 SelectInst *I = dyn_cast<SelectInst>(&*It);
7130 if (I && SI->getCondition() == I->getCondition()) {
7131 ASI.push_back(I);
7132 } else {
7133 break;
7134 }
7135 }
7136
7137 SelectInst *LastSI = ASI.back();
7138 // Increment the current iterator to skip all the rest of select instructions
7139 // because they will be either "not lowered" or "all lowered" to branch.
7140 CurInstIterator = std::next(LastSI->getIterator());
7141 // Examine debug-info attached to the consecutive select instructions. They
7142 // won't be individually optimised by optimizeInst, so we need to perform
7143 // DbgVariableRecord maintenence here instead.
7144 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7145 fixupDbgVariableRecordsOnInst(*SI);
7146
7147 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7148
7149 // Can we convert the 'select' to CF ?
7150 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7151 return false;
7152
7154 if (SI->getType()->isVectorTy())
7155 SelectKind = TargetLowering::ScalarCondVectorVal;
7156 else
7157 SelectKind = TargetLowering::ScalarValSelect;
7158
7159 if (TLI->isSelectSupported(SelectKind) &&
7160 (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
7161 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7162 return false;
7163
7164 // The DominatorTree needs to be rebuilt by any consumers after this
7165 // transformation. We simply reset here rather than setting the ModifiedDT
7166 // flag to avoid restarting the function walk in runOnFunction for each
7167 // select optimized.
7168 DT.reset();
7169
7170 // Transform a sequence like this:
7171 // start:
7172 // %cmp = cmp uge i32 %a, %b
7173 // %sel = select i1 %cmp, i32 %c, i32 %d
7174 //
7175 // Into:
7176 // start:
7177 // %cmp = cmp uge i32 %a, %b
7178 // %cmp.frozen = freeze %cmp
7179 // br i1 %cmp.frozen, label %select.true, label %select.false
7180 // select.true:
7181 // br label %select.end
7182 // select.false:
7183 // br label %select.end
7184 // select.end:
7185 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7186 //
7187 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7188 // In addition, we may sink instructions that produce %c or %d from
7189 // the entry block into the destination(s) of the new branch.
7190 // If the true or false blocks do not contain a sunken instruction, that
7191 // block and its branch may be optimized away. In that case, one side of the
7192 // first branch will point directly to select.end, and the corresponding PHI
7193 // predecessor block will be the start block.
7194
7195 // Collect values that go on the true side and the values that go on the false
7196 // side.
7197 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7198 for (SelectInst *SI : ASI) {
7199 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7200 TrueInstrs.push_back(cast<Instruction>(V));
7201 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7202 FalseInstrs.push_back(cast<Instruction>(V));
7203 }
7204
7205 // Split the select block, according to how many (if any) values go on each
7206 // side.
7207 BasicBlock *StartBlock = SI->getParent();
7208 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7209 // We should split before any debug-info.
7210 SplitPt.setHeadBit(true);
7211
7212 IRBuilder<> IB(SI);
7213 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7214
7215 BasicBlock *TrueBlock = nullptr;
7216 BasicBlock *FalseBlock = nullptr;
7217 BasicBlock *EndBlock = nullptr;
7218 BranchInst *TrueBranch = nullptr;
7219 BranchInst *FalseBranch = nullptr;
7220 if (TrueInstrs.size() == 0) {
7221 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7222 CondFr, SplitPt, false, nullptr, nullptr, LI));
7223 FalseBlock = FalseBranch->getParent();
7224 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7225 } else if (FalseInstrs.size() == 0) {
7226 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7227 CondFr, SplitPt, false, nullptr, nullptr, LI));
7228 TrueBlock = TrueBranch->getParent();
7229 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7230 } else {
7231 Instruction *ThenTerm = nullptr;
7232 Instruction *ElseTerm = nullptr;
7233 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7234 nullptr, nullptr, LI);
7235 TrueBranch = cast<BranchInst>(ThenTerm);
7236 FalseBranch = cast<BranchInst>(ElseTerm);
7237 TrueBlock = TrueBranch->getParent();
7238 FalseBlock = FalseBranch->getParent();
7239 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7240 }
7241
7242 EndBlock->setName("select.end");
7243 if (TrueBlock)
7244 TrueBlock->setName("select.true.sink");
7245 if (FalseBlock)
7246 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7247 : "select.false.sink");
7248
7249 if (IsHugeFunc) {
7250 if (TrueBlock)
7251 FreshBBs.insert(TrueBlock);
7252 if (FalseBlock)
7253 FreshBBs.insert(FalseBlock);
7254 FreshBBs.insert(EndBlock);
7255 }
7256
7257 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7258
7259 static const unsigned MD[] = {
7260 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7261 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7262 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7263
7264 // Sink expensive instructions into the conditional blocks to avoid executing
7265 // them speculatively.
7266 for (Instruction *I : TrueInstrs)
7267 I->moveBefore(TrueBranch);
7268 for (Instruction *I : FalseInstrs)
7269 I->moveBefore(FalseBranch);
7270
7271 // If we did not create a new block for one of the 'true' or 'false' paths
7272 // of the condition, it means that side of the branch goes to the end block
7273 // directly and the path originates from the start block from the point of
7274 // view of the new PHI.
7275 if (TrueBlock == nullptr)
7276 TrueBlock = StartBlock;
7277 else if (FalseBlock == nullptr)
7278 FalseBlock = StartBlock;
7279
7281 INS.insert(ASI.begin(), ASI.end());
7282 // Use reverse iterator because later select may use the value of the
7283 // earlier select, and we need to propagate value through earlier select
7284 // to get the PHI operand.
7285 for (SelectInst *SI : llvm::reverse(ASI)) {
7286 // The select itself is replaced with a PHI Node.
7287 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7288 PN->insertBefore(EndBlock->begin());
7289 PN->takeName(SI);
7290 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7291 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7292 PN->setDebugLoc(SI->getDebugLoc());
7293
7294 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7295 SI->eraseFromParent();
7296 INS.erase(SI);
7297 ++NumSelectsExpanded;
7298 }
7299
7300 // Instruct OptimizeBlock to skip to the next block.
7301 CurInstIterator = StartBlock->end();
7302 return true;
7303}
7304
7305/// Some targets only accept certain types for splat inputs. For example a VDUP
7306/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7307/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7308bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7309 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7311 m_Undef(), m_ZeroMask())))
7312 return false;
7313 Type *NewType = TLI->shouldConvertSplatType(SVI);
7314 if (!NewType)
7315 return false;
7316
7317 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7318 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7319 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7320 "Expected a type of the same size!");
7321 auto *NewVecType =
7322 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7323
7324 // Create a bitcast (shuffle (insert (bitcast(..))))
7325 IRBuilder<> Builder(SVI->getContext());
7326 Builder.SetInsertPoint(SVI);
7327 Value *BC1 = Builder.CreateBitCast(
7328 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7329 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7330 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7331
7332 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7334 SVI, TLInfo, nullptr,
7335 [&](Value *V) { removeAllAssertingVHReferences(V); });
7336
7337 // Also hoist the bitcast up to its operand if it they are not in the same
7338 // block.
7339 if (auto *BCI = dyn_cast<Instruction>(BC1))
7340 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7341 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7342 !Op->isTerminator() && !Op->isEHPad())
7343 BCI->moveAfter(Op);
7344
7345 return true;
7346}
7347
7348bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7349 // If the operands of I can be folded into a target instruction together with
7350 // I, duplicate and sink them.
7351 SmallVector<Use *, 4> OpsToSink;
7352 if (!TLI->shouldSinkOperands(I, OpsToSink))
7353 return false;
7354
7355 // OpsToSink can contain multiple uses in a use chain (e.g.
7356 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7357 // uses must come first, so we process the ops in reverse order so as to not
7358 // create invalid IR.
7359 BasicBlock *TargetBB = I->getParent();
7360 bool Changed = false;
7361 SmallVector<Use *, 4> ToReplace;
7362 Instruction *InsertPoint = I;
7364 unsigned long InstNumber = 0;
7365 for (const auto &I : *TargetBB)
7366 InstOrdering[&I] = InstNumber++;
7367
7368 for (Use *U : reverse(OpsToSink)) {
7369 auto *UI = cast<Instruction>(U->get());
7370 if (isa<PHINode>(UI))
7371 continue;
7372 if (UI->getParent() == TargetBB) {
7373 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7374 InsertPoint = UI;
7375 continue;
7376 }
7377 ToReplace.push_back(U);
7378 }
7379
7380 SetVector<Instruction *> MaybeDead;
7382 for (Use *U : ToReplace) {
7383 auto *UI = cast<Instruction>(U->get());
7384 Instruction *NI = UI->clone();
7385
7386 if (IsHugeFunc) {
7387 // Now we clone an instruction, its operands' defs may sink to this BB
7388 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7389 for (unsigned I = 0; I < NI->getNumOperands(); ++I) {
7390 auto *OpDef = dyn_cast<Instruction>(NI->getOperand(I));
7391 if (!OpDef)
7392 continue;
7393 FreshBBs.insert(OpDef->getParent());
7394 }
7395 }
7396
7397 NewInstructions[UI] = NI;
7398 MaybeDead.insert(UI);
7399 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7400 NI->insertBefore(InsertPoint);
7401 InsertPoint = NI;
7402 InsertedInsts.insert(NI);
7403
7404 // Update the use for the new instruction, making sure that we update the
7405 // sunk instruction uses, if it is part of a chain that has already been
7406 // sunk.
7407 Instruction *OldI = cast<Instruction>(U->getUser());
7408 if (NewInstructions.count(OldI))
7409 NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
7410 else
7411 U->set(NI);
7412 Changed = true;
7413 }
7414
7415 // Remove instructions that are dead after sinking.
7416 for (auto *I : MaybeDead) {
7417 if (!I->hasNUsesOrMore(1)) {
7418 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7419 I->eraseFromParent();
7420 }
7421 }
7422
7423 return Changed;
7424}
7425
7426bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7427 Value *Cond = SI->getCondition();
7428 Type *OldType = Cond->getType();
7429 LLVMContext &Context = Cond->getContext();
7430 EVT OldVT = TLI->getValueType(*DL, OldType);
7431 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7432 unsigned RegWidth = RegType.getSizeInBits();
7433
7434 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7435 return false;
7436
7437 // If the register width is greater than the type width, expand the condition
7438 // of the switch instruction and each case constant to the width of the
7439 // register. By widening the type of the switch condition, subsequent
7440 // comparisons (for case comparisons) will not need to be extended to the
7441 // preferred register width, so we will potentially eliminate N-1 extends,
7442 // where N is the number of cases in the switch.
7443 auto *NewType = Type::getIntNTy(Context, RegWidth);
7444
7445 // Extend the switch condition and case constants using the target preferred
7446 // extend unless the switch condition is a function argument with an extend
7447 // attribute. In that case, we can avoid an unnecessary mask/extension by
7448 // matching the argument extension instead.
7449 Instruction::CastOps ExtType = Instruction::ZExt;
7450 // Some targets prefer SExt over ZExt.
7451 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7452 ExtType = Instruction::SExt;
7453
7454 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7455 if (Arg->hasSExtAttr())
7456 ExtType = Instruction::SExt;
7457 if (Arg->hasZExtAttr())
7458 ExtType = Instruction::ZExt;
7459 }
7460
7461 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7462 ExtInst->insertBefore(SI);
7463 ExtInst->setDebugLoc(SI->getDebugLoc());
7464 SI->setCondition(ExtInst);
7465 for (auto Case : SI->cases()) {
7466 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7467 APInt WideConst = (ExtType == Instruction::ZExt)
7468 ? NarrowConst.zext(RegWidth)
7469 : NarrowConst.sext(RegWidth);
7470 Case.setValue(ConstantInt::get(Context, WideConst));
7471 }
7472
7473 return true;
7474}
7475
7476bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7477 // The SCCP optimization tends to produce code like this:
7478 // switch(x) { case 42: phi(42, ...) }
7479 // Materializing the constant for the phi-argument needs instructions; So we
7480 // change the code to:
7481 // switch(x) { case 42: phi(x, ...) }
7482
7483 Value *Condition = SI->getCondition();
7484 // Avoid endless loop in degenerate case.
7485 if (isa<ConstantInt>(*Condition))
7486 return false;
7487
7488 bool Changed = false;
7489 BasicBlock *SwitchBB = SI->getParent();
7490 Type *ConditionType = Condition->getType();
7491
7492 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7493 ConstantInt *CaseValue = Case.getCaseValue();
7494 BasicBlock *CaseBB = Case.getCaseSuccessor();
7495 // Set to true if we previously checked that `CaseBB` is only reached by
7496 // a single case from this switch.
7497 bool CheckedForSinglePred = false;
7498 for (PHINode &PHI : CaseBB->phis()) {
7499 Type *PHIType = PHI.getType();
7500 // If ZExt is free then we can also catch patterns like this:
7501 // switch((i32)x) { case 42: phi((i64)42, ...); }
7502 // and replace `(i64)42` with `zext i32 %x to i64`.
7503 bool TryZExt =
7504 PHIType->isIntegerTy() &&
7505 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7506 TLI->isZExtFree(ConditionType, PHIType);
7507 if (PHIType == ConditionType || TryZExt) {
7508 // Set to true to skip this case because of multiple preds.
7509 bool SkipCase = false;
7510 Value *Replacement = nullptr;
7511 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7512 Value *PHIValue = PHI.getIncomingValue(I);
7513 if (PHIValue != CaseValue) {
7514 if (!TryZExt)
7515 continue;
7516 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7517 if (!PHIValueInt ||
7518 PHIValueInt->getValue() !=
7519 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7520 continue;
7521 }
7522 if (PHI.getIncomingBlock(I) != SwitchBB)
7523 continue;
7524 // We cannot optimize if there are multiple case labels jumping to
7525 // this block. This check may get expensive when there are many
7526 // case labels so we test for it last.
7527 if (!CheckedForSinglePred) {
7528 CheckedForSinglePred = true;
7529 if (SI->findCaseDest(CaseBB) == nullptr) {
7530 SkipCase = true;
7531 break;
7532 }
7533 }
7534
7535 if (Replacement == nullptr) {
7536 if (PHIValue == CaseValue) {
7537 Replacement = Condition;
7538 } else {
7539 IRBuilder<> Builder(SI);
7540 Replacement = Builder.CreateZExt(Condition, PHIType);
7541 }
7542 }
7543 PHI.setIncomingValue(I, Replacement);
7544 Changed = true;
7545 }
7546 if (SkipCase)
7547 break;
7548 }
7549 }
7550 }
7551 return Changed;
7552}
7553
7554bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7555 bool Changed = optimizeSwitchType(SI);
7556 Changed |= optimizeSwitchPhiConstants(SI);
7557 return Changed;
7558}
7559
7560namespace {
7561
7562/// Helper class to promote a scalar operation to a vector one.
7563/// This class is used to move downward extractelement transition.
7564/// E.g.,
7565/// a = vector_op <2 x i32>
7566/// b = extractelement <2 x i32> a, i32 0
7567/// c = scalar_op b
7568/// store c
7569///
7570/// =>
7571/// a = vector_op <2 x i32>
7572/// c = vector_op a (equivalent to scalar_op on the related lane)
7573/// * d = extractelement <2 x i32> c, i32 0
7574/// * store d
7575/// Assuming both extractelement and store can be combine, we get rid of the
7576/// transition.
7577class VectorPromoteHelper {
7578 /// DataLayout associated with the current module.
7579 const DataLayout &DL;
7580
7581 /// Used to perform some checks on the legality of vector operations.
7582 const TargetLowering &TLI;
7583
7584 /// Used to estimated the cost of the promoted chain.
7585 const TargetTransformInfo &TTI;
7586
7587 /// The transition being moved downwards.
7588 Instruction *Transition;
7589
7590 /// The sequence of instructions to be promoted.
7591 SmallVector<Instruction *, 4> InstsToBePromoted;
7592
7593 /// Cost of combining a store and an extract.
7594 unsigned StoreExtractCombineCost;
7595
7596 /// Instruction that will be combined with the transition.
7597 Instruction *CombineInst = nullptr;
7598
7599 /// The instruction that represents the current end of the transition.
7600 /// Since we are faking the promotion until we reach the end of the chain
7601 /// of computation, we need a way to get the current end of the transition.
7602 Instruction *getEndOfTransition() const {
7603 if (InstsToBePromoted.empty())
7604 return Transition;
7605 return InstsToBePromoted.back();
7606 }
7607
7608 /// Return the index of the original value in the transition.
7609 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7610 /// c, is at index 0.
7611 unsigned getTransitionOriginalValueIdx() const {
7612 assert(isa<ExtractElementInst>(Transition) &&
7613 "Other kind of transitions are not supported yet");
7614 return 0;
7615 }
7616
7617 /// Return the index of the index in the transition.
7618 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7619 /// is at index 1.
7620 unsigned getTransitionIdx() const {
7621 assert(isa<ExtractElementInst>(Transition) &&
7622 "Other kind of transitions are not supported yet");
7623 return 1;
7624 }
7625
7626 /// Get the type of the transition.
7627 /// This is the type of the original value.
7628 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7629 /// transition is <2 x i32>.
7630 Type *getTransitionType() const {
7631 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7632 }
7633
7634 /// Promote \p ToBePromoted by moving \p Def downward through.
7635 /// I.e., we have the following sequence:
7636 /// Def = Transition <ty1> a to <ty2>
7637 /// b = ToBePromoted <ty2> Def, ...
7638 /// =>
7639 /// b = ToBePromoted <ty1> a, ...
7640 /// Def = Transition <ty1> ToBePromoted to <ty2>
7641 void promoteImpl(Instruction *ToBePromoted);
7642
7643 /// Check whether or not it is profitable to promote all the
7644 /// instructions enqueued to be promoted.
7645 bool isProfitableToPromote() {
7646 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7647 unsigned Index = isa<ConstantInt>(ValIdx)
7648 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7649 : -1;
7650 Type *PromotedType = getTransitionType();
7651
7652 StoreInst *ST = cast<StoreInst>(CombineInst);
7653 unsigned AS = ST->getPointerAddressSpace();
7654 // Check if this store is supported.
7656 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
7657 ST->getAlign())) {
7658 // If this is not supported, there is no way we can combine
7659 // the extract with the store.
7660 return false;
7661 }
7662
7663 // The scalar chain of computation has to pay for the transition
7664 // scalar to vector.
7665 // The vector chain has to account for the combining cost.
7668 InstructionCost ScalarCost =
7669 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
7670 InstructionCost VectorCost = StoreExtractCombineCost;
7671 for (const auto &Inst : InstsToBePromoted) {
7672 // Compute the cost.
7673 // By construction, all instructions being promoted are arithmetic ones.
7674 // Moreover, one argument is a constant that can be viewed as a splat
7675 // constant.
7676 Value *Arg0 = Inst->getOperand(0);
7677 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7678 isa<ConstantFP>(Arg0);
7679 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
7680 if (IsArg0Constant)
7682 else
7684
7685 ScalarCost += TTI.getArithmeticInstrCost(
7686 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
7687 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7688 CostKind, Arg0Info, Arg1Info);
7689 }
7690 LLVM_DEBUG(
7691 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
7692 << ScalarCost << "\nVector: " << VectorCost << '\n');
7693 return ScalarCost > VectorCost;
7694 }
7695
7696 /// Generate a constant vector with \p Val with the same
7697 /// number of elements as the transition.
7698 /// \p UseSplat defines whether or not \p Val should be replicated
7699 /// across the whole vector.
7700 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7701 /// otherwise we generate a vector with as many undef as possible:
7702 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
7703 /// used at the index of the extract.
7704 Value *getConstantVector(Constant *Val, bool UseSplat) const {
7705 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7706 if (!UseSplat) {
7707 // If we cannot determine where the constant must be, we have to
7708 // use a splat constant.
7709 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
7710 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
7711 ExtractIdx = CstVal->getSExtValue();
7712 else
7713 UseSplat = true;
7714 }
7715
7716 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
7717 if (UseSplat)
7718 return ConstantVector::getSplat(EC, Val);
7719
7720 if (!EC.isScalable()) {
7722 UndefValue *UndefVal = UndefValue::get(Val->getType());
7723 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
7724 if (Idx == ExtractIdx)
7725 ConstVec.push_back(Val);
7726 else
7727 ConstVec.push_back(UndefVal);
7728 }
7729 return ConstantVector::get(ConstVec);
7730 } else
7732 "Generate scalable vector for non-splat is unimplemented");
7733 }
7734
7735 /// Check if promoting to a vector type an operand at \p OperandIdx
7736 /// in \p Use can trigger undefined behavior.
7737 static bool canCauseUndefinedBehavior(const Instruction *Use,
7738 unsigned OperandIdx) {
7739 // This is not safe to introduce undef when the operand is on
7740 // the right hand side of a division-like instruction.
7741 if (OperandIdx != 1)
7742 return false;
7743 switch (Use->getOpcode()) {
7744 default:
7745 return false;
7746 case Instruction::SDiv:
7747 case Instruction::UDiv:
7748 case Instruction::SRem:
7749 case Instruction::URem:
7750 return true;
7751 case Instruction::FDiv:
7752 case Instruction::FRem:
7753 return !Use->hasNoNaNs();
7754 }
7755 llvm_unreachable(nullptr);
7756 }
7757
7758public:
7759 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
7760 const TargetTransformInfo &TTI, Instruction *Transition,
7761 unsigned CombineCost)
7762 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
7763 StoreExtractCombineCost(CombineCost) {
7764 assert(Transition && "Do not know how to promote null");
7765 }
7766
7767 /// Check if we can promote \p ToBePromoted to \p Type.
7768 bool canPromote(const Instruction *ToBePromoted) const {
7769 // We could support CastInst too.
7770 return isa<BinaryOperator>(ToBePromoted);
7771 }
7772
7773 /// Check if it is profitable to promote \p ToBePromoted
7774 /// by moving downward the transition through.
7775 bool shouldPromote(const Instruction *ToBePromoted) const {
7776 // Promote only if all the operands can be statically expanded.
7777 // Indeed, we do not want to introduce any new kind of transitions.
7778 for (const Use &U : ToBePromoted->operands()) {
7779 const Value *Val = U.get();
7780 if (Val == getEndOfTransition()) {
7781 // If the use is a division and the transition is on the rhs,
7782 // we cannot promote the operation, otherwise we may create a
7783 // division by zero.
7784 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
7785 return false;
7786 continue;
7787 }
7788 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
7789 !isa<ConstantFP>(Val))
7790 return false;
7791 }
7792 // Check that the resulting operation is legal.
7793 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
7794 if (!ISDOpcode)
7795 return false;
7796 return StressStoreExtract ||
7798 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
7799 }
7800
7801 /// Check whether or not \p Use can be combined
7802 /// with the transition.
7803 /// I.e., is it possible to do Use(Transition) => AnotherUse?
7804 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
7805
7806 /// Record \p ToBePromoted as part of the chain to be promoted.
7807 void enqueueForPromotion(Instruction *ToBePromoted) {
7808 InstsToBePromoted.push_back(ToBePromoted);
7809 }
7810
7811 /// Set the instruction that will be combined with the transition.
7812 void recordCombineInstruction(Instruction *ToBeCombined) {
7813 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
7814 CombineInst = ToBeCombined;
7815 }
7816
7817 /// Promote all the instructions enqueued for promotion if it is
7818 /// is profitable.
7819 /// \return True if the promotion happened, false otherwise.
7820 bool promote() {
7821 // Check if there is something to promote.
7822 // Right now, if we do not have anything to combine with,
7823 // we assume the promotion is not profitable.
7824 if (InstsToBePromoted.empty() || !CombineInst)
7825 return false;
7826
7827 // Check cost.
7828 if (!StressStoreExtract && !isProfitableToPromote())
7829 return false;
7830
7831 // Promote.
7832 for (auto &ToBePromoted : InstsToBePromoted)
7833 promoteImpl(ToBePromoted);
7834 InstsToBePromoted.clear();
7835 return true;
7836 }
7837};
7838
7839} // end anonymous namespace
7840
7841void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
7842 // At this point, we know that all the operands of ToBePromoted but Def
7843 // can be statically promoted.
7844 // For Def, we need to use its parameter in ToBePromoted:
7845 // b = ToBePromoted ty1 a
7846 // Def = Transition ty1 b to ty2
7847 // Move the transition down.
7848 // 1. Replace all uses of the promoted operation by the transition.
7849 // = ... b => = ... Def.
7850 assert(ToBePromoted->getType() == Transition->getType() &&
7851 "The type of the result of the transition does not match "
7852 "the final type");
7853 ToBePromoted->replaceAllUsesWith(Transition);
7854 // 2. Update the type of the uses.
7855 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
7856 Type *TransitionTy = getTransitionType();
7857 ToBePromoted->mutateType(TransitionTy);
7858 // 3. Update all the operands of the promoted operation with promoted
7859 // operands.
7860 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
7861 for (Use &U : ToBePromoted->operands()) {
7862 Value *Val = U.get();
7863 Value *NewVal = nullptr;
7864 if (Val == Transition)
7865 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
7866 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
7867 isa<ConstantFP>(Val)) {
7868 // Use a splat constant if it is not safe to use undef.
7869 NewVal = getConstantVector(
7870 cast<Constant>(Val),
7871 isa<UndefValue>(Val) ||
7872 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
7873 } else
7874 llvm_unreachable("Did you modified shouldPromote and forgot to update "
7875 "this?");
7876 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
7877 }
7878 Transition->moveAfter(ToBePromoted);
7879 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
7880}
7881
7882/// Some targets can do store(extractelement) with one instruction.
7883/// Try to push the extractelement towards the stores when the target
7884/// has this feature and this is profitable.
7885bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
7886 unsigned CombineCost = std::numeric_limits<unsigned>::max();
7887 if (DisableStoreExtract ||
7890 Inst->getOperand(1), CombineCost)))
7891 return false;
7892
7893 // At this point we know that Inst is a vector to scalar transition.
7894 // Try to move it down the def-use chain, until:
7895 // - We can combine the transition with its single use
7896 // => we got rid of the transition.
7897 // - We escape the current basic block
7898 // => we would need to check that we are moving it at a cheaper place and
7899 // we do not do that for now.
7900 BasicBlock *Parent = Inst->getParent();
7901 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
7902 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
7903 // If the transition has more than one use, assume this is not going to be
7904 // beneficial.
7905 while (Inst->hasOneUse()) {
7906 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
7907 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
7908
7909 if (ToBePromoted->getParent() != Parent) {
7910 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
7911 << ToBePromoted->getParent()->getName()
7912 << ") than the transition (" << Parent->getName()
7913 << ").\n");
7914 return false;
7915 }
7916
7917 if (VPH.canCombine(ToBePromoted)) {
7918 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
7919 << "will be combined with: " << *ToBePromoted << '\n');
7920 VPH.recordCombineInstruction(ToBePromoted);
7921 bool Changed = VPH.promote();
7922 NumStoreExtractExposed += Changed;
7923 return Changed;
7924 }
7925
7926 LLVM_DEBUG(dbgs() << "Try promoting.\n");
7927 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
7928 return false;
7929
7930 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
7931
7932 VPH.enqueueForPromotion(ToBePromoted);
7933 Inst = ToBePromoted;
7934 }
7935 return false;
7936}
7937
7938/// For the instruction sequence of store below, F and I values
7939/// are bundled together as an i64 value before being stored into memory.
7940/// Sometimes it is more efficient to generate separate stores for F and I,
7941/// which can remove the bitwise instructions or sink them to colder places.
7942///
7943/// (store (or (zext (bitcast F to i32) to i64),
7944/// (shl (zext I to i64), 32)), addr) -->
7945/// (store F, addr) and (store I, addr+4)
7946///
7947/// Similarly, splitting for other merged store can also be beneficial, like:
7948/// For pair of {i32, i32}, i64 store --> two i32 stores.
7949/// For pair of {i32, i16}, i64 store --> two i32 stores.
7950/// For pair of {i16, i16}, i32 store --> two i16 stores.
7951/// For pair of {i16, i8}, i32 store --> two i16 stores.
7952/// For pair of {i8, i8}, i16 store --> two i8 stores.
7953///
7954/// We allow each target to determine specifically which kind of splitting is
7955/// supported.
7956///
7957/// The store patterns are commonly seen from the simple code snippet below
7958/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
7959/// void goo(const std::pair<int, float> &);
7960/// hoo() {
7961/// ...
7962/// goo(std::make_pair(tmp, ftmp));
7963/// ...
7964/// }
7965///
7966/// Although we already have similar splitting in DAG Combine, we duplicate
7967/// it in CodeGenPrepare to catch the case in which pattern is across
7968/// multiple BBs. The logic in DAG Combine is kept to catch case generated
7969/// during code expansion.
7971 const TargetLowering &TLI) {
7972 // Handle simple but common cases only.
7973 Type *StoreType = SI.getValueOperand()->getType();
7974
7975 // The code below assumes shifting a value by <number of bits>,
7976 // whereas scalable vectors would have to be shifted by
7977 // <2log(vscale) + number of bits> in order to store the
7978 // low/high parts. Bailing out for now.
7979 if (StoreType->isScalableTy())
7980 return false;
7981
7982 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
7983 DL.getTypeSizeInBits(StoreType) == 0)
7984 return false;
7985
7986 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
7987 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
7988 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
7989 return false;
7990
7991 // Don't split the store if it is volatile.
7992 if (SI.isVolatile())
7993 return false;
7994
7995 // Match the following patterns:
7996 // (store (or (zext LValue to i64),
7997 // (shl (zext HValue to i64), 32)), HalfValBitSize)
7998 // or
7999 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8000 // (zext LValue to i64),
8001 // Expect both operands of OR and the first operand of SHL have only
8002 // one use.
8003 Value *LValue, *HValue;
8004 if (!match(SI.getValueOperand(),
8007 m_SpecificInt(HalfValBitSize))))))
8008 return false;
8009
8010 // Check LValue and HValue are int with size less or equal than 32.
8011 if (!LValue->getType()->isIntegerTy() ||
8012 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8013 !HValue->getType()->isIntegerTy() ||
8014 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8015 return false;
8016
8017 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8018 // as the input of target query.
8019 auto *LBC = dyn_cast<BitCastInst>(LValue);
8020 auto *HBC = dyn_cast<BitCastInst>(HValue);
8021 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8022 : EVT::getEVT(LValue->getType());
8023 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8024 : EVT::getEVT(HValue->getType());
8025 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8026 return false;
8027
8028 // Start to split store.
8029 IRBuilder<> Builder(SI.getContext());
8030 Builder.SetInsertPoint(&SI);
8031
8032 // If LValue/HValue is a bitcast in another BB, create a new one in current
8033 // BB so it may be merged with the splitted stores by dag combiner.
8034 if (LBC && LBC->getParent() != SI.getParent())
8035 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8036 if (HBC && HBC->getParent() != SI.getParent())
8037 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8038
8039 bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
8040 auto CreateSplitStore = [&](Value *V, bool Upper) {
8041 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8042 Value *Addr = SI.getPointerOperand();
8043 Align Alignment = SI.getAlign();
8044 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8045 if (IsOffsetStore) {
8046 Addr = Builder.CreateGEP(
8047 SplitStoreType, Addr,
8048 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8049
8050 // When splitting the store in half, naturally one half will retain the
8051 // alignment of the original wider store, regardless of whether it was
8052 // over-aligned or not, while the other will require adjustment.
8053 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8054 }
8055 Builder.CreateAlignedStore(V, Addr, Alignment);
8056 };
8057
8058 CreateSplitStore(LValue, false);
8059 CreateSplitStore(HValue, true);
8060
8061 // Delete the old store.
8062 SI.eraseFromParent();
8063 return true;
8064}
8065
8066// Return true if the GEP has two operands, the first operand is of a sequential
8067// type, and the second operand is a constant.
8070 return GEP->getNumOperands() == 2 && I.isSequential() &&
8071 isa<ConstantInt>(GEP->getOperand(1));
8072}
8073
8074// Try unmerging GEPs to reduce liveness interference (register pressure) across
8075// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8076// reducing liveness interference across those edges benefits global register
8077// allocation. Currently handles only certain cases.
8078//
8079// For example, unmerge %GEPI and %UGEPI as below.
8080//
8081// ---------- BEFORE ----------
8082// SrcBlock:
8083// ...
8084// %GEPIOp = ...
8085// ...
8086// %GEPI = gep %GEPIOp, Idx
8087// ...
8088// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8089// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8090// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8091// %UGEPI)
8092//
8093// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8094// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8095// ...
8096//
8097// DstBi:
8098// ...
8099// %UGEPI = gep %GEPIOp, UIdx
8100// ...
8101// ---------------------------
8102//
8103// ---------- AFTER ----------
8104// SrcBlock:
8105// ... (same as above)
8106// (* %GEPI is still alive on the indirectbr edges)
8107// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8108// unmerging)
8109// ...
8110//
8111// DstBi:
8112// ...
8113// %UGEPI = gep %GEPI, (UIdx-Idx)
8114// ...
8115// ---------------------------
8116//
8117// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8118// no longer alive on them.
8119//
8120// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8121// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8122// not to disable further simplications and optimizations as a result of GEP
8123// merging.
8124//
8125// Note this unmerging may increase the length of the data flow critical path
8126// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8127// between the register pressure and the length of data-flow critical
8128// path. Restricting this to the uncommon IndirectBr case would minimize the
8129// impact of potentially longer critical path, if any, and the impact on compile
8130// time.
8132 const TargetTransformInfo *TTI) {
8133 BasicBlock *SrcBlock = GEPI->getParent();
8134 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8135 // (non-IndirectBr) cases exit early here.
8136 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8137 return false;
8138 // Check that GEPI is a simple gep with a single constant index.
8139 if (!GEPSequentialConstIndexed(GEPI))
8140 return false;
8141 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8142 // Check that GEPI is a cheap one.
8143 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8146 return false;
8147 Value *GEPIOp = GEPI->getOperand(0);
8148 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8149 if (!isa<Instruction>(GEPIOp))
8150 return false;
8151 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8152 if (GEPIOpI->getParent() != SrcBlock)
8153 return false;
8154 // Check that GEP is used outside the block, meaning it's alive on the
8155 // IndirectBr edge(s).
8156 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8157 if (auto *I = dyn_cast<Instruction>(Usr)) {
8158 if (I->getParent() != SrcBlock) {
8159 return true;
8160 }
8161 }
8162 return false;
8163 }))
8164 return false;
8165 // The second elements of the GEP chains to be unmerged.
8166 std::vector<GetElementPtrInst *> UGEPIs;
8167 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8168 // on IndirectBr edges.
8169 for (User *Usr : GEPIOp->users()) {
8170 if (Usr == GEPI)
8171 continue;
8172 // Check if Usr is an Instruction. If not, give up.
8173 if (!isa<Instruction>(Usr))
8174 return false;
8175 auto *UI = cast<Instruction>(Usr);
8176 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8177 if (UI->getParent() == SrcBlock)
8178 continue;
8179 // Check if Usr is a GEP. If not, give up.
8180 if (!isa<GetElementPtrInst>(Usr))
8181 return false;
8182 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8183 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8184 // the pointer operand to it. If so, record it in the vector. If not, give
8185 // up.
8186 if (!GEPSequentialConstIndexed(UGEPI))
8187 return false;
8188 if (UGEPI->getOperand(0) != GEPIOp)
8189 return false;
8190 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8191 return false;
8192 if (GEPIIdx->getType() !=
8193 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8194 return false;
8195 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8196 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8199 return false;
8200 UGEPIs.push_back(UGEPI);
8201 }
8202 if (UGEPIs.size() == 0)
8203 return false;
8204 // Check the materializing cost of (Uidx-Idx).
8205 for (GetElementPtrInst *UGEPI : UGEPIs) {
8206 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8207 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8209 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8210 if (ImmCost > TargetTransformInfo::TCC_Basic)
8211 return false;
8212 }
8213 // Now unmerge between GEPI and UGEPIs.
8214 for (GetElementPtrInst *UGEPI : UGEPIs) {
8215 UGEPI->setOperand(0, GEPI);
8216 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8217 Constant *NewUGEPIIdx = ConstantInt::get(
8218 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8219 UGEPI->setOperand(1, NewUGEPIIdx);
8220 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8221 // inbounds to avoid UB.
8222 if (!GEPI->isInBounds()) {
8223 UGEPI->setIsInBounds(false);
8224 }
8225 }
8226 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8227 // alive on IndirectBr edges).
8228 assert(llvm::none_of(GEPIOp->users(),
8229 [&](User *Usr) {
8230 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8231 }) &&
8232 "GEPIOp is used outside SrcBlock");
8233 return true;
8234}
8235
8236static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8238 bool IsHugeFunc) {
8239 // Try and convert
8240 // %c = icmp ult %x, 8
8241 // br %c, bla, blb
8242 // %tc = lshr %x, 3
8243 // to
8244 // %tc = lshr %x, 3
8245 // %c = icmp eq %tc, 0
8246 // br %c, bla, blb
8247 // Creating the cmp to zero can be better for the backend, especially if the
8248 // lshr produces flags that can be used automatically.
8249 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8250 return false;
8251
8252 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8253 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8254 return false;
8255
8256 Value *X = Cmp->getOperand(0);
8257 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8258
8259 for (auto *U : X->users()) {
8260 Instruction *UI = dyn_cast<Instruction>(U);
8261 // A quick dominance check
8262 if (!UI ||
8263 (UI->getParent() != Branch->getParent() &&
8264 UI->getParent() != Branch->getSuccessor(0) &&
8265 UI->getParent() != Branch->getSuccessor(1)) ||
8266 (UI->getParent() != Branch->getParent() &&
8268 continue;
8269
8270 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8271 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8272 IRBuilder<> Builder(Branch);
8273 if (UI->getParent() != Branch->getParent())
8274 UI->moveBefore(Branch);
8275 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8276 ConstantInt::get(UI->getType(), 0));
8277 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8278 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8279 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8280 return true;
8281 }
8282 if (Cmp->isEquality() &&
8283 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8284 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
8285 IRBuilder<> Builder(Branch);
8286 if (UI->getParent() != Branch->getParent())
8287 UI->moveBefore(Branch);
8288 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8289 ConstantInt::get(UI->getType(), 0));
8290 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8291 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8292 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8293 return true;
8294 }
8295 }
8296 return false;
8297}
8298
8299bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8300 bool AnyChange = false;
8301 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8302
8303 // Bail out if we inserted the instruction to prevent optimizations from
8304 // stepping on each other's toes.
8305 if (InsertedInsts.count(I))
8306 return AnyChange;
8307
8308 // TODO: Move into the switch on opcode below here.
8309 if (PHINode *P = dyn_cast<PHINode>(I)) {
8310 // It is possible for very late stage optimizations (such as SimplifyCFG)
8311 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8312 // trivial PHI, go ahead and zap it here.
8313 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8314 LargeOffsetGEPMap.erase(P);
8315 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8316 P->eraseFromParent();
8317 ++NumPHIsElim;
8318 return true;
8319 }
8320 return AnyChange;
8321 }
8322
8323 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8324 // If the source of the cast is a constant, then this should have
8325 // already been constant folded. The only reason NOT to constant fold
8326 // it is if something (e.g. LSR) was careful to place the constant
8327 // evaluation in a block other than then one that uses it (e.g. to hoist
8328 // the address of globals out of a loop). If this is the case, we don't
8329 // want to forward-subst the cast.
8330 if (isa<Constant>(CI->getOperand(0)))
8331 return AnyChange;
8332
8333 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8334 return true;
8335
8336 if ((isa<UIToFPInst>(I) || isa<FPToUIInst>(I) || isa<TruncInst>(I)) &&
8338 I, LI->getLoopFor(I->getParent()), *TTI))
8339 return true;
8340
8341 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8342 /// Sink a zext or sext into its user blocks if the target type doesn't
8343 /// fit in one register
8344 if (TLI->getTypeAction(CI->getContext(),
8345 TLI->getValueType(*DL, CI->getType())) ==
8346 TargetLowering::TypeExpandInteger) {
8347 return SinkCast(CI);
8348 } else {
8350 I, LI->getLoopFor(I->getParent()), *TTI))
8351 return true;
8352
8353 bool MadeChange = optimizeExt(I);
8354 return MadeChange | optimizeExtUses(I);
8355 }
8356 }
8357 return AnyChange;
8358 }
8359
8360 if (auto *Cmp = dyn_cast<CmpInst>(I))
8361 if (optimizeCmp(Cmp, ModifiedDT))
8362 return true;
8363
8364 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8365 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8366 bool Modified = optimizeLoadExt(LI);
8367 unsigned AS = LI->getPointerAddressSpace();
8368 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8369 return Modified;
8370 }
8371
8372 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8373 if (splitMergedValStore(*SI, *DL, *TLI))
8374 return true;
8375 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8376 unsigned AS = SI->getPointerAddressSpace();
8377 return optimizeMemoryInst(I, SI->getOperand(1),
8378 SI->getOperand(0)->getType(), AS);
8379 }
8380
8381 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8382 unsigned AS = RMW->getPointerAddressSpace();
8383 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8384 }
8385
8386 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8387 unsigned AS = CmpX->getPointerAddressSpace();
8388 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8389 CmpX->getCompareOperand()->getType(), AS);
8390 }
8391
8392 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8393
8394 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8395 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8396 return true;
8397
8398 // TODO: Move this into the switch on opcode - it handles shifts already.
8399 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8400 BinOp->getOpcode() == Instruction::LShr)) {
8401 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8402 if (CI && TLI->hasExtractBitsInsn())
8403 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8404 return true;
8405 }
8406
8407 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8408 if (GEPI->hasAllZeroIndices()) {
8409 /// The GEP operand must be a pointer, so must its result -> BitCast
8410 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8411 GEPI->getName(), GEPI->getIterator());
8412 NC->setDebugLoc(GEPI->getDebugLoc());
8413 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8415 GEPI, TLInfo, nullptr,
8416 [&](Value *V) { removeAllAssertingVHReferences(V); });
8417 ++NumGEPsElim;
8418 optimizeInst(NC, ModifiedDT);
8419 return true;
8420 }
8422 return true;
8423 }
8424 }
8425
8426 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8427 // freeze(icmp a, const)) -> icmp (freeze a), const
8428 // This helps generate efficient conditional jumps.
8429 Instruction *CmpI = nullptr;
8430 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8431 CmpI = II;
8432 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8433 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8434
8435 if (CmpI && CmpI->hasOneUse()) {
8436 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8437 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8438 isa<ConstantPointerNull>(Op0);
8439 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8440 isa<ConstantPointerNull>(Op1);
8441 if (Const0 || Const1) {
8442 if (!Const0 || !Const1) {
8443 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8444 F->takeName(FI);
8445 CmpI->setOperand(Const0 ? 1 : 0, F);
8446 }
8447 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8448 FI->eraseFromParent();
8449 return true;
8450 }
8451 }
8452 return AnyChange;
8453 }
8454
8455 if (tryToSinkFreeOperands(I))
8456 return true;
8457
8458 switch (I->getOpcode()) {
8459 case Instruction::Shl:
8460 case Instruction::LShr:
8461 case Instruction::AShr:
8462 return optimizeShiftInst(cast<BinaryOperator>(I));
8463 case Instruction::Call:
8464 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8465 case Instruction::Select:
8466 return optimizeSelectInst(cast<SelectInst>(I));
8467 case Instruction::ShuffleVector:
8468 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8469 case Instruction::Switch:
8470 return optimizeSwitchInst(cast<SwitchInst>(I));
8471 case Instruction::ExtractElement:
8472 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8473 case Instruction::Br:
8474 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8475 }
8476
8477 return AnyChange;
8478}
8479
8480/// Given an OR instruction, check to see if this is a bitreverse
8481/// idiom. If so, insert the new intrinsic and return true.
8482bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8483 if (!I.getType()->isIntegerTy() ||
8485 TLI->getValueType(*DL, I.getType(), true)))
8486 return false;
8487
8489 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8490 return false;
8491 Instruction *LastInst = Insts.back();
8492 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8494 &I, TLInfo, nullptr,
8495 [&](Value *V) { removeAllAssertingVHReferences(V); });
8496 return true;
8497}
8498
8499// In this pass we look for GEP and cast instructions that are used
8500// across basic blocks and rewrite them to improve basic-block-at-a-time
8501// selection.
8502bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8503 SunkAddrs.clear();
8504 bool MadeChange = false;
8505
8506 do {
8507 CurInstIterator = BB.begin();
8508 ModifiedDT = ModifyDT::NotModifyDT;
8509 while (CurInstIterator != BB.end()) {
8510 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8511 if (ModifiedDT != ModifyDT::NotModifyDT) {
8512 // For huge function we tend to quickly go though the inner optmization
8513 // opportunities in the BB. So we go back to the BB head to re-optimize
8514 // each instruction instead of go back to the function head.
8515 if (IsHugeFunc) {
8516 DT.reset();
8517 getDT(*BB.getParent());
8518 break;
8519 } else {
8520 return true;
8521 }
8522 }
8523 }
8524 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8525
8526 bool MadeBitReverse = true;
8527 while (MadeBitReverse) {
8528 MadeBitReverse = false;
8529 for (auto &I : reverse(BB)) {
8530 if (makeBitReverse(I)) {
8531 MadeBitReverse = MadeChange = true;
8532 break;
8533 }
8534 }
8535 }
8536 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8537
8538 return MadeChange;
8539}
8540
8541// Some CGP optimizations may move or alter what's computed in a block. Check
8542// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8543bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8544 assert(isa<DbgValueInst>(I));
8545 DbgValueInst &DVI = *cast<DbgValueInst>(I);
8546
8547 // Does this dbg.value refer to a sunk address calculation?
8548 bool AnyChange = false;
8549 SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8550 DVI.location_ops().end());
8551 for (Value *Location : LocationOps) {
8552 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8553 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8554 if (SunkAddr) {
8555 // Point dbg.value at locally computed address, which should give the best
8556 // opportunity to be accurately lowered. This update may change the type
8557 // of pointer being referred to; however this makes no difference to
8558 // debugging information, and we can't generate bitcasts that may affect
8559 // codegen.
8560 DVI.replaceVariableLocationOp(Location, SunkAddr);
8561 AnyChange = true;
8562 }
8563 }
8564 return AnyChange;
8565}
8566
8567bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8568 bool AnyChange = false;
8569 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8570 AnyChange |= fixupDbgVariableRecord(DVR);
8571 return AnyChange;
8572}
8573
8574// FIXME: should updating debug-info really cause the "changed" flag to fire,
8575// which can cause a function to be reprocessed?
8576bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8577 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8578 DVR.Type != DbgVariableRecord::LocationType::Assign)
8579 return false;
8580
8581 // Does this DbgVariableRecord refer to a sunk address calculation?
8582 bool AnyChange = false;
8583 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8584 DVR.location_ops().end());
8585 for (Value *Location : LocationOps) {
8586 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8587 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8588 if (SunkAddr) {
8589 // Point dbg.value at locally computed address, which should give the best
8590 // opportunity to be accurately lowered. This update may change the type
8591 // of pointer being referred to; however this makes no difference to
8592 // debugging information, and we can't generate bitcasts that may affect
8593 // codegen.
8594 DVR.replaceVariableLocationOp(Location, SunkAddr);
8595 AnyChange = true;
8596 }
8597 }
8598 return AnyChange;
8599}
8600
8602 DVI->removeFromParent();
8603 if (isa<PHINode>(VI))
8604 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
8605 else
8606 DVI->insertAfter(VI);
8607}
8608
8610 DVR->removeFromParent();
8611 BasicBlock *VIBB = VI->getParent();
8612 if (isa<PHINode>(VI))
8613 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8614 else
8615 VIBB->insertDbgRecordAfter(DVR, VI);
8616}
8617
8618// A llvm.dbg.value may be using a value before its definition, due to
8619// optimizations in this pass and others. Scan for such dbg.values, and rescue
8620// them by moving the dbg.value to immediately after the value definition.
8621// FIXME: Ideally this should never be necessary, and this has the potential
8622// to re-order dbg.value intrinsics.
8623bool CodeGenPrepare::placeDbgValues(Function &F) {
8624 bool MadeChange = false;
8625 DominatorTree DT(F);
8626
8627 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8629 for (Value *V : DbgItem->location_ops())
8630 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8631 VIs.push_back(VI);
8632
8633 // This item may depend on multiple instructions, complicating any
8634 // potential sink. This block takes the defensive approach, opting to
8635 // "undef" the item if it has more than one instruction and any of them do
8636 // not dominate iem.
8637 for (Instruction *VI : VIs) {
8638 if (VI->isTerminator())
8639 continue;
8640
8641 // If VI is a phi in a block with an EHPad terminator, we can't insert
8642 // after it.
8643 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
8644 continue;
8645
8646 // If the defining instruction dominates the dbg.value, we do not need
8647 // to move the dbg.value.
8648 if (DT.dominates(VI, Position))
8649 continue;
8650
8651 // If we depend on multiple instructions and any of them doesn't
8652 // dominate this DVI, we probably can't salvage it: moving it to
8653 // after any of the instructions could cause us to lose the others.
8654 if (VIs.size() > 1) {
8655 LLVM_DEBUG(
8656 dbgs()
8657 << "Unable to find valid location for Debug Value, undefing:\n"
8658 << *DbgItem);
8659 DbgItem->setKillLocation();
8660 break;
8661 }
8662
8663 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
8664 << *DbgItem << ' ' << *VI);
8665 DbgInserterHelper(DbgItem, VI);
8666 MadeChange = true;
8667 ++NumDbgValueMoved;
8668 }
8669 };
8670
8671 for (BasicBlock &BB : F) {
8673 // Process dbg.value intrinsics.
8674 DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
8675 if (DVI) {
8676 DbgProcessor(DVI, DVI);
8677 continue;
8678 }
8679
8680 // If this isn't a dbg.value, process any attached DbgVariableRecord
8681 // records attached to this instruction.
8683 filterDbgVars(Insn.getDbgRecordRange()))) {
8684 if (DVR.Type != DbgVariableRecord::LocationType::Value)
8685 continue;
8686 DbgProcessor(&DVR, &Insn);
8687 }
8688 }
8689 }
8690
8691 return MadeChange;
8692}
8693
8694// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
8695// probes can be chained dependencies of other regular DAG nodes and block DAG
8696// combine optimizations.
8697bool CodeGenPrepare::placePseudoProbes(Function &F) {
8698 bool MadeChange = false;
8699 for (auto &Block : F) {
8700 // Move the rest probes to the beginning of the block.
8701 auto FirstInst = Block.getFirstInsertionPt();
8702 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
8703 ++FirstInst;
8704 BasicBlock::iterator I(FirstInst);
8705 I++;
8706 while (I != Block.end()) {
8707 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
8708 II->moveBefore(&*FirstInst);
8709 MadeChange = true;
8710 }
8711 }
8712 }
8713 return MadeChange;
8714}
8715
8716/// Scale down both weights to fit into uint32_t.
8717static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
8718 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
8719 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
8720 NewTrue = NewTrue / Scale;
8721 NewFalse = NewFalse / Scale;
8722}
8723
8724/// Some targets prefer to split a conditional branch like:
8725/// \code
8726/// %0 = icmp ne i32 %a, 0
8727/// %1 = icmp ne i32 %b, 0
8728/// %or.cond = or i1 %0, %1
8729/// br i1 %or.cond, label %TrueBB, label %FalseBB
8730/// \endcode
8731/// into multiple branch instructions like:
8732/// \code
8733/// bb1:
8734/// %0 = icmp ne i32 %a, 0
8735/// br i1 %0, label %TrueBB, label %bb2
8736/// bb2:
8737/// %1 = icmp ne i32 %b, 0
8738/// br i1 %1, label %TrueBB, label %FalseBB
8739/// \endcode
8740/// This usually allows instruction selection to do even further optimizations
8741/// and combine the compare with the branch instruction. Currently this is
8742/// applied for targets which have "cheap" jump instructions.
8743///
8744/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
8745///
8746bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
8747 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
8748 return false;
8749
8750 bool MadeChange = false;
8751 for (auto &BB : F) {
8752 // Does this BB end with the following?
8753 // %cond1 = icmp|fcmp|binary instruction ...
8754 // %cond2 = icmp|fcmp|binary instruction ...
8755 // %cond.or = or|and i1 %cond1, cond2
8756 // br i1 %cond.or label %dest1, label %dest2"
8757 Instruction *LogicOp;
8758 BasicBlock *TBB, *FBB;
8759 if (!match(BB.getTerminator(),
8760 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
8761 continue;
8762
8763 auto *Br1 = cast<BranchInst>(BB.getTerminator());
8764 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
8765 continue;
8766
8767 // The merging of mostly empty BB can cause a degenerate branch.
8768 if (TBB == FBB)
8769 continue;
8770
8771 unsigned Opc;
8772 Value *Cond1, *Cond2;
8773 if (match(LogicOp,
8774 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
8775 Opc = Instruction::And;
8776 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
8777 m_OneUse(m_Value(Cond2)))))
8778 Opc = Instruction::Or;
8779 else
8780 continue;
8781
8782 auto IsGoodCond = [](Value *Cond) {
8783 return match(
8784 Cond,
8786 m_LogicalOr(m_Value(), m_Value()))));
8787 };
8788 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
8789 continue;
8790
8791 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
8792
8793 // Create a new BB.
8794 auto *TmpBB =
8795 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
8796 BB.getParent(), BB.getNextNode());
8797 if (IsHugeFunc)
8798 FreshBBs.insert(TmpBB);
8799
8800 // Update original basic block by using the first condition directly by the
8801 // branch instruction and removing the no longer needed and/or instruction.
8802 Br1->setCondition(Cond1);
8803 LogicOp->eraseFromParent();
8804
8805 // Depending on the condition we have to either replace the true or the
8806 // false successor of the original branch instruction.
8807 if (Opc == Instruction::And)
8808 Br1->setSuccessor(0, TmpBB);
8809 else
8810 Br1->setSuccessor(1, TmpBB);
8811
8812 // Fill in the new basic block.
8813 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
8814 if (auto *I = dyn_cast<Instruction>(Cond2)) {
8815 I->removeFromParent();
8816 I->insertBefore(Br2);
8817 }
8818
8819 // Update PHI nodes in both successors. The original BB needs to be
8820 // replaced in one successor's PHI nodes, because the branch comes now from
8821 // the newly generated BB (NewBB). In the other successor we need to add one
8822 // incoming edge to the PHI nodes, because both branch instructions target
8823 // now the same successor. Depending on the original branch condition
8824 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
8825 // we perform the correct update for the PHI nodes.
8826 // This doesn't change the successor order of the just created branch
8827 // instruction (or any other instruction).
8828 if (Opc == Instruction::Or)
8829 std::swap(TBB, FBB);
8830
8831 // Replace the old BB with the new BB.
8832 TBB->replacePhiUsesWith(&BB, TmpBB);
8833
8834 // Add another incoming edge from the new BB.
8835 for (PHINode &PN : FBB->phis()) {
8836 auto *Val = PN.getIncomingValueForBlock(&BB);
8837 PN.addIncoming(Val, TmpBB);
8838 }
8839
8840 // Update the branch weights (from SelectionDAGBuilder::
8841 // FindMergedConditions).
8842 if (Opc == Instruction::Or) {
8843 // Codegen X | Y as:
8844 // BB1:
8845 // jmp_if_X TBB
8846 // jmp TmpBB
8847 // TmpBB:
8848 // jmp_if_Y TBB
8849 // jmp FBB
8850 //
8851
8852 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
8853 // The requirement is that
8854 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
8855 // = TrueProb for original BB.
8856 // Assuming the original weights are A and B, one choice is to set BB1's
8857 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
8858 // assumes that
8859 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
8860 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
8861 // TmpBB, but the math is more complicated.
8862 uint64_t TrueWeight, FalseWeight;
8863 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
8864 uint64_t NewTrueWeight = TrueWeight;
8865 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
8866 scaleWeights(NewTrueWeight, NewFalseWeight);
8867 Br1->setMetadata(LLVMContext::MD_prof,
8868 MDBuilder(Br1->getContext())
8869 .createBranchWeights(TrueWeight, FalseWeight));
8870
8871 NewTrueWeight = TrueWeight;
8872 NewFalseWeight = 2 * FalseWeight;
8873 scaleWeights(NewTrueWeight, NewFalseWeight);
8874 Br2->setMetadata(LLVMContext::MD_prof,
8875 MDBuilder(Br2->getContext())
8876 .createBranchWeights(TrueWeight, FalseWeight));
8877 }
8878 } else {
8879 // Codegen X & Y as:
8880 // BB1:
8881 // jmp_if_X TmpBB
8882 // jmp FBB
8883 // TmpBB:
8884 // jmp_if_Y TBB
8885 // jmp FBB
8886 //
8887 // This requires creation of TmpBB after CurBB.
8888
8889 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
8890 // The requirement is that
8891 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
8892 // = FalseProb for original BB.
8893 // Assuming the original weights are A and B, one choice is to set BB1's
8894 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
8895 // assumes that
8896 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
8897 uint64_t TrueWeight, FalseWeight;
8898 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
8899 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
8900 uint64_t NewFalseWeight = FalseWeight;
8901 scaleWeights(NewTrueWeight, NewFalseWeight);
8902 Br1->setMetadata(LLVMContext::MD_prof,
8903 MDBuilder(Br1->getContext())
8904 .createBranchWeights(TrueWeight, FalseWeight));
8905
8906 NewTrueWeight = 2 * TrueWeight;
8907 NewFalseWeight = FalseWeight;
8908 scaleWeights(NewTrueWeight, NewFalseWeight);
8909 Br2->setMetadata(LLVMContext::MD_prof,
8910 MDBuilder(Br2->getContext())
8911 .createBranchWeights(TrueWeight, FalseWeight));
8912 }
8913 }
8914
8915 ModifiedDT = ModifyDT::ModifyBBDT;
8916 MadeChange = true;
8917
8918 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
8919 TmpBB->dump());
8920 }
8921 return MadeChange;
8922}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, DenseMap< GCRelocateInst *, SmallVector< GCRelocateInst *, 2 > > &RelocateInstMap)
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
#define DEBUG_TYPE
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:148
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
Hexagon Common GEP
IRTranslator LLVM IR MI
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1497
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
This defines the Use class.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:76
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1482
unsigned logBase2() const
Definition: APInt.h:1703
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1513
an instruction to allocate memory on the stack
Definition: Instructions.h:59
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:132
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:125
void setAlignment(Align Align)
Definition: Instructions.h:136
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:321
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:492
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:473
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:539
static unsigned getPointerOperandIndex()
Definition: Instructions.h:675
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:748
static unsigned getPointerOperandIndex()
Definition: Instructions.h:912
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:443
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:430
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:499
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:409
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:640
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:360
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:199
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:570
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:452
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:460
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:482
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:206
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:276
void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:379
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:165
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:65
void reinsertInstInDbgRecords(Instruction *I, std::optional< DbgRecord::self_iterator > Pos)
In rare circumstances instructions can be speculatively removed from blocks, and then be re-inserted ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:221
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a binary instruction, given the opcode and the two operands.
BinaryOps getOpcode() const
Definition: InstrTypes.h:513
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1809
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1742
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1828
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1687
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1692
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1678
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:601
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Definition: InstrTypes.h:930
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name, BasicBlock::iterator InsertBefore)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:983
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:993
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:1022
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:1016
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:1020
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:1018
@ ICMP_EQ
equal
Definition: InstrTypes.h:1014
@ ICMP_NE
not equal
Definition: InstrTypes.h:1015
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:1167
static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name, BasicBlock::iterator InsertBefore)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:1105
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
Definition: Constants.h:52
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1017
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2140
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2523
This is the shared class of boolean and integer constants.
Definition: Constants.h:80
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:849
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:205
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:160
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:145
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1449
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1398
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:878
This represents the llvm.dbg.value instruction.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:329
unsigned size() const
Definition: DenseMap.h:99
bool empty() const
Definition: DenseMap.h:98
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:692
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
Definition: Function.h:783
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:973
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:128
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition: Globals.cpp:295
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
Type * getValueType() const
Definition: GlobalValue.h:296
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:466
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2535
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:220
Value * createIsFPClass(Value *FPNum, unsigned Test)
Definition: IRBuilder.cpp:1309
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2366
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2397
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2241
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2127
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1120
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1826
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", bool IsInBounds=false)
Definition: IRBuilder.h:1866
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:502
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2666
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:90
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:454
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:812
const BasicBlock * getParent() const
Definition: Instruction.h:152
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:149
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:86
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1636
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:252
bool isShift() const
Definition: Instruction.h:259
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:451
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:184
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:286
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:44
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
void clear()
Definition: MapVector.h:88
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:756
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr, BasicBlock::iterator InsertBefore)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1827
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:109
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:115
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:129
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr, BasicBlock::iterator InsertBefore, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
size_type size() const
Definition: SmallPtrSet.h:94
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:321
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:236
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:317
static unsigned getPointerOperandIndex()
Definition: Instructions.h:419
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:622
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:651
Class to represent struct types.
Definition: DerivedTypes.h:216
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
virtual bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=ArrayRef< const Value * >(), const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:265
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:243
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:348
'undef' values are things that do not have specified contents.
Definition: Constants.h:1348
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1808
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
See the file comment.
Definition: ValueMap.h:84
void clear()
Definition: ValueMap.h:145
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:926
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:234
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:693
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1074
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
iterator_range< use_iterator > uses()
Definition: Value.h:376
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:815
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5239
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
This class represents zero extension of integer types.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:187
constexpr bool isNonZero() const
Definition: TypeSize.h:158
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
TypeSize getSequentialElementStride(const DataLayout &DL) const
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:477
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:160
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:918
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:765
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:821
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:163
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:541
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:548
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:294
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:234
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:456
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:539
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:129
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2165
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1650
auto successors(const MachineBasicBlock *BB)
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2043
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:141
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:241
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2059
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:3938
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:199
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:50
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:581
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if the instruction does not have any effects besides calculating the result and does not ...
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:274
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:628
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:238
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:151
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
ExtAddrMode()=default
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
const DataLayout & DL
Definition: SimplifyQuery.h:61
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.