LLVM 20.0.0git
CodeGenPrepare.cpp
Go to the documentation of this file.
1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
16#include "llvm/ADT/APInt.h"
17#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/MapVector.h"
21#include "llvm/ADT/STLExtras.h"
24#include "llvm/ADT/Statistic.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Argument.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugInfo.h"
53#include "llvm/IR/Dominators.h"
54#include "llvm/IR/Function.h"
56#include "llvm/IR/GlobalValue.h"
58#include "llvm/IR/IRBuilder.h"
59#include "llvm/IR/InlineAsm.h"
60#include "llvm/IR/InstrTypes.h"
61#include "llvm/IR/Instruction.h"
64#include "llvm/IR/Intrinsics.h"
65#include "llvm/IR/IntrinsicsAArch64.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/MDBuilder.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
72#include "llvm/IR/Statepoint.h"
73#include "llvm/IR/Type.h"
74#include "llvm/IR/Use.h"
75#include "llvm/IR/User.h"
76#include "llvm/IR/Value.h"
77#include "llvm/IR/ValueHandle.h"
78#include "llvm/IR/ValueMap.h"
80#include "llvm/Pass.h"
86#include "llvm/Support/Debug.h"
97#include <algorithm>
98#include <cassert>
99#include <cstdint>
100#include <iterator>
101#include <limits>
102#include <memory>
103#include <optional>
104#include <utility>
105#include <vector>
106
107using namespace llvm;
108using namespace llvm::PatternMatch;
109
110#define DEBUG_TYPE "codegenprepare"
111
112STATISTIC(NumBlocksElim, "Number of blocks eliminated");
113STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated");
114STATISTIC(NumGEPsElim, "Number of GEPs converted to casts");
115STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
116 "sunken Cmps");
117STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
118 "of sunken Casts");
119STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
120 "computations were sunk");
121STATISTIC(NumMemoryInstsPhiCreated,
122 "Number of phis created when address "
123 "computations were sunk to memory instructions");
124STATISTIC(NumMemoryInstsSelectCreated,
125 "Number of select created when address "
126 "computations were sunk to memory instructions");
127STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads");
128STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized");
129STATISTIC(NumAndsAdded,
130 "Number of and mask instructions added to form ext loads");
131STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
132STATISTIC(NumRetsDup, "Number of return instructions duplicated");
133STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
134STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
135STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
136
138 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
139 cl::desc("Disable branch optimizations in CodeGenPrepare"));
140
141static cl::opt<bool>
142 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
143 cl::desc("Disable GC optimizations in CodeGenPrepare"));
144
145static cl::opt<bool>
146 DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden,
147 cl::init(false),
148 cl::desc("Disable select to branch conversion."));
149
150static cl::opt<bool>
151 AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true),
152 cl::desc("Address sinking in CGP using GEPs."));
153
154static cl::opt<bool>
155 EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true),
156 cl::desc("Enable sinkinig and/cmp into branches."));
157
159 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
160 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
161
163 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
164 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
165
167 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
168 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
169 "CodeGenPrepare"));
170
172 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
173 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
174 "optimization in CodeGenPrepare"));
175
177 "disable-preheader-prot", cl::Hidden, cl::init(false),
178 cl::desc("Disable protection against removing loop preheaders"));
179
181 "profile-guided-section-prefix", cl::Hidden, cl::init(true),
182 cl::desc("Use profile info to add section prefix for hot/cold functions"));
183
185 "profile-unknown-in-special-section", cl::Hidden,
186 cl::desc("In profiling mode like sampleFDO, if a function doesn't have "
187 "profile, we cannot tell the function is cold for sure because "
188 "it may be a function newly added without ever being sampled. "
189 "With the flag enabled, compiler can put such profile unknown "
190 "functions into a special section, so runtime system can choose "
191 "to handle it in a different way than .text section, to save "
192 "RAM for example. "));
193
195 "bbsections-guided-section-prefix", cl::Hidden, cl::init(true),
196 cl::desc("Use the basic-block-sections profile to determine the text "
197 "section prefix for hot functions. Functions with "
198 "basic-block-sections profile will be placed in `.text.hot` "
199 "regardless of their FDO profile info. Other functions won't be "
200 "impacted, i.e., their prefixes will be decided by FDO/sampleFDO "
201 "profiles."));
202
204 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
205 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
206 "(frequency of destination block) is greater than this ratio"));
207
209 "force-split-store", cl::Hidden, cl::init(false),
210 cl::desc("Force store splitting no matter what the target query says."));
211
213 "cgp-type-promotion-merge", cl::Hidden,
214 cl::desc("Enable merging of redundant sexts when one is dominating"
215 " the other."),
216 cl::init(true));
217
219 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
220 cl::desc("Disables combining addressing modes with different parts "
221 "in optimizeMemoryInst."));
222
223static cl::opt<bool>
224 AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
225 cl::desc("Allow creation of Phis in Address sinking."));
226
228 "addr-sink-new-select", cl::Hidden, cl::init(true),
229 cl::desc("Allow creation of selects in Address sinking."));
230
232 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
233 cl::desc("Allow combining of BaseReg field in Address sinking."));
234
236 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
237 cl::desc("Allow combining of BaseGV field in Address sinking."));
238
240 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
241 cl::desc("Allow combining of BaseOffs field in Address sinking."));
242
244 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
245 cl::desc("Allow combining of ScaledReg field in Address sinking."));
246
247static cl::opt<bool>
248 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
249 cl::init(true),
250 cl::desc("Enable splitting large offset of GEP."));
251
253 "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false),
254 cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."));
255
256static cl::opt<bool>
257 VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false),
258 cl::desc("Enable BFI update verification for "
259 "CodeGenPrepare."));
260
261static cl::opt<bool>
262 OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true),
263 cl::desc("Enable converting phi types in CodeGenPrepare"));
264
266 HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden,
267 cl::desc("Least BB number of huge function."));
268
270 MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100),
272 cl::desc("Max number of address users to look at"));
273
274static cl::opt<bool>
275 DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false),
276 cl::desc("Disable elimination of dead PHI nodes."));
277
278namespace {
279
280enum ExtType {
281 ZeroExtension, // Zero extension has been seen.
282 SignExtension, // Sign extension has been seen.
283 BothExtension // This extension type is used if we saw sext after
284 // ZeroExtension had been set, or if we saw zext after
285 // SignExtension had been set. It makes the type
286 // information of a promoted instruction invalid.
287};
288
289enum ModifyDT {
290 NotModifyDT, // Not Modify any DT.
291 ModifyBBDT, // Modify the Basic Block Dominator Tree.
292 ModifyInstDT // Modify the Instruction Dominator in a Basic Block,
293 // This usually means we move/delete/insert instruction
294 // in a Basic Block. So we should re-iterate instructions
295 // in such Basic Block.
296};
297
298using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
299using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
300using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
302using ValueToSExts = MapVector<Value *, SExts>;
303
304class TypePromotionTransaction;
305
306class CodeGenPrepare {
307 friend class CodeGenPrepareLegacyPass;
308 const TargetMachine *TM = nullptr;
309 const TargetSubtargetInfo *SubtargetInfo = nullptr;
310 const TargetLowering *TLI = nullptr;
311 const TargetRegisterInfo *TRI = nullptr;
312 const TargetTransformInfo *TTI = nullptr;
313 const BasicBlockSectionsProfileReader *BBSectionsProfileReader = nullptr;
314 const TargetLibraryInfo *TLInfo = nullptr;
315 LoopInfo *LI = nullptr;
316 std::unique_ptr<BlockFrequencyInfo> BFI;
317 std::unique_ptr<BranchProbabilityInfo> BPI;
318 ProfileSummaryInfo *PSI = nullptr;
319
320 /// As we scan instructions optimizing them, this is the next instruction
321 /// to optimize. Transforms that can invalidate this should update it.
322 BasicBlock::iterator CurInstIterator;
323
324 /// Keeps track of non-local addresses that have been sunk into a block.
325 /// This allows us to avoid inserting duplicate code for blocks with
326 /// multiple load/stores of the same address. The usage of WeakTrackingVH
327 /// enables SunkAddrs to be treated as a cache whose entries can be
328 /// invalidated if a sunken address computation has been erased.
330
331 /// Keeps track of all instructions inserted for the current function.
332 SetOfInstrs InsertedInsts;
333
334 /// Keeps track of the type of the related instruction before their
335 /// promotion for the current function.
336 InstrToOrigTy PromotedInsts;
337
338 /// Keep track of instructions removed during promotion.
339 SetOfInstrs RemovedInsts;
340
341 /// Keep track of sext chains based on their initial value.
342 DenseMap<Value *, Instruction *> SeenChainsForSExt;
343
344 /// Keep track of GEPs accessing the same data structures such as structs or
345 /// arrays that are candidates to be split later because of their large
346 /// size.
349 LargeOffsetGEPMap;
350
351 /// Keep track of new GEP base after splitting the GEPs having large offset.
352 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
353
354 /// Map serial numbers to Large offset GEPs.
355 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
356
357 /// Keep track of SExt promoted.
358 ValueToSExts ValToSExtendedUses;
359
360 /// True if the function has the OptSize attribute.
361 bool OptSize;
362
363 /// DataLayout for the Function being processed.
364 const DataLayout *DL = nullptr;
365
366 /// Building the dominator tree can be expensive, so we only build it
367 /// lazily and update it when required.
368 std::unique_ptr<DominatorTree> DT;
369
370public:
371 CodeGenPrepare(){};
372 CodeGenPrepare(const TargetMachine *TM) : TM(TM){};
373 /// If encounter huge function, we need to limit the build time.
374 bool IsHugeFunc = false;
375
376 /// FreshBBs is like worklist, it collected the updated BBs which need
377 /// to be optimized again.
378 /// Note: Consider building time in this pass, when a BB updated, we need
379 /// to insert such BB into FreshBBs for huge function.
381
382 void releaseMemory() {
383 // Clear per function information.
384 InsertedInsts.clear();
385 PromotedInsts.clear();
386 FreshBBs.clear();
387 BPI.reset();
388 BFI.reset();
389 }
390
392
393private:
394 template <typename F>
395 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
396 // Substituting can cause recursive simplifications, which can invalidate
397 // our iterator. Use a WeakTrackingVH to hold onto it in case this
398 // happens.
399 Value *CurValue = &*CurInstIterator;
400 WeakTrackingVH IterHandle(CurValue);
401
402 f();
403
404 // If the iterator instruction was recursively deleted, start over at the
405 // start of the block.
406 if (IterHandle != CurValue) {
407 CurInstIterator = BB->begin();
408 SunkAddrs.clear();
409 }
410 }
411
412 // Get the DominatorTree, building if necessary.
413 DominatorTree &getDT(Function &F) {
414 if (!DT)
415 DT = std::make_unique<DominatorTree>(F);
416 return *DT;
417 }
418
419 void removeAllAssertingVHReferences(Value *V);
420 bool eliminateAssumptions(Function &F);
421 bool eliminateFallThrough(Function &F, DominatorTree *DT = nullptr);
422 bool eliminateMostlyEmptyBlocks(Function &F);
423 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
424 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
425 void eliminateMostlyEmptyBlock(BasicBlock *BB);
426 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
427 bool isPreheader);
428 bool makeBitReverse(Instruction &I);
429 bool optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT);
430 bool optimizeInst(Instruction *I, ModifyDT &ModifiedDT);
431 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, Type *AccessTy,
432 unsigned AddrSpace);
433 bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
434 bool optimizeInlineAsmInst(CallInst *CS);
435 bool optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT);
436 bool optimizeExt(Instruction *&I);
437 bool optimizeExtUses(Instruction *I);
438 bool optimizeLoadExt(LoadInst *Load);
439 bool optimizeShiftInst(BinaryOperator *BO);
440 bool optimizeFunnelShift(IntrinsicInst *Fsh);
441 bool optimizeSelectInst(SelectInst *SI);
442 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
443 bool optimizeSwitchType(SwitchInst *SI);
444 bool optimizeSwitchPhiConstants(SwitchInst *SI);
445 bool optimizeSwitchInst(SwitchInst *SI);
446 bool optimizeExtractElementInst(Instruction *Inst);
447 bool dupRetToEnableTailCallOpts(BasicBlock *BB, ModifyDT &ModifiedDT);
448 bool fixupDbgValue(Instruction *I);
449 bool fixupDbgVariableRecord(DbgVariableRecord &I);
450 bool fixupDbgVariableRecordsOnInst(Instruction &I);
451 bool placeDbgValues(Function &F);
452 bool placePseudoProbes(Function &F);
453 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
454 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
455 bool tryToPromoteExts(TypePromotionTransaction &TPT,
457 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
458 unsigned CreatedInstsCost = 0);
459 bool mergeSExts(Function &F);
460 bool splitLargeGEPOffsets();
461 bool optimizePhiType(PHINode *Inst, SmallPtrSetImpl<PHINode *> &Visited,
462 SmallPtrSetImpl<Instruction *> &DeletedInstrs);
463 bool optimizePhiTypes(Function &F);
464 bool performAddressTypePromotion(
465 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
466 bool HasPromoted, TypePromotionTransaction &TPT,
467 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
468 bool splitBranchCondition(Function &F, ModifyDT &ModifiedDT);
469 bool simplifyOffsetableRelocate(GCStatepointInst &I);
470
471 bool tryToSinkFreeOperands(Instruction *I);
472 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
473 CmpInst *Cmp, Intrinsic::ID IID);
474 bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
475 bool optimizeURem(Instruction *Rem);
476 bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
477 bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
478 void verifyBFIUpdates(Function &F);
479 bool _run(Function &F);
480};
481
482class CodeGenPrepareLegacyPass : public FunctionPass {
483public:
484 static char ID; // Pass identification, replacement for typeid
485
486 CodeGenPrepareLegacyPass() : FunctionPass(ID) {
488 }
489
490 bool runOnFunction(Function &F) override;
491
492 StringRef getPassName() const override { return "CodeGen Prepare"; }
493
494 void getAnalysisUsage(AnalysisUsage &AU) const override {
495 // FIXME: When we can selectively preserve passes, preserve the domtree.
502 }
503};
504
505} // end anonymous namespace
506
507char CodeGenPrepareLegacyPass::ID = 0;
508
509bool CodeGenPrepareLegacyPass::runOnFunction(Function &F) {
510 if (skipFunction(F))
511 return false;
512 auto TM = &getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
513 CodeGenPrepare CGP(TM);
514 CGP.DL = &F.getDataLayout();
515 CGP.SubtargetInfo = TM->getSubtargetImpl(F);
516 CGP.TLI = CGP.SubtargetInfo->getTargetLowering();
517 CGP.TRI = CGP.SubtargetInfo->getRegisterInfo();
518 CGP.TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
519 CGP.TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
520 CGP.LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
521 CGP.BPI.reset(new BranchProbabilityInfo(F, *CGP.LI));
522 CGP.BFI.reset(new BlockFrequencyInfo(F, *CGP.BPI, *CGP.LI));
523 CGP.PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
524 auto BBSPRWP =
525 getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
526 CGP.BBSectionsProfileReader = BBSPRWP ? &BBSPRWP->getBBSPR() : nullptr;
527
528 return CGP._run(F);
529}
530
531INITIALIZE_PASS_BEGIN(CodeGenPrepareLegacyPass, DEBUG_TYPE,
532 "Optimize for code generation", false, false)
539INITIALIZE_PASS_END(CodeGenPrepareLegacyPass, DEBUG_TYPE,
541
543 return new CodeGenPrepareLegacyPass();
544}
545
548 CodeGenPrepare CGP(TM);
549
550 bool Changed = CGP.run(F, AM);
551 if (!Changed)
552 return PreservedAnalyses::all();
553
558 return PA;
559}
560
561bool CodeGenPrepare::run(Function &F, FunctionAnalysisManager &AM) {
562 DL = &F.getDataLayout();
563 SubtargetInfo = TM->getSubtargetImpl(F);
564 TLI = SubtargetInfo->getTargetLowering();
565 TRI = SubtargetInfo->getRegisterInfo();
566 TLInfo = &AM.getResult<TargetLibraryAnalysis>(F);
568 LI = &AM.getResult<LoopAnalysis>(F);
569 BPI.reset(new BranchProbabilityInfo(F, *LI));
570 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
571 auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
572 PSI = MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
573 BBSectionsProfileReader =
575 return _run(F);
576}
577
578bool CodeGenPrepare::_run(Function &F) {
579 bool EverMadeChange = false;
580
581 OptSize = F.hasOptSize();
582 // Use the basic-block-sections profile to promote hot functions to .text.hot
583 // if requested.
584 if (BBSectionsGuidedSectionPrefix && BBSectionsProfileReader &&
585 BBSectionsProfileReader->isFunctionHot(F.getName())) {
586 F.setSectionPrefix("hot");
587 } else if (ProfileGuidedSectionPrefix) {
588 // The hot attribute overwrites profile count based hotness while profile
589 // counts based hotness overwrite the cold attribute.
590 // This is a conservative behabvior.
591 if (F.hasFnAttribute(Attribute::Hot) ||
592 PSI->isFunctionHotInCallGraph(&F, *BFI))
593 F.setSectionPrefix("hot");
594 // If PSI shows this function is not hot, we will placed the function
595 // into unlikely section if (1) PSI shows this is a cold function, or
596 // (2) the function has a attribute of cold.
597 else if (PSI->isFunctionColdInCallGraph(&F, *BFI) ||
598 F.hasFnAttribute(Attribute::Cold))
599 F.setSectionPrefix("unlikely");
600 else if (ProfileUnknownInSpecialSection && PSI->hasPartialSampleProfile() &&
601 PSI->isFunctionHotnessUnknown(F))
602 F.setSectionPrefix("unknown");
603 }
604
605 /// This optimization identifies DIV instructions that can be
606 /// profitably bypassed and carried out with a shorter, faster divide.
607 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI->isSlowDivBypassed()) {
608 const DenseMap<unsigned int, unsigned int> &BypassWidths =
610 BasicBlock *BB = &*F.begin();
611 while (BB != nullptr) {
612 // bypassSlowDivision may create new BBs, but we don't want to reapply the
613 // optimization to those blocks.
614 BasicBlock *Next = BB->getNextNode();
615 // F.hasOptSize is already checked in the outer if statement.
616 if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
617 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
618 BB = Next;
619 }
620 }
621
622 // Get rid of @llvm.assume builtins before attempting to eliminate empty
623 // blocks, since there might be blocks that only contain @llvm.assume calls
624 // (plus arguments that we can get rid of).
625 EverMadeChange |= eliminateAssumptions(F);
626
627 // Eliminate blocks that contain only PHI nodes and an
628 // unconditional branch.
629 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
630
631 ModifyDT ModifiedDT = ModifyDT::NotModifyDT;
633 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
634
635 // Split some critical edges where one of the sources is an indirect branch,
636 // to help generate sane code for PHIs involving such edges.
637 EverMadeChange |=
638 SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/true);
639
640 // If we are optimzing huge function, we need to consider the build time.
641 // Because the basic algorithm's complex is near O(N!).
642 IsHugeFunc = F.size() > HugeFuncThresholdInCGPP;
643
644 // Transformations above may invalidate dominator tree and/or loop info.
645 DT.reset();
646 LI->releaseMemory();
647 LI->analyze(getDT(F));
648
649 bool MadeChange = true;
650 bool FuncIterated = false;
651 while (MadeChange) {
652 MadeChange = false;
653
655 if (FuncIterated && !FreshBBs.contains(&BB))
656 continue;
657
658 ModifyDT ModifiedDTOnIteration = ModifyDT::NotModifyDT;
659 bool Changed = optimizeBlock(BB, ModifiedDTOnIteration);
660
661 if (ModifiedDTOnIteration == ModifyDT::ModifyBBDT)
662 DT.reset();
663
664 MadeChange |= Changed;
665 if (IsHugeFunc) {
666 // If the BB is updated, it may still has chance to be optimized.
667 // This usually happen at sink optimization.
668 // For example:
669 //
670 // bb0:
671 // %and = and i32 %a, 4
672 // %cmp = icmp eq i32 %and, 0
673 //
674 // If the %cmp sink to other BB, the %and will has chance to sink.
675 if (Changed)
676 FreshBBs.insert(&BB);
677 else if (FuncIterated)
678 FreshBBs.erase(&BB);
679 } else {
680 // For small/normal functions, we restart BB iteration if the dominator
681 // tree of the Function was changed.
682 if (ModifiedDTOnIteration != ModifyDT::NotModifyDT)
683 break;
684 }
685 }
686 // We have iterated all the BB in the (only work for huge) function.
687 FuncIterated = IsHugeFunc;
688
689 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
690 MadeChange |= mergeSExts(F);
691 if (!LargeOffsetGEPMap.empty())
692 MadeChange |= splitLargeGEPOffsets();
693 MadeChange |= optimizePhiTypes(F);
694
695 if (MadeChange)
696 eliminateFallThrough(F, DT.get());
697
698#ifndef NDEBUG
699 if (MadeChange && VerifyLoopInfo)
700 LI->verify(getDT(F));
701#endif
702
703 // Really free removed instructions during promotion.
704 for (Instruction *I : RemovedInsts)
705 I->deleteValue();
706
707 EverMadeChange |= MadeChange;
708 SeenChainsForSExt.clear();
709 ValToSExtendedUses.clear();
710 RemovedInsts.clear();
711 LargeOffsetGEPMap.clear();
712 LargeOffsetGEPID.clear();
713 }
714
715 NewGEPBases.clear();
716 SunkAddrs.clear();
717
718 if (!DisableBranchOpts) {
719 MadeChange = false;
720 // Use a set vector to get deterministic iteration order. The order the
721 // blocks are removed may affect whether or not PHI nodes in successors
722 // are removed.
724 for (BasicBlock &BB : F) {
726 MadeChange |= ConstantFoldTerminator(&BB, true);
727 if (!MadeChange)
728 continue;
729
730 for (BasicBlock *Succ : Successors)
731 if (pred_empty(Succ))
732 WorkList.insert(Succ);
733 }
734
735 // Delete the dead blocks and any of their dead successors.
736 MadeChange |= !WorkList.empty();
737 while (!WorkList.empty()) {
738 BasicBlock *BB = WorkList.pop_back_val();
740
741 DeleteDeadBlock(BB);
742
743 for (BasicBlock *Succ : Successors)
744 if (pred_empty(Succ))
745 WorkList.insert(Succ);
746 }
747
748 // Merge pairs of basic blocks with unconditional branches, connected by
749 // a single edge.
750 if (EverMadeChange || MadeChange)
751 MadeChange |= eliminateFallThrough(F);
752
753 EverMadeChange |= MadeChange;
754 }
755
756 if (!DisableGCOpts) {
758 for (BasicBlock &BB : F)
759 for (Instruction &I : BB)
760 if (auto *SP = dyn_cast<GCStatepointInst>(&I))
761 Statepoints.push_back(SP);
762 for (auto &I : Statepoints)
763 EverMadeChange |= simplifyOffsetableRelocate(*I);
764 }
765
766 // Do this last to clean up use-before-def scenarios introduced by other
767 // preparatory transforms.
768 EverMadeChange |= placeDbgValues(F);
769 EverMadeChange |= placePseudoProbes(F);
770
771#ifndef NDEBUG
773 verifyBFIUpdates(F);
774#endif
775
776 return EverMadeChange;
777}
778
779bool CodeGenPrepare::eliminateAssumptions(Function &F) {
780 bool MadeChange = false;
781 for (BasicBlock &BB : F) {
782 CurInstIterator = BB.begin();
783 while (CurInstIterator != BB.end()) {
784 Instruction *I = &*(CurInstIterator++);
785 if (auto *Assume = dyn_cast<AssumeInst>(I)) {
786 MadeChange = true;
787 Value *Operand = Assume->getOperand(0);
788 Assume->eraseFromParent();
789
790 resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
791 RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
792 });
793 }
794 }
795 }
796 return MadeChange;
797}
798
799/// An instruction is about to be deleted, so remove all references to it in our
800/// GEP-tracking data strcutures.
801void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
802 LargeOffsetGEPMap.erase(V);
803 NewGEPBases.erase(V);
804
805 auto GEP = dyn_cast<GetElementPtrInst>(V);
806 if (!GEP)
807 return;
808
809 LargeOffsetGEPID.erase(GEP);
810
811 auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
812 if (VecI == LargeOffsetGEPMap.end())
813 return;
814
815 auto &GEPVector = VecI->second;
816 llvm::erase_if(GEPVector, [=](auto &Elt) { return Elt.first == GEP; });
817
818 if (GEPVector.empty())
819 LargeOffsetGEPMap.erase(VecI);
820}
821
822// Verify BFI has been updated correctly by recomputing BFI and comparing them.
823void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
824 DominatorTree NewDT(F);
825 LoopInfo NewLI(NewDT);
826 BranchProbabilityInfo NewBPI(F, NewLI, TLInfo);
827 BlockFrequencyInfo NewBFI(F, NewBPI, NewLI);
828 NewBFI.verifyMatch(*BFI);
829}
830
831/// Merge basic blocks which are connected by a single edge, where one of the
832/// basic blocks has a single successor pointing to the other basic block,
833/// which has a single predecessor.
834bool CodeGenPrepare::eliminateFallThrough(Function &F, DominatorTree *DT) {
835 bool Changed = false;
836 // Scan all of the blocks in the function, except for the entry block.
837 // Use a temporary array to avoid iterator being invalidated when
838 // deleting blocks.
840 for (auto &Block : llvm::drop_begin(F))
841 Blocks.push_back(&Block);
842
844 for (auto &Block : Blocks) {
845 auto *BB = cast_or_null<BasicBlock>(Block);
846 if (!BB)
847 continue;
848 // If the destination block has a single pred, then this is a trivial
849 // edge, just collapse it.
850 BasicBlock *SinglePred = BB->getSinglePredecessor();
851
852 // Don't merge if BB's address is taken.
853 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken())
854 continue;
855
856 // Make an effort to skip unreachable blocks.
857 if (DT && !DT->isReachableFromEntry(BB))
858 continue;
859
860 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
861 if (Term && !Term->isConditional()) {
862 Changed = true;
863 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n");
864
865 // Merge BB into SinglePred and delete it.
866 MergeBlockIntoPredecessor(BB, /* DTU */ nullptr, LI, /* MSSAU */ nullptr,
867 /* MemDep */ nullptr,
868 /* PredecessorWithTwoSuccessors */ false, DT);
869 Preds.insert(SinglePred);
870
871 if (IsHugeFunc) {
872 // Update FreshBBs to optimize the merged BB.
873 FreshBBs.insert(SinglePred);
874 FreshBBs.erase(BB);
875 }
876 }
877 }
878
879 // (Repeatedly) merging blocks into their predecessors can create redundant
880 // debug intrinsics.
881 for (const auto &Pred : Preds)
882 if (auto *BB = cast_or_null<BasicBlock>(Pred))
884
885 return Changed;
886}
887
888/// Find a destination block from BB if BB is mergeable empty block.
889BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
890 // If this block doesn't end with an uncond branch, ignore it.
891 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
892 if (!BI || !BI->isUnconditional())
893 return nullptr;
894
895 // If the instruction before the branch (skipping debug info) isn't a phi
896 // node, then other stuff is happening here.
898 if (BBI != BB->begin()) {
899 --BBI;
900 while (isa<DbgInfoIntrinsic>(BBI)) {
901 if (BBI == BB->begin())
902 break;
903 --BBI;
904 }
905 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
906 return nullptr;
907 }
908
909 // Do not break infinite loops.
910 BasicBlock *DestBB = BI->getSuccessor(0);
911 if (DestBB == BB)
912 return nullptr;
913
914 if (!canMergeBlocks(BB, DestBB))
915 DestBB = nullptr;
916
917 return DestBB;
918}
919
920/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
921/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
922/// edges in ways that are non-optimal for isel. Start by eliminating these
923/// blocks so we can split them the way we want them.
924bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
926 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
927 while (!LoopList.empty()) {
928 Loop *L = LoopList.pop_back_val();
929 llvm::append_range(LoopList, *L);
930 if (BasicBlock *Preheader = L->getLoopPreheader())
931 Preheaders.insert(Preheader);
932 }
933
934 bool MadeChange = false;
935 // Copy blocks into a temporary array to avoid iterator invalidation issues
936 // as we remove them.
937 // Note that this intentionally skips the entry block.
939 for (auto &Block : llvm::drop_begin(F)) {
940 // Delete phi nodes that could block deleting other empty blocks.
942 MadeChange |= DeleteDeadPHIs(&Block, TLInfo);
943 Blocks.push_back(&Block);
944 }
945
946 for (auto &Block : Blocks) {
947 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
948 if (!BB)
949 continue;
950 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
951 if (!DestBB ||
952 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
953 continue;
954
955 eliminateMostlyEmptyBlock(BB);
956 MadeChange = true;
957 }
958 return MadeChange;
959}
960
961bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
962 BasicBlock *DestBB,
963 bool isPreheader) {
964 // Do not delete loop preheaders if doing so would create a critical edge.
965 // Loop preheaders can be good locations to spill registers. If the
966 // preheader is deleted and we create a critical edge, registers may be
967 // spilled in the loop body instead.
968 if (!DisablePreheaderProtect && isPreheader &&
969 !(BB->getSinglePredecessor() &&
971 return false;
972
973 // Skip merging if the block's successor is also a successor to any callbr
974 // that leads to this block.
975 // FIXME: Is this really needed? Is this a correctness issue?
976 for (BasicBlock *Pred : predecessors(BB)) {
977 if (isa<CallBrInst>(Pred->getTerminator()) &&
978 llvm::is_contained(successors(Pred), DestBB))
979 return false;
980 }
981
982 // Try to skip merging if the unique predecessor of BB is terminated by a
983 // switch or indirect branch instruction, and BB is used as an incoming block
984 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
985 // add COPY instructions in the predecessor of BB instead of BB (if it is not
986 // merged). Note that the critical edge created by merging such blocks wont be
987 // split in MachineSink because the jump table is not analyzable. By keeping
988 // such empty block (BB), ISel will place COPY instructions in BB, not in the
989 // predecessor of BB.
990 BasicBlock *Pred = BB->getUniquePredecessor();
991 if (!Pred || !(isa<SwitchInst>(Pred->getTerminator()) ||
992 isa<IndirectBrInst>(Pred->getTerminator())))
993 return true;
994
995 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
996 return true;
997
998 // We use a simple cost heuristic which determine skipping merging is
999 // profitable if the cost of skipping merging is less than the cost of
1000 // merging : Cost(skipping merging) < Cost(merging BB), where the
1001 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
1002 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
1003 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
1004 // Freq(Pred) / Freq(BB) > 2.
1005 // Note that if there are multiple empty blocks sharing the same incoming
1006 // value for the PHIs in the DestBB, we consider them together. In such
1007 // case, Cost(merging BB) will be the sum of their frequencies.
1008
1009 if (!isa<PHINode>(DestBB->begin()))
1010 return true;
1011
1012 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
1013
1014 // Find all other incoming blocks from which incoming values of all PHIs in
1015 // DestBB are the same as the ones from BB.
1016 for (BasicBlock *DestBBPred : predecessors(DestBB)) {
1017 if (DestBBPred == BB)
1018 continue;
1019
1020 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
1021 return DestPN.getIncomingValueForBlock(BB) ==
1022 DestPN.getIncomingValueForBlock(DestBBPred);
1023 }))
1024 SameIncomingValueBBs.insert(DestBBPred);
1025 }
1026
1027 // See if all BB's incoming values are same as the value from Pred. In this
1028 // case, no reason to skip merging because COPYs are expected to be place in
1029 // Pred already.
1030 if (SameIncomingValueBBs.count(Pred))
1031 return true;
1032
1033 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
1034 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
1035
1036 for (auto *SameValueBB : SameIncomingValueBBs)
1037 if (SameValueBB->getUniquePredecessor() == Pred &&
1038 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
1039 BBFreq += BFI->getBlockFreq(SameValueBB);
1040
1041 std::optional<BlockFrequency> Limit = BBFreq.mul(FreqRatioToSkipMerge);
1042 return !Limit || PredFreq <= *Limit;
1043}
1044
1045/// Return true if we can merge BB into DestBB if there is a single
1046/// unconditional branch between them, and BB contains no other non-phi
1047/// instructions.
1048bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
1049 const BasicBlock *DestBB) const {
1050 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
1051 // the successor. If there are more complex condition (e.g. preheaders),
1052 // don't mess around with them.
1053 for (const PHINode &PN : BB->phis()) {
1054 for (const User *U : PN.users()) {
1055 const Instruction *UI = cast<Instruction>(U);
1056 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
1057 return false;
1058 // If User is inside DestBB block and it is a PHINode then check
1059 // incoming value. If incoming value is not from BB then this is
1060 // a complex condition (e.g. preheaders) we want to avoid here.
1061 if (UI->getParent() == DestBB) {
1062 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
1063 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
1064 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
1065 if (Insn && Insn->getParent() == BB &&
1066 Insn->getParent() != UPN->getIncomingBlock(I))
1067 return false;
1068 }
1069 }
1070 }
1071 }
1072
1073 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
1074 // and DestBB may have conflicting incoming values for the block. If so, we
1075 // can't merge the block.
1076 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
1077 if (!DestBBPN)
1078 return true; // no conflict.
1079
1080 // Collect the preds of BB.
1082 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1083 // It is faster to get preds from a PHI than with pred_iterator.
1084 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1085 BBPreds.insert(BBPN->getIncomingBlock(i));
1086 } else {
1087 BBPreds.insert(pred_begin(BB), pred_end(BB));
1088 }
1089
1090 // Walk the preds of DestBB.
1091 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
1092 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
1093 if (BBPreds.count(Pred)) { // Common predecessor?
1094 for (const PHINode &PN : DestBB->phis()) {
1095 const Value *V1 = PN.getIncomingValueForBlock(Pred);
1096 const Value *V2 = PN.getIncomingValueForBlock(BB);
1097
1098 // If V2 is a phi node in BB, look up what the mapped value will be.
1099 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
1100 if (V2PN->getParent() == BB)
1101 V2 = V2PN->getIncomingValueForBlock(Pred);
1102
1103 // If there is a conflict, bail out.
1104 if (V1 != V2)
1105 return false;
1106 }
1107 }
1108 }
1109
1110 return true;
1111}
1112
1113/// Replace all old uses with new ones, and push the updated BBs into FreshBBs.
1114static void replaceAllUsesWith(Value *Old, Value *New,
1116 bool IsHuge) {
1117 auto *OldI = dyn_cast<Instruction>(Old);
1118 if (OldI) {
1119 for (Value::user_iterator UI = OldI->user_begin(), E = OldI->user_end();
1120 UI != E; ++UI) {
1121 Instruction *User = cast<Instruction>(*UI);
1122 if (IsHuge)
1123 FreshBBs.insert(User->getParent());
1124 }
1125 }
1126 Old->replaceAllUsesWith(New);
1127}
1128
1129/// Eliminate a basic block that has only phi's and an unconditional branch in
1130/// it.
1131void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
1132 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
1133 BasicBlock *DestBB = BI->getSuccessor(0);
1134
1135 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
1136 << *BB << *DestBB);
1137
1138 // If the destination block has a single pred, then this is a trivial edge,
1139 // just collapse it.
1140 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
1141 if (SinglePred != DestBB) {
1142 assert(SinglePred == BB &&
1143 "Single predecessor not the same as predecessor");
1144 // Merge DestBB into SinglePred/BB and delete it.
1146 // Note: BB(=SinglePred) will not be deleted on this path.
1147 // DestBB(=its single successor) is the one that was deleted.
1148 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n");
1149
1150 if (IsHugeFunc) {
1151 // Update FreshBBs to optimize the merged BB.
1152 FreshBBs.insert(SinglePred);
1153 FreshBBs.erase(DestBB);
1154 }
1155 return;
1156 }
1157 }
1158
1159 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
1160 // to handle the new incoming edges it is about to have.
1161 for (PHINode &PN : DestBB->phis()) {
1162 // Remove the incoming value for BB, and remember it.
1163 Value *InVal = PN.removeIncomingValue(BB, false);
1164
1165 // Two options: either the InVal is a phi node defined in BB or it is some
1166 // value that dominates BB.
1167 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
1168 if (InValPhi && InValPhi->getParent() == BB) {
1169 // Add all of the input values of the input PHI as inputs of this phi.
1170 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
1171 PN.addIncoming(InValPhi->getIncomingValue(i),
1172 InValPhi->getIncomingBlock(i));
1173 } else {
1174 // Otherwise, add one instance of the dominating value for each edge that
1175 // we will be adding.
1176 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
1177 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
1178 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
1179 } else {
1180 for (BasicBlock *Pred : predecessors(BB))
1181 PN.addIncoming(InVal, Pred);
1182 }
1183 }
1184 }
1185
1186 // The PHIs are now updated, change everything that refers to BB to use
1187 // DestBB and remove BB.
1188 BB->replaceAllUsesWith(DestBB);
1189 BB->eraseFromParent();
1190 ++NumBlocksElim;
1191
1192 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
1193}
1194
1195// Computes a map of base pointer relocation instructions to corresponding
1196// derived pointer relocation instructions given a vector of all relocate calls
1198 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
1200 &RelocateInstMap) {
1201 // Collect information in two maps: one primarily for locating the base object
1202 // while filling the second map; the second map is the final structure holding
1203 // a mapping between Base and corresponding Derived relocate calls
1205 for (auto *ThisRelocate : AllRelocateCalls) {
1206 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
1207 ThisRelocate->getDerivedPtrIndex());
1208 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
1209 }
1210 for (auto &Item : RelocateIdxMap) {
1211 std::pair<unsigned, unsigned> Key = Item.first;
1212 if (Key.first == Key.second)
1213 // Base relocation: nothing to insert
1214 continue;
1215
1216 GCRelocateInst *I = Item.second;
1217 auto BaseKey = std::make_pair(Key.first, Key.first);
1218
1219 // We're iterating over RelocateIdxMap so we cannot modify it.
1220 auto MaybeBase = RelocateIdxMap.find(BaseKey);
1221 if (MaybeBase == RelocateIdxMap.end())
1222 // TODO: We might want to insert a new base object relocate and gep off
1223 // that, if there are enough derived object relocates.
1224 continue;
1225
1226 RelocateInstMap[MaybeBase->second].push_back(I);
1227 }
1228}
1229
1230// Accepts a GEP and extracts the operands into a vector provided they're all
1231// small integer constants
1233 SmallVectorImpl<Value *> &OffsetV) {
1234 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
1235 // Only accept small constant integer operands
1236 auto *Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
1237 if (!Op || Op->getZExtValue() > 20)
1238 return false;
1239 }
1240
1241 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
1242 OffsetV.push_back(GEP->getOperand(i));
1243 return true;
1244}
1245
1246// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
1247// replace, computes a replacement, and affects it.
1248static bool
1250 const SmallVectorImpl<GCRelocateInst *> &Targets) {
1251 bool MadeChange = false;
1252 // We must ensure the relocation of derived pointer is defined after
1253 // relocation of base pointer. If we find a relocation corresponding to base
1254 // defined earlier than relocation of base then we move relocation of base
1255 // right before found relocation. We consider only relocation in the same
1256 // basic block as relocation of base. Relocations from other basic block will
1257 // be skipped by optimization and we do not care about them.
1258 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
1259 &*R != RelocatedBase; ++R)
1260 if (auto *RI = dyn_cast<GCRelocateInst>(R))
1261 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
1262 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
1263 RelocatedBase->moveBefore(RI);
1264 MadeChange = true;
1265 break;
1266 }
1267
1268 for (GCRelocateInst *ToReplace : Targets) {
1269 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&
1270 "Not relocating a derived object of the original base object");
1271 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
1272 // A duplicate relocate call. TODO: coalesce duplicates.
1273 continue;
1274 }
1275
1276 if (RelocatedBase->getParent() != ToReplace->getParent()) {
1277 // Base and derived relocates are in different basic blocks.
1278 // In this case transform is only valid when base dominates derived
1279 // relocate. However it would be too expensive to check dominance
1280 // for each such relocate, so we skip the whole transformation.
1281 continue;
1282 }
1283
1284 Value *Base = ToReplace->getBasePtr();
1285 auto *Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
1286 if (!Derived || Derived->getPointerOperand() != Base)
1287 continue;
1288
1290 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
1291 continue;
1292
1293 // Create a Builder and replace the target callsite with a gep
1294 assert(RelocatedBase->getNextNode() &&
1295 "Should always have one since it's not a terminator");
1296
1297 // Insert after RelocatedBase
1298 IRBuilder<> Builder(RelocatedBase->getNextNode());
1299 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1300
1301 // If gc_relocate does not match the actual type, cast it to the right type.
1302 // In theory, there must be a bitcast after gc_relocate if the type does not
1303 // match, and we should reuse it to get the derived pointer. But it could be
1304 // cases like this:
1305 // bb1:
1306 // ...
1307 // %g1 = call coldcc i8 addrspace(1)*
1308 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1309 //
1310 // bb2:
1311 // ...
1312 // %g2 = call coldcc i8 addrspace(1)*
1313 // @llvm.experimental.gc.relocate.p1i8(...) br label %merge
1314 //
1315 // merge:
1316 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
1317 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
1318 //
1319 // In this case, we can not find the bitcast any more. So we insert a new
1320 // bitcast no matter there is already one or not. In this way, we can handle
1321 // all cases, and the extra bitcast should be optimized away in later
1322 // passes.
1323 Value *ActualRelocatedBase = RelocatedBase;
1324 if (RelocatedBase->getType() != Base->getType()) {
1325 ActualRelocatedBase =
1326 Builder.CreateBitCast(RelocatedBase, Base->getType());
1327 }
1328 Value *Replacement =
1329 Builder.CreateGEP(Derived->getSourceElementType(), ActualRelocatedBase,
1330 ArrayRef(OffsetV));
1331 Replacement->takeName(ToReplace);
1332 // If the newly generated derived pointer's type does not match the original
1333 // derived pointer's type, cast the new derived pointer to match it. Same
1334 // reasoning as above.
1335 Value *ActualReplacement = Replacement;
1336 if (Replacement->getType() != ToReplace->getType()) {
1337 ActualReplacement =
1338 Builder.CreateBitCast(Replacement, ToReplace->getType());
1339 }
1340 ToReplace->replaceAllUsesWith(ActualReplacement);
1341 ToReplace->eraseFromParent();
1342
1343 MadeChange = true;
1344 }
1345 return MadeChange;
1346}
1347
1348// Turns this:
1349//
1350// %base = ...
1351// %ptr = gep %base + 15
1352// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1353// %base' = relocate(%tok, i32 4, i32 4)
1354// %ptr' = relocate(%tok, i32 4, i32 5)
1355// %val = load %ptr'
1356//
1357// into this:
1358//
1359// %base = ...
1360// %ptr = gep %base + 15
1361// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1362// %base' = gc.relocate(%tok, i32 4, i32 4)
1363// %ptr' = gep %base' + 15
1364// %val = load %ptr'
1365bool CodeGenPrepare::simplifyOffsetableRelocate(GCStatepointInst &I) {
1366 bool MadeChange = false;
1367 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1368 for (auto *U : I.users())
1369 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1370 // Collect all the relocate calls associated with a statepoint
1371 AllRelocateCalls.push_back(Relocate);
1372
1373 // We need at least one base pointer relocation + one derived pointer
1374 // relocation to mangle
1375 if (AllRelocateCalls.size() < 2)
1376 return false;
1377
1378 // RelocateInstMap is a mapping from the base relocate instruction to the
1379 // corresponding derived relocate instructions
1381 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1382 if (RelocateInstMap.empty())
1383 return false;
1384
1385 for (auto &Item : RelocateInstMap)
1386 // Item.first is the RelocatedBase to offset against
1387 // Item.second is the vector of Targets to replace
1388 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1389 return MadeChange;
1390}
1391
1392/// Sink the specified cast instruction into its user blocks.
1393static bool SinkCast(CastInst *CI) {
1394 BasicBlock *DefBB = CI->getParent();
1395
1396 /// InsertedCasts - Only insert a cast in each block once.
1398
1399 bool MadeChange = false;
1400 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1401 UI != E;) {
1402 Use &TheUse = UI.getUse();
1403 Instruction *User = cast<Instruction>(*UI);
1404
1405 // Figure out which BB this cast is used in. For PHI's this is the
1406 // appropriate predecessor block.
1407 BasicBlock *UserBB = User->getParent();
1408 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1409 UserBB = PN->getIncomingBlock(TheUse);
1410 }
1411
1412 // Preincrement use iterator so we don't invalidate it.
1413 ++UI;
1414
1415 // The first insertion point of a block containing an EH pad is after the
1416 // pad. If the pad is the user, we cannot sink the cast past the pad.
1417 if (User->isEHPad())
1418 continue;
1419
1420 // If the block selected to receive the cast is an EH pad that does not
1421 // allow non-PHI instructions before the terminator, we can't sink the
1422 // cast.
1423 if (UserBB->getTerminator()->isEHPad())
1424 continue;
1425
1426 // If this user is in the same block as the cast, don't change the cast.
1427 if (UserBB == DefBB)
1428 continue;
1429
1430 // If we have already inserted a cast into this block, use it.
1431 CastInst *&InsertedCast = InsertedCasts[UserBB];
1432
1433 if (!InsertedCast) {
1434 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1435 assert(InsertPt != UserBB->end());
1436 InsertedCast = cast<CastInst>(CI->clone());
1437 InsertedCast->insertBefore(*UserBB, InsertPt);
1438 }
1439
1440 // Replace a use of the cast with a use of the new cast.
1441 TheUse = InsertedCast;
1442 MadeChange = true;
1443 ++NumCastUses;
1444 }
1445
1446 // If we removed all uses, nuke the cast.
1447 if (CI->use_empty()) {
1448 salvageDebugInfo(*CI);
1449 CI->eraseFromParent();
1450 MadeChange = true;
1451 }
1452
1453 return MadeChange;
1454}
1455
1456/// If the specified cast instruction is a noop copy (e.g. it's casting from
1457/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1458/// reduce the number of virtual registers that must be created and coalesced.
1459///
1460/// Return true if any changes are made.
1462 const DataLayout &DL) {
1463 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1464 // than sinking only nop casts, but is helpful on some platforms.
1465 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1466 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1467 ASC->getDestAddressSpace()))
1468 return false;
1469 }
1470
1471 // If this is a noop copy,
1472 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1473 EVT DstVT = TLI.getValueType(DL, CI->getType());
1474
1475 // This is an fp<->int conversion?
1476 if (SrcVT.isInteger() != DstVT.isInteger())
1477 return false;
1478
1479 // If this is an extension, it will be a zero or sign extension, which
1480 // isn't a noop.
1481 if (SrcVT.bitsLT(DstVT))
1482 return false;
1483
1484 // If these values will be promoted, find out what they will be promoted
1485 // to. This helps us consider truncates on PPC as noop copies when they
1486 // are.
1487 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1489 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1490 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1492 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1493
1494 // If, after promotion, these are the same types, this is a noop copy.
1495 if (SrcVT != DstVT)
1496 return false;
1497
1498 return SinkCast(CI);
1499}
1500
1501// Match a simple increment by constant operation. Note that if a sub is
1502// matched, the step is negated (as if the step had been canonicalized to
1503// an add, even though we leave the instruction alone.)
1504static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS,
1505 Constant *&Step) {
1506 if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
1507 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
1508 m_Instruction(LHS), m_Constant(Step)))))
1509 return true;
1510 if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
1511 match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
1512 m_Instruction(LHS), m_Constant(Step))))) {
1513 Step = ConstantExpr::getNeg(Step);
1514 return true;
1515 }
1516 return false;
1517}
1518
1519/// If given \p PN is an inductive variable with value IVInc coming from the
1520/// backedge, and on each iteration it gets increased by Step, return pair
1521/// <IVInc, Step>. Otherwise, return std::nullopt.
1522static std::optional<std::pair<Instruction *, Constant *>>
1523getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
1524 const Loop *L = LI->getLoopFor(PN->getParent());
1525 if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
1526 return std::nullopt;
1527 auto *IVInc =
1528 dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
1529 if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
1530 return std::nullopt;
1531 Instruction *LHS = nullptr;
1532 Constant *Step = nullptr;
1533 if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
1534 return std::make_pair(IVInc, Step);
1535 return std::nullopt;
1536}
1537
1538static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
1539 auto *I = dyn_cast<Instruction>(V);
1540 if (!I)
1541 return false;
1542 Instruction *LHS = nullptr;
1543 Constant *Step = nullptr;
1544 if (!matchIncrement(I, LHS, Step))
1545 return false;
1546 if (auto *PN = dyn_cast<PHINode>(LHS))
1547 if (auto IVInc = getIVIncrement(PN, LI))
1548 return IVInc->first == I;
1549 return false;
1550}
1551
1552bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1553 Value *Arg0, Value *Arg1,
1554 CmpInst *Cmp,
1555 Intrinsic::ID IID) {
1556 auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
1557 if (!isIVIncrement(BO, LI))
1558 return false;
1559 const Loop *L = LI->getLoopFor(BO->getParent());
1560 assert(L && "L should not be null after isIVIncrement()");
1561 // Do not risk on moving increment into a child loop.
1562 if (LI->getLoopFor(Cmp->getParent()) != L)
1563 return false;
1564
1565 // Finally, we need to ensure that the insert point will dominate all
1566 // existing uses of the increment.
1567
1568 auto &DT = getDT(*BO->getParent()->getParent());
1569 if (DT.dominates(Cmp->getParent(), BO->getParent()))
1570 // If we're moving up the dom tree, all uses are trivially dominated.
1571 // (This is the common case for code produced by LSR.)
1572 return true;
1573
1574 // Otherwise, special case the single use in the phi recurrence.
1575 return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
1576 };
1577 if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
1578 // We used to use a dominator tree here to allow multi-block optimization.
1579 // But that was problematic because:
1580 // 1. It could cause a perf regression by hoisting the math op into the
1581 // critical path.
1582 // 2. It could cause a perf regression by creating a value that was live
1583 // across multiple blocks and increasing register pressure.
1584 // 3. Use of a dominator tree could cause large compile-time regression.
1585 // This is because we recompute the DT on every change in the main CGP
1586 // run-loop. The recomputing is probably unnecessary in many cases, so if
1587 // that was fixed, using a DT here would be ok.
1588 //
1589 // There is one important particular case we still want to handle: if BO is
1590 // the IV increment. Important properties that make it profitable:
1591 // - We can speculate IV increment anywhere in the loop (as long as the
1592 // indvar Phi is its only user);
1593 // - Upon computing Cmp, we effectively compute something equivalent to the
1594 // IV increment (despite it loops differently in the IR). So moving it up
1595 // to the cmp point does not really increase register pressure.
1596 return false;
1597 }
1598
1599 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1600 if (BO->getOpcode() == Instruction::Add &&
1601 IID == Intrinsic::usub_with_overflow) {
1602 assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
1603 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1604 }
1605
1606 // Insert at the first instruction of the pair.
1607 Instruction *InsertPt = nullptr;
1608 for (Instruction &Iter : *Cmp->getParent()) {
1609 // If BO is an XOR, it is not guaranteed that it comes after both inputs to
1610 // the overflow intrinsic are defined.
1611 if ((BO->getOpcode() != Instruction::Xor && &Iter == BO) || &Iter == Cmp) {
1612 InsertPt = &Iter;
1613 break;
1614 }
1615 }
1616 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
1617
1618 IRBuilder<> Builder(InsertPt);
1619 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1620 if (BO->getOpcode() != Instruction::Xor) {
1621 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1622 replaceAllUsesWith(BO, Math, FreshBBs, IsHugeFunc);
1623 } else
1624 assert(BO->hasOneUse() &&
1625 "Patterns with XOr should use the BO only in the compare");
1626 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1627 replaceAllUsesWith(Cmp, OV, FreshBBs, IsHugeFunc);
1628 Cmp->eraseFromParent();
1629 BO->eraseFromParent();
1630 return true;
1631}
1632
1633/// Match special-case patterns that check for unsigned add overflow.
1635 BinaryOperator *&Add) {
1636 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1637 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1638 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1639
1640 // We are not expecting non-canonical/degenerate code. Just bail out.
1641 if (isa<Constant>(A))
1642 return false;
1643
1644 ICmpInst::Predicate Pred = Cmp->getPredicate();
1645 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1646 B = ConstantInt::get(B->getType(), 1);
1647 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1648 B = Constant::getAllOnesValue(B->getType());
1649 else
1650 return false;
1651
1652 // Check the users of the variable operand of the compare looking for an add
1653 // with the adjusted constant.
1654 for (User *U : A->users()) {
1655 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1656 Add = cast<BinaryOperator>(U);
1657 return true;
1658 }
1659 }
1660 return false;
1661}
1662
1663/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1664/// intrinsic. Return true if any changes were made.
1665bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1666 ModifyDT &ModifiedDT) {
1667 bool EdgeCase = false;
1668 Value *A, *B;
1670 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) {
1672 return false;
1673 // Set A and B in case we match matchUAddWithOverflowConstantEdgeCases.
1674 A = Add->getOperand(0);
1675 B = Add->getOperand(1);
1676 EdgeCase = true;
1677 }
1678
1680 TLI->getValueType(*DL, Add->getType()),
1681 Add->hasNUsesOrMore(EdgeCase ? 1 : 2)))
1682 return false;
1683
1684 // We don't want to move around uses of condition values this late, so we
1685 // check if it is legal to create the call to the intrinsic in the basic
1686 // block containing the icmp.
1687 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1688 return false;
1689
1690 if (!replaceMathCmpWithIntrinsic(Add, A, B, Cmp,
1691 Intrinsic::uadd_with_overflow))
1692 return false;
1693
1694 // Reset callers - do not crash by iterating over a dead instruction.
1695 ModifiedDT = ModifyDT::ModifyInstDT;
1696 return true;
1697}
1698
1699bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1700 ModifyDT &ModifiedDT) {
1701 // We are not expecting non-canonical/degenerate code. Just bail out.
1702 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1703 if (isa<Constant>(A) && isa<Constant>(B))
1704 return false;
1705
1706 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1707 ICmpInst::Predicate Pred = Cmp->getPredicate();
1708 if (Pred == ICmpInst::ICMP_UGT) {
1709 std::swap(A, B);
1710 Pred = ICmpInst::ICMP_ULT;
1711 }
1712 // Convert special-case: (A == 0) is the same as (A u< 1).
1713 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1714 B = ConstantInt::get(B->getType(), 1);
1715 Pred = ICmpInst::ICMP_ULT;
1716 }
1717 // Convert special-case: (A != 0) is the same as (0 u< A).
1718 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1719 std::swap(A, B);
1720 Pred = ICmpInst::ICMP_ULT;
1721 }
1722 if (Pred != ICmpInst::ICMP_ULT)
1723 return false;
1724
1725 // Walk the users of a variable operand of a compare looking for a subtract or
1726 // add with that same operand. Also match the 2nd operand of the compare to
1727 // the add/sub, but that may be a negated constant operand of an add.
1728 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1729 BinaryOperator *Sub = nullptr;
1730 for (User *U : CmpVariableOperand->users()) {
1731 // A - B, A u< B --> usubo(A, B)
1732 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1733 Sub = cast<BinaryOperator>(U);
1734 break;
1735 }
1736
1737 // A + (-C), A u< C (canonicalized form of (sub A, C))
1738 const APInt *CmpC, *AddC;
1739 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1740 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1741 Sub = cast<BinaryOperator>(U);
1742 break;
1743 }
1744 }
1745 if (!Sub)
1746 return false;
1747
1749 TLI->getValueType(*DL, Sub->getType()),
1750 Sub->hasNUsesOrMore(1)))
1751 return false;
1752
1753 if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
1754 Cmp, Intrinsic::usub_with_overflow))
1755 return false;
1756
1757 // Reset callers - do not crash by iterating over a dead instruction.
1758 ModifiedDT = ModifyDT::ModifyInstDT;
1759 return true;
1760}
1761
1762/// Sink the given CmpInst into user blocks to reduce the number of virtual
1763/// registers that must be created and coalesced. This is a clear win except on
1764/// targets with multiple condition code registers (PowerPC), where it might
1765/// lose; some adjustment may be wanted there.
1766///
1767/// Return true if any changes are made.
1768static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1770 return false;
1771
1772 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1773 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1774 return false;
1775
1776 // Only insert a cmp in each block once.
1778
1779 bool MadeChange = false;
1780 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1781 UI != E;) {
1782 Use &TheUse = UI.getUse();
1783 Instruction *User = cast<Instruction>(*UI);
1784
1785 // Preincrement use iterator so we don't invalidate it.
1786 ++UI;
1787
1788 // Don't bother for PHI nodes.
1789 if (isa<PHINode>(User))
1790 continue;
1791
1792 // Figure out which BB this cmp is used in.
1793 BasicBlock *UserBB = User->getParent();
1794 BasicBlock *DefBB = Cmp->getParent();
1795
1796 // If this user is in the same block as the cmp, don't change the cmp.
1797 if (UserBB == DefBB)
1798 continue;
1799
1800 // If we have already inserted a cmp into this block, use it.
1801 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1802
1803 if (!InsertedCmp) {
1804 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1805 assert(InsertPt != UserBB->end());
1806 InsertedCmp = CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1807 Cmp->getOperand(0), Cmp->getOperand(1), "");
1808 InsertedCmp->insertBefore(*UserBB, InsertPt);
1809 // Propagate the debug info.
1810 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1811 }
1812
1813 // Replace a use of the cmp with a use of the new cmp.
1814 TheUse = InsertedCmp;
1815 MadeChange = true;
1816 ++NumCmpUses;
1817 }
1818
1819 // If we removed all uses, nuke the cmp.
1820 if (Cmp->use_empty()) {
1821 Cmp->eraseFromParent();
1822 MadeChange = true;
1823 }
1824
1825 return MadeChange;
1826}
1827
1828/// For pattern like:
1829///
1830/// DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB)
1831/// ...
1832/// DomBB:
1833/// ...
1834/// br DomCond, TrueBB, CmpBB
1835/// CmpBB: (with DomBB being the single predecessor)
1836/// ...
1837/// Cmp = icmp eq CmpOp0, CmpOp1
1838/// ...
1839///
1840/// It would use two comparison on targets that lowering of icmp sgt/slt is
1841/// different from lowering of icmp eq (PowerPC). This function try to convert
1842/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'.
1843/// After that, DomCond and Cmp can use the same comparison so reduce one
1844/// comparison.
1845///
1846/// Return true if any changes are made.
1848 const TargetLowering &TLI) {
1850 return false;
1851
1852 ICmpInst::Predicate Pred = Cmp->getPredicate();
1853 if (Pred != ICmpInst::ICMP_EQ)
1854 return false;
1855
1856 // If icmp eq has users other than BranchInst and SelectInst, converting it to
1857 // icmp slt/sgt would introduce more redundant LLVM IR.
1858 for (User *U : Cmp->users()) {
1859 if (isa<BranchInst>(U))
1860 continue;
1861 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp)
1862 continue;
1863 return false;
1864 }
1865
1866 // This is a cheap/incomplete check for dominance - just match a single
1867 // predecessor with a conditional branch.
1868 BasicBlock *CmpBB = Cmp->getParent();
1869 BasicBlock *DomBB = CmpBB->getSinglePredecessor();
1870 if (!DomBB)
1871 return false;
1872
1873 // We want to ensure that the only way control gets to the comparison of
1874 // interest is that a less/greater than comparison on the same operands is
1875 // false.
1876 Value *DomCond;
1877 BasicBlock *TrueBB, *FalseBB;
1878 if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB)))
1879 return false;
1880 if (CmpBB != FalseBB)
1881 return false;
1882
1883 Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1);
1884 ICmpInst::Predicate DomPred;
1885 if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1))))
1886 return false;
1887 if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT)
1888 return false;
1889
1890 // Convert the equality comparison to the opposite of the dominating
1891 // comparison and swap the direction for all branch/select users.
1892 // We have conceptually converted:
1893 // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>;
1894 // to
1895 // Res = (a < b) ? <LT_RES> : (a > b) ? <GT_RES> : <EQ_RES>;
1896 // And similarly for branches.
1897 for (User *U : Cmp->users()) {
1898 if (auto *BI = dyn_cast<BranchInst>(U)) {
1899 assert(BI->isConditional() && "Must be conditional");
1900 BI->swapSuccessors();
1901 continue;
1902 }
1903 if (auto *SI = dyn_cast<SelectInst>(U)) {
1904 // Swap operands
1905 SI->swapValues();
1906 SI->swapProfMetadata();
1907 continue;
1908 }
1909 llvm_unreachable("Must be a branch or a select");
1910 }
1911 Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred));
1912 return true;
1913}
1914
1915/// Many architectures use the same instruction for both subtract and cmp. Try
1916/// to swap cmp operands to match subtract operations to allow for CSE.
1918 Value *Op0 = Cmp->getOperand(0);
1919 Value *Op1 = Cmp->getOperand(1);
1920 if (!Op0->getType()->isIntegerTy() || isa<Constant>(Op0) ||
1921 isa<Constant>(Op1) || Op0 == Op1)
1922 return false;
1923
1924 // If a subtract already has the same operands as a compare, swapping would be
1925 // bad. If a subtract has the same operands as a compare but in reverse order,
1926 // then swapping is good.
1927 int GoodToSwap = 0;
1928 unsigned NumInspected = 0;
1929 for (const User *U : Op0->users()) {
1930 // Avoid walking many users.
1931 if (++NumInspected > 128)
1932 return false;
1933 if (match(U, m_Sub(m_Specific(Op1), m_Specific(Op0))))
1934 GoodToSwap++;
1935 else if (match(U, m_Sub(m_Specific(Op0), m_Specific(Op1))))
1936 GoodToSwap--;
1937 }
1938
1939 if (GoodToSwap > 0) {
1940 Cmp->swapOperands();
1941 return true;
1942 }
1943 return false;
1944}
1945
1946static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
1947 const DataLayout &DL) {
1948 FCmpInst *FCmp = dyn_cast<FCmpInst>(Cmp);
1949 if (!FCmp)
1950 return false;
1951
1952 // Don't fold if the target offers free fabs and the predicate is legal.
1953 EVT VT = TLI.getValueType(DL, Cmp->getOperand(0)->getType());
1954 if (TLI.isFAbsFree(VT) &&
1956 VT.getSimpleVT()))
1957 return false;
1958
1959 // Reverse the canonicalization if it is a FP class test
1960 auto ShouldReverseTransform = [](FPClassTest ClassTest) {
1961 return ClassTest == fcInf || ClassTest == (fcInf | fcNan);
1962 };
1963 auto [ClassVal, ClassTest] =
1964 fcmpToClassTest(FCmp->getPredicate(), *FCmp->getParent()->getParent(),
1965 FCmp->getOperand(0), FCmp->getOperand(1));
1966 if (!ClassVal)
1967 return false;
1968
1969 if (!ShouldReverseTransform(ClassTest) && !ShouldReverseTransform(~ClassTest))
1970 return false;
1971
1972 IRBuilder<> Builder(Cmp);
1973 Value *IsFPClass = Builder.createIsFPClass(ClassVal, ClassTest);
1974 Cmp->replaceAllUsesWith(IsFPClass);
1976 return true;
1977}
1978
1980 const LoopInfo *LI,
1981 Value *&RemAmtOut,
1982 PHINode *&LoopIncrPNOut) {
1983 Value *Incr, *RemAmt;
1984 // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
1985 if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
1986 return false;
1987
1988 // Find out loop increment PHI.
1989 auto *PN = dyn_cast<PHINode>(Incr);
1990 if (!PN)
1991 return false;
1992
1993 // This isn't strictly necessary, what we really need is one increment and any
1994 // amount of initial values all being the same.
1995 if (PN->getNumIncomingValues() != 2)
1996 return false;
1997
1998 // Only trivially analyzable loops.
1999 Loop *L = LI->getLoopFor(PN->getParent());
2000 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
2001 return false;
2002
2003 // Req that the remainder is in the loop
2004 if (!L->contains(Rem))
2005 return false;
2006
2007 // Only works if the remainder amount is a loop invaraint
2008 if (!L->isLoopInvariant(RemAmt))
2009 return false;
2010
2011 // Is the PHI a loop increment?
2012 auto LoopIncrInfo = getIVIncrement(PN, LI);
2013 if (!LoopIncrInfo)
2014 return false;
2015
2016 // We need remainder_amount % increment_amount to be zero. Increment of one
2017 // satisfies that without any special logic and is overwhelmingly the common
2018 // case.
2019 if (!match(LoopIncrInfo->second, m_One()))
2020 return false;
2021
2022 // Need the increment to not overflow.
2023 if (!match(LoopIncrInfo->first, m_c_NUWAdd(m_Specific(PN), m_Value())))
2024 return false;
2025
2026 // Set output variables.
2027 RemAmtOut = RemAmt;
2028 LoopIncrPNOut = PN;
2029
2030 return true;
2031}
2032
2033// Try to transform:
2034//
2035// for(i = Start; i < End; ++i)
2036// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
2037//
2038// ->
2039//
2040// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
2041// for(i = Start; i < End; ++i, ++rem)
2042// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
2043//
2044// Currently only implemented for `IncrLoopInvariant` being zero.
2046 const LoopInfo *LI,
2048 bool IsHuge) {
2049 Value *RemAmt;
2050 PHINode *LoopIncrPN;
2051 if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, LoopIncrPN))
2052 return false;
2053
2054 // Only non-constant remainder as the extra IV is probably not profitable
2055 // in that case.
2056 //
2057 // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
2058 // we can rule out register pressure and ensure this `urem` is executed each
2059 // iteration, its probably profitable to handle the const case as well.
2060 //
2061 // Potential TODO(2): Should we have a check for how "nested" this remainder
2062 // operation is? The new code runs every iteration so if the remainder is
2063 // guarded behind unlikely conditions this might not be worth it.
2064 if (match(RemAmt, m_ImmConstant()))
2065 return false;
2066
2067 Loop *L = LI->getLoopFor(LoopIncrPN->getParent());
2068 Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
2069 // If we can't fully optimize out the `rem`, skip this transform.
2070 Start = simplifyURemInst(Start, RemAmt, *DL);
2071 if (!Start)
2072 return false;
2073
2074 // Create new remainder with induction variable.
2075 Type *Ty = Rem->getType();
2076 IRBuilder<> Builder(Rem->getContext());
2077
2078 Builder.SetInsertPoint(LoopIncrPN);
2079 PHINode *NewRem = Builder.CreatePHI(Ty, 2);
2080
2081 Builder.SetInsertPoint(cast<Instruction>(
2082 LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
2083 // `(add (urem x, y), 1)` is always nuw.
2084 Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
2085 Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
2086 Value *RemSel =
2087 Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
2088
2089 NewRem->addIncoming(Start, L->getLoopPreheader());
2090 NewRem->addIncoming(RemSel, L->getLoopLatch());
2091
2092 // Insert all touched BBs.
2093 FreshBBs.insert(LoopIncrPN->getParent());
2094 FreshBBs.insert(L->getLoopLatch());
2095 FreshBBs.insert(Rem->getParent());
2096
2097 replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
2098 Rem->eraseFromParent();
2099 return true;
2100}
2101
2102bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
2103 if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
2104 return true;
2105 return false;
2106}
2107
2108bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
2109 if (sinkCmpExpression(Cmp, *TLI))
2110 return true;
2111
2112 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
2113 return true;
2114
2115 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
2116 return true;
2117
2118 if (foldICmpWithDominatingICmp(Cmp, *TLI))
2119 return true;
2120
2122 return true;
2123
2124 if (foldFCmpToFPClassTest(Cmp, *TLI, *DL))
2125 return true;
2126
2127 return false;
2128}
2129
2130/// Duplicate and sink the given 'and' instruction into user blocks where it is
2131/// used in a compare to allow isel to generate better code for targets where
2132/// this operation can be combined.
2133///
2134/// Return true if any changes are made.
2136 SetOfInstrs &InsertedInsts) {
2137 // Double-check that we're not trying to optimize an instruction that was
2138 // already optimized by some other part of this pass.
2139 assert(!InsertedInsts.count(AndI) &&
2140 "Attempting to optimize already optimized and instruction");
2141 (void)InsertedInsts;
2142
2143 // Nothing to do for single use in same basic block.
2144 if (AndI->hasOneUse() &&
2145 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
2146 return false;
2147
2148 // Try to avoid cases where sinking/duplicating is likely to increase register
2149 // pressure.
2150 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
2151 !isa<ConstantInt>(AndI->getOperand(1)) &&
2152 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
2153 return false;
2154
2155 for (auto *U : AndI->users()) {
2156 Instruction *User = cast<Instruction>(U);
2157
2158 // Only sink 'and' feeding icmp with 0.
2159 if (!isa<ICmpInst>(User))
2160 return false;
2161
2162 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
2163 if (!CmpC || !CmpC->isZero())
2164 return false;
2165 }
2166
2167 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
2168 return false;
2169
2170 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
2171 LLVM_DEBUG(AndI->getParent()->dump());
2172
2173 // Push the 'and' into the same block as the icmp 0. There should only be
2174 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
2175 // others, so we don't need to keep track of which BBs we insert into.
2176 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
2177 UI != E;) {
2178 Use &TheUse = UI.getUse();
2179 Instruction *User = cast<Instruction>(*UI);
2180
2181 // Preincrement use iterator so we don't invalidate it.
2182 ++UI;
2183
2184 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
2185
2186 // Keep the 'and' in the same place if the use is already in the same block.
2187 Instruction *InsertPt =
2188 User->getParent() == AndI->getParent() ? AndI : User;
2189 Instruction *InsertedAnd = BinaryOperator::Create(
2190 Instruction::And, AndI->getOperand(0), AndI->getOperand(1), "",
2191 InsertPt->getIterator());
2192 // Propagate the debug info.
2193 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
2194
2195 // Replace a use of the 'and' with a use of the new 'and'.
2196 TheUse = InsertedAnd;
2197 ++NumAndUses;
2198 LLVM_DEBUG(User->getParent()->dump());
2199 }
2200
2201 // We removed all uses, nuke the and.
2202 AndI->eraseFromParent();
2203 return true;
2204}
2205
2206/// Check if the candidates could be combined with a shift instruction, which
2207/// includes:
2208/// 1. Truncate instruction
2209/// 2. And instruction and the imm is a mask of the low bits:
2210/// imm & (imm+1) == 0
2212 if (!isa<TruncInst>(User)) {
2213 if (User->getOpcode() != Instruction::And ||
2214 !isa<ConstantInt>(User->getOperand(1)))
2215 return false;
2216
2217 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
2218
2219 if ((Cimm & (Cimm + 1)).getBoolValue())
2220 return false;
2221 }
2222 return true;
2223}
2224
2225/// Sink both shift and truncate instruction to the use of truncate's BB.
2226static bool
2229 const TargetLowering &TLI, const DataLayout &DL) {
2230 BasicBlock *UserBB = User->getParent();
2232 auto *TruncI = cast<TruncInst>(User);
2233 bool MadeChange = false;
2234
2235 for (Value::user_iterator TruncUI = TruncI->user_begin(),
2236 TruncE = TruncI->user_end();
2237 TruncUI != TruncE;) {
2238
2239 Use &TruncTheUse = TruncUI.getUse();
2240 Instruction *TruncUser = cast<Instruction>(*TruncUI);
2241 // Preincrement use iterator so we don't invalidate it.
2242
2243 ++TruncUI;
2244
2245 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
2246 if (!ISDOpcode)
2247 continue;
2248
2249 // If the use is actually a legal node, there will not be an
2250 // implicit truncate.
2251 // FIXME: always querying the result type is just an
2252 // approximation; some nodes' legality is determined by the
2253 // operand or other means. There's no good way to find out though.
2255 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
2256 continue;
2257
2258 // Don't bother for PHI nodes.
2259 if (isa<PHINode>(TruncUser))
2260 continue;
2261
2262 BasicBlock *TruncUserBB = TruncUser->getParent();
2263
2264 if (UserBB == TruncUserBB)
2265 continue;
2266
2267 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
2268 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
2269
2270 if (!InsertedShift && !InsertedTrunc) {
2271 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
2272 assert(InsertPt != TruncUserBB->end());
2273 // Sink the shift
2274 if (ShiftI->getOpcode() == Instruction::AShr)
2275 InsertedShift =
2276 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2277 else
2278 InsertedShift =
2279 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2280 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2281 InsertedShift->insertBefore(*TruncUserBB, InsertPt);
2282
2283 // Sink the trunc
2284 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
2285 TruncInsertPt++;
2286 // It will go ahead of any debug-info.
2287 TruncInsertPt.setHeadBit(true);
2288 assert(TruncInsertPt != TruncUserBB->end());
2289
2290 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
2291 TruncI->getType(), "");
2292 InsertedTrunc->insertBefore(*TruncUserBB, TruncInsertPt);
2293 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
2294
2295 MadeChange = true;
2296
2297 TruncTheUse = InsertedTrunc;
2298 }
2299 }
2300 return MadeChange;
2301}
2302
2303/// Sink the shift *right* instruction into user blocks if the uses could
2304/// potentially be combined with this shift instruction and generate BitExtract
2305/// instruction. It will only be applied if the architecture supports BitExtract
2306/// instruction. Here is an example:
2307/// BB1:
2308/// %x.extract.shift = lshr i64 %arg1, 32
2309/// BB2:
2310/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
2311/// ==>
2312///
2313/// BB2:
2314/// %x.extract.shift.1 = lshr i64 %arg1, 32
2315/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
2316///
2317/// CodeGen will recognize the pattern in BB2 and generate BitExtract
2318/// instruction.
2319/// Return true if any changes are made.
2321 const TargetLowering &TLI,
2322 const DataLayout &DL) {
2323 BasicBlock *DefBB = ShiftI->getParent();
2324
2325 /// Only insert instructions in each block once.
2327
2328 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
2329
2330 bool MadeChange = false;
2331 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
2332 UI != E;) {
2333 Use &TheUse = UI.getUse();
2334 Instruction *User = cast<Instruction>(*UI);
2335 // Preincrement use iterator so we don't invalidate it.
2336 ++UI;
2337
2338 // Don't bother for PHI nodes.
2339 if (isa<PHINode>(User))
2340 continue;
2341
2343 continue;
2344
2345 BasicBlock *UserBB = User->getParent();
2346
2347 if (UserBB == DefBB) {
2348 // If the shift and truncate instruction are in the same BB. The use of
2349 // the truncate(TruncUse) may still introduce another truncate if not
2350 // legal. In this case, we would like to sink both shift and truncate
2351 // instruction to the BB of TruncUse.
2352 // for example:
2353 // BB1:
2354 // i64 shift.result = lshr i64 opnd, imm
2355 // trunc.result = trunc shift.result to i16
2356 //
2357 // BB2:
2358 // ----> We will have an implicit truncate here if the architecture does
2359 // not have i16 compare.
2360 // cmp i16 trunc.result, opnd2
2361 //
2362 if (isa<TruncInst>(User) &&
2363 shiftIsLegal
2364 // If the type of the truncate is legal, no truncate will be
2365 // introduced in other basic blocks.
2366 && (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
2367 MadeChange =
2368 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
2369
2370 continue;
2371 }
2372 // If we have already inserted a shift into this block, use it.
2373 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
2374
2375 if (!InsertedShift) {
2376 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
2377 assert(InsertPt != UserBB->end());
2378
2379 if (ShiftI->getOpcode() == Instruction::AShr)
2380 InsertedShift =
2381 BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, "");
2382 else
2383 InsertedShift =
2384 BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, "");
2385 InsertedShift->insertBefore(*UserBB, InsertPt);
2386 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
2387
2388 MadeChange = true;
2389 }
2390
2391 // Replace a use of the shift with a use of the new shift.
2392 TheUse = InsertedShift;
2393 }
2394
2395 // If we removed all uses, or there are none, nuke the shift.
2396 if (ShiftI->use_empty()) {
2397 salvageDebugInfo(*ShiftI);
2398 ShiftI->eraseFromParent();
2399 MadeChange = true;
2400 }
2401
2402 return MadeChange;
2403}
2404
2405/// If counting leading or trailing zeros is an expensive operation and a zero
2406/// input is defined, add a check for zero to avoid calling the intrinsic.
2407///
2408/// We want to transform:
2409/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
2410///
2411/// into:
2412/// entry:
2413/// %cmpz = icmp eq i64 %A, 0
2414/// br i1 %cmpz, label %cond.end, label %cond.false
2415/// cond.false:
2416/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
2417/// br label %cond.end
2418/// cond.end:
2419/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
2420///
2421/// If the transform is performed, return true and set ModifiedDT to true.
2422static bool despeculateCountZeros(IntrinsicInst *CountZeros,
2423 LoopInfo &LI,
2424 const TargetLowering *TLI,
2425 const DataLayout *DL, ModifyDT &ModifiedDT,
2427 bool IsHugeFunc) {
2428 // If a zero input is undefined, it doesn't make sense to despeculate that.
2429 if (match(CountZeros->getOperand(1), m_One()))
2430 return false;
2431
2432 // If it's cheap to speculate, there's nothing to do.
2433 Type *Ty = CountZeros->getType();
2434 auto IntrinsicID = CountZeros->getIntrinsicID();
2435 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz(Ty)) ||
2436 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz(Ty)))
2437 return false;
2438
2439 // Only handle legal scalar cases. Anything else requires too much work.
2440 unsigned SizeInBits = Ty->getScalarSizeInBits();
2441 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
2442 return false;
2443
2444 // Bail if the value is never zero.
2445 Use &Op = CountZeros->getOperandUse(0);
2446 if (isKnownNonZero(Op, *DL))
2447 return false;
2448
2449 // The intrinsic will be sunk behind a compare against zero and branch.
2450 BasicBlock *StartBlock = CountZeros->getParent();
2451 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
2452 if (IsHugeFunc)
2453 FreshBBs.insert(CallBlock);
2454
2455 // Create another block after the count zero intrinsic. A PHI will be added
2456 // in this block to select the result of the intrinsic or the bit-width
2457 // constant if the input to the intrinsic is zero.
2458 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(CountZeros));
2459 // Any debug-info after CountZeros should not be included.
2460 SplitPt.setHeadBit(true);
2461 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
2462 if (IsHugeFunc)
2463 FreshBBs.insert(EndBlock);
2464
2465 // Update the LoopInfo. The new blocks are in the same loop as the start
2466 // block.
2467 if (Loop *L = LI.getLoopFor(StartBlock)) {
2468 L->addBasicBlockToLoop(CallBlock, LI);
2469 L->addBasicBlockToLoop(EndBlock, LI);
2470 }
2471
2472 // Set up a builder to create a compare, conditional branch, and PHI.
2473 IRBuilder<> Builder(CountZeros->getContext());
2474 Builder.SetInsertPoint(StartBlock->getTerminator());
2475 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
2476
2477 // Replace the unconditional branch that was created by the first split with
2478 // a compare against zero and a conditional branch.
2479 Value *Zero = Constant::getNullValue(Ty);
2480 // Avoid introducing branch on poison. This also replaces the ctz operand.
2482 Op = Builder.CreateFreeze(Op, Op->getName() + ".fr");
2483 Value *Cmp = Builder.CreateICmpEQ(Op, Zero, "cmpz");
2484 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
2485 StartBlock->getTerminator()->eraseFromParent();
2486
2487 // Create a PHI in the end block to select either the output of the intrinsic
2488 // or the bit width of the operand.
2489 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
2490 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
2491 replaceAllUsesWith(CountZeros, PN, FreshBBs, IsHugeFunc);
2492 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
2493 PN->addIncoming(BitWidth, StartBlock);
2494 PN->addIncoming(CountZeros, CallBlock);
2495
2496 // We are explicitly handling the zero case, so we can set the intrinsic's
2497 // undefined zero argument to 'true'. This will also prevent reprocessing the
2498 // intrinsic; we only despeculate when a zero input is defined.
2499 CountZeros->setArgOperand(1, Builder.getTrue());
2500 ModifiedDT = ModifyDT::ModifyBBDT;
2501 return true;
2502}
2503
2504bool CodeGenPrepare::optimizeCallInst(CallInst *CI, ModifyDT &ModifiedDT) {
2505 BasicBlock *BB = CI->getParent();
2506
2507 // Lower inline assembly if we can.
2508 // If we found an inline asm expession, and if the target knows how to
2509 // lower it to normal LLVM code, do so now.
2510 if (CI->isInlineAsm()) {
2511 if (TLI->ExpandInlineAsm(CI)) {
2512 // Avoid invalidating the iterator.
2513 CurInstIterator = BB->begin();
2514 // Avoid processing instructions out of order, which could cause
2515 // reuse before a value is defined.
2516 SunkAddrs.clear();
2517 return true;
2518 }
2519 // Sink address computing for memory operands into the block.
2520 if (optimizeInlineAsmInst(CI))
2521 return true;
2522 }
2523
2524 // Align the pointer arguments to this call if the target thinks it's a good
2525 // idea
2526 unsigned MinSize;
2527 Align PrefAlign;
2528 if (TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
2529 for (auto &Arg : CI->args()) {
2530 // We want to align both objects whose address is used directly and
2531 // objects whose address is used in casts and GEPs, though it only makes
2532 // sense for GEPs if the offset is a multiple of the desired alignment and
2533 // if size - offset meets the size threshold.
2534 if (!Arg->getType()->isPointerTy())
2535 continue;
2536 APInt Offset(DL->getIndexSizeInBits(
2537 cast<PointerType>(Arg->getType())->getAddressSpace()),
2538 0);
2540 uint64_t Offset2 = Offset.getLimitedValue();
2541 if (!isAligned(PrefAlign, Offset2))
2542 continue;
2543 AllocaInst *AI;
2544 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlign() < PrefAlign &&
2545 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
2546 AI->setAlignment(PrefAlign);
2547 // Global variables can only be aligned if they are defined in this
2548 // object (i.e. they are uniquely initialized in this object), and
2549 // over-aligning global variables that have an explicit section is
2550 // forbidden.
2551 GlobalVariable *GV;
2552 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
2553 GV->getPointerAlignment(*DL) < PrefAlign &&
2554 DL->getTypeAllocSize(GV->getValueType()) >= MinSize + Offset2)
2555 GV->setAlignment(PrefAlign);
2556 }
2557 }
2558 // If this is a memcpy (or similar) then we may be able to improve the
2559 // alignment.
2560 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
2561 Align DestAlign = getKnownAlignment(MI->getDest(), *DL);
2562 MaybeAlign MIDestAlign = MI->getDestAlign();
2563 if (!MIDestAlign || DestAlign > *MIDestAlign)
2564 MI->setDestAlignment(DestAlign);
2565 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
2566 MaybeAlign MTISrcAlign = MTI->getSourceAlign();
2567 Align SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
2568 if (!MTISrcAlign || SrcAlign > *MTISrcAlign)
2569 MTI->setSourceAlignment(SrcAlign);
2570 }
2571 }
2572
2573 // If we have a cold call site, try to sink addressing computation into the
2574 // cold block. This interacts with our handling for loads and stores to
2575 // ensure that we can fold all uses of a potential addressing computation
2576 // into their uses. TODO: generalize this to work over profiling data
2577 if (CI->hasFnAttr(Attribute::Cold) && !OptSize &&
2578 !llvm::shouldOptimizeForSize(BB, PSI, BFI.get()))
2579 for (auto &Arg : CI->args()) {
2580 if (!Arg->getType()->isPointerTy())
2581 continue;
2582 unsigned AS = Arg->getType()->getPointerAddressSpace();
2583 if (optimizeMemoryInst(CI, Arg, Arg->getType(), AS))
2584 return true;
2585 }
2586
2587 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
2588 if (II) {
2589 switch (II->getIntrinsicID()) {
2590 default:
2591 break;
2592 case Intrinsic::assume:
2593 llvm_unreachable("llvm.assume should have been removed already");
2594 case Intrinsic::allow_runtime_check:
2595 case Intrinsic::allow_ubsan_check:
2596 case Intrinsic::experimental_widenable_condition: {
2597 // Give up on future widening opportunities so that we can fold away dead
2598 // paths and merge blocks before going into block-local instruction
2599 // selection.
2600 if (II->use_empty()) {
2601 II->eraseFromParent();
2602 return true;
2603 }
2604 Constant *RetVal = ConstantInt::getTrue(II->getContext());
2605 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
2606 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
2607 });
2608 return true;
2609 }
2610 case Intrinsic::objectsize:
2611 llvm_unreachable("llvm.objectsize.* should have been lowered already");
2612 case Intrinsic::is_constant:
2613 llvm_unreachable("llvm.is.constant.* should have been lowered already");
2614 case Intrinsic::aarch64_stlxr:
2615 case Intrinsic::aarch64_stxr: {
2616 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
2617 if (!ExtVal || !ExtVal->hasOneUse() ||
2618 ExtVal->getParent() == CI->getParent())
2619 return false;
2620 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
2621 ExtVal->moveBefore(CI);
2622 // Mark this instruction as "inserted by CGP", so that other
2623 // optimizations don't touch it.
2624 InsertedInsts.insert(ExtVal);
2625 return true;
2626 }
2627
2628 case Intrinsic::launder_invariant_group:
2629 case Intrinsic::strip_invariant_group: {
2630 Value *ArgVal = II->getArgOperand(0);
2631 auto it = LargeOffsetGEPMap.find(II);
2632 if (it != LargeOffsetGEPMap.end()) {
2633 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
2634 // Make sure not to have to deal with iterator invalidation
2635 // after possibly adding ArgVal to LargeOffsetGEPMap.
2636 auto GEPs = std::move(it->second);
2637 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
2638 LargeOffsetGEPMap.erase(II);
2639 }
2640
2641 replaceAllUsesWith(II, ArgVal, FreshBBs, IsHugeFunc);
2642 II->eraseFromParent();
2643 return true;
2644 }
2645 case Intrinsic::cttz:
2646 case Intrinsic::ctlz:
2647 // If counting zeros is expensive, try to avoid it.
2648 return despeculateCountZeros(II, *LI, TLI, DL, ModifiedDT, FreshBBs,
2649 IsHugeFunc);
2650 case Intrinsic::fshl:
2651 case Intrinsic::fshr:
2652 return optimizeFunnelShift(II);
2653 case Intrinsic::dbg_assign:
2654 case Intrinsic::dbg_value:
2655 return fixupDbgValue(II);
2656 case Intrinsic::masked_gather:
2657 return optimizeGatherScatterInst(II, II->getArgOperand(0));
2658 case Intrinsic::masked_scatter:
2659 return optimizeGatherScatterInst(II, II->getArgOperand(1));
2660 }
2661
2663 Type *AccessTy;
2664 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
2665 while (!PtrOps.empty()) {
2666 Value *PtrVal = PtrOps.pop_back_val();
2667 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
2668 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
2669 return true;
2670 }
2671 }
2672
2673 // From here on out we're working with named functions.
2674 if (!CI->getCalledFunction())
2675 return false;
2676
2677 // Lower all default uses of _chk calls. This is very similar
2678 // to what InstCombineCalls does, but here we are only lowering calls
2679 // to fortified library functions (e.g. __memcpy_chk) that have the default
2680 // "don't know" as the objectsize. Anything else should be left alone.
2681 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
2682 IRBuilder<> Builder(CI);
2683 if (Value *V = Simplifier.optimizeCall(CI, Builder)) {
2684 replaceAllUsesWith(CI, V, FreshBBs, IsHugeFunc);
2685 CI->eraseFromParent();
2686 return true;
2687 }
2688
2689 return false;
2690}
2691
2693 const CallInst *CI) {
2694 assert(CI && CI->use_empty());
2695
2696 if (const auto *II = dyn_cast<IntrinsicInst>(CI))
2697 switch (II->getIntrinsicID()) {
2698 case Intrinsic::memset:
2699 case Intrinsic::memcpy:
2700 case Intrinsic::memmove:
2701 return true;
2702 default:
2703 return false;
2704 }
2705
2706 LibFunc LF;
2707 Function *Callee = CI->getCalledFunction();
2708 if (Callee && TLInfo && TLInfo->getLibFunc(*Callee, LF))
2709 switch (LF) {
2710 case LibFunc_strcpy:
2711 case LibFunc_strncpy:
2712 case LibFunc_strcat:
2713 case LibFunc_strncat:
2714 return true;
2715 default:
2716 return false;
2717 }
2718
2719 return false;
2720}
2721
2722/// Look for opportunities to duplicate return instructions to the predecessor
2723/// to enable tail call optimizations. The case it is currently looking for is
2724/// the following one. Known intrinsics or library function that may be tail
2725/// called are taken into account as well.
2726/// @code
2727/// bb0:
2728/// %tmp0 = tail call i32 @f0()
2729/// br label %return
2730/// bb1:
2731/// %tmp1 = tail call i32 @f1()
2732/// br label %return
2733/// bb2:
2734/// %tmp2 = tail call i32 @f2()
2735/// br label %return
2736/// return:
2737/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
2738/// ret i32 %retval
2739/// @endcode
2740///
2741/// =>
2742///
2743/// @code
2744/// bb0:
2745/// %tmp0 = tail call i32 @f0()
2746/// ret i32 %tmp0
2747/// bb1:
2748/// %tmp1 = tail call i32 @f1()
2749/// ret i32 %tmp1
2750/// bb2:
2751/// %tmp2 = tail call i32 @f2()
2752/// ret i32 %tmp2
2753/// @endcode
2754bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB,
2755 ModifyDT &ModifiedDT) {
2756 if (!BB->getTerminator())
2757 return false;
2758
2759 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
2760 if (!RetI)
2761 return false;
2762
2763 assert(LI->getLoopFor(BB) == nullptr && "A return block cannot be in a loop");
2764
2765 PHINode *PN = nullptr;
2766 ExtractValueInst *EVI = nullptr;
2767 BitCastInst *BCI = nullptr;
2768 Value *V = RetI->getReturnValue();
2769 if (V) {
2770 BCI = dyn_cast<BitCastInst>(V);
2771 if (BCI)
2772 V = BCI->getOperand(0);
2773
2774 EVI = dyn_cast<ExtractValueInst>(V);
2775 if (EVI) {
2776 V = EVI->getOperand(0);
2777 if (!llvm::all_of(EVI->indices(), [](unsigned idx) { return idx == 0; }))
2778 return false;
2779 }
2780
2781 PN = dyn_cast<PHINode>(V);
2782 }
2783
2784 if (PN && PN->getParent() != BB)
2785 return false;
2786
2787 auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
2788 const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
2789 if (BC && BC->hasOneUse())
2790 Inst = BC->user_back();
2791
2792 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
2793 return II->getIntrinsicID() == Intrinsic::lifetime_end;
2794 return false;
2795 };
2796
2797 // Make sure there are no instructions between the first instruction
2798 // and return.
2799 const Instruction *BI = BB->getFirstNonPHI();
2800 // Skip over debug and the bitcast.
2801 while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
2802 isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
2803 BI = BI->getNextNode();
2804 if (BI != RetI)
2805 return false;
2806
2807 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2808 /// call.
2809 const Function *F = BB->getParent();
2810 SmallVector<BasicBlock *, 4> TailCallBBs;
2811 if (PN) {
2812 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2813 // Look through bitcasts.
2814 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2815 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2816 BasicBlock *PredBB = PN->getIncomingBlock(I);
2817 // Make sure the phi value is indeed produced by the tail call.
2818 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2819 TLI->mayBeEmittedAsTailCall(CI) &&
2820 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2821 TailCallBBs.push_back(PredBB);
2822 } else {
2823 // Consider the cases in which the phi value is indirectly produced by
2824 // the tail call, for example when encountering memset(), memmove(),
2825 // strcpy(), whose return value may have been optimized out. In such
2826 // cases, the value needs to be the first function argument.
2827 //
2828 // bb0:
2829 // tail call void @llvm.memset.p0.i64(ptr %0, i8 0, i64 %1)
2830 // br label %return
2831 // return:
2832 // %phi = phi ptr [ %0, %bb0 ], [ %2, %entry ]
2833 if (PredBB && PredBB->getSingleSuccessor() == BB)
2834 CI = dyn_cast_or_null<CallInst>(
2835 PredBB->getTerminator()->getPrevNonDebugInstruction(true));
2836
2837 if (CI && CI->use_empty() &&
2838 isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2839 IncomingVal == CI->getArgOperand(0) &&
2840 TLI->mayBeEmittedAsTailCall(CI) &&
2841 attributesPermitTailCall(F, CI, RetI, *TLI))
2842 TailCallBBs.push_back(PredBB);
2843 }
2844 }
2845 } else {
2847 for (BasicBlock *Pred : predecessors(BB)) {
2848 if (!VisitedBBs.insert(Pred).second)
2849 continue;
2850 if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
2851 CallInst *CI = dyn_cast<CallInst>(I);
2852 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2853 attributesPermitTailCall(F, CI, RetI, *TLI)) {
2854 // Either we return void or the return value must be the first
2855 // argument of a known intrinsic or library function.
2856 if (!V || isa<UndefValue>(V) ||
2857 (isIntrinsicOrLFToBeTailCalled(TLInfo, CI) &&
2858 V == CI->getArgOperand(0))) {
2859 TailCallBBs.push_back(Pred);
2860 }
2861 }
2862 }
2863 }
2864 }
2865
2866 bool Changed = false;
2867 for (auto const &TailCallBB : TailCallBBs) {
2868 // Make sure the call instruction is followed by an unconditional branch to
2869 // the return block.
2870 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2871 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2872 continue;
2873
2874 // Duplicate the return into TailCallBB.
2875 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2877 BFI->getBlockFreq(BB) >= BFI->getBlockFreq(TailCallBB));
2878 BFI->setBlockFreq(BB,
2879 (BFI->getBlockFreq(BB) - BFI->getBlockFreq(TailCallBB)));
2880 ModifiedDT = ModifyDT::ModifyBBDT;
2881 Changed = true;
2882 ++NumRetsDup;
2883 }
2884
2885 // If we eliminated all predecessors of the block, delete the block now.
2886 if (Changed && !BB->hasAddressTaken() && pred_empty(BB))
2887 BB->eraseFromParent();
2888
2889 return Changed;
2890}
2891
2892//===----------------------------------------------------------------------===//
2893// Memory Optimization
2894//===----------------------------------------------------------------------===//
2895
2896namespace {
2897
2898/// This is an extended version of TargetLowering::AddrMode
2899/// which holds actual Value*'s for register values.
2900struct ExtAddrMode : public TargetLowering::AddrMode {
2901 Value *BaseReg = nullptr;
2902 Value *ScaledReg = nullptr;
2903 Value *OriginalValue = nullptr;
2904 bool InBounds = true;
2905
2906 enum FieldName {
2907 NoField = 0x00,
2908 BaseRegField = 0x01,
2909 BaseGVField = 0x02,
2910 BaseOffsField = 0x04,
2911 ScaledRegField = 0x08,
2912 ScaleField = 0x10,
2913 MultipleFields = 0xff
2914 };
2915
2916 ExtAddrMode() = default;
2917
2918 void print(raw_ostream &OS) const;
2919 void dump() const;
2920
2921 FieldName compare(const ExtAddrMode &other) {
2922 // First check that the types are the same on each field, as differing types
2923 // is something we can't cope with later on.
2924 if (BaseReg && other.BaseReg &&
2925 BaseReg->getType() != other.BaseReg->getType())
2926 return MultipleFields;
2927 if (BaseGV && other.BaseGV && BaseGV->getType() != other.BaseGV->getType())
2928 return MultipleFields;
2929 if (ScaledReg && other.ScaledReg &&
2930 ScaledReg->getType() != other.ScaledReg->getType())
2931 return MultipleFields;
2932
2933 // Conservatively reject 'inbounds' mismatches.
2934 if (InBounds != other.InBounds)
2935 return MultipleFields;
2936
2937 // Check each field to see if it differs.
2938 unsigned Result = NoField;
2939 if (BaseReg != other.BaseReg)
2940 Result |= BaseRegField;
2941 if (BaseGV != other.BaseGV)
2942 Result |= BaseGVField;
2943 if (BaseOffs != other.BaseOffs)
2944 Result |= BaseOffsField;
2945 if (ScaledReg != other.ScaledReg)
2946 Result |= ScaledRegField;
2947 // Don't count 0 as being a different scale, because that actually means
2948 // unscaled (which will already be counted by having no ScaledReg).
2949 if (Scale && other.Scale && Scale != other.Scale)
2950 Result |= ScaleField;
2951
2952 if (llvm::popcount(Result) > 1)
2953 return MultipleFields;
2954 else
2955 return static_cast<FieldName>(Result);
2956 }
2957
2958 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2959 // with no offset.
2960 bool isTrivial() {
2961 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2962 // trivial if at most one of these terms is nonzero, except that BaseGV and
2963 // BaseReg both being zero actually means a null pointer value, which we
2964 // consider to be 'non-zero' here.
2965 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2966 }
2967
2968 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2969 switch (Field) {
2970 default:
2971 return nullptr;
2972 case BaseRegField:
2973 return BaseReg;
2974 case BaseGVField:
2975 return BaseGV;
2976 case ScaledRegField:
2977 return ScaledReg;
2978 case BaseOffsField:
2979 return ConstantInt::get(IntPtrTy, BaseOffs);
2980 }
2981 }
2982
2983 void SetCombinedField(FieldName Field, Value *V,
2984 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2985 switch (Field) {
2986 default:
2987 llvm_unreachable("Unhandled fields are expected to be rejected earlier");
2988 break;
2989 case ExtAddrMode::BaseRegField:
2990 BaseReg = V;
2991 break;
2992 case ExtAddrMode::BaseGVField:
2993 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2994 // in the BaseReg field.
2995 assert(BaseReg == nullptr);
2996 BaseReg = V;
2997 BaseGV = nullptr;
2998 break;
2999 case ExtAddrMode::ScaledRegField:
3000 ScaledReg = V;
3001 // If we have a mix of scaled and unscaled addrmodes then we want scale
3002 // to be the scale and not zero.
3003 if (!Scale)
3004 for (const ExtAddrMode &AM : AddrModes)
3005 if (AM.Scale) {
3006 Scale = AM.Scale;
3007 break;
3008 }
3009 break;
3010 case ExtAddrMode::BaseOffsField:
3011 // The offset is no longer a constant, so it goes in ScaledReg with a
3012 // scale of 1.
3013 assert(ScaledReg == nullptr);
3014 ScaledReg = V;
3015 Scale = 1;
3016 BaseOffs = 0;
3017 break;
3018 }
3019 }
3020};
3021
3022#ifndef NDEBUG
3023static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
3024 AM.print(OS);
3025 return OS;
3026}
3027#endif
3028
3029#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3030void ExtAddrMode::print(raw_ostream &OS) const {
3031 bool NeedPlus = false;
3032 OS << "[";
3033 if (InBounds)
3034 OS << "inbounds ";
3035 if (BaseGV) {
3036 OS << "GV:";
3037 BaseGV->printAsOperand(OS, /*PrintType=*/false);
3038 NeedPlus = true;
3039 }
3040
3041 if (BaseOffs) {
3042 OS << (NeedPlus ? " + " : "") << BaseOffs;
3043 NeedPlus = true;
3044 }
3045
3046 if (BaseReg) {
3047 OS << (NeedPlus ? " + " : "") << "Base:";
3048 BaseReg->printAsOperand(OS, /*PrintType=*/false);
3049 NeedPlus = true;
3050 }
3051 if (Scale) {
3052 OS << (NeedPlus ? " + " : "") << Scale << "*";
3053 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
3054 }
3055
3056 OS << ']';
3057}
3058
3059LLVM_DUMP_METHOD void ExtAddrMode::dump() const {
3060 print(dbgs());
3061 dbgs() << '\n';
3062}
3063#endif
3064
3065} // end anonymous namespace
3066
3067namespace {
3068
3069/// This class provides transaction based operation on the IR.
3070/// Every change made through this class is recorded in the internal state and
3071/// can be undone (rollback) until commit is called.
3072/// CGP does not check if instructions could be speculatively executed when
3073/// moved. Preserving the original location would pessimize the debugging
3074/// experience, as well as negatively impact the quality of sample PGO.
3075class TypePromotionTransaction {
3076 /// This represents the common interface of the individual transaction.
3077 /// Each class implements the logic for doing one specific modification on
3078 /// the IR via the TypePromotionTransaction.
3079 class TypePromotionAction {
3080 protected:
3081 /// The Instruction modified.
3082 Instruction *Inst;
3083
3084 public:
3085 /// Constructor of the action.
3086 /// The constructor performs the related action on the IR.
3087 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
3088
3089 virtual ~TypePromotionAction() = default;
3090
3091 /// Undo the modification done by this action.
3092 /// When this method is called, the IR must be in the same state as it was
3093 /// before this action was applied.
3094 /// \pre Undoing the action works if and only if the IR is in the exact same
3095 /// state as it was directly after this action was applied.
3096 virtual void undo() = 0;
3097
3098 /// Advocate every change made by this action.
3099 /// When the results on the IR of the action are to be kept, it is important
3100 /// to call this function, otherwise hidden information may be kept forever.
3101 virtual void commit() {
3102 // Nothing to be done, this action is not doing anything.
3103 }
3104 };
3105
3106 /// Utility to remember the position of an instruction.
3107 class InsertionHandler {
3108 /// Position of an instruction.
3109 /// Either an instruction:
3110 /// - Is the first in a basic block: BB is used.
3111 /// - Has a previous instruction: PrevInst is used.
3112 union {
3113 Instruction *PrevInst;
3114 BasicBlock *BB;
3115 } Point;
3116 std::optional<DbgRecord::self_iterator> BeforeDbgRecord = std::nullopt;
3117
3118 /// Remember whether or not the instruction had a previous instruction.
3119 bool HasPrevInstruction;
3120
3121 public:
3122 /// Record the position of \p Inst.
3123 InsertionHandler(Instruction *Inst) {
3124 HasPrevInstruction = (Inst != &*(Inst->getParent()->begin()));
3125 BasicBlock *BB = Inst->getParent();
3126
3127 // Record where we would have to re-insert the instruction in the sequence
3128 // of DbgRecords, if we ended up reinserting.
3129 if (BB->IsNewDbgInfoFormat)
3130 BeforeDbgRecord = Inst->getDbgReinsertionPosition();
3131
3132 if (HasPrevInstruction) {
3133 Point.PrevInst = &*std::prev(Inst->getIterator());
3134 } else {
3135 Point.BB = BB;
3136 }
3137 }
3138
3139 /// Insert \p Inst at the recorded position.
3140 void insert(Instruction *Inst) {
3141 if (HasPrevInstruction) {
3142 if (Inst->getParent())
3143 Inst->removeFromParent();
3144 Inst->insertAfter(&*Point.PrevInst);
3145 } else {
3146 BasicBlock::iterator Position = Point.BB->getFirstInsertionPt();
3147 if (Inst->getParent())
3148 Inst->moveBefore(*Point.BB, Position);
3149 else
3150 Inst->insertBefore(*Point.BB, Position);
3151 }
3152
3153 Inst->getParent()->reinsertInstInDbgRecords(Inst, BeforeDbgRecord);
3154 }
3155 };
3156
3157 /// Move an instruction before another.
3158 class InstructionMoveBefore : public TypePromotionAction {
3159 /// Original position of the instruction.
3160 InsertionHandler Position;
3161
3162 public:
3163 /// Move \p Inst before \p Before.
3164 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
3165 : TypePromotionAction(Inst), Position(Inst) {
3166 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Before
3167 << "\n");
3168 Inst->moveBefore(Before);
3169 }
3170
3171 /// Move the instruction back to its original position.
3172 void undo() override {
3173 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n");
3174 Position.insert(Inst);
3175 }
3176 };
3177
3178 /// Set the operand of an instruction with a new value.
3179 class OperandSetter : public TypePromotionAction {
3180 /// Original operand of the instruction.
3181 Value *Origin;
3182
3183 /// Index of the modified instruction.
3184 unsigned Idx;
3185
3186 public:
3187 /// Set \p Idx operand of \p Inst with \p NewVal.
3188 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
3189 : TypePromotionAction(Inst), Idx(Idx) {
3190 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"
3191 << "for:" << *Inst << "\n"
3192 << "with:" << *NewVal << "\n");
3193 Origin = Inst->getOperand(Idx);
3194 Inst->setOperand(Idx, NewVal);
3195 }
3196
3197 /// Restore the original value of the instruction.
3198 void undo() override {
3199 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"
3200 << "for: " << *Inst << "\n"
3201 << "with: " << *Origin << "\n");
3202 Inst->setOperand(Idx, Origin);
3203 }
3204 };
3205
3206 /// Hide the operands of an instruction.
3207 /// Do as if this instruction was not using any of its operands.
3208 class OperandsHider : public TypePromotionAction {
3209 /// The list of original operands.
3210 SmallVector<Value *, 4> OriginalValues;
3211
3212 public:
3213 /// Remove \p Inst from the uses of the operands of \p Inst.
3214 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
3215 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n");
3216 unsigned NumOpnds = Inst->getNumOperands();
3217 OriginalValues.reserve(NumOpnds);
3218 for (unsigned It = 0; It < NumOpnds; ++It) {
3219 // Save the current operand.
3220 Value *Val = Inst->getOperand(It);
3221 OriginalValues.push_back(Val);
3222 // Set a dummy one.
3223 // We could use OperandSetter here, but that would imply an overhead
3224 // that we are not willing to pay.
3225 Inst->setOperand(It, UndefValue::get(Val->getType()));
3226 }
3227 }
3228
3229 /// Restore the original list of uses.
3230 void undo() override {
3231 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n");
3232 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
3233 Inst->setOperand(It, OriginalValues[It]);
3234 }
3235 };
3236
3237 /// Build a truncate instruction.
3238 class TruncBuilder : public TypePromotionAction {
3239 Value *Val;
3240
3241 public:
3242 /// Build a truncate instruction of \p Opnd producing a \p Ty
3243 /// result.
3244 /// trunc Opnd to Ty.
3245 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
3246 IRBuilder<> Builder(Opnd);
3247 Builder.SetCurrentDebugLocation(DebugLoc());
3248 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
3249 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n");
3250 }
3251
3252 /// Get the built value.
3253 Value *getBuiltValue() { return Val; }
3254
3255 /// Remove the built instruction.
3256 void undo() override {
3257 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n");
3258 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3259 IVal->eraseFromParent();
3260 }
3261 };
3262
3263 /// Build a sign extension instruction.
3264 class SExtBuilder : public TypePromotionAction {
3265 Value *Val;
3266
3267 public:
3268 /// Build a sign extension instruction of \p Opnd producing a \p Ty
3269 /// result.
3270 /// sext Opnd to Ty.
3271 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3272 : TypePromotionAction(InsertPt) {
3273 IRBuilder<> Builder(InsertPt);
3274 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
3275 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n");
3276 }
3277
3278 /// Get the built value.
3279 Value *getBuiltValue() { return Val; }
3280
3281 /// Remove the built instruction.
3282 void undo() override {
3283 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n");
3284 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3285 IVal->eraseFromParent();
3286 }
3287 };
3288
3289 /// Build a zero extension instruction.
3290 class ZExtBuilder : public TypePromotionAction {
3291 Value *Val;
3292
3293 public:
3294 /// Build a zero extension instruction of \p Opnd producing a \p Ty
3295 /// result.
3296 /// zext Opnd to Ty.
3297 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
3298 : TypePromotionAction(InsertPt) {
3299 IRBuilder<> Builder(InsertPt);
3300 Builder.SetCurrentDebugLocation(DebugLoc());
3301 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
3302 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n");
3303 }
3304
3305 /// Get the built value.
3306 Value *getBuiltValue() { return Val; }
3307
3308 /// Remove the built instruction.
3309 void undo() override {
3310 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n");
3311 if (Instruction *IVal = dyn_cast<Instruction>(Val))
3312 IVal->eraseFromParent();
3313 }
3314 };
3315
3316 /// Mutate an instruction to another type.
3317 class TypeMutator : public TypePromotionAction {
3318 /// Record the original type.
3319 Type *OrigTy;
3320
3321 public:
3322 /// Mutate the type of \p Inst into \p NewTy.
3323 TypeMutator(Instruction *Inst, Type *NewTy)
3324 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
3325 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy
3326 << "\n");
3327 Inst->mutateType(NewTy);
3328 }
3329
3330 /// Mutate the instruction back to its original type.
3331 void undo() override {
3332 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy
3333 << "\n");
3334 Inst->mutateType(OrigTy);
3335 }
3336 };
3337
3338 /// Replace the uses of an instruction by another instruction.
3339 class UsesReplacer : public TypePromotionAction {
3340 /// Helper structure to keep track of the replaced uses.
3341 struct InstructionAndIdx {
3342 /// The instruction using the instruction.
3343 Instruction *Inst;
3344
3345 /// The index where this instruction is used for Inst.
3346 unsigned Idx;
3347
3348 InstructionAndIdx(Instruction *Inst, unsigned Idx)
3349 : Inst(Inst), Idx(Idx) {}
3350 };
3351
3352 /// Keep track of the original uses (pair Instruction, Index).
3354 /// Keep track of the debug users.
3356 /// And non-instruction debug-users too.
3357 SmallVector<DbgVariableRecord *, 1> DbgVariableRecords;
3358
3359 /// Keep track of the new value so that we can undo it by replacing
3360 /// instances of the new value with the original value.
3361 Value *New;
3362
3364
3365 public:
3366 /// Replace all the use of \p Inst by \p New.
3367 UsesReplacer(Instruction *Inst, Value *New)
3368 : TypePromotionAction(Inst), New(New) {
3369 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
3370 << "\n");
3371 // Record the original uses.
3372 for (Use &U : Inst->uses()) {
3373 Instruction *UserI = cast<Instruction>(U.getUser());
3374 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
3375 }
3376 // Record the debug uses separately. They are not in the instruction's
3377 // use list, but they are replaced by RAUW.
3378 findDbgValues(DbgValues, Inst, &DbgVariableRecords);
3379
3380 // Now, we can replace the uses.
3381 Inst->replaceAllUsesWith(New);
3382 }
3383
3384 /// Reassign the original uses of Inst to Inst.
3385 void undo() override {
3386 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
3387 for (InstructionAndIdx &Use : OriginalUses)
3388 Use.Inst->setOperand(Use.Idx, Inst);
3389 // RAUW has replaced all original uses with references to the new value,
3390 // including the debug uses. Since we are undoing the replacements,
3391 // the original debug uses must also be reinstated to maintain the
3392 // correctness and utility of debug value instructions.
3393 for (auto *DVI : DbgValues)
3394 DVI->replaceVariableLocationOp(New, Inst);
3395 // Similar story with DbgVariableRecords, the non-instruction
3396 // representation of dbg.values.
3397 for (DbgVariableRecord *DVR : DbgVariableRecords)
3398 DVR->replaceVariableLocationOp(New, Inst);
3399 }
3400 };
3401
3402 /// Remove an instruction from the IR.
3403 class InstructionRemover : public TypePromotionAction {
3404 /// Original position of the instruction.
3405 InsertionHandler Inserter;
3406
3407 /// Helper structure to hide all the link to the instruction. In other
3408 /// words, this helps to do as if the instruction was removed.
3409 OperandsHider Hider;
3410
3411 /// Keep track of the uses replaced, if any.
3412 UsesReplacer *Replacer = nullptr;
3413
3414 /// Keep track of instructions removed.
3415 SetOfInstrs &RemovedInsts;
3416
3417 public:
3418 /// Remove all reference of \p Inst and optionally replace all its
3419 /// uses with New.
3420 /// \p RemovedInsts Keep track of the instructions removed by this Action.
3421 /// \pre If !Inst->use_empty(), then New != nullptr
3422 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
3423 Value *New = nullptr)
3424 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
3425 RemovedInsts(RemovedInsts) {
3426 if (New)
3427 Replacer = new UsesReplacer(Inst, New);
3428 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
3429 RemovedInsts.insert(Inst);
3430 /// The instructions removed here will be freed after completing
3431 /// optimizeBlock() for all blocks as we need to keep track of the
3432 /// removed instructions during promotion.
3433 Inst->removeFromParent();
3434 }
3435
3436 ~InstructionRemover() override { delete Replacer; }
3437
3438 InstructionRemover &operator=(const InstructionRemover &other) = delete;
3439 InstructionRemover(const InstructionRemover &other) = delete;
3440
3441 /// Resurrect the instruction and reassign it to the proper uses if
3442 /// new value was provided when build this action.
3443 void undo() override {
3444 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n");
3445 Inserter.insert(Inst);
3446 if (Replacer)
3447 Replacer->undo();
3448 Hider.undo();
3449 RemovedInsts.erase(Inst);
3450 }
3451 };
3452
3453public:
3454 /// Restoration point.
3455 /// The restoration point is a pointer to an action instead of an iterator
3456 /// because the iterator may be invalidated but not the pointer.
3457 using ConstRestorationPt = const TypePromotionAction *;
3458
3459 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
3460 : RemovedInsts(RemovedInsts) {}
3461
3462 /// Advocate every changes made in that transaction. Return true if any change
3463 /// happen.
3464 bool commit();
3465
3466 /// Undo all the changes made after the given point.
3467 void rollback(ConstRestorationPt Point);
3468
3469 /// Get the current restoration point.
3470 ConstRestorationPt getRestorationPoint() const;
3471
3472 /// \name API for IR modification with state keeping to support rollback.
3473 /// @{
3474 /// Same as Instruction::setOperand.
3475 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
3476
3477 /// Same as Instruction::eraseFromParent.
3478 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
3479
3480 /// Same as Value::replaceAllUsesWith.
3481 void replaceAllUsesWith(Instruction *Inst, Value *New);
3482
3483 /// Same as Value::mutateType.
3484 void mutateType(Instruction *Inst, Type *NewTy);
3485
3486 /// Same as IRBuilder::createTrunc.
3487 Value *createTrunc(Instruction *Opnd, Type *Ty);
3488
3489 /// Same as IRBuilder::createSExt.
3490 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
3491
3492 /// Same as IRBuilder::createZExt.
3493 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
3494
3495private:
3496 /// The ordered list of actions made so far.
3498
3499 using CommitPt =
3501
3502 SetOfInstrs &RemovedInsts;
3503};
3504
3505} // end anonymous namespace
3506
3507void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
3508 Value *NewVal) {
3509 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
3510 Inst, Idx, NewVal));
3511}
3512
3513void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
3514 Value *NewVal) {
3515 Actions.push_back(
3516 std::make_unique<TypePromotionTransaction::InstructionRemover>(
3517 Inst, RemovedInsts, NewVal));
3518}
3519
3520void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
3521 Value *New) {
3522 Actions.push_back(
3523 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
3524}
3525
3526void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
3527 Actions.push_back(
3528 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
3529}
3530
3531Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, Type *Ty) {
3532 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
3533 Value *Val = Ptr->getBuiltValue();
3534 Actions.push_back(std::move(Ptr));
3535 return Val;
3536}
3537
3538Value *TypePromotionTransaction::createSExt(Instruction *Inst, Value *Opnd,
3539 Type *Ty) {
3540 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
3541 Value *Val = Ptr->getBuiltValue();
3542 Actions.push_back(std::move(Ptr));
3543 return Val;
3544}
3545
3546Value *TypePromotionTransaction::createZExt(Instruction *Inst, Value *Opnd,
3547 Type *Ty) {
3548 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
3549 Value *Val = Ptr->getBuiltValue();
3550 Actions.push_back(std::move(Ptr));
3551 return Val;
3552}
3553
3554TypePromotionTransaction::ConstRestorationPt
3555TypePromotionTransaction::getRestorationPoint() const {
3556 return !Actions.empty() ? Actions.back().get() : nullptr;
3557}
3558
3559bool TypePromotionTransaction::commit() {
3560 for (std::unique_ptr<TypePromotionAction> &Action : Actions)
3561 Action->commit();
3562 bool Modified = !Actions.empty();
3563 Actions.clear();
3564 return Modified;
3565}
3566
3567void TypePromotionTransaction::rollback(
3568 TypePromotionTransaction::ConstRestorationPt Point) {
3569 while (!Actions.empty() && Point != Actions.back().get()) {
3570 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
3571 Curr->undo();
3572 }
3573}
3574
3575namespace {
3576
3577/// A helper class for matching addressing modes.
3578///
3579/// This encapsulates the logic for matching the target-legal addressing modes.
3580class AddressingModeMatcher {
3581 SmallVectorImpl<Instruction *> &AddrModeInsts;
3582 const TargetLowering &TLI;
3583 const TargetRegisterInfo &TRI;
3584 const DataLayout &DL;
3585 const LoopInfo &LI;
3586 const std::function<const DominatorTree &()> getDTFn;
3587
3588 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
3589 /// the memory instruction that we're computing this address for.
3590 Type *AccessTy;
3591 unsigned AddrSpace;
3592 Instruction *MemoryInst;
3593
3594 /// This is the addressing mode that we're building up. This is
3595 /// part of the return value of this addressing mode matching stuff.
3597
3598 /// The instructions inserted by other CodeGenPrepare optimizations.
3599 const SetOfInstrs &InsertedInsts;
3600
3601 /// A map from the instructions to their type before promotion.
3602 InstrToOrigTy &PromotedInsts;
3603
3604 /// The ongoing transaction where every action should be registered.
3605 TypePromotionTransaction &TPT;
3606
3607 // A GEP which has too large offset to be folded into the addressing mode.
3608 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
3609
3610 /// This is set to true when we should not do profitability checks.
3611 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
3612 bool IgnoreProfitability;
3613
3614 /// True if we are optimizing for size.
3615 bool OptSize = false;
3616
3617 ProfileSummaryInfo *PSI;
3619
3620 AddressingModeMatcher(
3622 const TargetRegisterInfo &TRI, const LoopInfo &LI,
3623 const std::function<const DominatorTree &()> getDTFn, Type *AT,
3624 unsigned AS, Instruction *MI, ExtAddrMode &AM,
3625 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
3626 TypePromotionTransaction &TPT,
3627 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3628 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
3629 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
3630 DL(MI->getDataLayout()), LI(LI), getDTFn(getDTFn),
3631 AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
3632 InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
3633 LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
3634 IgnoreProfitability = false;
3635 }
3636
3637public:
3638 /// Find the maximal addressing mode that a load/store of V can fold,
3639 /// give an access type of AccessTy. This returns a list of involved
3640 /// instructions in AddrModeInsts.
3641 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
3642 /// optimizations.
3643 /// \p PromotedInsts maps the instructions to their type before promotion.
3644 /// \p The ongoing transaction where every action should be registered.
3645 static ExtAddrMode
3646 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
3647 SmallVectorImpl<Instruction *> &AddrModeInsts,
3648 const TargetLowering &TLI, const LoopInfo &LI,
3649 const std::function<const DominatorTree &()> getDTFn,
3650 const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
3651 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
3652 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
3653 bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
3655
3656 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, LI, getDTFn,
3657 AccessTy, AS, MemoryInst, Result,
3658 InsertedInsts, PromotedInsts, TPT,
3659 LargeOffsetGEP, OptSize, PSI, BFI)
3660 .matchAddr(V, 0);
3661 (void)Success;
3662 assert(Success && "Couldn't select *anything*?");
3663 return Result;
3664 }
3665
3666private:
3667 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
3668 bool matchAddr(Value *Addr, unsigned Depth);
3669 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
3670 bool *MovedAway = nullptr);
3671 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
3672 ExtAddrMode &AMBefore,
3673 ExtAddrMode &AMAfter);
3674 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
3675 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
3676 Value *PromotedOperand) const;
3677};
3678
3679class PhiNodeSet;
3680
3681/// An iterator for PhiNodeSet.
3682class PhiNodeSetIterator {
3683 PhiNodeSet *const Set;
3684 size_t CurrentIndex = 0;
3685
3686public:
3687 /// The constructor. Start should point to either a valid element, or be equal
3688 /// to the size of the underlying SmallVector of the PhiNodeSet.
3689 PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start);
3690 PHINode *operator*() const;
3691 PhiNodeSetIterator &operator++();
3692 bool operator==(const PhiNodeSetIterator &RHS) const;
3693 bool operator!=(const PhiNodeSetIterator &RHS) const;
3694};
3695
3696/// Keeps a set of PHINodes.
3697///
3698/// This is a minimal set implementation for a specific use case:
3699/// It is very fast when there are very few elements, but also provides good
3700/// performance when there are many. It is similar to SmallPtrSet, but also
3701/// provides iteration by insertion order, which is deterministic and stable
3702/// across runs. It is also similar to SmallSetVector, but provides removing
3703/// elements in O(1) time. This is achieved by not actually removing the element
3704/// from the underlying vector, so comes at the cost of using more memory, but
3705/// that is fine, since PhiNodeSets are used as short lived objects.
3706class PhiNodeSet {
3707 friend class PhiNodeSetIterator;
3708
3710 using iterator = PhiNodeSetIterator;
3711
3712 /// Keeps the elements in the order of their insertion in the underlying
3713 /// vector. To achieve constant time removal, it never deletes any element.
3715
3716 /// Keeps the elements in the underlying set implementation. This (and not the
3717 /// NodeList defined above) is the source of truth on whether an element
3718 /// is actually in the collection.
3719 MapType NodeMap;
3720
3721 /// Points to the first valid (not deleted) element when the set is not empty
3722 /// and the value is not zero. Equals to the size of the underlying vector
3723 /// when the set is empty. When the value is 0, as in the beginning, the
3724 /// first element may or may not be valid.
3725 size_t FirstValidElement = 0;
3726
3727public:
3728 /// Inserts a new element to the collection.
3729 /// \returns true if the element is actually added, i.e. was not in the
3730 /// collection before the operation.
3731 bool insert(PHINode *Ptr) {
3732 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
3733 NodeList.push_back(Ptr);
3734 return true;
3735 }
3736 return false;
3737 }
3738
3739 /// Removes the element from the collection.
3740 /// \returns whether the element is actually removed, i.e. was in the
3741 /// collection before the operation.
3742 bool erase(PHINode *Ptr) {
3743 if (NodeMap.erase(Ptr)) {
3744 SkipRemovedElements(FirstValidElement);
3745 return true;
3746 }
3747 return false;
3748 }
3749
3750 /// Removes all elements and clears the collection.
3751 void clear() {
3752 NodeMap.clear();
3753 NodeList.clear();
3754 FirstValidElement = 0;
3755 }
3756
3757 /// \returns an iterator that will iterate the elements in the order of
3758 /// insertion.
3759 iterator begin() {
3760 if (FirstValidElement == 0)
3761 SkipRemovedElements(FirstValidElement);
3762 return PhiNodeSetIterator(this, FirstValidElement);
3763 }
3764
3765 /// \returns an iterator that points to the end of the collection.
3766 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
3767
3768 /// Returns the number of elements in the collection.
3769 size_t size() const { return NodeMap.size(); }
3770
3771 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
3772 size_t count(PHINode *Ptr) const { return NodeMap.count(Ptr); }
3773
3774private:
3775 /// Updates the CurrentIndex so that it will point to a valid element.
3776 ///
3777 /// If the element of NodeList at CurrentIndex is valid, it does not
3778 /// change it. If there are no more valid elements, it updates CurrentIndex
3779 /// to point to the end of the NodeList.
3780 void SkipRemovedElements(size_t &CurrentIndex) {
3781 while (CurrentIndex < NodeList.size()) {
3782 auto it = NodeMap.find(NodeList[CurrentIndex]);
3783 // If the element has been deleted and added again later, NodeMap will
3784 // point to a different index, so CurrentIndex will still be invalid.
3785 if (it != NodeMap.end() && it->second == CurrentIndex)
3786 break;
3787 ++CurrentIndex;
3788 }
3789 }
3790};
3791
3792PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
3793 : Set(Set), CurrentIndex(Start) {}
3794
3795PHINode *PhiNodeSetIterator::operator*() const {
3796 assert(CurrentIndex < Set->NodeList.size() &&
3797 "PhiNodeSet access out of range");
3798 return Set->NodeList[CurrentIndex];
3799}
3800
3801PhiNodeSetIterator &PhiNodeSetIterator::operator++() {
3802 assert(CurrentIndex < Set->NodeList.size() &&
3803 "PhiNodeSet access out of range");
3804 ++CurrentIndex;
3805 Set->SkipRemovedElements(CurrentIndex);
3806 return *this;
3807}
3808
3809bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
3810 return CurrentIndex == RHS.CurrentIndex;
3811}
3812
3813bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
3814 return !((*this) == RHS);
3815}
3816
3817/// Keep track of simplification of Phi nodes.
3818/// Accept the set of all phi nodes and erase phi node from this set
3819/// if it is simplified.
3820class SimplificationTracker {
3822 const SimplifyQuery &SQ;
3823 // Tracks newly created Phi nodes. The elements are iterated by insertion
3824 // order.
3825 PhiNodeSet AllPhiNodes;
3826 // Tracks newly created Select nodes.
3827 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3828
3829public:
3830 SimplificationTracker(const SimplifyQuery &sq) : SQ(sq) {}
3831
3832 Value *Get(Value *V) {
3833 do {
3834 auto SV = Storage.find(V);
3835 if (SV == Storage.end())
3836 return V;
3837 V = SV->second;
3838 } while (true);
3839 }
3840
3841 Value *Simplify(Value *Val) {
3842 SmallVector<Value *, 32> WorkList;
3844 WorkList.push_back(Val);
3845 while (!WorkList.empty()) {
3846 auto *P = WorkList.pop_back_val();
3847 if (!Visited.insert(P).second)
3848 continue;
3849 if (auto *PI = dyn_cast<Instruction>(P))
3850 if (Value *V = simplifyInstruction(cast<Instruction>(PI), SQ)) {
3851 for (auto *U : PI->users())
3852 WorkList.push_back(cast<Value>(U));
3853 Put(PI, V);
3854 PI->replaceAllUsesWith(V);
3855 if (auto *PHI = dyn_cast<PHINode>(PI))
3856 AllPhiNodes.erase(PHI);
3857 if (auto *Select = dyn_cast<SelectInst>(PI))
3858 AllSelectNodes.erase(Select);
3859 PI->eraseFromParent();
3860 }
3861 }
3862 return Get(Val);
3863 }
3864
3865 void Put(Value *From, Value *To) { Storage.insert({From, To}); }
3866
3867 void ReplacePhi(PHINode *From, PHINode *To) {
3868 Value *OldReplacement = Get(From);
3869 while (OldReplacement != From) {
3870 From = To;
3871 To = dyn_cast<PHINode>(OldReplacement);
3872 OldReplacement = Get(From);
3873 }
3874 assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
3875 Put(From, To);
3876 From->replaceAllUsesWith(To);
3877 AllPhiNodes.erase(From);
3878 From->eraseFromParent();
3879 }
3880
3881 PhiNodeSet &newPhiNodes() { return AllPhiNodes; }
3882
3883 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3884
3885 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3886
3887 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3888
3889 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3890
3891 void destroyNewNodes(Type *CommonType) {
3892 // For safe erasing, replace the uses with dummy value first.
3893 auto *Dummy = PoisonValue::get(CommonType);
3894 for (auto *I : AllPhiNodes) {
3895 I->replaceAllUsesWith(Dummy);
3896 I->eraseFromParent();
3897 }
3898 AllPhiNodes.clear();
3899 for (auto *I : AllSelectNodes) {
3900 I->replaceAllUsesWith(Dummy);
3901 I->eraseFromParent();
3902 }
3903 AllSelectNodes.clear();
3904 }
3905};
3906
3907/// A helper class for combining addressing modes.
3908class AddressingModeCombiner {
3909 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3910 typedef std::pair<PHINode *, PHINode *> PHIPair;
3911
3912private:
3913 /// The addressing modes we've collected.
3915
3916 /// The field in which the AddrModes differ, when we have more than one.
3917 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3918
3919 /// Are the AddrModes that we have all just equal to their original values?
3920 bool AllAddrModesTrivial = true;
3921
3922 /// Common Type for all different fields in addressing modes.
3923 Type *CommonType = nullptr;
3924
3925 /// SimplifyQuery for simplifyInstruction utility.
3926 const SimplifyQuery &SQ;
3927
3928 /// Original Address.
3929 Value *Original;
3930
3931 /// Common value among addresses
3932 Value *CommonValue = nullptr;
3933
3934public:
3935 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3936 : SQ(_SQ), Original(OriginalValue) {}
3937
3938 ~AddressingModeCombiner() { eraseCommonValueIfDead(); }
3939
3940 /// Get the combined AddrMode
3941 const ExtAddrMode &getAddrMode() const { return AddrModes[0]; }
3942
3943 /// Add a new AddrMode if it's compatible with the AddrModes we already
3944 /// have.
3945 /// \return True iff we succeeded in doing so.
3946 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3947 // Take note of if we have any non-trivial AddrModes, as we need to detect
3948 // when all AddrModes are trivial as then we would introduce a phi or select
3949 // which just duplicates what's already there.
3950 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3951
3952 // If this is the first addrmode then everything is fine.
3953 if (AddrModes.empty()) {
3954 AddrModes.emplace_back(NewAddrMode);
3955 return true;
3956 }
3957
3958 // Figure out how different this is from the other address modes, which we
3959 // can do just by comparing against the first one given that we only care
3960 // about the cumulative difference.
3961 ExtAddrMode::FieldName ThisDifferentField =
3962 AddrModes[0].compare(NewAddrMode);
3963 if (DifferentField == ExtAddrMode::NoField)
3964 DifferentField = ThisDifferentField;
3965 else if (DifferentField != ThisDifferentField)
3966 DifferentField = ExtAddrMode::MultipleFields;
3967
3968 // If NewAddrMode differs in more than one dimension we cannot handle it.
3969 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3970
3971 // If Scale Field is different then we reject.
3972 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3973
3974 // We also must reject the case when base offset is different and
3975 // scale reg is not null, we cannot handle this case due to merge of
3976 // different offsets will be used as ScaleReg.
3977 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3978 !NewAddrMode.ScaledReg);
3979
3980 // We also must reject the case when GV is different and BaseReg installed
3981 // due to we want to use base reg as a merge of GV values.
3982 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3983 !NewAddrMode.HasBaseReg);
3984
3985 // Even if NewAddMode is the same we still need to collect it due to
3986 // original value is different. And later we will need all original values
3987 // as anchors during finding the common Phi node.
3988 if (CanHandle)
3989 AddrModes.emplace_back(NewAddrMode);
3990 else
3991 AddrModes.clear();
3992
3993 return CanHandle;
3994 }
3995
3996 /// Combine the addressing modes we've collected into a single
3997 /// addressing mode.
3998 /// \return True iff we successfully combined them or we only had one so
3999 /// didn't need to combine them anyway.
4000 bool combineAddrModes() {
4001 // If we have no AddrModes then they can't be combined.
4002 if (AddrModes.size() == 0)
4003 return false;
4004
4005 // A single AddrMode can trivially be combined.
4006 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
4007 return true;
4008
4009 // If the AddrModes we collected are all just equal to the value they are
4010 // derived from then combining them wouldn't do anything useful.
4011 if (AllAddrModesTrivial)
4012 return false;
4013
4014 if (!addrModeCombiningAllowed())
4015 return false;
4016
4017 // Build a map between <original value, basic block where we saw it> to
4018 // value of base register.
4019 // Bail out if there is no common type.
4020 FoldAddrToValueMapping Map;
4021 if (!initializeMap(Map))
4022 return false;
4023
4024 CommonValue = findCommon(Map);
4025 if (CommonValue)
4026 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
4027 return CommonValue != nullptr;
4028 }
4029
4030private:
4031 /// `CommonValue` may be a placeholder inserted by us.
4032 /// If the placeholder is not used, we should remove this dead instruction.
4033 void eraseCommonValueIfDead() {
4034 if (CommonValue && CommonValue->getNumUses() == 0)
4035 if (Instruction *CommonInst = dyn_cast<Instruction>(CommonValue))
4036 CommonInst->eraseFromParent();
4037 }
4038
4039 /// Initialize Map with anchor values. For address seen
4040 /// we set the value of different field saw in this address.
4041 /// At the same time we find a common type for different field we will
4042 /// use to create new Phi/Select nodes. Keep it in CommonType field.
4043 /// Return false if there is no common type found.
4044 bool initializeMap(FoldAddrToValueMapping &Map) {
4045 // Keep track of keys where the value is null. We will need to replace it
4046 // with constant null when we know the common type.
4047 SmallVector<Value *, 2> NullValue;
4048 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
4049 for (auto &AM : AddrModes) {
4050 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
4051 if (DV) {
4052 auto *Type = DV->getType();
4053 if (CommonType && CommonType != Type)
4054 return false;
4055 CommonType = Type;
4056 Map[AM.OriginalValue] = DV;
4057 } else {
4058 NullValue.push_back(AM.OriginalValue);
4059 }
4060 }
4061 assert(CommonType && "At least one non-null value must be!");
4062 for (auto *V : NullValue)
4063 Map[V] = Constant::getNullValue(CommonType);
4064 return true;
4065 }
4066
4067 /// We have mapping between value A and other value B where B was a field in
4068 /// addressing mode represented by A. Also we have an original value C
4069 /// representing an address we start with. Traversing from C through phi and
4070 /// selects we ended up with A's in a map. This utility function tries to find
4071 /// a value V which is a field in addressing mode C and traversing through phi
4072 /// nodes and selects we will end up in corresponded values B in a map.
4073 /// The utility will create a new Phi/Selects if needed.
4074 // The simple example looks as follows:
4075 // BB1:
4076 // p1 = b1 + 40
4077 // br cond BB2, BB3
4078 // BB2:
4079 // p2 = b2 + 40
4080 // br BB3
4081 // BB3:
4082 // p = phi [p1, BB1], [p2, BB2]
4083 // v = load p
4084 // Map is
4085 // p1 -> b1
4086 // p2 -> b2
4087 // Request is
4088 // p -> ?
4089 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
4090 Value *findCommon(FoldAddrToValueMapping &Map) {
4091 // Tracks the simplification of newly created phi nodes. The reason we use
4092 // this mapping is because we will add new created Phi nodes in AddrToBase.
4093 // Simplification of Phi nodes is recursive, so some Phi node may
4094 // be simplified after we added it to AddrToBase. In reality this
4095 // simplification is possible only if original phi/selects were not
4096 // simplified yet.
4097 // Using this mapping we can find the current value in AddrToBase.
4098 SimplificationTracker ST(SQ);
4099
4100 // First step, DFS to create PHI nodes for all intermediate blocks.
4101 // Also fill traverse order for the second step.
4102 SmallVector<Value *, 32> TraverseOrder;
4103 InsertPlaceholders(Map, TraverseOrder, ST);
4104
4105 // Second Step, fill new nodes by merged values and simplify if possible.
4106 FillPlaceholders(Map, TraverseOrder, ST);
4107
4108 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
4109 ST.destroyNewNodes(CommonType);
4110 return nullptr;
4111 }
4112
4113 // Now we'd like to match New Phi nodes to existed ones.
4114 unsigned PhiNotMatchedCount = 0;
4115 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
4116 ST.destroyNewNodes(CommonType);
4117 return nullptr;
4118 }
4119
4120 auto *Result = ST.Get(Map.find(Original)->second);
4121 if (Result) {
4122 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
4123 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
4124 }
4125 return Result;
4126 }
4127
4128 /// Try to match PHI node to Candidate.
4129 /// Matcher tracks the matched Phi nodes.
4130 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
4132 PhiNodeSet &PhiNodesToMatch) {
4133 SmallVector<PHIPair, 8> WorkList;
4134 Matcher.insert({PHI, Candidate});
4135 SmallSet<PHINode *, 8> MatchedPHIs;
4136 MatchedPHIs.insert(PHI);
4137 WorkList.push_back({PHI, Candidate});
4138 SmallSet<PHIPair, 8> Visited;
4139 while (!WorkList.empty()) {
4140 auto Item = WorkList.pop_back_val();
4141 if (!Visited.insert(Item).second)
4142 continue;
4143 // We iterate over all incoming values to Phi to compare them.
4144 // If values are different and both of them Phi and the first one is a
4145 // Phi we added (subject to match) and both of them is in the same basic
4146 // block then we can match our pair if values match. So we state that
4147 // these values match and add it to work list to verify that.
4148 for (auto *B : Item.first->blocks()) {
4149 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
4150 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
4151 if (FirstValue == SecondValue)
4152 continue;
4153
4154 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
4155 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
4156
4157 // One of them is not Phi or
4158 // The first one is not Phi node from the set we'd like to match or
4159 // Phi nodes from different basic blocks then
4160 // we will not be able to match.
4161 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
4162 FirstPhi->getParent() != SecondPhi->getParent())
4163 return false;
4164
4165 // If we already matched them then continue.
4166 if (Matcher.count({FirstPhi, SecondPhi}))
4167 continue;
4168 // So the values are different and does not match. So we need them to
4169 // match. (But we register no more than one match per PHI node, so that
4170 // we won't later try to replace them twice.)
4171 if (MatchedPHIs.insert(FirstPhi).second)
4172 Matcher.insert({FirstPhi, SecondPhi});
4173 // But me must check it.
4174 WorkList.push_back({FirstPhi, SecondPhi});
4175 }
4176 }
4177 return true;
4178 }
4179
4180 /// For the given set of PHI nodes (in the SimplificationTracker) try
4181 /// to find their equivalents.
4182 /// Returns false if this matching fails and creation of new Phi is disabled.
4183 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
4184 unsigned &PhiNotMatchedCount) {
4185 // Matched and PhiNodesToMatch iterate their elements in a deterministic
4186 // order, so the replacements (ReplacePhi) are also done in a deterministic
4187 // order.
4189 SmallPtrSet<PHINode *, 8> WillNotMatch;
4190 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
4191 while (PhiNodesToMatch.size()) {
4192 PHINode *PHI = *PhiNodesToMatch.begin();
4193
4194 // Add us, if no Phi nodes in the basic block we do not match.
4195 WillNotMatch.clear();
4196 WillNotMatch.insert(PHI);
4197
4198 // Traverse all Phis until we found equivalent or fail to do that.
4199 bool IsMatched = false;
4200 for (auto &P : PHI->getParent()->phis()) {
4201 // Skip new Phi nodes.
4202 if (PhiNodesToMatch.count(&P))
4203 continue;
4204 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
4205 break;
4206 // If it does not match, collect all Phi nodes from matcher.
4207 // if we end up with no match, them all these Phi nodes will not match
4208 // later.
4209 for (auto M : Matched)
4210 WillNotMatch.insert(M.first);
4211 Matched.clear();
4212 }
4213 if (IsMatched) {
4214 // Replace all matched values and erase them.
4215 for (auto MV : Matched)
4216 ST.ReplacePhi(MV.first, MV.second);
4217 Matched.clear();
4218 continue;
4219 }
4220 // If we are not allowed to create new nodes then bail out.
4221 if (!AllowNewPhiNodes)
4222 return false;
4223 // Just remove all seen values in matcher. They will not match anything.
4224 PhiNotMatchedCount += WillNotMatch.size();
4225 for (auto *P : WillNotMatch)
4226 PhiNodesToMatch.erase(P);
4227 }
4228 return true;
4229 }
4230 /// Fill the placeholders with values from predecessors and simplify them.
4231 void FillPlaceholders(FoldAddrToValueMapping &Map,
4232 SmallVectorImpl<Value *> &TraverseOrder,
4233 SimplificationTracker &ST) {
4234 while (!TraverseOrder.empty()) {
4235 Value *Current = TraverseOrder.pop_back_val();
4236 assert(Map.contains(Current) && "No node to fill!!!");
4237 Value *V = Map[Current];
4238
4239 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
4240 // CurrentValue also must be Select.
4241 auto *CurrentSelect = cast<SelectInst>(Current);
4242 auto *TrueValue = CurrentSelect->getTrueValue();
4243 assert(Map.contains(TrueValue) && "No True Value!");
4244 Select->setTrueValue(ST.Get(Map[TrueValue]));
4245 auto *FalseValue = CurrentSelect->getFalseValue();
4246 assert(Map.contains(FalseValue) && "No False Value!");
4247 Select->setFalseValue(ST.Get(Map[FalseValue]));
4248 } else {
4249 // Must be a Phi node then.
4250 auto *PHI = cast<PHINode>(V);
4251 // Fill the Phi node with values from predecessors.
4252 for (auto *B : predecessors(PHI->getParent())) {
4253 Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
4254 assert(Map.contains(PV) && "No predecessor Value!");
4255 PHI->addIncoming(ST.Get(Map[PV]), B);
4256 }
4257 }
4258 Map[Current] = ST.Simplify(V);
4259 }
4260 }
4261
4262 /// Starting from original value recursively iterates over def-use chain up to
4263 /// known ending values represented in a map. For each traversed phi/select
4264 /// inserts a placeholder Phi or Select.
4265 /// Reports all new created Phi/Select nodes by adding them to set.
4266 /// Also reports and order in what values have been traversed.
4267 void InsertPlaceholders(FoldAddrToValueMapping &Map,
4268 SmallVectorImpl<Value *> &TraverseOrder,
4269 SimplificationTracker &ST) {
4270 SmallVector<Value *, 32> Worklist;
4271 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&
4272 "Address must be a Phi or Select node");
4273 auto *Dummy = PoisonValue::get(CommonType);
4274 Worklist.push_back(Original);
4275 while (!Worklist.empty()) {
4276 Value *Current = Worklist.pop_back_val();
4277 // if it is already visited or it is an ending value then skip it.
4278 if (Map.contains(Current))
4279 continue;
4280 TraverseOrder.push_back(Current);
4281
4282 // CurrentValue must be a Phi node or select. All others must be covered
4283 // by anchors.
4284 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
4285 // Is it OK to get metadata from OrigSelect?!
4286 // Create a Select placeholder with dummy value.
4288 SelectInst::Create(CurrentSelect->getCondition(), Dummy, Dummy,
4289 CurrentSelect->getName(),
4290 CurrentSelect->getIterator(), CurrentSelect);
4291 Map[Current] = Select;
4292 ST.insertNewSelect(Select);
4293 // We are interested in True and False values.
4294 Worklist.push_back(CurrentSelect->getTrueValue());
4295 Worklist.push_back(CurrentSelect->getFalseValue());
4296 } else {
4297 // It must be a Phi node then.
4298 PHINode *CurrentPhi = cast<PHINode>(Current);
4299 unsigned PredCount = CurrentPhi->getNumIncomingValues();
4300 PHINode *PHI =
4301 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi->getIterator());
4302 Map[Current] = PHI;
4303 ST.insertNewPhi(PHI);
4304 append_range(Worklist, CurrentPhi->incoming_values());
4305 }
4306 }
4307 }
4308
4309 bool addrModeCombiningAllowed() {
4311 return false;
4312 switch (DifferentField) {
4313 default:
4314 return false;
4315 case ExtAddrMode::BaseRegField:
4317 case ExtAddrMode::BaseGVField:
4318 return AddrSinkCombineBaseGV;
4319 case ExtAddrMode::BaseOffsField:
4321 case ExtAddrMode::ScaledRegField:
4323 }
4324 }
4325};
4326} // end anonymous namespace
4327
4328/// Try adding ScaleReg*Scale to the current addressing mode.
4329/// Return true and update AddrMode if this addr mode is legal for the target,
4330/// false if not.
4331bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
4332 unsigned Depth) {
4333 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
4334 // mode. Just process that directly.
4335 if (Scale == 1)
4336 return matchAddr(ScaleReg, Depth);
4337
4338 // If the scale is 0, it takes nothing to add this.
4339 if (Scale == 0)
4340 return true;
4341
4342 // If we already have a scale of this value, we can add to it, otherwise, we
4343 // need an available scale field.
4344 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
4345 return false;
4346
4347 ExtAddrMode TestAddrMode = AddrMode;
4348
4349 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
4350 // [A+B + A*7] -> [B+A*8].
4351 TestAddrMode.Scale += Scale;
4352 TestAddrMode.ScaledReg = ScaleReg;
4353
4354 // If the new address isn't legal, bail out.
4355 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
4356 return false;
4357
4358 // It was legal, so commit it.
4359 AddrMode = TestAddrMode;
4360
4361 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
4362 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
4363 // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
4364 // go any further: we can reuse it and cannot eliminate it.
4365 ConstantInt *CI = nullptr;
4366 Value *AddLHS = nullptr;
4367 if (isa<Instruction>(ScaleReg) && // not a constant expr.
4368 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
4369 !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
4370 TestAddrMode.InBounds = false;
4371 TestAddrMode.ScaledReg = AddLHS;
4372 TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
4373
4374 // If this addressing mode is legal, commit it and remember that we folded
4375 // this instruction.
4376 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
4377 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
4378 AddrMode = TestAddrMode;
4379 return true;
4380 }
4381 // Restore status quo.
4382 TestAddrMode = AddrMode;
4383 }
4384
4385 // If this is an add recurrence with a constant step, return the increment
4386 // instruction and the canonicalized step.
4387 auto GetConstantStep =
4388 [this](const Value *V) -> std::optional<std::pair<Instruction *, APInt>> {
4389 auto *PN = dyn_cast<PHINode>(V);
4390 if (!PN)
4391 return std::nullopt;
4392 auto IVInc = getIVIncrement(PN, &LI);
4393 if (!IVInc)
4394 return std::nullopt;
4395 // TODO: The result of the intrinsics above is two-complement. However when
4396 // IV inc is expressed as add or sub, iv.next is potentially a poison value.
4397 // If it has nuw or nsw flags, we need to make sure that these flags are
4398 // inferrable at the point of memory instruction. Otherwise we are replacing
4399 // well-defined two-complement computation with poison. Currently, to avoid
4400 // potentially complex analysis needed to prove this, we reject such cases.
4401 if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
4402 if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
4403 return std::nullopt;
4404 if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
4405 return std::make_pair(IVInc->first, ConstantStep->getValue());
4406 return std::nullopt;
4407 };
4408
4409 // Try to account for the following special case:
4410 // 1. ScaleReg is an inductive variable;
4411 // 2. We use it with non-zero offset;
4412 // 3. IV's increment is available at the point of memory instruction.
4413 //
4414 // In this case, we may reuse the IV increment instead of the IV Phi to
4415 // achieve the following advantages:
4416 // 1. If IV step matches the offset, we will have no need in the offset;
4417 // 2. Even if they don't match, we will reduce the overlap of living IV
4418 // and IV increment, that will potentially lead to better register
4419 // assignment.
4420 if (AddrMode.BaseOffs) {
4421 if (auto IVStep = GetConstantStep(ScaleReg)) {
4422 Instruction *IVInc = IVStep->first;
4423 // The following assert is important to ensure a lack of infinite loops.
4424 // This transforms is (intentionally) the inverse of the one just above.
4425 // If they don't agree on the definition of an increment, we'd alternate
4426 // back and forth indefinitely.
4427 assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
4428 APInt Step = IVStep->second;
4429 APInt Offset = Step * AddrMode.Scale;
4430 if (Offset.isSignedIntN(64)) {
4431 TestAddrMode.InBounds = false;
4432 TestAddrMode.ScaledReg = IVInc;
4433 TestAddrMode.BaseOffs -= Offset.getLimitedValue();
4434 // If this addressing mode is legal, commit it..
4435 // (Note that we defer the (expensive) domtree base legality check
4436 // to the very last possible point.)
4437 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
4438 getDTFn().dominates(IVInc, MemoryInst)) {
4439 AddrModeInsts.push_back(cast<Instruction>(IVInc));
4440 AddrMode = TestAddrMode;
4441 return true;
4442 }
4443 // Restore status quo.
4444 TestAddrMode = AddrMode;
4445 }
4446 }
4447 }
4448
4449 // Otherwise, just return what we have.
4450 return true;
4451}
4452
4453/// This is a little filter, which returns true if an addressing computation
4454/// involving I might be folded into a load/store accessing it.
4455/// This doesn't need to be perfect, but needs to accept at least
4456/// the set of instructions that MatchOperationAddr can.
4458 switch (I->getOpcode()) {
4459 case Instruction::BitCast:
4460 case Instruction::AddrSpaceCast:
4461 // Don't touch identity bitcasts.
4462 if (I->getType() == I->getOperand(0)->getType())
4463 return false;
4464 return I->getType()->isIntOrPtrTy();
4465 case Instruction::PtrToInt:
4466 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4467 return true;
4468 case Instruction::IntToPtr:
4469 // We know the input is intptr_t, so this is foldable.
4470 return true;
4471 case Instruction::Add:
4472 return true;
4473 case Instruction::Mul:
4474 case Instruction::Shl:
4475 // Can only handle X*C and X << C.
4476 return isa<ConstantInt>(I->getOperand(1));
4477 case Instruction::GetElementPtr:
4478 return true;
4479 default:
4480 return false;
4481 }
4482}
4483
4484/// Check whether or not \p Val is a legal instruction for \p TLI.
4485/// \note \p Val is assumed to be the product of some type promotion.
4486/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
4487/// to be legal, as the non-promoted value would have had the same state.
4489 const DataLayout &DL, Value *Val) {
4490 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
4491 if (!PromotedInst)
4492 return false;
4493 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
4494 // If the ISDOpcode is undefined, it was undefined before the promotion.
4495 if (!ISDOpcode)
4496 return true;
4497 // Otherwise, check if the promoted instruction is legal or not.
4498 return TLI.isOperationLegalOrCustom(
4499 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
4500}
4501
4502namespace {
4503
4504/// Hepler class to perform type promotion.
4505class TypePromotionHelper {
4506 /// Utility function to add a promoted instruction \p ExtOpnd to
4507 /// \p PromotedInsts and record the type of extension we have seen.
4508 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
4509 Instruction *ExtOpnd, bool IsSExt) {
4510 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4511 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
4512 if (It != PromotedInsts.end()) {
4513 // If the new extension is same as original, the information in
4514 // PromotedInsts[ExtOpnd] is still correct.
4515 if (It->second.getInt() == ExtTy)
4516 return;
4517
4518 // Now the new extension is different from old extension, we make
4519 // the type information invalid by setting extension type to
4520 // BothExtension.
4521 ExtTy = BothExtension;
4522 }
4523 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
4524 }
4525
4526 /// Utility function to query the original type of instruction \p Opnd
4527 /// with a matched extension type. If the extension doesn't match, we
4528 /// cannot use the information we had on the original type.
4529 /// BothExtension doesn't match any extension type.
4530 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
4531 Instruction *Opnd, bool IsSExt) {
4532 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
4533 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
4534 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
4535 return It->second.getPointer();
4536 return nullptr;
4537 }
4538
4539 /// Utility function to check whether or not a sign or zero extension
4540 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
4541 /// either using the operands of \p Inst or promoting \p Inst.
4542 /// The type of the extension is defined by \p IsSExt.
4543 /// In other words, check if:
4544 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
4545 /// #1 Promotion applies:
4546 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
4547 /// #2 Operand reuses:
4548 /// ext opnd1 to ConsideredExtType.
4549 /// \p PromotedInsts maps the instructions to their type before promotion.
4550 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
4551 const InstrToOrigTy &PromotedInsts, bool IsSExt);
4552
4553 /// Utility function to determine if \p OpIdx should be promoted when
4554 /// promoting \p Inst.
4555 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
4556 return !(isa<SelectInst>(Inst) && OpIdx == 0);
4557 }
4558
4559 /// Utility function to promote the operand of \p Ext when this
4560 /// operand is a promotable trunc or sext or zext.
4561 /// \p PromotedInsts maps the instructions to their type before promotion.
4562 /// \p CreatedInstsCost[out] contains the cost of all instructions
4563 /// created to promote the operand of Ext.
4564 /// Newly added extensions are inserted in \p Exts.
4565 /// Newly added truncates are inserted in \p Truncs.
4566 /// Should never be called directly.
4567 /// \return The promoted value which is used instead of Ext.
4568 static Value *promoteOperandForTruncAndAnyExt(
4569 Instruction *Ext, TypePromotionTransaction &TPT,
4570 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4573
4574 /// Utility function to promote the operand of \p Ext when this
4575 /// operand is promotable and is not a supported trunc or sext.
4576 /// \p PromotedInsts maps the instructions to their type before promotion.
4577 /// \p CreatedInstsCost[out] contains the cost of all the instructions
4578 /// created to promote the operand of Ext.
4579 /// Newly added extensions are inserted in \p Exts.
4580 /// Newly added truncates are inserted in \p Truncs.
4581 /// Should never be called directly.
4582 /// \return The promoted value which is used instead of Ext.
4583 static Value *promoteOperandForOther(Instruction *Ext,
4584 TypePromotionTransaction &TPT,
4585 InstrToOrigTy &PromotedInsts,
4586 unsigned &CreatedInstsCost,
4589 const TargetLowering &TLI, bool IsSExt);
4590
4591 /// \see promoteOperandForOther.
4592 static Value *signExtendOperandForOther(
4593 Instruction *Ext, TypePromotionTransaction &TPT,
4594 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4596 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4597 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4598 Exts, Truncs, TLI, true);
4599 }
4600
4601 /// \see promoteOperandForOther.
4602 static Value *zeroExtendOperandForOther(
4603 Instruction *Ext, TypePromotionTransaction &TPT,
4604 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4606 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4607 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
4608 Exts, Truncs, TLI, false);
4609 }
4610
4611public:
4612 /// Type for the utility function that promotes the operand of Ext.
4613 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
4614 InstrToOrigTy &PromotedInsts,
4615 unsigned &CreatedInstsCost,
4618 const TargetLowering &TLI);
4619
4620 /// Given a sign/zero extend instruction \p Ext, return the appropriate
4621 /// action to promote the operand of \p Ext instead of using Ext.
4622 /// \return NULL if no promotable action is possible with the current
4623 /// sign extension.
4624 /// \p InsertedInsts keeps track of all the instructions inserted by the
4625 /// other CodeGenPrepare optimizations. This information is important
4626 /// because we do not want to promote these instructions as CodeGenPrepare
4627 /// will reinsert them later. Thus creating an infinite loop: create/remove.
4628 /// \p PromotedInsts maps the instructions to their type before promotion.
4629 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
4630 const TargetLowering &TLI,
4631 const InstrToOrigTy &PromotedInsts);
4632};
4633
4634} // end anonymous namespace
4635
4636bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
4637 Type *ConsideredExtType,
4638 const InstrToOrigTy &PromotedInsts,
4639 bool IsSExt) {
4640 // The promotion helper does not know how to deal with vector types yet.
4641 // To be able to fix that, we would need to fix the places where we
4642 // statically extend, e.g., constants and such.
4643 if (Inst->getType()->isVectorTy())
4644 return false;
4645
4646 // We can always get through zext.
4647 if (isa<ZExtInst>(Inst))
4648 return true;
4649
4650 // sext(sext) is ok too.
4651 if (IsSExt && isa<SExtInst>(Inst))
4652 return true;
4653
4654 // We can get through binary operator, if it is legal. In other words, the
4655 // binary operator must have a nuw or nsw flag.
4656 if (const auto *BinOp = dyn_cast<BinaryOperator>(Inst))
4657 if (isa<OverflowingBinaryOperator>(BinOp) &&
4658 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
4659 (IsSExt && BinOp->hasNoSignedWrap())))
4660 return true;
4661
4662 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
4663 if ((Inst->getOpcode() == Instruction::And ||
4664 Inst->getOpcode() == Instruction::Or))
4665 return true;
4666
4667 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
4668 if (Inst->getOpcode() == Instruction::Xor) {
4669 // Make sure it is not a NOT.
4670 if (const auto *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)))
4671 if (!Cst->getValue().isAllOnes())
4672 return true;
4673 }
4674
4675 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
4676 // It may change a poisoned value into a regular value, like
4677 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
4678 // poisoned value regular value
4679 // It should be OK since undef covers valid value.
4680 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
4681 return true;
4682
4683 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
4684 // It may change a poisoned value into a regular value, like
4685 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
4686 // poisoned value regular value
4687 // It should be OK since undef covers valid value.
4688 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
4689 const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
4690 if (ExtInst->hasOneUse()) {
4691 const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
4692 if (AndInst && AndInst->getOpcode() == Instruction::And) {
4693 const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
4694 if (Cst &&
4695 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
4696 return true;
4697 }
4698 }
4699 }
4700
4701 // Check if we can do the following simplification.
4702 // ext(trunc(opnd)) --> ext(opnd)
4703 if (!isa<TruncInst>(Inst))
4704 return false;
4705
4706 Value *OpndVal = Inst->getOperand(0);
4707 // Check if we can use this operand in the extension.
4708 // If the type is larger than the result type of the extension, we cannot.
4709 if (!OpndVal->getType()->isIntegerTy() ||
4710 OpndVal->getType()->getIntegerBitWidth() >
4711 ConsideredExtType->getIntegerBitWidth())
4712 return false;
4713
4714 // If the operand of the truncate is not an instruction, we will not have
4715 // any information on the dropped bits.
4716 // (Actually we could for constant but it is not worth the extra logic).
4717 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
4718 if (!Opnd)
4719 return false;
4720
4721 // Check if the source of the type is narrow enough.
4722 // I.e., check that trunc just drops extended bits of the same kind of
4723 // the extension.
4724 // #1 get the type of the operand and check the kind of the extended bits.
4725 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
4726 if (OpndType)
4727 ;
4728 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
4729 OpndType = Opnd->getOperand(0)->getType();
4730 else
4731 return false;
4732
4733 // #2 check that the truncate just drops extended bits.
4734 return Inst->getType()->getIntegerBitWidth() >=
4735 OpndType->getIntegerBitWidth();
4736}
4737
4738TypePromotionHelper::Action TypePromotionHelper::getAction(
4739 Instruction *Ext, const SetOfInstrs &InsertedInsts,
4740 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
4741 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
4742 "Unexpected instruction type");
4743 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
4744 Type *ExtTy = Ext->getType();
4745 bool IsSExt = isa<SExtInst>(Ext);
4746 // If the operand of the extension is not an instruction, we cannot
4747 // get through.
4748 // If it, check we can get through.
4749 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
4750 return nullptr;
4751
4752 // Do not promote if the operand has been added by codegenprepare.
4753 // Otherwise, it means we are undoing an optimization that is likely to be
4754 // redone, thus causing potential infinite loop.
4755 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
4756 return nullptr;
4757
4758 // SExt or Trunc instructions.
4759 // Return the related handler.
4760 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
4761 isa<ZExtInst>(ExtOpnd))
4762 return promoteOperandForTruncAndAnyExt;
4763
4764 // Regular instruction.
4765 // Abort early if we will have to insert non-free instructions.
4766 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
4767 return nullptr;
4768 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
4769}
4770
4771Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
4772 Instruction *SExt, TypePromotionTransaction &TPT,
4773 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4775 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
4776 // By construction, the operand of SExt is an instruction. Otherwise we cannot
4777 // get through it and this method should not be called.
4778 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
4779 Value *ExtVal = SExt;
4780 bool HasMergedNonFreeExt = false;
4781 if (isa<ZExtInst>(SExtOpnd)) {
4782 // Replace s|zext(zext(opnd))
4783 // => zext(opnd).
4784 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
4785 Value *ZExt =
4786 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
4787 TPT.replaceAllUsesWith(SExt, ZExt);
4788 TPT.eraseInstruction(SExt);
4789 ExtVal = ZExt;
4790 } else {
4791 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
4792 // => z|sext(opnd).
4793 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
4794 }
4795 CreatedInstsCost = 0;
4796
4797 // Remove dead code.
4798 if (SExtOpnd->use_empty())
4799 TPT.eraseInstruction(SExtOpnd);
4800
4801 // Check if the extension is still needed.
4802 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
4803 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
4804 if (ExtInst) {
4805 if (Exts)
4806 Exts->push_back(ExtInst);
4807 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
4808 }
4809 return ExtVal;
4810 }
4811
4812 // At this point we have: ext ty opnd to ty.
4813 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
4814 Value *NextVal = ExtInst->getOperand(0);
4815 TPT.eraseInstruction(ExtInst, NextVal);
4816 return NextVal;
4817}
4818
4819Value *TypePromotionHelper::promoteOperandForOther(
4820 Instruction *Ext, TypePromotionTransaction &TPT,
4821 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
4824 bool IsSExt) {
4825 // By construction, the operand of Ext is an instruction. Otherwise we cannot
4826 // get through it and this method should not be called.
4827 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
4828 CreatedInstsCost = 0;
4829 if (!ExtOpnd->hasOneUse()) {
4830 // ExtOpnd will be promoted.
4831 // All its uses, but Ext, will need to use a truncated value of the
4832 // promoted version.
4833 // Create the truncate now.
4834 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
4835 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
4836 // Insert it just after the definition.
4837 ITrunc->moveAfter(ExtOpnd);
4838 if (Truncs)
4839 Truncs->push_back(ITrunc);
4840 }
4841
4842 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
4843 // Restore the operand of Ext (which has been replaced by the previous call
4844 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
4845 TPT.setOperand(Ext, 0, ExtOpnd);
4846 }
4847
4848 // Get through the Instruction:
4849 // 1. Update its type.
4850 // 2. Replace the uses of Ext by Inst.
4851 // 3. Extend each operand that needs to be extended.
4852
4853 // Remember the original type of the instruction before promotion.
4854 // This is useful to know that the high bits are sign extended bits.
4855 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
4856 // Step #1.
4857 TPT.mutateType(ExtOpnd, Ext->getType());
4858 // Step #2.
4859 TPT.replaceAllUsesWith(Ext, ExtOpnd);
4860 // Step #3.
4861 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n");
4862 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
4863 ++OpIdx) {
4864 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n');
4865 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
4866 !shouldExtOperand(ExtOpnd, OpIdx)) {
4867 LLVM_DEBUG(dbgs() << "No need to propagate\n");
4868 continue;
4869 }
4870 // Check if we can statically extend the operand.
4871 Value *Opnd = ExtOpnd->getOperand(OpIdx);
4872 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
4873 LLVM_DEBUG(dbgs() << "Statically extend\n");
4874 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
4875 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
4876 : Cst->getValue().zext(BitWidth);
4877 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
4878 continue;
4879 }
4880 // UndefValue are typed, so we have to statically sign extend them.
4881 if (isa<UndefValue>(Opnd)) {
4882 LLVM_DEBUG(dbgs() << "Statically extend\n");
4883 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
4884 continue;
4885 }
4886
4887 // Otherwise we have to explicitly sign extend the operand.
4888 Value *ValForExtOpnd = IsSExt
4889 ? TPT.createSExt(ExtOpnd, Opnd, Ext->getType())
4890 : TPT.createZExt(ExtOpnd, Opnd, Ext->getType());
4891 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
4892 Instruction *InstForExtOpnd = dyn_cast<Instruction>(ValForExtOpnd);
4893 if (!InstForExtOpnd)
4894 continue;
4895
4896 if (Exts)
4897 Exts->push_back(InstForExtOpnd);
4898
4899 CreatedInstsCost += !TLI.isExtFree(InstForExtOpnd);
4900 }
4901 LLVM_DEBUG(dbgs() << "Extension is useless now\n");
4902 TPT.eraseInstruction(Ext);
4903 return ExtOpnd;
4904}
4905
4906/// Check whether or not promoting an instruction to a wider type is profitable.
4907/// \p NewCost gives the cost of extension instructions created by the
4908/// promotion.
4909/// \p OldCost gives the cost of extension instructions before the promotion
4910/// plus the number of instructions that have been
4911/// matched in the addressing mode the promotion.
4912/// \p PromotedOperand is the value that has been promoted.
4913/// \return True if the promotion is profitable, false otherwise.
4914bool AddressingModeMatcher::isPromotionProfitable(
4915 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4916 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost
4917 << '\n');
4918 // The cost of the new extensions is greater than the cost of the
4919 // old extension plus what we folded.
4920 // This is not profitable.
4921 if (NewCost > OldCost)
4922 return false;
4923 if (NewCost < OldCost)
4924 return true;
4925 // The promotion is neutral but it may help folding the sign extension in
4926 // loads for instance.
4927 // Check that we did not create an illegal instruction.
4928 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4929}
4930
4931/// Given an instruction or constant expr, see if we can fold the operation
4932/// into the addressing mode. If so, update the addressing mode and return
4933/// true, otherwise return false without modifying AddrMode.
4934/// If \p MovedAway is not NULL, it contains the information of whether or
4935/// not AddrInst has to be folded into the addressing mode on success.
4936/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4937/// because it has been moved away.
4938/// Thus AddrInst must not be added in the matched instructions.
4939/// This state can happen when AddrInst is a sext, since it may be moved away.
4940/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4941/// not be referenced anymore.
4942bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4943 unsigned Depth,
4944 bool *MovedAway) {
4945 // Avoid exponential behavior on extremely deep expression trees.
4946 if (Depth >= 5)
4947 return false;
4948
4949 // By default, all matched instructions stay in place.
4950 if (MovedAway)
4951 *MovedAway = false;
4952
4953 switch (Opcode) {
4954 case Instruction::PtrToInt:
4955 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4956 return matchAddr(AddrInst->getOperand(0), Depth);
4957 case Instruction::IntToPtr: {
4958 auto AS = AddrInst->getType()->getPointerAddressSpace();
4959 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4960 // This inttoptr is a no-op if the integer type is pointer sized.
4961 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4962 return matchAddr(AddrInst->getOperand(0), Depth);
4963 return false;
4964 }
4965 case Instruction::BitCast:
4966 // BitCast is always a noop, and we can handle it as long as it is
4967 // int->int or pointer->pointer (we don't want int<->fp or something).
4968 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4969 // Don't touch identity bitcasts. These were probably put here by LSR,
4970 // and we don't want to mess around with them. Assume it knows what it
4971 // is doing.
4972 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4973 return matchAddr(AddrInst->getOperand(0), Depth);
4974 return false;
4975 case Instruction::AddrSpaceCast: {
4976 unsigned SrcAS =
4977 AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4978 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4979 if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS))
4980 return matchAddr(AddrInst->getOperand(0), Depth);
4981 return false;
4982 }
4983 case Instruction::Add: {
4984 // Check to see if we can merge in one operand, then the other. If so, we
4985 // win.
4986 ExtAddrMode BackupAddrMode = AddrMode;
4987 unsigned OldSize = AddrModeInsts.size();
4988 // Start a transaction at this point.
4989 // The LHS may match but not the RHS.
4990 // Therefore, we need a higher level restoration point to undo partially
4991 // matched operation.
4992 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4993 TPT.getRestorationPoint();
4994
4995 // Try to match an integer constant second to increase its chance of ending
4996 // up in `BaseOffs`, resp. decrease its chance of ending up in `BaseReg`.
4997 int First = 0, Second = 1;
4998 if (isa<ConstantInt>(AddrInst->getOperand(First))
4999 && !isa<ConstantInt>(AddrInst->getOperand(Second)))
5000 std::swap(First, Second);
5001 AddrMode.InBounds = false;
5002 if (matchAddr(AddrInst->getOperand(First), Depth + 1) &&
5003 matchAddr(AddrInst->getOperand(Second), Depth + 1))
5004 return true;
5005
5006 // Restore the old addr mode info.
5007 AddrMode = BackupAddrMode;
5008 AddrModeInsts.resize(OldSize);
5009 TPT.rollback(LastKnownGood);
5010
5011 // Otherwise this was over-aggressive. Try merging operands in the opposite
5012 // order.
5013 if (matchAddr(AddrInst->getOperand(Second), Depth + 1) &&
5014 matchAddr(AddrInst->getOperand(First), Depth + 1))
5015 return true;
5016
5017 // Otherwise we definitely can't merge the ADD in.
5018 AddrMode = BackupAddrMode;
5019 AddrModeInsts.resize(OldSize);
5020 TPT.rollback(LastKnownGood);
5021 break;
5022 }
5023 // case Instruction::Or:
5024 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
5025 // break;
5026 case Instruction::Mul:
5027 case Instruction::Shl: {
5028 // Can only handle X*C and X << C.
5029 AddrMode.InBounds = false;
5030 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
5031 if (!RHS || RHS->getBitWidth() > 64)
5032 return false;
5033 int64_t Scale = Opcode == Instruction::Shl
5034 ? 1LL << RHS->getLimitedValue(RHS->getBitWidth() - 1)
5035 : RHS->getSExtValue();
5036
5037 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
5038 }
5039 case Instruction::GetElementPtr: {
5040 // Scan the GEP. We check it if it contains constant offsets and at most
5041 // one variable offset.
5042 int VariableOperand = -1;
5043 unsigned VariableScale = 0;
5044
5045 int64_t ConstantOffset = 0;
5046 gep_type_iterator GTI = gep_type_begin(AddrInst);
5047 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
5048 if (StructType *STy = GTI.getStructTypeOrNull()) {
5049 const StructLayout *SL = DL.getStructLayout(STy);
5050 unsigned Idx =
5051 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
5052 ConstantOffset += SL->getElementOffset(Idx);
5053 } else {
5055 if (TS.isNonZero()) {
5056 // The optimisations below currently only work for fixed offsets.
5057 if (TS.isScalable())
5058 return false;
5059 int64_t TypeSize = TS.getFixedValue();
5060 if (ConstantInt *CI =
5061 dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
5062 const APInt &CVal = CI->getValue();
5063 if (CVal.getSignificantBits() <= 64) {
5064 ConstantOffset += CVal.getSExtValue() * TypeSize;
5065 continue;
5066 }
5067 }
5068 // We only allow one variable index at the moment.
5069 if (VariableOperand != -1)
5070 return false;
5071
5072 // Remember the variable index.
5073 VariableOperand = i;
5074 VariableScale = TypeSize;
5075 }
5076 }
5077 }
5078
5079 // A common case is for the GEP to only do a constant offset. In this case,
5080 // just add it to the disp field and check validity.
5081 if (VariableOperand == -1) {
5082 AddrMode.BaseOffs += ConstantOffset;
5083 if (matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5084 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5085 AddrMode.InBounds = false;
5086 return true;
5087 }
5088 AddrMode.BaseOffs -= ConstantOffset;
5089
5090 if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
5091 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
5092 ConstantOffset > 0) {
5093 // Record GEPs with non-zero offsets as candidates for splitting in
5094 // the event that the offset cannot fit into the r+i addressing mode.
5095 // Simple and common case that only one GEP is used in calculating the
5096 // address for the memory access.
5097 Value *Base = AddrInst->getOperand(0);
5098 auto *BaseI = dyn_cast<Instruction>(Base);
5099 auto *GEP = cast<GetElementPtrInst>(AddrInst);
5100 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
5101 (BaseI && !isa<CastInst>(BaseI) &&
5102 !isa<GetElementPtrInst>(BaseI))) {
5103 // Make sure the parent block allows inserting non-PHI instructions
5104 // before the terminator.
5105 BasicBlock *Parent = BaseI ? BaseI->getParent()
5106 : &GEP->getFunction()->getEntryBlock();
5107 if (!Parent->getTerminator()->isEHPad())
5108 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
5109 }
5110 }
5111
5112 return false;
5113 }
5114
5115 // Save the valid addressing mode in case we can't match.
5116 ExtAddrMode BackupAddrMode = AddrMode;
5117 unsigned OldSize = AddrModeInsts.size();
5118
5119 // See if the scale and offset amount is valid for this target.
5120 AddrMode.BaseOffs += ConstantOffset;
5121 if (!cast<GEPOperator>(AddrInst)->isInBounds())
5122 AddrMode.InBounds = false;
5123
5124 // Match the base operand of the GEP.
5125 if (!matchAddr(AddrInst->getOperand(0), Depth + 1)) {
5126 // If it couldn't be matched, just stuff the value in a register.
5127 if (AddrMode.HasBaseReg) {
5128 AddrMode = BackupAddrMode;
5129 AddrModeInsts.resize(OldSize);
5130 return false;
5131 }
5132 AddrMode.HasBaseReg = true;
5133 AddrMode.BaseReg = AddrInst->getOperand(0);
5134 }
5135
5136 // Match the remaining variable portion of the GEP.
5137 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
5138 Depth)) {
5139 // If it couldn't be matched, try stuffing the base into a register
5140 // instead of matching it, and retrying the match of the scale.
5141 AddrMode = BackupAddrMode;
5142 AddrModeInsts.resize(OldSize);
5143 if (AddrMode.HasBaseReg)
5144 return false;
5145 AddrMode.HasBaseReg = true;
5146 AddrMode.BaseReg = AddrInst->getOperand(0);
5147 AddrMode.BaseOffs += ConstantOffset;
5148 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
5149 VariableScale, Depth)) {
5150 // If even that didn't work, bail.
5151 AddrMode = BackupAddrMode;
5152 AddrModeInsts.resize(OldSize);
5153 return false;
5154 }
5155 }
5156
5157 return true;
5158 }
5159 case Instruction::SExt:
5160 case Instruction::ZExt: {
5161 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
5162 if (!Ext)
5163 return false;
5164
5165 // Try to move this ext out of the way of the addressing mode.
5166 // Ask for a method for doing so.
5167 TypePromotionHelper::Action TPH =
5168 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
5169 if (!TPH)
5170 return false;
5171
5172 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5173 TPT.getRestorationPoint();
5174 unsigned CreatedInstsCost = 0;
5175 unsigned ExtCost = !TLI.isExtFree(Ext);
5176 Value *PromotedOperand =
5177 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
5178 // SExt has been moved away.
5179 // Thus either it will be rematched later in the recursive calls or it is
5180 // gone. Anyway, we must not fold it into the addressing mode at this point.
5181 // E.g.,
5182 // op = add opnd, 1
5183 // idx = ext op
5184 // addr = gep base, idx
5185 // is now:
5186 // promotedOpnd = ext opnd <- no match here
5187 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
5188 // addr = gep base, op <- match
5189 if (MovedAway)
5190 *MovedAway = true;
5191
5192 assert(PromotedOperand &&
5193 "TypePromotionHelper should have filtered out those cases");
5194
5195 ExtAddrMode BackupAddrMode = AddrMode;
5196 unsigned OldSize = AddrModeInsts.size();
5197
5198 if (!matchAddr(PromotedOperand, Depth) ||
5199 // The total of the new cost is equal to the cost of the created
5200 // instructions.
5201 // The total of the old cost is equal to the cost of the extension plus
5202 // what we have saved in the addressing mode.
5203 !isPromotionProfitable(CreatedInstsCost,
5204 ExtCost + (AddrModeInsts.size() - OldSize),
5205 PromotedOperand)) {
5206 AddrMode = BackupAddrMode;
5207 AddrModeInsts.resize(OldSize);
5208 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n");
5209 TPT.rollback(LastKnownGood);
5210 return false;
5211 }
5212 return true;
5213 }
5214 case Instruction::Call:
5215 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(AddrInst)) {
5216 if (II->getIntrinsicID() == Intrinsic::threadlocal_address) {
5217 GlobalValue &GV = cast<GlobalValue>(*II->getArgOperand(0));
5218 if (TLI.addressingModeSupportsTLS(GV))
5219 return matchAddr(AddrInst->getOperand(0), Depth);
5220 }
5221 }
5222 break;
5223 }
5224 return false;
5225}
5226
5227/// If we can, try to add the value of 'Addr' into the current addressing mode.
5228/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
5229/// unmodified. This assumes that Addr is either a pointer type or intptr_t
5230/// for the target.
5231///
5232bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
5233 // Start a transaction at this point that we will rollback if the matching
5234 // fails.
5235 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5236 TPT.getRestorationPoint();
5237 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
5238 if (CI->getValue().isSignedIntN(64)) {
5239 // Fold in immediates if legal for the target.
5240 AddrMode.BaseOffs += CI->getSExtValue();
5241 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5242 return true;
5243 AddrMode.BaseOffs -= CI->getSExtValue();
5244 }
5245 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
5246 // If this is a global variable, try to fold it into the addressing mode.
5247 if (!AddrMode.BaseGV) {
5248 AddrMode.BaseGV = GV;
5249 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5250 return true;
5251 AddrMode.BaseGV = nullptr;
5252 }
5253 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
5254 ExtAddrMode BackupAddrMode = AddrMode;
5255 unsigned OldSize = AddrModeInsts.size();
5256
5257 // Check to see if it is possible to fold this operation.
5258 bool MovedAway = false;
5259 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
5260 // This instruction may have been moved away. If so, there is nothing
5261 // to check here.
5262 if (MovedAway)
5263 return true;
5264 // Okay, it's possible to fold this. Check to see if it is actually
5265 // *profitable* to do so. We use a simple cost model to avoid increasing
5266 // register pressure too much.
5267 if (I->hasOneUse() ||
5268 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
5269 AddrModeInsts.push_back(I);
5270 return true;
5271 }
5272
5273 // It isn't profitable to do this, roll back.
5274 AddrMode = BackupAddrMode;
5275 AddrModeInsts.resize(OldSize);
5276 TPT.rollback(LastKnownGood);
5277 }
5278 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
5279 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
5280 return true;
5281 TPT.rollback(LastKnownGood);
5282 } else if (isa<ConstantPointerNull>(Addr)) {
5283 // Null pointer gets folded without affecting the addressing mode.
5284 return true;
5285 }
5286
5287 // Worse case, the target should support [reg] addressing modes. :)
5288 if (!AddrMode.HasBaseReg) {
5289 AddrMode.HasBaseReg = true;
5290 AddrMode.BaseReg = Addr;
5291 // Still check for legality in case the target supports [imm] but not [i+r].
5292 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5293 return true;
5294 AddrMode.HasBaseReg = false;
5295 AddrMode.BaseReg = nullptr;
5296 }
5297
5298 // If the base register is already taken, see if we can do [r+r].
5299 if (AddrMode.Scale == 0) {
5300 AddrMode.Scale = 1;
5301 AddrMode.ScaledReg = Addr;
5302 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
5303 return true;
5304 AddrMode.Scale = 0;
5305 AddrMode.ScaledReg = nullptr;
5306 }
5307 // Couldn't match.
5308 TPT.rollback(LastKnownGood);
5309 return false;
5310}
5311
5312/// Check to see if all uses of OpVal by the specified inline asm call are due
5313/// to memory operands. If so, return true, otherwise return false.
5315 const TargetLowering &TLI,
5316 const TargetRegisterInfo &TRI) {
5317 const Function *F = CI->getFunction();
5318 TargetLowering::AsmOperandInfoVector TargetConstraints =
5319 TLI.ParseConstraints(F->getDataLayout(), &TRI, *CI);
5320
5321 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
5322 // Compute the constraint code and ConstraintType to use.
5323 TLI.ComputeConstraintToUse(OpInfo, SDValue());
5324
5325 // If this asm operand is our Value*, and if it isn't an indirect memory
5326 // operand, we can't fold it! TODO: Also handle C_Address?
5327 if (OpInfo.CallOperandVal == OpVal &&
5328 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
5329 !OpInfo.isIndirect))
5330 return false;
5331 }
5332
5333 return true;
5334}
5335
5336/// Recursively walk all the uses of I until we find a memory use.
5337/// If we find an obviously non-foldable instruction, return true.
5338/// Add accessed addresses and types to MemoryUses.
5340 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5341 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
5342 const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI,
5343 BlockFrequencyInfo *BFI, unsigned &SeenInsts) {
5344 // If we already considered this instruction, we're done.
5345 if (!ConsideredInsts.insert(I).second)
5346 return false;
5347
5348 // If this is an obviously unfoldable instruction, bail out.
5349 if (!MightBeFoldableInst(I))
5350 return true;
5351
5352 // Loop over all the uses, recursively processing them.
5353 for (Use &U : I->uses()) {
5354 // Conservatively return true if we're seeing a large number or a deep chain
5355 // of users. This avoids excessive compilation times in pathological cases.
5356 if (SeenInsts++ >= MaxAddressUsersToScan)
5357 return true;
5358
5359 Instruction *UserI = cast<Instruction>(U.getUser());
5360 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
5361 MemoryUses.push_back({&U, LI->getType()});
5362 continue;
5363 }
5364
5365 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
5366 if (U.getOperandNo() != StoreInst::getPointerOperandIndex())
5367 return true; // Storing addr, not into addr.
5368 MemoryUses.push_back({&U, SI->getValueOperand()->getType()});
5369 continue;
5370 }
5371
5372 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
5373 if (U.getOperandNo() != AtomicRMWInst::getPointerOperandIndex())
5374 return true; // Storing addr, not into addr.
5375 MemoryUses.push_back({&U, RMW->getValOperand()->getType()});
5376 continue;
5377 }
5378
5379 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
5380 if (U.getOperandNo() != AtomicCmpXchgInst::getPointerOperandIndex())
5381 return true; // Storing addr, not into addr.
5382 MemoryUses.push_back({&U, CmpX->getCompareOperand()->getType()});
5383 continue;
5384 }
5385
5386 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
5387 if (CI->hasFnAttr(Attribute::Cold)) {
5388 // If this is a cold call, we can sink the addressing calculation into
5389 // the cold path. See optimizeCallInst
5390 bool OptForSize =
5391 OptSize || llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
5392 if (!OptForSize)
5393 continue;
5394 }
5395
5396 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand());
5397 if (!IA)
5398 return true;
5399
5400 // If this is a memory operand, we're cool, otherwise bail out.
5401 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
5402 return true;
5403 continue;
5404 }
5405
5406 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5407 PSI, BFI, SeenInsts))
5408 return true;
5409 }
5410
5411 return false;
5412}
5413
5415 Instruction *I, SmallVectorImpl<std::pair<Use *, Type *>> &MemoryUses,
5416 const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize,
5418 unsigned SeenInsts = 0;
5419 SmallPtrSet<Instruction *, 16> ConsideredInsts;
5420 return FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize,
5421 PSI, BFI, SeenInsts);
5422}
5423
5424
5425/// Return true if Val is already known to be live at the use site that we're
5426/// folding it into. If so, there is no cost to include it in the addressing
5427/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
5428/// instruction already.
5429bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,
5430 Value *KnownLive1,
5431 Value *KnownLive2) {
5432 // If Val is either of the known-live values, we know it is live!
5433 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
5434 return true;
5435
5436 // All values other than instructions and arguments (e.g. constants) are live.
5437 if (!isa<Instruction>(Val) && !isa<Argument>(Val))
5438 return true;
5439
5440 // If Val is a constant sized alloca in the entry block, it is live, this is
5441 // true because it is just a reference to the stack/frame pointer, which is
5442 // live for the whole function.
5443 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
5444 if (AI->isStaticAlloca())
5445 return true;
5446
5447 // Check to see if this value is already used in the memory instruction's
5448 // block. If so, it's already live into the block at the very least, so we
5449 // can reasonably fold it.
5450 return Val->isUsedInBasicBlock(MemoryInst->getParent());
5451}
5452
5453/// It is possible for the addressing mode of the machine to fold the specified
5454/// instruction into a load or store that ultimately uses it.
5455/// However, the specified instruction has multiple uses.
5456/// Given this, it may actually increase register pressure to fold it
5457/// into the load. For example, consider this code:
5458///
5459/// X = ...
5460/// Y = X+1
5461/// use(Y) -> nonload/store
5462/// Z = Y+1
5463/// load Z
5464///
5465/// In this case, Y has multiple uses, and can be folded into the load of Z
5466/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
5467/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
5468/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
5469/// number of computations either.
5470///
5471/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
5472/// X was live across 'load Z' for other reasons, we actually *would* want to
5473/// fold the addressing mode in the Z case. This would make Y die earlier.
5474bool AddressingModeMatcher::isProfitableToFoldIntoAddressingMode(
5475 Instruction *I, ExtAddrMode &AMBefore, ExtAddrMode &AMAfter) {
5476 if (IgnoreProfitability)
5477 return true;
5478
5479 // AMBefore is the addressing mode before this instruction was folded into it,
5480 // and AMAfter is the addressing mode after the instruction was folded. Get
5481 // the set of registers referenced by AMAfter and subtract out those
5482 // referenced by AMBefore: this is the set of values which folding in this
5483 // address extends the lifetime of.
5484 //
5485 // Note that there are only two potential values being referenced here,
5486 // BaseReg and ScaleReg (global addresses are always available, as are any
5487 // folded immediates).
5488 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
5489
5490 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
5491 // lifetime wasn't extended by adding this instruction.
5492 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5493 BaseReg = nullptr;
5494 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
5495 ScaledReg = nullptr;
5496
5497 // If folding this instruction (and it's subexprs) didn't extend any live
5498 // ranges, we're ok with it.
5499 if (!BaseReg && !ScaledReg)
5500 return true;
5501
5502 // If all uses of this instruction can have the address mode sunk into them,
5503 // we can remove the addressing mode and effectively trade one live register
5504 // for another (at worst.) In this context, folding an addressing mode into
5505 // the use is just a particularly nice way of sinking it.
5507 if (FindAllMemoryUses(I, MemoryUses, TLI, TRI, OptSize, PSI, BFI))
5508 return false; // Has a non-memory, non-foldable use!
5509
5510 // Now that we know that all uses of this instruction are part of a chain of
5511 // computation involving only operations that could theoretically be folded
5512 // into a memory use, loop over each of these memory operation uses and see
5513 // if they could *actually* fold the instruction. The assumption is that
5514 // addressing modes are cheap and that duplicating the computation involved
5515 // many times is worthwhile, even on a fastpath. For sinking candidates
5516 // (i.e. cold call sites), this serves as a way to prevent excessive code
5517 // growth since most architectures have some reasonable small and fast way to
5518 // compute an effective address. (i.e LEA on x86)
5519 SmallVector<Instruction *, 32> MatchedAddrModeInsts;
5520 for (const std::pair<Use *, Type *> &Pair : MemoryUses) {
5521 Value *Address = Pair.first->get();
5522 Instruction *UserI = cast<Instruction>(Pair.first->getUser());
5523 Type *AddressAccessTy = Pair.second;
5524 unsigned AS = Address->getType()->getPointerAddressSpace();
5525
5526 // Do a match against the root of this address, ignoring profitability. This
5527 // will tell us if the addressing mode for the memory operation will
5528 // *actually* cover the shared instruction.
5530 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5531 0);
5532 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5533 TPT.getRestorationPoint();
5534 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
5535 AddressAccessTy, AS, UserI, Result,
5536 InsertedInsts, PromotedInsts, TPT,
5537 LargeOffsetGEP, OptSize, PSI, BFI);
5538 Matcher.IgnoreProfitability = true;
5539 bool Success = Matcher.matchAddr(Address, 0);
5540 (void)Success;
5541 assert(Success && "Couldn't select *anything*?");
5542
5543 // The match was to check the profitability, the changes made are not
5544 // part of the original matcher. Therefore, they should be dropped
5545 // otherwise the original matcher will not present the right state.
5546 TPT.rollback(LastKnownGood);
5547
5548 // If the match didn't cover I, then it won't be shared by it.
5549 if (!is_contained(MatchedAddrModeInsts, I))
5550 return false;
5551
5552 MatchedAddrModeInsts.clear();
5553 }
5554
5555 return true;
5556}
5557
5558/// Return true if the specified values are defined in a
5559/// different basic block than BB.
5560static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
5561 if (Instruction *I = dyn_cast<Instruction>(V))
5562 return I->getParent() != BB;
5563 return false;
5564}
5565
5566/// Sink addressing mode computation immediate before MemoryInst if doing so
5567/// can be done without increasing register pressure. The need for the
5568/// register pressure constraint means this can end up being an all or nothing
5569/// decision for all uses of the same addressing computation.
5570///
5571/// Load and Store Instructions often have addressing modes that can do
5572/// significant amounts of computation. As such, instruction selection will try
5573/// to get the load or store to do as much computation as possible for the
5574/// program. The problem is that isel can only see within a single block. As
5575/// such, we sink as much legal addressing mode work into the block as possible.
5576///
5577/// This method is used to optimize both load/store and inline asms with memory
5578/// operands. It's also used to sink addressing computations feeding into cold
5579/// call sites into their (cold) basic block.
5580///
5581/// The motivation for handling sinking into cold blocks is that doing so can
5582/// both enable other address mode sinking (by satisfying the register pressure
5583/// constraint above), and reduce register pressure globally (by removing the
5584/// addressing mode computation from the fast path entirely.).
5585bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
5586 Type *AccessTy, unsigned AddrSpace) {
5587 Value *Repl = Addr;
5588
5589 // Try to collapse single-value PHI nodes. This is necessary to undo
5590 // unprofitable PRE transformations.
5591 SmallVector<Value *, 8> worklist;
5593 worklist.push_back(Addr);
5594
5595 // Use a worklist to iteratively look through PHI and select nodes, and
5596 // ensure that the addressing mode obtained from the non-PHI/select roots of
5597 // the graph are compatible.
5598 bool PhiOrSelectSeen = false;
5599 SmallVector<Instruction *, 16> AddrModeInsts;
5600 const SimplifyQuery SQ(*DL, TLInfo);
5601 AddressingModeCombiner AddrModes(SQ, Addr);
5602 TypePromotionTransaction TPT(RemovedInsts);
5603 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5604 TPT.getRestorationPoint();
5605 while (!worklist.empty()) {
5606 Value *V = worklist.pop_back_val();
5607
5608 // We allow traversing cyclic Phi nodes.
5609 // In case of success after this loop we ensure that traversing through
5610 // Phi nodes ends up with all cases to compute address of the form
5611 // BaseGV + Base + Scale * Index + Offset
5612 // where Scale and Offset are constans and BaseGV, Base and Index
5613 // are exactly the same Values in all cases.
5614 // It means that BaseGV, Scale and Offset dominate our memory instruction
5615 // and have the same value as they had in address computation represented
5616 // as Phi. So we can safely sink address computation to memory instruction.
5617 if (!Visited.insert(V).second)
5618 continue;
5619
5620 // For a PHI node, push all of its incoming values.
5621 if (PHINode *P = dyn_cast<PHINode>(V)) {
5622 append_range(worklist, P->incoming_values());
5623 PhiOrSelectSeen = true;
5624 continue;
5625 }
5626 // Similar for select.
5627 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
5628 worklist.push_back(SI->getFalseValue());
5629 worklist.push_back(SI->getTrueValue());
5630 PhiOrSelectSeen = true;
5631 continue;
5632 }
5633
5634 // For non-PHIs, determine the addressing mode being computed. Note that
5635 // the result may differ depending on what other uses our candidate
5636 // addressing instructions might have.
5637 AddrModeInsts.clear();
5638 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
5639 0);
5640 // Defer the query (and possible computation of) the dom tree to point of
5641 // actual use. It's expected that most address matches don't actually need
5642 // the domtree.
5643 auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
5644 Function *F = MemoryInst->getParent()->getParent();
5645 return this->getDT(*F);
5646 };
5647 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
5648 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
5649 *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
5650 BFI.get());
5651
5652 GetElementPtrInst *GEP = LargeOffsetGEP.first;
5653 if (GEP && !NewGEPBases.count(GEP)) {
5654 // If splitting the underlying data structure can reduce the offset of a
5655 // GEP, collect the GEP. Skip the GEPs that are the new bases of
5656 // previously split data structures.
5657 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
5658 LargeOffsetGEPID.insert(std::make_pair(GEP, LargeOffsetGEPID.size()));
5659 }
5660
5661 NewAddrMode.OriginalValue = V;
5662 if (!AddrModes.addNewAddrMode(NewAddrMode))
5663 break;
5664 }
5665
5666 // Try to combine the AddrModes we've collected. If we couldn't collect any,
5667 // or we have multiple but either couldn't combine them or combining them
5668 // wouldn't do anything useful, bail out now.
5669 if (!AddrModes.combineAddrModes()) {
5670 TPT.rollback(LastKnownGood);
5671 return false;
5672 }
5673 bool Modified = TPT.commit();
5674
5675 // Get the combined AddrMode (or the only AddrMode, if we only had one).
5676 ExtAddrMode AddrMode = AddrModes.getAddrMode();
5677
5678 // If all the instructions matched are already in this BB, don't do anything.
5679 // If we saw a Phi node then it is not local definitely, and if we saw a
5680 // select then we want to push the address calculation past it even if it's
5681 // already in this BB.
5682 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
5683 return IsNonLocalValue(V, MemoryInst->getParent());
5684 })) {
5685 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode
5686 << "\n");
5687 return Modified;
5688 }
5689
5690 // Insert this computation right after this user. Since our caller is
5691 // scanning from the top of the BB to the bottom, reuse of the expr are
5692 // guaranteed to happen later.
5693 IRBuilder<> Builder(MemoryInst);
5694
5695 // Now that we determined the addressing expression we want to use and know
5696 // that we have to sink it into this block. Check to see if we have already
5697 // done this for some other load/store instr in this block. If so, reuse
5698 // the computation. Before attempting reuse, check if the address is valid
5699 // as it may have been erased.
5700
5701 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
5702
5703 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
5704 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5705 if (SunkAddr) {
5706 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode
5707 << " for " << *MemoryInst << "\n");
5708 if (SunkAddr->getType() != Addr->getType()) {
5709 if (SunkAddr->getType()->getPointerAddressSpace() !=
5710 Addr->getType()->getPointerAddressSpace() &&
5711 !DL->isNonIntegralPointerType(Addr->getType())) {
5712 // There are two reasons the address spaces might not match: a no-op
5713 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5714 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5715 // TODO: allow bitcast between different address space pointers with the
5716 // same size.
5717 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5718 SunkAddr =
5719 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5720 } else
5721 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5722 }
5723 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
5724 SubtargetInfo->addrSinkUsingGEPs())) {
5725 // By default, we use the GEP-based method when AA is used later. This
5726 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
5727 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5728 << " for " << *MemoryInst << "\n");
5729 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
5730
5731 // First, find the pointer.
5732 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
5733 ResultPtr = AddrMode.BaseReg;
5734 AddrMode.BaseReg = nullptr;
5735 }
5736
5737 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
5738 // We can't add more than one pointer together, nor can we scale a
5739 // pointer (both of which seem meaningless).
5740 if (ResultPtr || AddrMode.Scale != 1)
5741 return Modified;
5742
5743 ResultPtr = AddrMode.ScaledReg;
5744 AddrMode.Scale = 0;
5745 }
5746
5747 // It is only safe to sign extend the BaseReg if we know that the math
5748 // required to create it did not overflow before we extend it. Since
5749 // the original IR value was tossed in favor of a constant back when
5750 // the AddrMode was created we need to bail out gracefully if widths
5751 // do not match instead of extending it.
5752 //
5753 // (See below for code to add the scale.)
5754 if (AddrMode.Scale) {
5755 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
5756 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
5757 cast<IntegerType>(ScaledRegTy)->getBitWidth())
5758 return Modified;
5759 }
5760
5761 GlobalValue *BaseGV = AddrMode.BaseGV;
5762 if (BaseGV != nullptr) {
5763 if (ResultPtr)
5764 return Modified;
5765
5766 if (BaseGV->isThreadLocal()) {
5767 ResultPtr = Builder.CreateThreadLocalAddress(BaseGV);
5768 } else {
5769 ResultPtr = BaseGV;
5770 }
5771 }
5772
5773 // If the real base value actually came from an inttoptr, then the matcher
5774 // will look through it and provide only the integer value. In that case,
5775 // use it here.
5776 if (!DL->isNonIntegralPointerType(Addr->getType())) {
5777 if (!ResultPtr && AddrMode.BaseReg) {
5778 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
5779 "sunkaddr");
5780 AddrMode.BaseReg = nullptr;
5781 } else if (!ResultPtr && AddrMode.Scale == 1) {
5782 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
5783 "sunkaddr");
5784 AddrMode.Scale = 0;
5785 }
5786 }
5787
5788 if (!ResultPtr && !AddrMode.BaseReg && !AddrMode.Scale &&
5789 !AddrMode.BaseOffs) {
5790 SunkAddr = Constant::getNullValue(Addr->getType());
5791 } else if (!ResultPtr) {
5792 return Modified;
5793 } else {
5794 Type *I8PtrTy =
5795 Builder.getPtrTy(Addr->getType()->getPointerAddressSpace());
5796
5797 // Start with the base register. Do this first so that subsequent address
5798 // matching finds it last, which will prevent it from trying to match it
5799 // as the scaled value in case it happens to be a mul. That would be
5800 // problematic if we've sunk a different mul for the scale, because then
5801 // we'd end up sinking both muls.
5802 if (AddrMode.BaseReg) {
5803 Value *V = AddrMode.BaseReg;
5804 if (V->getType() != IntPtrTy)
5805 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5806
5807 ResultIndex = V;
5808 }
5809
5810 // Add the scale value.
5811 if (AddrMode.Scale) {
5812 Value *V = AddrMode.ScaledReg;
5813 if (V->getType() == IntPtrTy) {
5814 // done.
5815 } else {
5816 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <
5817 cast<IntegerType>(V->getType())->getBitWidth() &&
5818 "We can't transform if ScaledReg is too narrow");
5819 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5820 }
5821
5822 if (AddrMode.Scale != 1)
5823 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5824 "sunkaddr");
5825 if (ResultIndex)
5826 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
5827 else
5828 ResultIndex = V;
5829 }
5830
5831 // Add in the Base Offset if present.
5832 if (AddrMode.BaseOffs) {
5833 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5834 if (ResultIndex) {
5835 // We need to add this separately from the scale above to help with
5836 // SDAG consecutive load/store merging.
5837 if (ResultPtr->getType() != I8PtrTy)
5838 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5839 ResultPtr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5840 AddrMode.InBounds);
5841 }
5842
5843 ResultIndex = V;
5844 }
5845
5846 if (!ResultIndex) {
5847 SunkAddr = ResultPtr;
5848 } else {
5849 if (ResultPtr->getType() != I8PtrTy)
5850 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
5851 SunkAddr = Builder.CreatePtrAdd(ResultPtr, ResultIndex, "sunkaddr",
5852 AddrMode.InBounds);
5853 }
5854
5855 if (SunkAddr->getType() != Addr->getType()) {
5856 if (SunkAddr->getType()->getPointerAddressSpace() !=
5857 Addr->getType()->getPointerAddressSpace() &&
5858 !DL->isNonIntegralPointerType(Addr->getType())) {
5859 // There are two reasons the address spaces might not match: a no-op
5860 // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a
5861 // ptrtoint/inttoptr pair to ensure we match the original semantics.
5862 // TODO: allow bitcast between different address space pointers with
5863 // the same size.
5864 SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr");
5865 SunkAddr =
5866 Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr");
5867 } else
5868 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
5869 }
5870 }
5871 } else {
5872 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
5873 // non-integral pointers, so in that case bail out now.
5874 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
5875 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
5876 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
5877 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
5878 if (DL->isNonIntegralPointerType(Addr->getType()) ||
5879 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
5880 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
5881 (AddrMode.BaseGV &&
5882 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
5883 return Modified;
5884
5885 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
5886 << " for " << *MemoryInst << "\n");
5887 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
5888 Value *Result = nullptr;
5889
5890 // Start with the base register. Do this first so that subsequent address
5891 // matching finds it last, which will prevent it from trying to match it
5892 // as the scaled value in case it happens to be a mul. That would be
5893 // problematic if we've sunk a different mul for the scale, because then
5894 // we'd end up sinking both muls.
5895 if (AddrMode.BaseReg) {
5896 Value *V = AddrMode.BaseReg;
5897 if (V->getType()->isPointerTy())
5898 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5899 if (V->getType() != IntPtrTy)
5900 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
5901 Result = V;
5902 }
5903
5904 // Add the scale value.
5905 if (AddrMode.Scale) {
5906 Value *V = AddrMode.ScaledReg;
5907 if (V->getType() == IntPtrTy) {
5908 // done.
5909 } else if (V->getType()->isPointerTy()) {
5910 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
5911 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
5912 cast<IntegerType>(V->getType())->getBitWidth()) {
5913 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
5914 } else {
5915 // It is only safe to sign extend the BaseReg if we know that the math
5916 // required to create it did not overflow before we extend it. Since
5917 // the original IR value was tossed in favor of a constant back when
5918 // the AddrMode was created we need to bail out gracefully if widths
5919 // do not match instead of extending it.
5920 Instruction *I = dyn_cast_or_null<Instruction>(Result);
5921 if (I && (Result != AddrMode.BaseReg))
5922 I->eraseFromParent();
5923 return Modified;
5924 }
5925 if (AddrMode.Scale != 1)
5926 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
5927 "sunkaddr");
5928 if (Result)
5929 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5930 else
5931 Result = V;
5932 }
5933
5934 // Add in the BaseGV if present.
5935 GlobalValue *BaseGV = AddrMode.BaseGV;
5936 if (BaseGV != nullptr) {
5937 Value *BaseGVPtr;
5938 if (BaseGV->isThreadLocal()) {
5939 BaseGVPtr = Builder.CreateThreadLocalAddress(BaseGV);
5940 } else {
5941 BaseGVPtr = BaseGV;
5942 }
5943 Value *V = Builder.CreatePtrToInt(BaseGVPtr, IntPtrTy, "sunkaddr");
5944 if (Result)
5945 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5946 else
5947 Result = V;
5948 }
5949
5950 // Add in the Base Offset if present.
5951 if (AddrMode.BaseOffs) {
5952 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5953 if (Result)
5954 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5955 else
5956 Result = V;
5957 }
5958
5959 if (!Result)
5960 SunkAddr = Constant::getNullValue(Addr->getType());
5961 else
5962 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5963 }
5964
5965 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5966 // Store the newly computed address into the cache. In the case we reused a
5967 // value, this should be idempotent.
5968 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5969
5970 // If we have no uses, recursively delete the value and all dead instructions
5971 // using it.
5972 if (Repl->use_empty()) {
5973 resetIteratorIfInvalidatedWhileCalling(CurInstIterator->getParent(), [&]() {
5974 RecursivelyDeleteTriviallyDeadInstructions(
5975 Repl, TLInfo, nullptr,
5976 [&](Value *V) { removeAllAssertingVHReferences(V); });
5977 });
5978 }
5979 ++NumMemoryInsts;
5980 return true;
5981}
5982
5983/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
5984/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
5985/// only handle a 2 operand GEP in the same basic block or a splat constant
5986/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
5987/// index.
5988///
5989/// If the existing GEP has a vector base pointer that is splat, we can look
5990/// through the splat to find the scalar pointer. If we can't find a scalar
5991/// pointer there's nothing we can do.
5992///
5993/// If we have a GEP with more than 2 indices where the middle indices are all
5994/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
5995///
5996/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
5997/// followed by a GEP with an all zeroes vector index. This will enable
5998/// SelectionDAGBuilder to use the scalar GEP as the uniform base and have a
5999/// zero index.
6000bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
6001 Value *Ptr) {
6002 Value *NewAddr;
6003
6004 if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) {
6005 // Don't optimize GEPs that don't have indices.
6006 if (!GEP->hasIndices())
6007 return false;
6008
6009 // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
6010 // FIXME: We should support this by sinking the GEP.
6011 if (MemoryInst->getParent() != GEP->getParent())
6012 return false;
6013
6014 SmallVector<Value *, 2> Ops(GEP->operands());
6015
6016 bool RewriteGEP = false;
6017
6018 if (Ops[0]->getType()->isVectorTy()) {
6019 Ops[0] = getSplatValue(Ops[0]);
6020 if (!Ops[0])
6021 return false;
6022 RewriteGEP = true;
6023 }
6024
6025 unsigned FinalIndex = Ops.size() - 1;
6026
6027 // Ensure all but the last index is 0.
6028 // FIXME: This isn't strictly required. All that's required is that they are
6029 // all scalars or splats.
6030 for (unsigned i = 1; i < FinalIndex; ++i) {
6031 auto *C = dyn_cast<Constant>(Ops[i]);
6032 if (!C)
6033 return false;
6034 if (isa<VectorType>(C->getType()))
6035 C = C->getSplatValue();
6036 auto *CI = dyn_cast_or_null<ConstantInt>(C);
6037 if (!CI || !CI->isZero())
6038 return false;
6039 // Scalarize the index if needed.
6040 Ops[i] = CI;
6041 }
6042
6043 // Try to scalarize the final index.
6044 if (Ops[FinalIndex]->getType()->isVectorTy()) {
6045 if (Value *V = getSplatValue(Ops[FinalIndex])) {
6046 auto *C = dyn_cast<ConstantInt>(V);
6047 // Don't scalarize all zeros vector.
6048 if (!C || !C->isZero()) {
6049 Ops[FinalIndex] = V;
6050 RewriteGEP = true;
6051 }
6052 }
6053 }
6054
6055 // If we made any changes or the we have extra operands, we need to generate
6056 // new instructions.
6057 if (!RewriteGEP && Ops.size() == 2)
6058 return false;
6059
6060 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6061
6062 IRBuilder<> Builder(MemoryInst);
6063
6064 Type *SourceTy = GEP->getSourceElementType();
6065 Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
6066
6067 // If the final index isn't a vector, emit a scalar GEP containing all ops
6068 // and a vector GEP with all zeroes final index.
6069 if (!Ops[FinalIndex]->getType()->isVectorTy()) {
6070 NewAddr = Builder.CreateGEP(SourceTy, Ops[0], ArrayRef(Ops).drop_front());
6071 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6072 auto *SecondTy = GetElementPtrInst::getIndexedType(
6073 SourceTy, ArrayRef(Ops).drop_front());
6074 NewAddr =
6075 Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
6076 } else {
6077 Value *Base = Ops[0];
6078 Value *Index = Ops[FinalIndex];
6079
6080 // Create a scalar GEP if there are more than 2 operands.
6081 if (Ops.size() != 2) {
6082 // Replace the last index with 0.
6083 Ops[FinalIndex] =
6084 Constant::getNullValue(Ops[FinalIndex]->getType()->getScalarType());
6085 Base = Builder.CreateGEP(SourceTy, Base, ArrayRef(Ops).drop_front());
6087 SourceTy, ArrayRef(Ops).drop_front());
6088 }
6089
6090 // Now create the GEP with scalar pointer and vector index.
6091 NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
6092 }
6093 } else if (!isa<Constant>(Ptr)) {
6094 // Not a GEP, maybe its a splat and we can create a GEP to enable
6095 // SelectionDAGBuilder to use it as a uniform base.
6097 if (!V)
6098 return false;
6099
6100 auto NumElts = cast<VectorType>(Ptr->getType())->getElementCount();
6101
6102 IRBuilder<> Builder(MemoryInst);
6103
6104 // Emit a vector GEP with a scalar pointer and all 0s vector index.
6105 Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
6106 auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
6107 Type *ScalarTy;
6108 if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6109 Intrinsic::masked_gather) {
6110 ScalarTy = MemoryInst->getType()->getScalarType();
6111 } else {
6112 assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
6113 Intrinsic::masked_scatter);
6114 ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
6115 }
6116 NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
6117 } else {
6118 // Constant, SelectionDAGBuilder knows to check if its a splat.
6119 return false;
6120 }
6121
6122 MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
6123
6124 // If we have no uses, recursively delete the value and all dead instructions
6125 // using it.
6126 if (Ptr->use_empty())
6128 Ptr, TLInfo, nullptr,
6129 [&](Value *V) { removeAllAssertingVHReferences(V); });
6130
6131 return true;
6132}
6133
6134/// If there are any memory operands, use OptimizeMemoryInst to sink their
6135/// address computing into the block when possible / profitable.
6136bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
6137 bool MadeChange = false;
6138
6139 const TargetRegisterInfo *TRI =
6140 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
6141 TargetLowering::AsmOperandInfoVector TargetConstraints =
6142 TLI->ParseConstraints(*DL, TRI, *CS);
6143 unsigned ArgNo = 0;
6144 for (TargetLowering::AsmOperandInfo &OpInfo : TargetConstraints) {
6145 // Compute the constraint code and ConstraintType to use.
6146 TLI->ComputeConstraintToUse(OpInfo, SDValue());
6147
6148 // TODO: Also handle C_Address?
6149 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
6150 OpInfo.isIndirect) {
6151 Value *OpVal = CS->getArgOperand(ArgNo++);
6152 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
6153 } else if (OpInfo.Type == InlineAsm::isInput)
6154 ArgNo++;
6155 }
6156
6157 return MadeChange;
6158}
6159
6160/// Check if all the uses of \p Val are equivalent (or free) zero or
6161/// sign extensions.
6162static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
6163 assert(!Val->use_empty() && "Input must have at least one use");
6164 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
6165 bool IsSExt = isa<SExtInst>(FirstUser);
6166 Type *ExtTy = FirstUser->getType();
6167 for (const User *U : Val->users()) {
6168 const Instruction *UI = cast<Instruction>(U);
6169 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
6170 return false;
6171 Type *CurTy = UI->getType();
6172 // Same input and output types: Same instruction after CSE.
6173 if (CurTy == ExtTy)
6174 continue;
6175
6176 // If IsSExt is true, we are in this situation:
6177 // a = Val
6178 // b = sext ty1 a to ty2
6179 // c = sext ty1 a to ty3
6180 // Assuming ty2 is shorter than ty3, this could be turned into:
6181 // a = Val
6182 // b = sext ty1 a to ty2
6183 // c = sext ty2 b to ty3
6184 // However, the last sext is not free.
6185 if (IsSExt)
6186 return false;
6187
6188 // This is a ZExt, maybe this is free to extend from one type to another.
6189 // In that case, we would not account for a different use.
6190 Type *NarrowTy;
6191 Type *LargeTy;
6192 if (ExtTy->getScalarType()->getIntegerBitWidth() >
6193 CurTy->getScalarType()->getIntegerBitWidth()) {
6194 NarrowTy = CurTy;
6195 LargeTy = ExtTy;
6196 } else {
6197 NarrowTy = ExtTy;
6198 LargeTy = CurTy;
6199 }
6200
6201 if (!TLI.isZExtFree(NarrowTy, LargeTy))
6202 return false;
6203 }
6204 // All uses are the same or can be derived from one another for free.
6205 return true;
6206}
6207
6208/// Try to speculatively promote extensions in \p Exts and continue
6209/// promoting through newly promoted operands recursively as far as doing so is
6210/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
6211/// When some promotion happened, \p TPT contains the proper state to revert
6212/// them.
6213///
6214/// \return true if some promotion happened, false otherwise.
6215bool CodeGenPrepare::tryToPromoteExts(
6216 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
6217 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
6218 unsigned CreatedInstsCost) {
6219 bool Promoted = false;
6220
6221 // Iterate over all the extensions to try to promote them.
6222 for (auto *I : Exts) {
6223 // Early check if we directly have ext(load).
6224 if (isa<LoadInst>(I->getOperand(0))) {
6225 ProfitablyMovedExts.push_back(I);
6226 continue;
6227 }
6228
6229 // Check whether or not we want to do any promotion. The reason we have
6230 // this check inside the for loop is to catch the case where an extension
6231 // is directly fed by a load because in such case the extension can be moved
6232 // up without any promotion on its operands.
6234 return false;
6235
6236 // Get the action to perform the promotion.
6237 TypePromotionHelper::Action TPH =
6238 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
6239 // Check if we can promote.
6240 if (!TPH) {
6241 // Save the current extension as we cannot move up through its operand.
6242 ProfitablyMovedExts.push_back(I);
6243 continue;
6244 }
6245
6246 // Save the current state.
6247 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6248 TPT.getRestorationPoint();
6250 unsigned NewCreatedInstsCost = 0;
6251 unsigned ExtCost = !TLI->isExtFree(I);
6252 // Promote.
6253 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
6254 &NewExts, nullptr, *TLI);
6255 assert(PromotedVal &&
6256 "TypePromotionHelper should have filtered out those cases");
6257
6258 // We would be able to merge only one extension in a load.
6259 // Therefore, if we have more than 1 new extension we heuristically
6260 // cut this search path, because it means we degrade the code quality.
6261 // With exactly 2, the transformation is neutral, because we will merge
6262 // one extension but leave one. However, we optimistically keep going,
6263 // because the new extension may be removed too. Also avoid replacing a
6264 // single free extension with multiple extensions, as this increases the
6265 // number of IR instructions while not providing any savings.
6266 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
6267 // FIXME: It would be possible to propagate a negative value instead of
6268 // conservatively ceiling it to 0.
6269 TotalCreatedInstsCost =
6270 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
6271 if (!StressExtLdPromotion &&
6272 (TotalCreatedInstsCost > 1 ||
6273 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal) ||
6274 (ExtCost == 0 && NewExts.size() > 1))) {
6275 // This promotion is not profitable, rollback to the previous state, and
6276 // save the current extension in ProfitablyMovedExts as the latest
6277 // speculative promotion turned out to be unprofitable.
6278 TPT.rollback(LastKnownGood);
6279 ProfitablyMovedExts.push_back(I);
6280 continue;
6281 }
6282 // Continue promoting NewExts as far as doing so is profitable.
6283 SmallVector<Instruction *, 2> NewlyMovedExts;
6284 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
6285 bool NewPromoted = false;
6286 for (auto *ExtInst : NewlyMovedExts) {
6287 Instruction *MovedExt = cast<Instruction>(ExtInst);
6288 Value *ExtOperand = MovedExt->getOperand(0);
6289 // If we have reached to a load, we need this extra profitability check
6290 // as it could potentially be merged into an ext(load).
6291 if (isa<LoadInst>(ExtOperand) &&
6292 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
6293 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
6294 continue;
6295
6296 ProfitablyMovedExts.push_back(MovedExt);
6297 NewPromoted = true;
6298 }
6299
6300 // If none of speculative promotions for NewExts is profitable, rollback
6301 // and save the current extension (I) as the last profitable extension.
6302 if (!NewPromoted) {
6303 TPT.rollback(LastKnownGood);
6304 ProfitablyMovedExts.push_back(I);
6305 continue;
6306 }
6307 // The promotion is profitable.
6308 Promoted = true;
6309 }
6310 return Promoted;
6311}
6312
6313/// Merging redundant sexts when one is dominating the other.
6314bool CodeGenPrepare::mergeSExts(Function &F) {
6315 bool Changed = false;
6316 for (auto &Entry : ValToSExtendedUses) {
6317 SExts &Insts = Entry.second;
6318 SExts CurPts;
6319 for (Instruction *Inst : Insts) {
6320 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
6321 Inst->getOperand(0) != Entry.first)
6322 continue;
6323 bool inserted = false;
6324 for (auto &Pt : CurPts) {
6325 if (getDT(F).dominates(Inst, Pt)) {
6326 replaceAllUsesWith(Pt, Inst, FreshBBs, IsHugeFunc);
6327 RemovedInsts.insert(Pt);
6328 Pt->removeFromParent();
6329 Pt = Inst;
6330 inserted = true;
6331 Changed = true;
6332 break;
6333 }
6334 if (!getDT(F).dominates(Pt, Inst))
6335 // Give up if we need to merge in a common dominator as the
6336 // experiments show it is not profitable.
6337 continue;
6338 replaceAllUsesWith(Inst, Pt, FreshBBs, IsHugeFunc);
6339 RemovedInsts.insert(Inst);
6340 Inst->removeFromParent();
6341 inserted = true;
6342 Changed = true;
6343 break;
6344 }
6345 if (!inserted)
6346 CurPts.push_back(Inst);
6347 }
6348 }
6349 return Changed;
6350}
6351
6352// Splitting large data structures so that the GEPs accessing them can have
6353// smaller offsets so that they can be sunk to the same blocks as their users.
6354// For example, a large struct starting from %base is split into two parts
6355// where the second part starts from %new_base.
6356//
6357// Before:
6358// BB0:
6359// %base =
6360//
6361// BB1:
6362// %gep0 = gep %base, off0
6363// %gep1 = gep %base, off1
6364// %gep2 = gep %base, off2
6365//
6366// BB2:
6367// %load1 = load %gep0
6368// %load2 = load %gep1
6369// %load3 = load %gep2
6370//
6371// After:
6372// BB0:
6373// %base =
6374// %new_base = gep %base, off0
6375//
6376// BB1:
6377// %new_gep0 = %new_base
6378// %new_gep1 = gep %new_base, off1 - off0
6379// %new_gep2 = gep %new_base, off2 - off0
6380//
6381// BB2:
6382// %load1 = load i32, i32* %new_gep0
6383// %load2 = load i32, i32* %new_gep1
6384// %load3 = load i32, i32* %new_gep2
6385//
6386// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
6387// their offsets are smaller enough to fit into the addressing mode.
6388bool CodeGenPrepare::splitLargeGEPOffsets() {
6389 bool Changed = false;
6390 for (auto &Entry : LargeOffsetGEPMap) {
6391 Value *OldBase = Entry.first;
6393 &LargeOffsetGEPs = Entry.second;
6394 auto compareGEPOffset =
6395 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
6396 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
6397 if (LHS.first == RHS.first)
6398 return false;
6399 if (LHS.second != RHS.second)
6400 return LHS.second < RHS.second;
6401 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
6402 };
6403 // Sorting all the GEPs of the same data structures based on the offsets.
6404 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
6405 LargeOffsetGEPs.erase(llvm::unique(LargeOffsetGEPs), LargeOffsetGEPs.end());
6406 // Skip if all the GEPs have the same offsets.
6407 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
6408 continue;
6409 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
6410 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
6411 Value *NewBaseGEP = nullptr;
6412
6413 auto createNewBase = [&](int64_t BaseOffset, Value *OldBase,
6415 LLVMContext &Ctx = GEP->getContext();
6416 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6417 Type *I8PtrTy =
6418 PointerType::get(Ctx, GEP->getType()->getPointerAddressSpace());
6419
6420 BasicBlock::iterator NewBaseInsertPt;
6421 BasicBlock *NewBaseInsertBB;
6422 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
6423 // If the base of the struct is an instruction, the new base will be
6424 // inserted close to it.
6425 NewBaseInsertBB = BaseI->getParent();
6426 if (isa<PHINode>(BaseI))
6427 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6428 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
6429 NewBaseInsertBB =
6430 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest(), DT.get(), LI);
6431 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6432 } else
6433 NewBaseInsertPt = std::next(BaseI->getIterator());
6434 } else {
6435 // If the current base is an argument or global value, the new base
6436 // will be inserted to the entry block.
6437 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
6438 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
6439 }
6440 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
6441 // Create a new base.
6442 Value *BaseIndex = ConstantInt::get(PtrIdxTy, BaseOffset);
6443 NewBaseGEP = OldBase;
6444 if (NewBaseGEP->getType() != I8PtrTy)
6445 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
6446 NewBaseGEP =
6447 NewBaseBuilder.CreatePtrAdd(NewBaseGEP, BaseIndex, "splitgep");
6448 NewGEPBases.insert(NewBaseGEP);
6449 return;
6450 };
6451
6452 // Check whether all the offsets can be encoded with prefered common base.
6453 if (int64_t PreferBase = TLI->getPreferredLargeGEPBaseOffset(
6454 LargeOffsetGEPs.front().second, LargeOffsetGEPs.back().second)) {
6455 BaseOffset = PreferBase;
6456 // Create a new base if the offset of the BaseGEP can be decoded with one
6457 // instruction.
6458 createNewBase(BaseOffset, OldBase, BaseGEP);
6459 }
6460
6461 auto *LargeOffsetGEP = LargeOffsetGEPs.begin();
6462 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
6463 GetElementPtrInst *GEP = LargeOffsetGEP->first;
6464 int64_t Offset = LargeOffsetGEP->second;
6465 if (Offset != BaseOffset) {
6467 AddrMode.HasBaseReg = true;
6468 AddrMode.BaseOffs = Offset - BaseOffset;
6469 // The result type of the GEP might not be the type of the memory
6470 // access.
6471 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
6472 GEP->getResultElementType(),
6473 GEP->getAddressSpace())) {
6474 // We need to create a new base if the offset to the current base is
6475 // too large to fit into the addressing mode. So, a very large struct
6476 // may be split into several parts.
6477 BaseGEP = GEP;
6478 BaseOffset = Offset;
6479 NewBaseGEP = nullptr;
6480 }
6481 }
6482
6483 // Generate a new GEP to replace the current one.
6484 Type *PtrIdxTy = DL->getIndexType(GEP->getType());
6485
6486 if (!NewBaseGEP) {
6487 // Create a new base if we don't have one yet. Find the insertion
6488 // pointer for the new base first.
6489 createNewBase(BaseOffset, OldBase, GEP);
6490 }
6491
6492 IRBuilder<> Builder(GEP);
6493 Value *NewGEP = NewBaseGEP;
6494 if (Offset != BaseOffset) {
6495 // Calculate the new offset for the new GEP.
6496 Value *Index = ConstantInt::get(PtrIdxTy, Offset - BaseOffset);
6497 NewGEP = Builder.CreatePtrAdd(NewBaseGEP, Index);
6498 }
6499 replaceAllUsesWith(GEP, NewGEP, FreshBBs, IsHugeFunc);
6500 LargeOffsetGEPID.erase(GEP);
6501 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
6502 GEP->eraseFromParent();
6503 Changed = true;
6504 }
6505 }
6506 return Changed;
6507}
6508
6509bool CodeGenPrepare::optimizePhiType(
6511 SmallPtrSetImpl<Instruction *> &DeletedInstrs) {
6512 // We are looking for a collection on interconnected phi nodes that together
6513 // only use loads/bitcasts and are used by stores/bitcasts, and the bitcasts
6514 // are of the same type. Convert the whole set of nodes to the type of the
6515 // bitcast.
6516 Type *PhiTy = I->getType();
6517 Type *ConvertTy = nullptr;
6518 if (Visited.count(I) ||
6519 (!I->getType()->isIntegerTy() && !I->getType()->isFloatingPointTy()))
6520 return false;
6521
6523 Worklist.push_back(cast<Instruction>(I));
6526 PhiNodes.insert(I);
6527 Visited.insert(I);
6530 // This works by adding extra bitcasts between load/stores and removing
6531 // existing bicasts. If we have a phi(bitcast(load)) or a store(bitcast(phi))
6532 // we can get in the situation where we remove a bitcast in one iteration
6533 // just to add it again in the next. We need to ensure that at least one
6534 // bitcast we remove are anchored to something that will not change back.
6535 bool AnyAnchored = false;
6536
6537 while (!Worklist.empty()) {
6538 Instruction *II = Worklist.pop_back_val();
6539
6540 if (auto *Phi = dyn_cast<PHINode>(II)) {
6541 // Handle Defs, which might also be PHI's
6542 for (Value *V : Phi->incoming_values()) {
6543 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6544 if (!PhiNodes.count(OpPhi)) {
6545 if (!Visited.insert(OpPhi).second)
6546 return false;
6547 PhiNodes.insert(OpPhi);
6548 Worklist.push_back(OpPhi);
6549 }
6550 } else if (auto *OpLoad = dyn_cast<LoadInst>(V)) {
6551 if (!OpLoad->isSimple())
6552 return false;
6553 if (Defs.insert(OpLoad).second)
6554 Worklist.push_back(OpLoad);
6555 } else if (auto *OpEx = dyn_cast<ExtractElementInst>(V)) {
6556 if (Defs.insert(OpEx).second)
6557 Worklist.push_back(OpEx);
6558 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6559 if (!ConvertTy)
6560 ConvertTy = OpBC->getOperand(0)->getType();
6561 if (OpBC->getOperand(0)->getType() != ConvertTy)
6562 return false;
6563 if (Defs.insert(OpBC).second) {
6564 Worklist.push_back(OpBC);
6565 AnyAnchored |= !isa<LoadInst>(OpBC->getOperand(0)) &&
6566 !isa<ExtractElementInst>(OpBC->getOperand(0));
6567 }
6568 } else if (auto *OpC = dyn_cast<ConstantData>(V))
6569 Constants.insert(OpC);
6570 else
6571 return false;
6572 }
6573 }
6574
6575 // Handle uses which might also be phi's
6576 for (User *V : II->users()) {
6577 if (auto *OpPhi = dyn_cast<PHINode>(V)) {
6578 if (!PhiNodes.count(OpPhi)) {
6579 if (Visited.count(OpPhi))
6580 return false;
6581 PhiNodes.insert(OpPhi);
6582 Visited.insert(OpPhi);
6583 Worklist.push_back(OpPhi);
6584 }
6585 } else if (auto *OpStore = dyn_cast<StoreInst>(V)) {
6586 if (!OpStore->isSimple() || OpStore->getOperand(0) != II)
6587 return false;
6588 Uses.insert(OpStore);
6589 } else if (auto *OpBC = dyn_cast<BitCastInst>(V)) {
6590 if (!ConvertTy)
6591 ConvertTy = OpBC->getType();
6592 if (OpBC->getType() != ConvertTy)
6593 return false;
6594 Uses.insert(OpBC);
6595 AnyAnchored |=
6596 any_of(OpBC->users(), [](User *U) { return !isa<StoreInst>(U); });
6597 } else {
6598 return false;
6599 }
6600 }
6601 }
6602
6603 if (!ConvertTy || !AnyAnchored ||
6604 !TLI->shouldConvertPhiType(PhiTy, ConvertTy))
6605 return false;
6606
6607 LLVM_DEBUG(dbgs() << "Converting " << *I << "\n and connected nodes to "
6608 << *ConvertTy << "\n");
6609
6610 // Create all the new phi nodes of the new type, and bitcast any loads to the
6611 // correct type.
6612 ValueToValueMap ValMap;
6613 for (ConstantData *C : Constants)
6614 ValMap[C] = ConstantExpr::getBitCast(C, ConvertTy);
6615 for (Instruction *D : Defs) {
6616 if (isa<BitCastInst>(D)) {
6617 ValMap[D] = D->getOperand(0);
6618 DeletedInstrs.insert(D);
6619 } else {
6620 BasicBlock::iterator insertPt = std::next(D->getIterator());
6621 ValMap[D] = new BitCastInst(D, ConvertTy, D->getName() + ".bc", insertPt);
6622 }
6623 }
6624 for (PHINode *Phi : PhiNodes)
6625 ValMap[Phi] = PHINode::Create(ConvertTy, Phi->getNumIncomingValues(),
6626 Phi->getName() + ".tc", Phi->getIterator());
6627 // Pipe together all the PhiNodes.
6628 for (PHINode *Phi : PhiNodes) {
6629 PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);
6630 for (int i = 0, e = Phi->getNumIncomingValues(); i < e; i++)
6631 NewPhi->addIncoming(ValMap[Phi->getIncomingValue(i)],
6632 Phi->getIncomingBlock(i));
6633 Visited.insert(NewPhi);
6634 }
6635 // And finally pipe up the stores and bitcasts
6636 for (Instruction *U : Uses) {
6637 if (isa<BitCastInst>(U)) {
6638 DeletedInstrs.insert(U);
6639 replaceAllUsesWith(U, ValMap[U->getOperand(0)], FreshBBs, IsHugeFunc);
6640 } else {
6641 U->setOperand(0, new BitCastInst(ValMap[U->getOperand(0)], PhiTy, "bc",
6642 U->getIterator()));
6643 }
6644 }
6645
6646 // Save the removed phis to be deleted later.
6647 for (PHINode *Phi : PhiNodes)
6648 DeletedInstrs.insert(Phi);
6649 return true;
6650}
6651
6652bool CodeGenPrepare::optimizePhiTypes(Function &F) {
6653 if (!OptimizePhiTypes)
6654 return false;
6655
6656 bool Changed = false;
6658 SmallPtrSet<Instruction *, 4> DeletedInstrs;
6659
6660 // Attempt to optimize all the phis in the functions to the correct type.
6661 for (auto &BB : F)
6662 for (auto &Phi : BB.phis())
6663 Changed |= optimizePhiType(&Phi, Visited, DeletedInstrs);
6664
6665 // Remove any old phi's that have been converted.
6666 for (auto *I : DeletedInstrs) {
6667 replaceAllUsesWith(I, PoisonValue::get(I->getType()), FreshBBs, IsHugeFunc);
6668 I->eraseFromParent();
6669 }
6670
6671 return Changed;
6672}
6673
6674/// Return true, if an ext(load) can be formed from an extension in
6675/// \p MovedExts.
6676bool CodeGenPrepare::canFormExtLd(
6677 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
6678 Instruction *&Inst, bool HasPromoted) {
6679 for (auto *MovedExtInst : MovedExts) {
6680 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
6681 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
6682 Inst = MovedExtInst;
6683 break;
6684 }
6685 }
6686 if (!LI)
6687 return false;
6688
6689 // If they're already in the same block, there's nothing to do.
6690 // Make the cheap checks first if we did not promote.
6691 // If we promoted, we need to check if it is indeed profitable.
6692 if (!HasPromoted && LI->getParent() == Inst->getParent())
6693 return false;
6694
6695 return TLI->isExtLoad(LI, Inst, *DL);
6696}
6697
6698/// Move a zext or sext fed by a load into the same basic block as the load,
6699/// unless conditions are unfavorable. This allows SelectionDAG to fold the
6700/// extend into the load.
6701///
6702/// E.g.,
6703/// \code
6704/// %ld = load i32* %addr
6705/// %add = add nuw i32 %ld, 4
6706/// %zext = zext i32 %add to i64
6707// \endcode
6708/// =>
6709/// \code
6710/// %ld = load i32* %addr
6711/// %zext = zext i32 %ld to i64
6712/// %add = add nuw i64 %zext, 4
6713/// \encode
6714/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
6715/// allow us to match zext(load i32*) to i64.
6716///
6717/// Also, try to promote the computations used to obtain a sign extended
6718/// value used into memory accesses.
6719/// E.g.,
6720/// \code
6721/// a = add nsw i32 b, 3
6722/// d = sext i32 a to i64
6723/// e = getelementptr ..., i64 d
6724/// \endcode
6725/// =>
6726/// \code
6727/// f = sext i32 b to i64
6728/// a = add nsw i64 f, 3
6729/// e = getelementptr ..., i64 a
6730/// \endcode
6731///
6732/// \p Inst[in/out] the extension may be modified during the process if some
6733/// promotions apply.
6734bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
6735 bool AllowPromotionWithoutCommonHeader = false;
6736 /// See if it is an interesting sext operations for the address type
6737 /// promotion before trying to promote it, e.g., the ones with the right
6738 /// type and used in memory accesses.
6739 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
6740 *Inst, AllowPromotionWithoutCommonHeader);
6741 TypePromotionTransaction TPT(RemovedInsts);
6742 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
6743 TPT.getRestorationPoint();
6745 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
6746 Exts.push_back(Inst);
6747
6748 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
6749
6750 // Look for a load being extended.
6751 LoadInst *LI = nullptr;
6752 Instruction *ExtFedByLoad;
6753
6754 // Try to promote a chain of computation if it allows to form an extended
6755 // load.
6756 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
6757 assert(LI && ExtFedByLoad && "Expect a valid load and extension");
6758 TPT.commit();
6759 // Move the extend into the same block as the load.
6760 ExtFedByLoad->moveAfter(LI);
6761 ++NumExtsMoved;
6762 Inst = ExtFedByLoad;
6763 return true;
6764 }
6765
6766 // Continue promoting SExts if known as considerable depending on targets.
6767 if (ATPConsiderable &&
6768 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
6769 HasPromoted, TPT, SpeculativelyMovedExts))
6770 return true;
6771
6772 TPT.rollback(LastKnownGood);
6773 return false;
6774}
6775
6776// Perform address type promotion if doing so is profitable.
6777// If AllowPromotionWithoutCommonHeader == false, we should find other sext
6778// instructions that sign extended the same initial value. However, if
6779// AllowPromotionWithoutCommonHeader == true, we expect promoting the
6780// extension is just profitable.
6781bool CodeGenPrepare::performAddressTypePromotion(
6782 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
6783 bool HasPromoted, TypePromotionTransaction &TPT,
6784 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
6785 bool Promoted = false;
6786 SmallPtrSet<Instruction *, 1> UnhandledExts;
6787 bool AllSeenFirst = true;
6788 for (auto *I : SpeculativelyMovedExts) {
6789 Value *HeadOfChain = I->getOperand(0);
6791 SeenChainsForSExt.find(HeadOfChain);
6792 // If there is an unhandled SExt which has the same header, try to promote
6793 // it as well.
6794 if (AlreadySeen != SeenChainsForSExt.end()) {
6795 if (AlreadySeen->second != nullptr)
6796 UnhandledExts.insert(AlreadySeen->second);
6797 AllSeenFirst = false;
6798 }
6799 }
6800
6801 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
6802 SpeculativelyMovedExts.size() == 1)) {
6803 TPT.commit();
6804 if (HasPromoted)
6805 Promoted = true;
6806 for (auto *I : SpeculativelyMovedExts) {
6807 Value *HeadOfChain = I->getOperand(0);
6808 SeenChainsForSExt[HeadOfChain] = nullptr;
6809 ValToSExtendedUses[HeadOfChain].push_back(I);
6810 }
6811 // Update Inst as promotion happen.
6812 Inst = SpeculativelyMovedExts.pop_back_val();
6813 } else {
6814 // This is the first chain visited from the header, keep the current chain
6815 // as unhandled. Defer to promote this until we encounter another SExt
6816 // chain derived from the same header.
6817 for (auto *I : SpeculativelyMovedExts) {
6818 Value *HeadOfChain = I->getOperand(0);
6819 SeenChainsForSExt[HeadOfChain] = Inst;
6820 }
6821 return false;
6822 }
6823
6824 if (!AllSeenFirst && !UnhandledExts.empty())
6825 for (auto *VisitedSExt : UnhandledExts) {
6826 if (RemovedInsts.count(VisitedSExt))
6827 continue;
6828 TypePromotionTransaction TPT(RemovedInsts);
6831 Exts.push_back(VisitedSExt);
6832 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
6833 TPT.commit();
6834 if (HasPromoted)
6835 Promoted = true;
6836 for (auto *I : Chains) {
6837 Value *HeadOfChain = I->getOperand(0);
6838 // Mark this as handled.
6839 SeenChainsForSExt[HeadOfChain] = nullptr;
6840 ValToSExtendedUses[HeadOfChain].push_back(I);
6841 }
6842 }
6843 return Promoted;
6844}
6845
6846bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
6847 BasicBlock *DefBB = I->getParent();
6848
6849 // If the result of a {s|z}ext and its source are both live out, rewrite all
6850 // other uses of the source with result of extension.
6851 Value *Src = I->getOperand(0);
6852 if (Src->hasOneUse())
6853 return false;
6854
6855 // Only do this xform if truncating is free.
6856 if (!TLI->isTruncateFree(I->getType(), Src->getType()))
6857 return false;
6858
6859 // Only safe to perform the optimization if the source is also defined in
6860 // this block.
6861 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
6862 return false;
6863
6864 bool DefIsLiveOut = false;
6865 for (User *U : I->users()) {
6866 Instruction *UI = cast<Instruction>(U);
6867
6868 // Figure out which BB this ext is used in.
6869 BasicBlock *UserBB = UI->getParent();
6870 if (UserBB == DefBB)
6871 continue;
6872 DefIsLiveOut = true;
6873 break;
6874 }
6875 if (!DefIsLiveOut)
6876 return false;
6877
6878 // Make sure none of the uses are PHI nodes.
6879 for (User *U : Src->users()) {
6880 Instruction *UI = cast<Instruction>(U);
6881 BasicBlock *UserBB = UI->getParent();
6882 if (UserBB == DefBB)
6883 continue;
6884 // Be conservative. We don't want this xform to end up introducing
6885 // reloads just before load / store instructions.
6886 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
6887 return false;
6888 }
6889
6890 // InsertedTruncs - Only insert one trunc in each block once.
6892
6893 bool MadeChange = false;
6894 for (Use &U : Src->uses()) {
6895 Instruction *User = cast<Instruction>(U.getUser());
6896
6897 // Figure out which BB this ext is used in.
6898 BasicBlock *UserBB = User->getParent();
6899 if (UserBB == DefBB)
6900 continue;
6901
6902 // Both src and def are live in this block. Rewrite the use.
6903 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
6904
6905 if (!InsertedTrunc) {
6906 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
6907 assert(InsertPt != UserBB->end());
6908 InsertedTrunc = new TruncInst(I, Src->getType(), "");
6909 InsertedTrunc->insertBefore(*UserBB, InsertPt);
6910 InsertedInsts.insert(InsertedTrunc);
6911 }
6912
6913 // Replace a use of the {s|z}ext source with a use of the result.
6914 U = InsertedTrunc;
6915 ++NumExtUses;
6916 MadeChange = true;
6917 }
6918
6919 return MadeChange;
6920}
6921
6922// Find loads whose uses only use some of the loaded value's bits. Add an "and"
6923// just after the load if the target can fold this into one extload instruction,
6924// with the hope of eliminating some of the other later "and" instructions using
6925// the loaded value. "and"s that are made trivially redundant by the insertion
6926// of the new "and" are removed by this function, while others (e.g. those whose
6927// path from the load goes through a phi) are left for isel to potentially
6928// remove.
6929//
6930// For example:
6931//
6932// b0:
6933// x = load i32
6934// ...
6935// b1:
6936// y = and x, 0xff
6937// z = use y
6938//
6939// becomes:
6940//
6941// b0:
6942// x = load i32
6943// x' = and x, 0xff
6944// ...
6945// b1:
6946// z = use x'
6947//
6948// whereas:
6949//
6950// b0:
6951// x1 = load i32
6952// ...
6953// b1:
6954// x2 = load i32
6955// ...
6956// b2:
6957// x = phi x1, x2
6958// y = and x, 0xff
6959//
6960// becomes (after a call to optimizeLoadExt for each load):
6961//
6962// b0:
6963// x1 = load i32
6964// x1' = and x1, 0xff
6965// ...
6966// b1:
6967// x2 = load i32
6968// x2' = and x2, 0xff
6969// ...
6970// b2:
6971// x = phi x1', x2'
6972// y = and x, 0xff
6973bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
6974 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
6975 return false;
6976
6977 // Skip loads we've already transformed.
6978 if (Load->hasOneUse() &&
6979 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
6980 return false;
6981
6982 // Look at all uses of Load, looking through phis, to determine how many bits
6983 // of the loaded value are needed.
6986 SmallVector<Instruction *, 8> AndsToMaybeRemove;
6987 for (auto *U : Load->users())
6988 WorkList.push_back(cast<Instruction>(U));
6989
6990 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
6991 unsigned BitWidth = LoadResultVT.getSizeInBits();
6992 // If the BitWidth is 0, do not try to optimize the type
6993 if (BitWidth == 0)
6994 return false;
6995
6996 APInt DemandBits(BitWidth, 0);
6997 APInt WidestAndBits(BitWidth, 0);
6998
6999 while (!WorkList.empty()) {
7000 Instruction *I = WorkList.pop_back_val();
7001
7002 // Break use-def graph loops.
7003 if (!Visited.insert(I).second)
7004 continue;
7005
7006 // For a PHI node, push all of its users.
7007 if (auto *Phi = dyn_cast<PHINode>(I)) {
7008 for (auto *U : Phi->users())
7009 WorkList.push_back(cast<Instruction>(U));
7010 continue;
7011 }
7012
7013 switch (I->getOpcode()) {
7014 case Instruction::And: {
7015 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
7016 if (!AndC)
7017 return false;
7018 APInt AndBits = AndC->getValue();
7019 DemandBits |= AndBits;
7020 // Keep track of the widest and mask we see.
7021 if (AndBits.ugt(WidestAndBits))
7022 WidestAndBits = AndBits;
7023 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
7024 AndsToMaybeRemove.push_back(I);
7025 break;
7026 }
7027
7028 case Instruction::Shl: {
7029 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
7030 if (!ShlC)
7031 return false;
7032 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
7033 DemandBits.setLowBits(BitWidth - ShiftAmt);
7034 break;
7035 }
7036
7037 case Instruction::Trunc: {
7038 EVT TruncVT = TLI->getValueType(*DL, I->getType());
7039 unsigned TruncBitWidth = TruncVT.getSizeInBits();
7040 DemandBits.setLowBits(TruncBitWidth);
7041 break;
7042 }
7043
7044 default:
7045 return false;
7046 }
7047 }
7048
7049 uint32_t ActiveBits = DemandBits.getActiveBits();
7050 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
7051 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
7052 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
7053 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
7054 // followed by an AND.
7055 // TODO: Look into removing this restriction by fixing backends to either
7056 // return false for isLoadExtLegal for i1 or have them select this pattern to
7057 // a single instruction.
7058 //
7059 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
7060 // mask, since these are the only ands that will be removed by isel.
7061 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
7062 WidestAndBits != DemandBits)
7063 return false;
7064
7065 LLVMContext &Ctx = Load->getType()->getContext();
7066 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
7067 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
7068
7069 // Reject cases that won't be matched as extloads.
7070 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
7071 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
7072 return false;
7073
7074 IRBuilder<> Builder(Load->getNextNonDebugInstruction());
7075 auto *NewAnd = cast<Instruction>(
7076 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
7077 // Mark this instruction as "inserted by CGP", so that other
7078 // optimizations don't touch it.
7079 InsertedInsts.insert(NewAnd);
7080
7081 // Replace all uses of load with new and (except for the use of load in the
7082 // new and itself).
7083 replaceAllUsesWith(Load, NewAnd, FreshBBs, IsHugeFunc);
7084 NewAnd->setOperand(0, Load);
7085
7086 // Remove any and instructions that are now redundant.
7087 for (auto *And : AndsToMaybeRemove)
7088 // Check that the and mask is the same as the one we decided to put on the
7089 // new and.
7090 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
7091 replaceAllUsesWith(And, NewAnd, FreshBBs, IsHugeFunc);
7092 if (&*CurInstIterator == And)
7093 CurInstIterator = std::next(And->getIterator());
7094 And->eraseFromParent();
7095 ++NumAndUses;
7096 }
7097
7098 ++NumAndsAdded;
7099 return true;
7100}
7101
7102/// Check if V (an operand of a select instruction) is an expensive instruction
7103/// that is only used once.
7105 auto *I = dyn_cast<Instruction>(V);
7106 // If it's safe to speculatively execute, then it should not have side
7107 // effects; therefore, it's safe to sink and possibly *not* execute.
7108 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
7110}
7111
7112/// Returns true if a SelectInst should be turned into an explicit branch.
7114 const TargetLowering *TLI,
7115 SelectInst *SI) {
7116 // If even a predictable select is cheap, then a branch can't be cheaper.
7117 if (!TLI->isPredictableSelectExpensive())
7118 return false;
7119
7120 // FIXME: This should use the same heuristics as IfConversion to determine
7121 // whether a select is better represented as a branch.
7122
7123 // If metadata tells us that the select condition is obviously predictable,
7124 // then we want to replace the select with a branch.
7125 uint64_t TrueWeight, FalseWeight;
7126 if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
7127 uint64_t Max = std::max(TrueWeight, FalseWeight);
7128 uint64_t Sum = TrueWeight + FalseWeight;
7129 if (Sum != 0) {
7130 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
7131 if (Probability > TTI->getPredictableBranchThreshold())
7132 return true;
7133 }
7134 }
7135
7136 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
7137
7138 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
7139 // comparison condition. If the compare has more than one use, there's
7140 // probably another cmov or setcc around, so it's not worth emitting a branch.
7141 if (!Cmp || !Cmp->hasOneUse())
7142 return false;
7143
7144 // If either operand of the select is expensive and only needed on one side
7145 // of the select, we should form a branch.
7146 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
7147 sinkSelectOperand(TTI, SI->getFalseValue()))
7148 return true;
7149
7150 return false;
7151}
7152
7153/// If \p isTrue is true, return the true value of \p SI, otherwise return
7154/// false value of \p SI. If the true/false value of \p SI is defined by any
7155/// select instructions in \p Selects, look through the defining select
7156/// instruction until the true/false value is not defined in \p Selects.
7157static Value *
7159 const SmallPtrSet<const Instruction *, 2> &Selects) {
7160 Value *V = nullptr;
7161
7162 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
7163 DefSI = dyn_cast<SelectInst>(V)) {
7164 assert(DefSI->getCondition() == SI->getCondition() &&
7165 "The condition of DefSI does not match with SI");
7166 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
7167 }
7168
7169 assert(V && "Failed to get select true/false value");
7170 return V;
7171}
7172
7173bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
7174 assert(Shift->isShift() && "Expected a shift");
7175
7176 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
7177 // general vector shifts, and (3) the shift amount is a select-of-splatted
7178 // values, hoist the shifts before the select:
7179 // shift Op0, (select Cond, TVal, FVal) -->
7180 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
7181 //
7182 // This is inverting a generic IR transform when we know that the cost of a
7183 // general vector shift is more than the cost of 2 shift-by-scalars.
7184 // We can't do this effectively in SDAG because we may not be able to
7185 // determine if the select operands are splats from within a basic block.
7186 Type *Ty = Shift->getType();
7187 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7188 return false;
7189 Value *Cond, *TVal, *FVal;
7190 if (!match(Shift->getOperand(1),
7191 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7192 return false;
7193 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7194 return false;
7195
7196 IRBuilder<> Builder(Shift);
7197 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
7198 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
7199 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
7200 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7201 replaceAllUsesWith(Shift, NewSel, FreshBBs, IsHugeFunc);
7202 Shift->eraseFromParent();
7203 return true;
7204}
7205
7206bool CodeGenPrepare::optimizeFunnelShift(IntrinsicInst *Fsh) {
7207 Intrinsic::ID Opcode = Fsh->getIntrinsicID();
7208 assert((Opcode == Intrinsic::fshl || Opcode == Intrinsic::fshr) &&
7209 "Expected a funnel shift");
7210
7211 // If this is (1) a vector funnel shift, (2) shifts by scalars are cheaper
7212 // than general vector shifts, and (3) the shift amount is select-of-splatted
7213 // values, hoist the funnel shifts before the select:
7214 // fsh Op0, Op1, (select Cond, TVal, FVal) -->
7215 // select Cond, (fsh Op0, Op1, TVal), (fsh Op0, Op1, FVal)
7216 //
7217 // This is inverting a generic IR transform when we know that the cost of a
7218 // general vector shift is more than the cost of 2 shift-by-scalars.
7219 // We can't do this effectively in SDAG because we may not be able to
7220 // determine if the select operands are splats from within a basic block.
7221 Type *Ty = Fsh->getType();
7222 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
7223 return false;
7224 Value *Cond, *TVal, *FVal;
7225 if (!match(Fsh->getOperand(2),
7226 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
7227 return false;
7228 if (!isSplatValue(TVal) || !isSplatValue(FVal))
7229 return false;
7230
7231 IRBuilder<> Builder(Fsh);
7232 Value *X = Fsh->getOperand(0), *Y = Fsh->getOperand(1);
7233 Value *NewTVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, TVal});
7234 Value *NewFVal = Builder.CreateIntrinsic(Opcode, Ty, {X, Y, FVal});
7235 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
7236 replaceAllUsesWith(Fsh, NewSel, FreshBBs, IsHugeFunc);
7237 Fsh->eraseFromParent();
7238 return true;
7239}
7240
7241/// If we have a SelectInst that will likely profit from branch prediction,
7242/// turn it into a branch.
7243bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
7245 return false;
7246
7247 // If the SelectOptimize pass is enabled, selects have already been optimized.
7249 return false;
7250
7251 // Find all consecutive select instructions that share the same condition.
7253 ASI.push_back(SI);
7255 It != SI->getParent()->end(); ++It) {
7256 SelectInst *I = dyn_cast<SelectInst>(&*It);
7257 if (I && SI->getCondition() == I->getCondition()) {
7258 ASI.push_back(I);
7259 } else {
7260 break;
7261 }
7262 }
7263
7264 SelectInst *LastSI = ASI.back();
7265 // Increment the current iterator to skip all the rest of select instructions
7266 // because they will be either "not lowered" or "all lowered" to branch.
7267 CurInstIterator = std::next(LastSI->getIterator());
7268 // Examine debug-info attached to the consecutive select instructions. They
7269 // won't be individually optimised by optimizeInst, so we need to perform
7270 // DbgVariableRecord maintenence here instead.
7271 for (SelectInst *SI : ArrayRef(ASI).drop_front())
7272 fixupDbgVariableRecordsOnInst(*SI);
7273
7274 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
7275
7276 // Can we convert the 'select' to CF ?
7277 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
7278 return false;
7279
7281 if (SI->getType()->isVectorTy())
7282 SelectKind = TargetLowering::ScalarCondVectorVal;
7283 else
7284 SelectKind = TargetLowering::ScalarValSelect;
7285
7286 if (TLI->isSelectSupported(SelectKind) &&
7287 (!isFormingBranchFromSelectProfitable(TTI, TLI, SI) || OptSize ||
7288 llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get())))
7289 return false;
7290
7291 // The DominatorTree needs to be rebuilt by any consumers after this
7292 // transformation. We simply reset here rather than setting the ModifiedDT
7293 // flag to avoid restarting the function walk in runOnFunction for each
7294 // select optimized.
7295 DT.reset();
7296
7297 // Transform a sequence like this:
7298 // start:
7299 // %cmp = cmp uge i32 %a, %b
7300 // %sel = select i1 %cmp, i32 %c, i32 %d
7301 //
7302 // Into:
7303 // start:
7304 // %cmp = cmp uge i32 %a, %b
7305 // %cmp.frozen = freeze %cmp
7306 // br i1 %cmp.frozen, label %select.true, label %select.false
7307 // select.true:
7308 // br label %select.end
7309 // select.false:
7310 // br label %select.end
7311 // select.end:
7312 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
7313 //
7314 // %cmp should be frozen, otherwise it may introduce undefined behavior.
7315 // In addition, we may sink instructions that produce %c or %d from
7316 // the entry block into the destination(s) of the new branch.
7317 // If the true or false blocks do not contain a sunken instruction, that
7318 // block and its branch may be optimized away. In that case, one side of the
7319 // first branch will point directly to select.end, and the corresponding PHI
7320 // predecessor block will be the start block.
7321
7322 // Collect values that go on the true side and the values that go on the false
7323 // side.
7324 SmallVector<Instruction *> TrueInstrs, FalseInstrs;
7325 for (SelectInst *SI : ASI) {
7326 if (Value *V = SI->getTrueValue(); sinkSelectOperand(TTI, V))
7327 TrueInstrs.push_back(cast<Instruction>(V));
7328 if (Value *V = SI->getFalseValue(); sinkSelectOperand(TTI, V))
7329 FalseInstrs.push_back(cast<Instruction>(V));
7330 }
7331
7332 // Split the select block, according to how many (if any) values go on each
7333 // side.
7334 BasicBlock *StartBlock = SI->getParent();
7335 BasicBlock::iterator SplitPt = std::next(BasicBlock::iterator(LastSI));
7336 // We should split before any debug-info.
7337 SplitPt.setHeadBit(true);
7338
7339 IRBuilder<> IB(SI);
7340 auto *CondFr = IB.CreateFreeze(SI->getCondition(), SI->getName() + ".frozen");
7341
7342 BasicBlock *TrueBlock = nullptr;
7343 BasicBlock *FalseBlock = nullptr;
7344 BasicBlock *EndBlock = nullptr;
7345 BranchInst *TrueBranch = nullptr;
7346 BranchInst *FalseBranch = nullptr;
7347 if (TrueInstrs.size() == 0) {
7348 FalseBranch = cast<BranchInst>(SplitBlockAndInsertIfElse(
7349 CondFr, SplitPt, false, nullptr, nullptr, LI));
7350 FalseBlock = FalseBranch->getParent();
7351 EndBlock = cast<BasicBlock>(FalseBranch->getOperand(0));
7352 } else if (FalseInstrs.size() == 0) {
7353 TrueBranch = cast<BranchInst>(SplitBlockAndInsertIfThen(
7354 CondFr, SplitPt, false, nullptr, nullptr, LI));
7355 TrueBlock = TrueBranch->getParent();
7356 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7357 } else {
7358 Instruction *ThenTerm = nullptr;
7359 Instruction *ElseTerm = nullptr;
7360 SplitBlockAndInsertIfThenElse(CondFr, SplitPt, &ThenTerm, &ElseTerm,
7361 nullptr, nullptr, LI);
7362 TrueBranch = cast<BranchInst>(ThenTerm);
7363 FalseBranch = cast<BranchInst>(ElseTerm);
7364 TrueBlock = TrueBranch->getParent();
7365 FalseBlock = FalseBranch->getParent();
7366 EndBlock = cast<BasicBlock>(TrueBranch->getOperand(0));
7367 }
7368
7369 EndBlock->setName("select.end");
7370 if (TrueBlock)
7371 TrueBlock->setName("select.true.sink");
7372 if (FalseBlock)
7373 FalseBlock->setName(FalseInstrs.size() == 0 ? "select.false"
7374 : "select.false.sink");
7375
7376 if (IsHugeFunc) {
7377 if (TrueBlock)
7378 FreshBBs.insert(TrueBlock);
7379 if (FalseBlock)
7380 FreshBBs.insert(FalseBlock);
7381 FreshBBs.insert(EndBlock);
7382 }
7383
7384 BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
7385
7386 static const unsigned MD[] = {
7387 LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
7388 LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
7389 StartBlock->getTerminator()->copyMetadata(*SI, MD);
7390
7391 // Sink expensive instructions into the conditional blocks to avoid executing
7392 // them speculatively.
7393 for (Instruction *I : TrueInstrs)
7394 I->moveBefore(TrueBranch);
7395 for (Instruction *I : FalseInstrs)
7396 I->moveBefore(FalseBranch);
7397
7398 // If we did not create a new block for one of the 'true' or 'false' paths
7399 // of the condition, it means that side of the branch goes to the end block
7400 // directly and the path originates from the start block from the point of
7401 // view of the new PHI.
7402 if (TrueBlock == nullptr)
7403 TrueBlock = StartBlock;
7404 else if (FalseBlock == nullptr)
7405 FalseBlock = StartBlock;
7406
7408 INS.insert(ASI.begin(), ASI.end());
7409 // Use reverse iterator because later select may use the value of the
7410 // earlier select, and we need to propagate value through earlier select
7411 // to get the PHI operand.
7412 for (SelectInst *SI : llvm::reverse(ASI)) {
7413 // The select itself is replaced with a PHI Node.
7414 PHINode *PN = PHINode::Create(SI->getType(), 2, "");
7415 PN->insertBefore(EndBlock->begin());
7416 PN->takeName(SI);
7417 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
7418 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
7419 PN->setDebugLoc(SI->getDebugLoc());
7420
7421 replaceAllUsesWith(SI, PN, FreshBBs, IsHugeFunc);
7422 SI->eraseFromParent();
7423 INS.erase(SI);
7424 ++NumSelectsExpanded;
7425 }
7426
7427 // Instruct OptimizeBlock to skip to the next block.
7428 CurInstIterator = StartBlock->end();
7429 return true;
7430}
7431
7432/// Some targets only accept certain types for splat inputs. For example a VDUP
7433/// in MVE takes a GPR (integer) register, and the instruction that incorporate
7434/// a VDUP (such as a VADD qd, qm, rm) also require a gpr register.
7435bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
7436 // Accept shuf(insertelem(undef/poison, val, 0), undef/poison, <0,0,..>) only
7438 m_Undef(), m_ZeroMask())))
7439 return false;
7440 Type *NewType = TLI->shouldConvertSplatType(SVI);
7441 if (!NewType)
7442 return false;
7443
7444 auto *SVIVecType = cast<FixedVectorType>(SVI->getType());
7445 assert(!NewType->isVectorTy() && "Expected a scalar type!");
7446 assert(NewType->getScalarSizeInBits() == SVIVecType->getScalarSizeInBits() &&
7447 "Expected a type of the same size!");
7448 auto *NewVecType =
7449 FixedVectorType::get(NewType, SVIVecType->getNumElements());
7450
7451 // Create a bitcast (shuffle (insert (bitcast(..))))
7452 IRBuilder<> Builder(SVI->getContext());
7453 Builder.SetInsertPoint(SVI);
7454 Value *BC1 = Builder.CreateBitCast(
7455 cast<Instruction>(SVI->getOperand(0))->getOperand(1), NewType);
7456 Value *Shuffle = Builder.CreateVectorSplat(NewVecType->getNumElements(), BC1);
7457 Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
7458
7459 replaceAllUsesWith(SVI, BC2, FreshBBs, IsHugeFunc);
7461 SVI, TLInfo, nullptr,
7462 [&](Value *V) { removeAllAssertingVHReferences(V); });
7463
7464 // Also hoist the bitcast up to its operand if it they are not in the same
7465 // block.
7466 if (auto *BCI = dyn_cast<Instruction>(BC1))
7467 if (auto *Op = dyn_cast<Instruction>(BCI->getOperand(0)))
7468 if (BCI->getParent() != Op->getParent() && !isa<PHINode>(Op) &&
7469 !Op->isTerminator() && !Op->isEHPad())
7470 BCI->moveAfter(Op);
7471
7472 return true;
7473}
7474
7475bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
7476 // If the operands of I can be folded into a target instruction together with
7477 // I, duplicate and sink them.
7478 SmallVector<Use *, 4> OpsToSink;
7479 if (!TLI->shouldSinkOperands(I, OpsToSink))
7480 return false;
7481
7482 // OpsToSink can contain multiple uses in a use chain (e.g.
7483 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
7484 // uses must come first, so we process the ops in reverse order so as to not
7485 // create invalid IR.
7486 BasicBlock *TargetBB = I->getParent();
7487 bool Changed = false;
7488 SmallVector<Use *, 4> ToReplace;
7489 Instruction *InsertPoint = I;
7491 unsigned long InstNumber = 0;
7492 for (const auto &I : *TargetBB)
7493 InstOrdering[&I] = InstNumber++;
7494
7495 for (Use *U : reverse(OpsToSink)) {
7496 auto *UI = cast<Instruction>(U->get());
7497 if (isa<PHINode>(UI))
7498 continue;
7499 if (UI->getParent() == TargetBB) {
7500 if (InstOrdering[UI] < InstOrdering[InsertPoint])
7501 InsertPoint = UI;
7502 continue;
7503 }
7504 ToReplace.push_back(U);
7505 }
7506
7507 SetVector<Instruction *> MaybeDead;
7509 for (Use *U : ToReplace) {
7510 auto *UI = cast<Instruction>(U->get());
7511 Instruction *NI = UI->clone();
7512
7513 if (IsHugeFunc) {
7514 // Now we clone an instruction, its operands' defs may sink to this BB
7515 // now. So we put the operands defs' BBs into FreshBBs to do optimization.
7516 for (Value *Op : NI->operands())
7517 if (auto *OpDef = dyn_cast<Instruction>(Op))
7518 FreshBBs.insert(OpDef->getParent());
7519 }
7520
7521 NewInstructions[UI] = NI;
7522 MaybeDead.insert(UI);
7523 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
7524 NI->insertBefore(InsertPoint);
7525 InsertPoint = NI;
7526 InsertedInsts.insert(NI);
7527
7528 // Update the use for the new instruction, making sure that we update the
7529 // sunk instruction uses, if it is part of a chain that has already been
7530 // sunk.
7531 Instruction *OldI = cast<Instruction>(U->getUser());
7532 if (NewInstructions.count(OldI))
7533 NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
7534 else
7535 U->set(NI);
7536 Changed = true;
7537 }
7538
7539 // Remove instructions that are dead after sinking.
7540 for (auto *I : MaybeDead) {
7541 if (!I->hasNUsesOrMore(1)) {
7542 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
7543 I->eraseFromParent();
7544 }
7545 }
7546
7547 return Changed;
7548}
7549
7550bool CodeGenPrepare::optimizeSwitchType(SwitchInst *SI) {
7551 Value *Cond = SI->getCondition();
7552 Type *OldType = Cond->getType();
7553 LLVMContext &Context = Cond->getContext();
7554 EVT OldVT = TLI->getValueType(*DL, OldType);
7555 MVT RegType = TLI->getPreferredSwitchConditionType(Context, OldVT);
7556 unsigned RegWidth = RegType.getSizeInBits();
7557
7558 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
7559 return false;
7560
7561 // If the register width is greater than the type width, expand the condition
7562 // of the switch instruction and each case constant to the width of the
7563 // register. By widening the type of the switch condition, subsequent
7564 // comparisons (for case comparisons) will not need to be extended to the
7565 // preferred register width, so we will potentially eliminate N-1 extends,
7566 // where N is the number of cases in the switch.
7567 auto *NewType = Type::getIntNTy(Context, RegWidth);
7568
7569 // Extend the switch condition and case constants using the target preferred
7570 // extend unless the switch condition is a function argument with an extend
7571 // attribute. In that case, we can avoid an unnecessary mask/extension by
7572 // matching the argument extension instead.
7573 Instruction::CastOps ExtType = Instruction::ZExt;
7574 // Some targets prefer SExt over ZExt.
7575 if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
7576 ExtType = Instruction::SExt;
7577
7578 if (auto *Arg = dyn_cast<Argument>(Cond)) {
7579 if (Arg->hasSExtAttr())
7580 ExtType = Instruction::SExt;
7581 if (Arg->hasZExtAttr())
7582 ExtType = Instruction::ZExt;
7583 }
7584
7585 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
7586 ExtInst->insertBefore(SI);
7587 ExtInst->setDebugLoc(SI->getDebugLoc());
7588 SI->setCondition(ExtInst);
7589 for (auto Case : SI->cases()) {
7590 const APInt &NarrowConst = Case.getCaseValue()->getValue();
7591 APInt WideConst = (ExtType == Instruction::ZExt)
7592 ? NarrowConst.zext(RegWidth)
7593 : NarrowConst.sext(RegWidth);
7594 Case.setValue(ConstantInt::get(Context, WideConst));
7595 }
7596
7597 return true;
7598}
7599
7600bool CodeGenPrepare::optimizeSwitchPhiConstants(SwitchInst *SI) {
7601 // The SCCP optimization tends to produce code like this:
7602 // switch(x) { case 42: phi(42, ...) }
7603 // Materializing the constant for the phi-argument needs instructions; So we
7604 // change the code to:
7605 // switch(x) { case 42: phi(x, ...) }
7606
7607 Value *Condition = SI->getCondition();
7608 // Avoid endless loop in degenerate case.
7609 if (isa<ConstantInt>(*Condition))
7610 return false;
7611
7612 bool Changed = false;
7613 BasicBlock *SwitchBB = SI->getParent();
7614 Type *ConditionType = Condition->getType();
7615
7616 for (const SwitchInst::CaseHandle &Case : SI->cases()) {
7617 ConstantInt *CaseValue = Case.getCaseValue();
7618 BasicBlock *CaseBB = Case.getCaseSuccessor();
7619 // Set to true if we previously checked that `CaseBB` is only reached by
7620 // a single case from this switch.
7621 bool CheckedForSinglePred = false;
7622 for (PHINode &PHI : CaseBB->phis()) {
7623 Type *PHIType = PHI.getType();
7624 // If ZExt is free then we can also catch patterns like this:
7625 // switch((i32)x) { case 42: phi((i64)42, ...); }
7626 // and replace `(i64)42` with `zext i32 %x to i64`.
7627 bool TryZExt =
7628 PHIType->isIntegerTy() &&
7629 PHIType->getIntegerBitWidth() > ConditionType->getIntegerBitWidth() &&
7630 TLI->isZExtFree(ConditionType, PHIType);
7631 if (PHIType == ConditionType || TryZExt) {
7632 // Set to true to skip this case because of multiple preds.
7633 bool SkipCase = false;
7634 Value *Replacement = nullptr;
7635 for (unsigned I = 0, E = PHI.getNumIncomingValues(); I != E; I++) {
7636 Value *PHIValue = PHI.getIncomingValue(I);
7637 if (PHIValue != CaseValue) {
7638 if (!TryZExt)
7639 continue;
7640 ConstantInt *PHIValueInt = dyn_cast<ConstantInt>(PHIValue);
7641 if (!PHIValueInt ||
7642 PHIValueInt->getValue() !=
7643 CaseValue->getValue().zext(PHIType->getIntegerBitWidth()))
7644 continue;
7645 }
7646 if (PHI.getIncomingBlock(I) != SwitchBB)
7647 continue;
7648 // We cannot optimize if there are multiple case labels jumping to
7649 // this block. This check may get expensive when there are many
7650 // case labels so we test for it last.
7651 if (!CheckedForSinglePred) {
7652 CheckedForSinglePred = true;
7653 if (SI->findCaseDest(CaseBB) == nullptr) {
7654 SkipCase = true;
7655 break;
7656 }
7657 }
7658
7659 if (Replacement == nullptr) {
7660 if (PHIValue == CaseValue) {
7661 Replacement = Condition;
7662 } else {
7663 IRBuilder<> Builder(SI);
7664 Replacement = Builder.CreateZExt(Condition, PHIType);
7665 }
7666 }
7667 PHI.setIncomingValue(I, Replacement);
7668 Changed = true;
7669 }
7670 if (SkipCase)
7671 break;
7672 }
7673 }
7674 }
7675 return Changed;
7676}
7677
7678bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
7679 bool Changed = optimizeSwitchType(SI);
7680 Changed |= optimizeSwitchPhiConstants(SI);
7681 return Changed;
7682}
7683
7684namespace {
7685
7686/// Helper class to promote a scalar operation to a vector one.
7687/// This class is used to move downward extractelement transition.
7688/// E.g.,
7689/// a = vector_op <2 x i32>
7690/// b = extractelement <2 x i32> a, i32 0
7691/// c = scalar_op b
7692/// store c
7693///
7694/// =>
7695/// a = vector_op <2 x i32>
7696/// c = vector_op a (equivalent to scalar_op on the related lane)
7697/// * d = extractelement <2 x i32> c, i32 0
7698/// * store d
7699/// Assuming both extractelement and store can be combine, we get rid of the
7700/// transition.
7701class VectorPromoteHelper {
7702 /// DataLayout associated with the current module.
7703 const DataLayout &DL;
7704
7705 /// Used to perform some checks on the legality of vector operations.
7706 const TargetLowering &TLI;
7707
7708 /// Used to estimated the cost of the promoted chain.
7709 const TargetTransformInfo &TTI;
7710
7711 /// The transition being moved downwards.
7712 Instruction *Transition;
7713
7714 /// The sequence of instructions to be promoted.
7715 SmallVector<Instruction *, 4> InstsToBePromoted;
7716
7717 /// Cost of combining a store and an extract.
7718 unsigned StoreExtractCombineCost;
7719
7720 /// Instruction that will be combined with the transition.
7721 Instruction *CombineInst = nullptr;
7722
7723 /// The instruction that represents the current end of the transition.
7724 /// Since we are faking the promotion until we reach the end of the chain
7725 /// of computation, we need a way to get the current end of the transition.
7726 Instruction *getEndOfTransition() const {
7727 if (InstsToBePromoted.empty())
7728 return Transition;
7729 return InstsToBePromoted.back();
7730 }
7731
7732 /// Return the index of the original value in the transition.
7733 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
7734 /// c, is at index 0.
7735 unsigned getTransitionOriginalValueIdx() const {
7736 assert(isa<ExtractElementInst>(Transition) &&
7737 "Other kind of transitions are not supported yet");
7738 return 0;
7739 }
7740
7741 /// Return the index of the index in the transition.
7742 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
7743 /// is at index 1.
7744 unsigned getTransitionIdx() const {
7745 assert(isa<ExtractElementInst>(Transition) &&
7746 "Other kind of transitions are not supported yet");
7747 return 1;
7748 }
7749
7750 /// Get the type of the transition.
7751 /// This is the type of the original value.
7752 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
7753 /// transition is <2 x i32>.
7754 Type *getTransitionType() const {
7755 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
7756 }
7757
7758 /// Promote \p ToBePromoted by moving \p Def downward through.
7759 /// I.e., we have the following sequence:
7760 /// Def = Transition <ty1> a to <ty2>
7761 /// b = ToBePromoted <ty2> Def, ...
7762 /// =>
7763 /// b = ToBePromoted <ty1> a, ...
7764 /// Def = Transition <ty1> ToBePromoted to <ty2>
7765 void promoteImpl(Instruction *ToBePromoted);
7766
7767 /// Check whether or not it is profitable to promote all the
7768 /// instructions enqueued to be promoted.
7769 bool isProfitableToPromote() {
7770 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
7771 unsigned Index = isa<ConstantInt>(ValIdx)
7772 ? cast<ConstantInt>(ValIdx)->getZExtValue()
7773 : -1;
7774 Type *PromotedType = getTransitionType();
7775
7776 StoreInst *ST = cast<StoreInst>(CombineInst);
7777 unsigned AS = ST->getPointerAddressSpace();
7778 // Check if this store is supported.
7780 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
7781 ST->getAlign())) {
7782 // If this is not supported, there is no way we can combine
7783 // the extract with the store.
7784 return false;
7785 }
7786
7787 // The scalar chain of computation has to pay for the transition
7788 // scalar to vector.
7789 // The vector chain has to account for the combining cost.
7792 InstructionCost ScalarCost =
7793 TTI.getVectorInstrCost(*Transition, PromotedType, CostKind, Index);
7794 InstructionCost VectorCost = StoreExtractCombineCost;
7795 for (const auto &Inst : InstsToBePromoted) {
7796 // Compute the cost.
7797 // By construction, all instructions being promoted are arithmetic ones.
7798 // Moreover, one argument is a constant that can be viewed as a splat
7799 // constant.
7800 Value *Arg0 = Inst->getOperand(0);
7801 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
7802 isa<ConstantFP>(Arg0);
7803 TargetTransformInfo::OperandValueInfo Arg0Info, Arg1Info;
7804 if (IsArg0Constant)
7806 else
7808
7809 ScalarCost += TTI.getArithmeticInstrCost(
7810 Inst->getOpcode(), Inst->getType(), CostKind, Arg0Info, Arg1Info);
7811 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
7812 CostKind, Arg0Info, Arg1Info);
7813 }
7814 LLVM_DEBUG(
7815 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
7816 << ScalarCost << "\nVector: " << VectorCost << '\n');
7817 return ScalarCost > VectorCost;
7818 }
7819
7820 /// Generate a constant vector with \p Val with the same
7821 /// number of elements as the transition.
7822 /// \p UseSplat defines whether or not \p Val should be replicated
7823 /// across the whole vector.
7824 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
7825 /// otherwise we generate a vector with as many undef as possible:
7826 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
7827 /// used at the index of the extract.
7828 Value *getConstantVector(Constant *Val, bool UseSplat) const {
7829 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
7830 if (!UseSplat) {
7831 // If we cannot determine where the constant must be, we have to
7832 // use a splat constant.
7833 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
7834 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
7835 ExtractIdx = CstVal->getSExtValue();
7836 else
7837 UseSplat = true;
7838 }
7839
7840 ElementCount EC = cast<VectorType>(getTransitionType())->getElementCount();
7841 if (UseSplat)
7842 return ConstantVector::getSplat(EC, Val);
7843
7844 if (!EC.isScalable()) {
7846 UndefValue *UndefVal = UndefValue::get(Val->getType());
7847 for (unsigned Idx = 0; Idx != EC.getKnownMinValue(); ++Idx) {
7848 if (Idx == ExtractIdx)
7849 ConstVec.push_back(Val);
7850 else
7851 ConstVec.push_back(UndefVal);
7852 }
7853 return ConstantVector::get(ConstVec);
7854 } else
7856 "Generate scalable vector for non-splat is unimplemented");
7857 }
7858
7859 /// Check if promoting to a vector type an operand at \p OperandIdx
7860 /// in \p Use can trigger undefined behavior.
7861 static bool canCauseUndefinedBehavior(const Instruction *Use,
7862 unsigned OperandIdx) {
7863 // This is not safe to introduce undef when the operand is on
7864 // the right hand side of a division-like instruction.
7865 if (OperandIdx != 1)
7866 return false;
7867 switch (Use->getOpcode()) {
7868 default:
7869 return false;
7870 case Instruction::SDiv:
7871 case Instruction::UDiv:
7872 case Instruction::SRem:
7873 case Instruction::URem:
7874 return true;
7875 case Instruction::FDiv:
7876 case Instruction::FRem:
7877 return !Use->hasNoNaNs();
7878 }
7879 llvm_unreachable(nullptr);
7880 }
7881
7882public:
7883 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
7884 const TargetTransformInfo &TTI, Instruction *Transition,
7885 unsigned CombineCost)
7886 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
7887 StoreExtractCombineCost(CombineCost) {
7888 assert(Transition && "Do not know how to promote null");
7889 }
7890
7891 /// Check if we can promote \p ToBePromoted to \p Type.
7892 bool canPromote(const Instruction *ToBePromoted) const {
7893 // We could support CastInst too.
7894 return isa<BinaryOperator>(ToBePromoted);
7895 }
7896
7897 /// Check if it is profitable to promote \p ToBePromoted
7898 /// by moving downward the transition through.
7899 bool shouldPromote(const Instruction *ToBePromoted) const {
7900 // Promote only if all the operands can be statically expanded.
7901 // Indeed, we do not want to introduce any new kind of transitions.
7902 for (const Use &U : ToBePromoted->operands()) {
7903 const Value *Val = U.get();
7904 if (Val == getEndOfTransition()) {
7905 // If the use is a division and the transition is on the rhs,
7906 // we cannot promote the operation, otherwise we may create a
7907 // division by zero.
7908 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
7909 return false;
7910 continue;
7911 }
7912 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
7913 !isa<ConstantFP>(Val))
7914 return false;
7915 }
7916 // Check that the resulting operation is legal.
7917 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
7918 if (!ISDOpcode)
7919 return false;
7920 return StressStoreExtract ||
7922 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
7923 }
7924
7925 /// Check whether or not \p Use can be combined
7926 /// with the transition.
7927 /// I.e., is it possible to do Use(Transition) => AnotherUse?
7928 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
7929
7930 /// Record \p ToBePromoted as part of the chain to be promoted.
7931 void enqueueForPromotion(Instruction *ToBePromoted) {
7932 InstsToBePromoted.push_back(ToBePromoted);
7933 }
7934
7935 /// Set the instruction that will be combined with the transition.
7936 void recordCombineInstruction(Instruction *ToBeCombined) {
7937 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine");
7938 CombineInst = ToBeCombined;
7939 }
7940
7941 /// Promote all the instructions enqueued for promotion if it is
7942 /// is profitable.
7943 /// \return True if the promotion happened, false otherwise.
7944 bool promote() {
7945 // Check if there is something to promote.
7946 // Right now, if we do not have anything to combine with,
7947 // we assume the promotion is not profitable.
7948 if (InstsToBePromoted.empty() || !CombineInst)
7949 return false;
7950
7951 // Check cost.
7952 if (!StressStoreExtract && !isProfitableToPromote())
7953 return false;
7954
7955 // Promote.
7956 for (auto &ToBePromoted : InstsToBePromoted)
7957 promoteImpl(ToBePromoted);
7958 InstsToBePromoted.clear();
7959 return true;
7960 }
7961};
7962
7963} // end anonymous namespace
7964
7965void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
7966 // At this point, we know that all the operands of ToBePromoted but Def
7967 // can be statically promoted.
7968 // For Def, we need to use its parameter in ToBePromoted:
7969 // b = ToBePromoted ty1 a
7970 // Def = Transition ty1 b to ty2
7971 // Move the transition down.
7972 // 1. Replace all uses of the promoted operation by the transition.
7973 // = ... b => = ... Def.
7974 assert(ToBePromoted->getType() == Transition->getType() &&
7975 "The type of the result of the transition does not match "
7976 "the final type");
7977 ToBePromoted->replaceAllUsesWith(Transition);
7978 // 2. Update the type of the uses.
7979 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
7980 Type *TransitionTy = getTransitionType();
7981 ToBePromoted->mutateType(TransitionTy);
7982 // 3. Update all the operands of the promoted operation with promoted
7983 // operands.
7984 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
7985 for (Use &U : ToBePromoted->operands()) {
7986 Value *Val = U.get();
7987 Value *NewVal = nullptr;
7988 if (Val == Transition)
7989 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
7990 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
7991 isa<ConstantFP>(Val)) {
7992 // Use a splat constant if it is not safe to use undef.
7993 NewVal = getConstantVector(
7994 cast<Constant>(Val),
7995 isa<UndefValue>(Val) ||
7996 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
7997 } else
7998 llvm_unreachable("Did you modified shouldPromote and forgot to update "
7999 "this?");
8000 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
8001 }
8002 Transition->moveAfter(ToBePromoted);
8003 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
8004}
8005
8006/// Some targets can do store(extractelement) with one instruction.
8007/// Try to push the extractelement towards the stores when the target
8008/// has this feature and this is profitable.
8009bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
8010 unsigned CombineCost = std::numeric_limits<unsigned>::max();
8011 if (DisableStoreExtract ||
8014 Inst->getOperand(1), CombineCost)))
8015 return false;
8016
8017 // At this point we know that Inst is a vector to scalar transition.
8018 // Try to move it down the def-use chain, until:
8019 // - We can combine the transition with its single use
8020 // => we got rid of the transition.
8021 // - We escape the current basic block
8022 // => we would need to check that we are moving it at a cheaper place and
8023 // we do not do that for now.
8024 BasicBlock *Parent = Inst->getParent();
8025 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n');
8026 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
8027 // If the transition has more than one use, assume this is not going to be
8028 // beneficial.
8029 while (Inst->hasOneUse()) {
8030 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
8031 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n');
8032
8033 if (ToBePromoted->getParent() != Parent) {
8034 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("
8035 << ToBePromoted->getParent()->getName()
8036 << ") than the transition (" << Parent->getName()
8037 << ").\n");
8038 return false;
8039 }
8040
8041 if (VPH.canCombine(ToBePromoted)) {
8042 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'
8043 << "will be combined with: " << *ToBePromoted << '\n');
8044 VPH.recordCombineInstruction(ToBePromoted);
8045 bool Changed = VPH.promote();
8046 NumStoreExtractExposed += Changed;
8047 return Changed;
8048 }
8049
8050 LLVM_DEBUG(dbgs() << "Try promoting.\n");
8051 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
8052 return false;
8053
8054 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n");
8055
8056 VPH.enqueueForPromotion(ToBePromoted);
8057 Inst = ToBePromoted;
8058 }
8059 return false;
8060}
8061
8062/// For the instruction sequence of store below, F and I values
8063/// are bundled together as an i64 value before being stored into memory.
8064/// Sometimes it is more efficient to generate separate stores for F and I,
8065/// which can remove the bitwise instructions or sink them to colder places.
8066///
8067/// (store (or (zext (bitcast F to i32) to i64),
8068/// (shl (zext I to i64), 32)), addr) -->
8069/// (store F, addr) and (store I, addr+4)
8070///
8071/// Similarly, splitting for other merged store can also be beneficial, like:
8072/// For pair of {i32, i32}, i64 store --> two i32 stores.
8073/// For pair of {i32, i16}, i64 store --> two i32 stores.
8074/// For pair of {i16, i16}, i32 store --> two i16 stores.
8075/// For pair of {i16, i8}, i32 store --> two i16 stores.
8076/// For pair of {i8, i8}, i16 store --> two i8 stores.
8077///
8078/// We allow each target to determine specifically which kind of splitting is
8079/// supported.
8080///
8081/// The store patterns are commonly seen from the simple code snippet below
8082/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
8083/// void goo(const std::pair<int, float> &);
8084/// hoo() {
8085/// ...
8086/// goo(std::make_pair(tmp, ftmp));
8087/// ...
8088/// }
8089///
8090/// Although we already have similar splitting in DAG Combine, we duplicate
8091/// it in CodeGenPrepare to catch the case in which pattern is across
8092/// multiple BBs. The logic in DAG Combine is kept to catch case generated
8093/// during code expansion.
8095 const TargetLowering &TLI) {
8096 // Handle simple but common cases only.
8097 Type *StoreType = SI.getValueOperand()->getType();
8098
8099 // The code below assumes shifting a value by <number of bits>,
8100 // whereas scalable vectors would have to be shifted by
8101 // <2log(vscale) + number of bits> in order to store the
8102 // low/high parts. Bailing out for now.
8103 if (StoreType->isScalableTy())
8104 return false;
8105
8106 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
8107 DL.getTypeSizeInBits(StoreType) == 0)
8108 return false;
8109
8110 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
8111 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
8112 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
8113 return false;
8114
8115 // Don't split the store if it is volatile.
8116 if (SI.isVolatile())
8117 return false;
8118
8119 // Match the following patterns:
8120 // (store (or (zext LValue to i64),
8121 // (shl (zext HValue to i64), 32)), HalfValBitSize)
8122 // or
8123 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
8124 // (zext LValue to i64),
8125 // Expect both operands of OR and the first operand of SHL have only
8126 // one use.
8127 Value *LValue, *HValue;
8128 if (!match(SI.getValueOperand(),
8131 m_SpecificInt(HalfValBitSize))))))
8132 return false;
8133
8134 // Check LValue and HValue are int with size less or equal than 32.
8135 if (!LValue->getType()->isIntegerTy() ||
8136 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
8137 !HValue->getType()->isIntegerTy() ||
8138 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
8139 return false;
8140
8141 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
8142 // as the input of target query.
8143 auto *LBC = dyn_cast<BitCastInst>(LValue);
8144 auto *HBC = dyn_cast<BitCastInst>(HValue);
8145 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
8146 : EVT::getEVT(LValue->getType());
8147 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
8148 : EVT::getEVT(HValue->getType());
8149 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
8150 return false;
8151
8152 // Start to split store.
8153 IRBuilder<> Builder(SI.getContext());
8154 Builder.SetInsertPoint(&SI);
8155
8156 // If LValue/HValue is a bitcast in another BB, create a new one in current
8157 // BB so it may be merged with the splitted stores by dag combiner.
8158 if (LBC && LBC->getParent() != SI.getParent())
8159 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
8160 if (HBC && HBC->getParent() != SI.getParent())
8161 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
8162
8163 bool IsLE = SI.getDataLayout().isLittleEndian();
8164 auto CreateSplitStore = [&](Value *V, bool Upper) {
8165 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
8166 Value *Addr = SI.getPointerOperand();
8167 Align Alignment = SI.getAlign();
8168 const bool IsOffsetStore = (IsLE && Upper) || (!IsLE && !Upper);
8169 if (IsOffsetStore) {
8170 Addr = Builder.CreateGEP(
8171 SplitStoreType, Addr,
8172 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
8173
8174 // When splitting the store in half, naturally one half will retain the
8175 // alignment of the original wider store, regardless of whether it was
8176 // over-aligned or not, while the other will require adjustment.
8177 Alignment = commonAlignment(Alignment, HalfValBitSize / 8);
8178 }
8179 Builder.CreateAlignedStore(V, Addr, Alignment);
8180 };
8181
8182 CreateSplitStore(LValue, false);
8183 CreateSplitStore(HValue, true);
8184
8185 // Delete the old store.
8186 SI.eraseFromParent();
8187 return true;
8188}
8189
8190// Return true if the GEP has two operands, the first operand is of a sequential
8191// type, and the second operand is a constant.
8194 return GEP->getNumOperands() == 2 && I.isSequential() &&
8195 isa<ConstantInt>(GEP->getOperand(1));
8196}
8197
8198// Try unmerging GEPs to reduce liveness interference (register pressure) across
8199// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
8200// reducing liveness interference across those edges benefits global register
8201// allocation. Currently handles only certain cases.
8202//
8203// For example, unmerge %GEPI and %UGEPI as below.
8204//
8205// ---------- BEFORE ----------
8206// SrcBlock:
8207// ...
8208// %GEPIOp = ...
8209// ...
8210// %GEPI = gep %GEPIOp, Idx
8211// ...
8212// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
8213// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
8214// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
8215// %UGEPI)
8216//
8217// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
8218// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
8219// ...
8220//
8221// DstBi:
8222// ...
8223// %UGEPI = gep %GEPIOp, UIdx
8224// ...
8225// ---------------------------
8226//
8227// ---------- AFTER ----------
8228// SrcBlock:
8229// ... (same as above)
8230// (* %GEPI is still alive on the indirectbr edges)
8231// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
8232// unmerging)
8233// ...
8234//
8235// DstBi:
8236// ...
8237// %UGEPI = gep %GEPI, (UIdx-Idx)
8238// ...
8239// ---------------------------
8240//
8241// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
8242// no longer alive on them.
8243//
8244// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
8245// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
8246// not to disable further simplications and optimizations as a result of GEP
8247// merging.
8248//
8249// Note this unmerging may increase the length of the data flow critical path
8250// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
8251// between the register pressure and the length of data-flow critical
8252// path. Restricting this to the uncommon IndirectBr case would minimize the
8253// impact of potentially longer critical path, if any, and the impact on compile
8254// time.
8256 const TargetTransformInfo *TTI) {
8257 BasicBlock *SrcBlock = GEPI->getParent();
8258 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
8259 // (non-IndirectBr) cases exit early here.
8260 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
8261 return false;
8262 // Check that GEPI is a simple gep with a single constant index.
8263 if (!GEPSequentialConstIndexed(GEPI))
8264 return false;
8265 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
8266 // Check that GEPI is a cheap one.
8267 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType(),
8270 return false;
8271 Value *GEPIOp = GEPI->getOperand(0);
8272 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
8273 if (!isa<Instruction>(GEPIOp))
8274 return false;
8275 auto *GEPIOpI = cast<Instruction>(GEPIOp);
8276 if (GEPIOpI->getParent() != SrcBlock)
8277 return false;
8278 // Check that GEP is used outside the block, meaning it's alive on the
8279 // IndirectBr edge(s).
8280 if (llvm::none_of(GEPI->users(), [&](User *Usr) {
8281 if (auto *I = dyn_cast<Instruction>(Usr)) {
8282 if (I->getParent() != SrcBlock) {
8283 return true;
8284 }
8285 }
8286 return false;
8287 }))
8288 return false;
8289 // The second elements of the GEP chains to be unmerged.
8290 std::vector<GetElementPtrInst *> UGEPIs;
8291 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
8292 // on IndirectBr edges.
8293 for (User *Usr : GEPIOp->users()) {
8294 if (Usr == GEPI)
8295 continue;
8296 // Check if Usr is an Instruction. If not, give up.
8297 if (!isa<Instruction>(Usr))
8298 return false;
8299 auto *UI = cast<Instruction>(Usr);
8300 // Check if Usr in the same block as GEPIOp, which is fine, skip.
8301 if (UI->getParent() == SrcBlock)
8302 continue;
8303 // Check if Usr is a GEP. If not, give up.
8304 if (!isa<GetElementPtrInst>(Usr))
8305 return false;
8306 auto *UGEPI = cast<GetElementPtrInst>(Usr);
8307 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
8308 // the pointer operand to it. If so, record it in the vector. If not, give
8309 // up.
8310 if (!GEPSequentialConstIndexed(UGEPI))
8311 return false;
8312 if (UGEPI->getOperand(0) != GEPIOp)
8313 return false;
8314 if (UGEPI->getSourceElementType() != GEPI->getSourceElementType())
8315 return false;
8316 if (GEPIIdx->getType() !=
8317 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
8318 return false;
8319 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8320 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType(),
8323 return false;
8324 UGEPIs.push_back(UGEPI);
8325 }
8326 if (UGEPIs.size() == 0)
8327 return false;
8328 // Check the materializing cost of (Uidx-Idx).
8329 for (GetElementPtrInst *UGEPI : UGEPIs) {
8330 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8331 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
8333 NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
8334 if (ImmCost > TargetTransformInfo::TCC_Basic)
8335 return false;
8336 }
8337 // Now unmerge between GEPI and UGEPIs.
8338 for (GetElementPtrInst *UGEPI : UGEPIs) {
8339 UGEPI->setOperand(0, GEPI);
8340 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
8341 Constant *NewUGEPIIdx = ConstantInt::get(
8342 GEPIIdx->getType(), UGEPIIdx->getValue() - GEPIIdx->getValue());
8343 UGEPI->setOperand(1, NewUGEPIIdx);
8344 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
8345 // inbounds to avoid UB.
8346 if (!GEPI->isInBounds()) {
8347 UGEPI->setIsInBounds(false);
8348 }
8349 }
8350 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
8351 // alive on IndirectBr edges).
8352 assert(llvm::none_of(GEPIOp->users(),
8353 [&](User *Usr) {
8354 return cast<Instruction>(Usr)->getParent() != SrcBlock;
8355 }) &&
8356 "GEPIOp is used outside SrcBlock");
8357 return true;
8358}
8359
8360static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI,
8362 bool IsHugeFunc) {
8363 // Try and convert
8364 // %c = icmp ult %x, 8
8365 // br %c, bla, blb
8366 // %tc = lshr %x, 3
8367 // to
8368 // %tc = lshr %x, 3
8369 // %c = icmp eq %tc, 0
8370 // br %c, bla, blb
8371 // Creating the cmp to zero can be better for the backend, especially if the
8372 // lshr produces flags that can be used automatically.
8373 if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
8374 return false;
8375
8376 ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
8377 if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
8378 return false;
8379
8380 Value *X = Cmp->getOperand(0);
8381 APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
8382
8383 for (auto *U : X->users()) {
8384 Instruction *UI = dyn_cast<Instruction>(U);
8385 // A quick dominance check
8386 if (!UI ||
8387 (UI->getParent() != Branch->getParent() &&
8388 UI->getParent() != Branch->getSuccessor(0) &&
8389 UI->getParent() != Branch->getSuccessor(1)) ||
8390 (UI->getParent() != Branch->getParent() &&
8391 !UI->getParent()->getSinglePredecessor()))
8392 continue;
8393
8394 if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
8395 match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
8396 IRBuilder<> Builder(Branch);
8397 if (UI->getParent() != Branch->getParent())
8398 UI->moveBefore(Branch);
8400 Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
8401 ConstantInt::get(UI->getType(), 0));
8402 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8403 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8404 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8405 return true;
8406 }
8407 if (Cmp->isEquality() &&
8408 (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
8409 match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
8410 IRBuilder<> Builder(Branch);
8411 if (UI->getParent() != Branch->getParent())
8412 UI->moveBefore(Branch);
8414 Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
8415 ConstantInt::get(UI->getType(), 0));
8416 LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
8417 LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
8418 replaceAllUsesWith(Cmp, NewCmp, FreshBBs, IsHugeFunc);
8419 return true;
8420 }
8421 }
8422 return false;
8423}
8424
8425bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
8426 bool AnyChange = false;
8427 AnyChange = fixupDbgVariableRecordsOnInst(*I);
8428
8429 // Bail out if we inserted the instruction to prevent optimizations from
8430 // stepping on each other's toes.
8431 if (InsertedInsts.count(I))
8432 return AnyChange;
8433
8434 // TODO: Move into the switch on opcode below here.
8435 if (PHINode *P = dyn_cast<PHINode>(I)) {
8436 // It is possible for very late stage optimizations (such as SimplifyCFG)
8437 // to introduce PHI nodes too late to be cleaned up. If we detect such a
8438 // trivial PHI, go ahead and zap it here.
8439 if (Value *V = simplifyInstruction(P, {*DL, TLInfo})) {
8440 LargeOffsetGEPMap.erase(P);
8441 replaceAllUsesWith(P, V, FreshBBs, IsHugeFunc);
8442 P->eraseFromParent();
8443 ++NumPHIsElim;
8444 return true;
8445 }
8446 return AnyChange;
8447 }
8448
8449 if (CastInst *CI = dyn_cast<CastInst>(I)) {
8450 // If the source of the cast is a constant, then this should have
8451 // already been constant folded. The only reason NOT to constant fold
8452 // it is if something (e.g. LSR) was careful to place the constant
8453 // evaluation in a block other than then one that uses it (e.g. to hoist
8454 // the address of globals out of a loop). If this is the case, we don't
8455 // want to forward-subst the cast.
8456 if (isa<Constant>(CI->getOperand(0)))
8457 return AnyChange;
8458
8459 if (OptimizeNoopCopyExpression(CI, *TLI, *DL))
8460 return true;
8461
8462 if ((isa<UIToFPInst>(I) || isa<SIToFPInst>(I) || isa<FPToUIInst>(I) ||
8463 isa<TruncInst>(I)) &&
8465 I, LI->getLoopFor(I->getParent()), *TTI))
8466 return true;
8467
8468 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
8469 /// Sink a zext or sext into its user blocks if the target type doesn't
8470 /// fit in one register
8471 if (TLI->getTypeAction(CI->getContext(),
8472 TLI->getValueType(*DL, CI->getType())) ==
8473 TargetLowering::TypeExpandInteger) {
8474 return SinkCast(CI);
8475 } else {
8477 I, LI->getLoopFor(I->getParent()), *TTI))
8478 return true;
8479
8480 bool MadeChange = optimizeExt(I);
8481 return MadeChange | optimizeExtUses(I);
8482 }
8483 }
8484 return AnyChange;
8485 }
8486
8487 if (auto *Cmp = dyn_cast<CmpInst>(I))
8488 if (optimizeCmp(Cmp, ModifiedDT))
8489 return true;
8490
8491 if (match(I, m_URem(m_Value(), m_Value())))
8492 if (optimizeURem(I))
8493 return true;
8494
8495 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
8496 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8497 bool Modified = optimizeLoadExt(LI);
8498 unsigned AS = LI->getPointerAddressSpace();
8499 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
8500 return Modified;
8501 }
8502
8503 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
8504 if (splitMergedValStore(*SI, *DL, *TLI))
8505 return true;
8506 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
8507 unsigned AS = SI->getPointerAddressSpace();
8508 return optimizeMemoryInst(I, SI->getOperand(1),
8509 SI->getOperand(0)->getType(), AS);
8510 }
8511
8512 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
8513 unsigned AS = RMW->getPointerAddressSpace();
8514 return optimizeMemoryInst(I, RMW->getPointerOperand(), RMW->getType(), AS);
8515 }
8516
8517 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
8518 unsigned AS = CmpX->getPointerAddressSpace();
8519 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
8520 CmpX->getCompareOperand()->getType(), AS);
8521 }
8522
8523 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
8524
8525 if (BinOp && BinOp->getOpcode() == Instruction::And && EnableAndCmpSinking &&
8526 sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts))
8527 return true;
8528
8529 // TODO: Move this into the switch on opcode - it handles shifts already.
8530 if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
8531 BinOp->getOpcode() == Instruction::LShr)) {
8532 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
8533 if (CI && TLI->hasExtractBitsInsn())
8534 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
8535 return true;
8536 }
8537
8538 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
8539 if (GEPI->hasAllZeroIndices()) {
8540 /// The GEP operand must be a pointer, so must its result -> BitCast
8541 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
8542 GEPI->getName(), GEPI->getIterator());
8543 NC->setDebugLoc(GEPI->getDebugLoc());
8544 replaceAllUsesWith(GEPI, NC, FreshBBs, IsHugeFunc);
8546 GEPI, TLInfo, nullptr,
8547 [&](Value *V) { removeAllAssertingVHReferences(V); });
8548 ++NumGEPsElim;
8549 optimizeInst(NC, ModifiedDT);
8550 return true;
8551 }
8553 return true;
8554 }
8555 }
8556
8557 if (FreezeInst *FI = dyn_cast<FreezeInst>(I)) {
8558 // freeze(icmp a, const)) -> icmp (freeze a), const
8559 // This helps generate efficient conditional jumps.
8560 Instruction *CmpI = nullptr;
8561 if (ICmpInst *II = dyn_cast<ICmpInst>(FI->getOperand(0)))
8562 CmpI = II;
8563 else if (FCmpInst *F = dyn_cast<FCmpInst>(FI->getOperand(0)))
8564 CmpI = F->getFastMathFlags().none() ? F : nullptr;
8565
8566 if (CmpI && CmpI->hasOneUse()) {
8567 auto Op0 = CmpI->getOperand(0), Op1 = CmpI->getOperand(1);
8568 bool Const0 = isa<ConstantInt>(Op0) || isa<ConstantFP>(Op0) ||
8569 isa<ConstantPointerNull>(Op0);
8570 bool Const1 = isa<ConstantInt>(Op1) || isa<ConstantFP>(Op1) ||
8571 isa<ConstantPointerNull>(Op1);
8572 if (Const0 || Const1) {
8573 if (!Const0 || !Const1) {
8574 auto *F = new FreezeInst(Const0 ? Op1 : Op0, "", CmpI->getIterator());
8575 F->takeName(FI);
8576 CmpI->setOperand(Const0 ? 1 : 0, F);
8577 }
8578 replaceAllUsesWith(FI, CmpI, FreshBBs, IsHugeFunc);
8579 FI->eraseFromParent();
8580 return true;
8581 }
8582 }
8583 return AnyChange;
8584 }
8585
8586 if (tryToSinkFreeOperands(I))
8587 return true;
8588
8589 switch (I->getOpcode()) {
8590 case Instruction::Shl:
8591 case Instruction::LShr:
8592 case Instruction::AShr:
8593 return optimizeShiftInst(cast<BinaryOperator>(I));
8594 case Instruction::Call:
8595 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
8596 case Instruction::Select:
8597 return optimizeSelectInst(cast<SelectInst>(I));
8598 case Instruction::ShuffleVector:
8599 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
8600 case Instruction::Switch:
8601 return optimizeSwitchInst(cast<SwitchInst>(I));
8602 case Instruction::ExtractElement:
8603 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
8604 case Instruction::Br:
8605 return optimizeBranch(cast<BranchInst>(I), *TLI, FreshBBs, IsHugeFunc);
8606 }
8607
8608 return AnyChange;
8609}
8610
8611/// Given an OR instruction, check to see if this is a bitreverse
8612/// idiom. If so, insert the new intrinsic and return true.
8613bool CodeGenPrepare::makeBitReverse(Instruction &I) {
8614 if (!I.getType()->isIntegerTy() ||
8616 TLI->getValueType(*DL, I.getType(), true)))
8617 return false;
8618
8620 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
8621 return false;
8622 Instruction *LastInst = Insts.back();
8623 replaceAllUsesWith(&I, LastInst, FreshBBs, IsHugeFunc);
8625 &I, TLInfo, nullptr,
8626 [&](Value *V) { removeAllAssertingVHReferences(V); });
8627 return true;
8628}
8629
8630// In this pass we look for GEP and cast instructions that are used
8631// across basic blocks and rewrite them to improve basic-block-at-a-time
8632// selection.
8633bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, ModifyDT &ModifiedDT) {
8634 SunkAddrs.clear();
8635 bool MadeChange = false;
8636
8637 do {
8638 CurInstIterator = BB.begin();
8639 ModifiedDT = ModifyDT::NotModifyDT;
8640 while (CurInstIterator != BB.end()) {
8641 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
8642 if (ModifiedDT != ModifyDT::NotModifyDT) {
8643 // For huge function we tend to quickly go though the inner optmization
8644 // opportunities in the BB. So we go back to the BB head to re-optimize
8645 // each instruction instead of go back to the function head.
8646 if (IsHugeFunc) {
8647 DT.reset();
8648 getDT(*BB.getParent());
8649 break;
8650 } else {
8651 return true;
8652 }
8653 }
8654 }
8655 } while (ModifiedDT == ModifyDT::ModifyInstDT);
8656
8657 bool MadeBitReverse = true;
8658 while (MadeBitReverse) {
8659 MadeBitReverse = false;
8660 for (auto &I : reverse(BB)) {
8661 if (makeBitReverse(I)) {
8662 MadeBitReverse = MadeChange = true;
8663 break;
8664 }
8665 }
8666 }
8667 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
8668
8669 return MadeChange;
8670}
8671
8672// Some CGP optimizations may move or alter what's computed in a block. Check
8673// whether a dbg.value intrinsic could be pointed at a more appropriate operand.
8674bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
8675 assert(isa<DbgValueInst>(I));
8676 DbgValueInst &DVI = *cast<DbgValueInst>(I);
8677
8678 // Does this dbg.value refer to a sunk address calculation?
8679 bool AnyChange = false;
8680 SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
8681 DVI.location_ops().end());
8682 for (Value *Location : LocationOps) {
8683 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8684 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8685 if (SunkAddr) {
8686 // Point dbg.value at locally computed address, which should give the best
8687 // opportunity to be accurately lowered. This update may change the type
8688 // of pointer being referred to; however this makes no difference to
8689 // debugging information, and we can't generate bitcasts that may affect
8690 // codegen.
8691 DVI.replaceVariableLocationOp(Location, SunkAddr);
8692 AnyChange = true;
8693 }
8694 }
8695 return AnyChange;
8696}
8697
8698bool CodeGenPrepare::fixupDbgVariableRecordsOnInst(Instruction &I) {
8699 bool AnyChange = false;
8700 for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
8701 AnyChange |= fixupDbgVariableRecord(DVR);
8702 return AnyChange;
8703}
8704
8705// FIXME: should updating debug-info really cause the "changed" flag to fire,
8706// which can cause a function to be reprocessed?
8707bool CodeGenPrepare::fixupDbgVariableRecord(DbgVariableRecord &DVR) {
8708 if (DVR.Type != DbgVariableRecord::LocationType::Value &&
8709 DVR.Type != DbgVariableRecord::LocationType::Assign)
8710 return false;
8711
8712 // Does this DbgVariableRecord refer to a sunk address calculation?
8713 bool AnyChange = false;
8714 SmallDenseSet<Value *> LocationOps(DVR.location_ops().begin(),
8715 DVR.location_ops().end());
8716 for (Value *Location : LocationOps) {
8717 WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
8718 Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
8719 if (SunkAddr) {
8720 // Point dbg.value at locally computed address, which should give the best
8721 // opportunity to be accurately lowered. This update may change the type
8722 // of pointer being referred to; however this makes no difference to
8723 // debugging information, and we can't generate bitcasts that may affect
8724 // codegen.
8725 DVR.replaceVariableLocationOp(Location, SunkAddr);
8726 AnyChange = true;
8727 }
8728 }
8729 return AnyChange;
8730}
8731
8733 DVI->removeFromParent();
8734 if (isa<PHINode>(VI))
8735 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
8736 else
8737 DVI->insertAfter(VI);
8738}
8739
8741 DVR->removeFromParent();
8742 BasicBlock *VIBB = VI->getParent();
8743 if (isa<PHINode>(VI))
8744 VIBB->insertDbgRecordBefore(DVR, VIBB->getFirstInsertionPt());
8745 else
8746 VIBB->insertDbgRecordAfter(DVR, VI);
8747}
8748
8749// A llvm.dbg.value may be using a value before its definition, due to
8750// optimizations in this pass and others. Scan for such dbg.values, and rescue
8751// them by moving the dbg.value to immediately after the value definition.
8752// FIXME: Ideally this should never be necessary, and this has the potential
8753// to re-order dbg.value intrinsics.
8754bool CodeGenPrepare::placeDbgValues(Function &F) {
8755 bool MadeChange = false;
8756 DominatorTree DT(F);
8757
8758 auto DbgProcessor = [&](auto *DbgItem, Instruction *Position) {
8760 for (Value *V : DbgItem->location_ops())
8761 if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
8762 VIs.push_back(VI);
8763
8764 // This item may depend on multiple instructions, complicating any
8765 // potential sink. This block takes the defensive approach, opting to
8766 // "undef" the item if it has more than one instruction and any of them do
8767 // not dominate iem.
8768 for (Instruction *VI : VIs) {
8769 if (VI->isTerminator())
8770 continue;
8771
8772 // If VI is a phi in a block with an EHPad terminator, we can't insert
8773 // after it.
8774 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
8775 continue;
8776
8777 // If the defining instruction dominates the dbg.value, we do not need
8778 // to move the dbg.value.
8779 if (DT.dominates(VI, Position))
8780 continue;
8781
8782 // If we depend on multiple instructions and any of them doesn't
8783 // dominate this DVI, we probably can't salvage it: moving it to
8784 // after any of the instructions could cause us to lose the others.
8785 if (VIs.size() > 1) {
8786 LLVM_DEBUG(
8787 dbgs()
8788 << "Unable to find valid location for Debug Value, undefing:\n"
8789 << *DbgItem);
8790 DbgItem->setKillLocation();
8791 break;
8792 }
8793
8794 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
8795 << *DbgItem << ' ' << *VI);
8796 DbgInserterHelper(DbgItem, VI);
8797 MadeChange = true;
8798 ++NumDbgValueMoved;
8799 }
8800 };
8801
8802 for (BasicBlock &BB : F) {
8804 // Process dbg.value intrinsics.
8805 DbgValueInst *DVI = dyn_cast<DbgValueInst>(&Insn);
8806 if (DVI) {
8807 DbgProcessor(DVI, DVI);
8808 continue;
8809 }
8810
8811 // If this isn't a dbg.value, process any attached DbgVariableRecord
8812 // records attached to this instruction.
8814 filterDbgVars(Insn.getDbgRecordRange()))) {
8815 if (DVR.Type != DbgVariableRecord::LocationType::Value)
8816 continue;
8817 DbgProcessor(&DVR, &Insn);
8818 }
8819 }
8820 }
8821
8822 return MadeChange;
8823}
8824
8825// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
8826// probes can be chained dependencies of other regular DAG nodes and block DAG
8827// combine optimizations.
8828bool CodeGenPrepare::placePseudoProbes(Function &F) {
8829 bool MadeChange = false;
8830 for (auto &Block : F) {
8831 // Move the rest probes to the beginning of the block.
8832 auto FirstInst = Block.getFirstInsertionPt();
8833 while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
8834 ++FirstInst;
8835 BasicBlock::iterator I(FirstInst);
8836 I++;
8837 while (I != Block.end()) {
8838 if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
8839 II->moveBefore(&*FirstInst);
8840 MadeChange = true;
8841 }
8842 }
8843 }
8844 return MadeChange;
8845}
8846
8847/// Scale down both weights to fit into uint32_t.
8848static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
8849 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
8850 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
8851 NewTrue = NewTrue / Scale;
8852 NewFalse = NewFalse / Scale;
8853}
8854
8855/// Some targets prefer to split a conditional branch like:
8856/// \code
8857/// %0 = icmp ne i32 %a, 0
8858/// %1 = icmp ne i32 %b, 0
8859/// %or.cond = or i1 %0, %1
8860/// br i1 %or.cond, label %TrueBB, label %FalseBB
8861/// \endcode
8862/// into multiple branch instructions like:
8863/// \code
8864/// bb1:
8865/// %0 = icmp ne i32 %a, 0
8866/// br i1 %0, label %TrueBB, label %bb2
8867/// bb2:
8868/// %1 = icmp ne i32 %b, 0
8869/// br i1 %1, label %TrueBB, label %FalseBB
8870/// \endcode
8871/// This usually allows instruction selection to do even further optimizations
8872/// and combine the compare with the branch instruction. Currently this is
8873/// applied for targets which have "cheap" jump instructions.
8874///
8875/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
8876///
8877bool CodeGenPrepare::splitBranchCondition(Function &F, ModifyDT &ModifiedDT) {
8878 if (!TM->Options.EnableFastISel || TLI->isJumpExpensive())
8879 return false;
8880
8881 bool MadeChange = false;
8882 for (auto &BB : F) {
8883 // Does this BB end with the following?
8884 // %cond1 = icmp|fcmp|binary instruction ...
8885 // %cond2 = icmp|fcmp|binary instruction ...
8886 // %cond.or = or|and i1 %cond1, cond2
8887 // br i1 %cond.or label %dest1, label %dest2"
8888 Instruction *LogicOp;
8889 BasicBlock *TBB, *FBB;
8890 if (!match(BB.getTerminator(),
8891 m_Br(m_OneUse(m_Instruction(LogicOp)), TBB, FBB)))
8892 continue;
8893
8894 auto *Br1 = cast<BranchInst>(BB.getTerminator());
8895 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
8896 continue;
8897
8898 // The merging of mostly empty BB can cause a degenerate branch.
8899 if (TBB == FBB)
8900 continue;
8901
8902 unsigned Opc;
8903 Value *Cond1, *Cond2;
8904 if (match(LogicOp,
8905 m_LogicalAnd(m_OneUse(m_Value(Cond1)), m_OneUse(m_Value(Cond2)))))
8906 Opc = Instruction::And;
8907 else if (match(LogicOp, m_LogicalOr(m_OneUse(m_Value(Cond1)),
8908 m_OneUse(m_Value(Cond2)))))
8909 Opc = Instruction::Or;
8910 else
8911 continue;
8912
8913 auto IsGoodCond = [](Value *Cond) {
8914 return match(
8915 Cond,
8917 m_LogicalOr(m_Value(), m_Value()))));
8918 };
8919 if (!IsGoodCond(Cond1) || !IsGoodCond(Cond2))
8920 continue;
8921
8922 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump());
8923
8924 // Create a new BB.
8925 auto *TmpBB =
8926 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
8927 BB.getParent(), BB.getNextNode());
8928 if (IsHugeFunc)
8929 FreshBBs.insert(TmpBB);
8930
8931 // Update original basic block by using the first condition directly by the
8932 // branch instruction and removing the no longer needed and/or instruction.
8933 Br1->setCondition(Cond1);
8934 LogicOp->eraseFromParent();
8935
8936 // Depending on the condition we have to either replace the true or the
8937 // false successor of the original branch instruction.
8938 if (Opc == Instruction::And)
8939 Br1->setSuccessor(0, TmpBB);
8940 else
8941 Br1->setSuccessor(1, TmpBB);
8942
8943 // Fill in the new basic block.
8944 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
8945 if (auto *I = dyn_cast<Instruction>(Cond2)) {
8946 I->removeFromParent();
8947 I->insertBefore(Br2);
8948 }
8949
8950 // Update PHI nodes in both successors. The original BB needs to be
8951 // replaced in one successor's PHI nodes, because the branch comes now from
8952 // the newly generated BB (NewBB). In the other successor we need to add one
8953 // incoming edge to the PHI nodes, because both branch instructions target
8954 // now the same successor. Depending on the original branch condition
8955 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
8956 // we perform the correct update for the PHI nodes.
8957 // This doesn't change the successor order of the just created branch
8958 // instruction (or any other instruction).
8959 if (Opc == Instruction::Or)
8960 std::swap(TBB, FBB);
8961
8962 // Replace the old BB with the new BB.
8963 TBB->replacePhiUsesWith(&BB, TmpBB);
8964
8965 // Add another incoming edge from the new BB.
8966 for (PHINode &PN : FBB->phis()) {
8967 auto *Val = PN.getIncomingValueForBlock(&BB);
8968 PN.addIncoming(Val, TmpBB);
8969 }
8970
8971 // Update the branch weights (from SelectionDAGBuilder::
8972 // FindMergedConditions).
8973 if (Opc == Instruction::Or) {
8974 // Codegen X | Y as:
8975 // BB1:
8976 // jmp_if_X TBB
8977 // jmp TmpBB
8978 // TmpBB:
8979 // jmp_if_Y TBB
8980 // jmp FBB
8981 //
8982
8983 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
8984 // The requirement is that
8985 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
8986 // = TrueProb for original BB.
8987 // Assuming the original weights are A and B, one choice is to set BB1's
8988 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
8989 // assumes that
8990 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
8991 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
8992 // TmpBB, but the math is more complicated.
8993 uint64_t TrueWeight, FalseWeight;
8994 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
8995 uint64_t NewTrueWeight = TrueWeight;
8996 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
8997 scaleWeights(NewTrueWeight, NewFalseWeight);
8998 Br1->setMetadata(LLVMContext::MD_prof,
8999 MDBuilder(Br1->getContext())
9000 .createBranchWeights(TrueWeight, FalseWeight,
9001 hasBranchWeightOrigin(*Br1)));
9002
9003 NewTrueWeight = TrueWeight;
9004 NewFalseWeight = 2 * FalseWeight;
9005 scaleWeights(NewTrueWeight, NewFalseWeight);
9006 Br2->setMetadata(LLVMContext::MD_prof,
9007 MDBuilder(Br2->getContext())
9008 .createBranchWeights(TrueWeight, FalseWeight));
9009 }
9010 } else {
9011 // Codegen X & Y as:
9012 // BB1:
9013 // jmp_if_X TmpBB
9014 // jmp FBB
9015 // TmpBB:
9016 // jmp_if_Y TBB
9017 // jmp FBB
9018 //
9019 // This requires creation of TmpBB after CurBB.
9020
9021 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
9022 // The requirement is that
9023 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
9024 // = FalseProb for original BB.
9025 // Assuming the original weights are A and B, one choice is to set BB1's
9026 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
9027 // assumes that
9028 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
9029 uint64_t TrueWeight, FalseWeight;
9030 if (extractBranchWeights(*Br1, TrueWeight, FalseWeight)) {
9031 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
9032 uint64_t NewFalseWeight = FalseWeight;
9033 scaleWeights(NewTrueWeight, NewFalseWeight);
9034 Br1->setMetadata(LLVMContext::MD_prof,
9035 MDBuilder(Br1->getContext())
9036 .createBranchWeights(TrueWeight, FalseWeight));
9037
9038 NewTrueWeight = 2 * TrueWeight;
9039 NewFalseWeight = FalseWeight;
9040 scaleWeights(NewTrueWeight, NewFalseWeight);
9041 Br2->setMetadata(LLVMContext::MD_prof,
9042 MDBuilder(Br2->getContext())
9043 .createBranchWeights(TrueWeight, FalseWeight));
9044 }
9045 }
9046
9047 ModifiedDT = ModifyDT::ModifyBBDT;
9048 MadeChange = true;
9049
9050 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
9051 TmpBB->dump());
9052 }
9053 return MadeChange;
9054}
#define Success
for(const MachineOperand &MO :llvm::drop_begin(OldMI.operands(), Desc.getNumOperands()))
static unsigned getIntrinsicID(const SDNode *N)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
amdgpu AMDGPU Register Bank Select
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static bool sinkAndCmp0Expression(Instruction *AndI, const TargetLowering &TLI, SetOfInstrs &InsertedInsts)
Duplicate and sink the given 'and' instruction into user blocks where it is used in a compare to allo...
static bool SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, DenseMap< BasicBlock *, BinaryOperator * > &InsertedShifts, const TargetLowering &TLI, const DataLayout &DL)
Sink both shift and truncate instruction to the use of truncate's BB.
static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, SmallVectorImpl< Value * > &OffsetV)
Optimize for code generation
static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V)
Check if V (an operand of a select instruction) is an expensive instruction that is only used once.
static void replaceAllUsesWith(Value *Old, Value *New, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
Replace all old uses with new ones, and push the updated BBs into FreshBBs.
static bool isExtractBitsCandidateUse(Instruction *User)
Check if the candidates could be combined with a shift instruction, which includes:
static cl::opt< unsigned > MaxAddressUsersToScan("cgp-max-address-users-to-scan", cl::init(100), cl::Hidden, cl::desc("Max number of address users to look at"))
static cl::opt< bool > OptimizePhiTypes("cgp-optimize-phi-types", cl::Hidden, cl::init(true), cl::desc("Enable converting phi types in CodeGenPrepare"))
static cl::opt< bool > DisableStoreExtract("disable-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Disable store(extract) optimizations in CodeGenPrepare"))
static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI, const DataLayout &DL)
static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI)
Sink the given CmpInst into user blocks to reduce the number of virtual registers that must be create...
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse)
Scale down both weights to fit into uint32_t.
static cl::opt< bool > ProfileUnknownInSpecialSection("profile-unknown-in-special-section", cl::Hidden, cl::desc("In profiling mode like sampleFDO, if a function doesn't have " "profile, we cannot tell the function is cold for sure because " "it may be a function newly added without ever being sampled. " "With the flag enabled, compiler can put such profile unknown " "functions into a special section, so runtime system can choose " "to handle it in a different way than .text section, to save " "RAM for example. "))
static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, const TargetLowering &TLI, const DataLayout &DL)
Sink the shift right instruction into user blocks if the uses could potentially be combined with this...
static cl::opt< bool > DisableExtLdPromotion("disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " "CodeGenPrepare"))
static cl::opt< bool > DisablePreheaderProtect("disable-preheader-prot", cl::Hidden, cl::init(false), cl::desc("Disable protection against removing loop preheaders"))
static cl::opt< bool > AddrSinkCombineBaseOffs("addr-sink-combine-base-offs", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseOffs field in Address sinking."))
static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, const DataLayout &DL)
If the specified cast instruction is a noop copy (e.g.
static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, const TargetLowering &TLI)
For the instruction sequence of store below, F and I values are bundled together as an i64 value befo...
static bool SinkCast(CastInst *CI)
Sink the specified cast instruction into its user blocks.
static bool swapICmpOperandsToExposeCSEOpportunities(CmpInst *Cmp)
Many architectures use the same instruction for both subtract and cmp.
static cl::opt< bool > AddrSinkCombineBaseReg("addr-sink-combine-base-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseReg field in Address sinking."))
static bool FindAllMemoryUses(Instruction *I, SmallVectorImpl< std::pair< Use *, Type * > > &MemoryUses, SmallPtrSetImpl< Instruction * > &ConsideredInsts, const TargetLowering &TLI, const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, unsigned &SeenInsts)
Recursively walk all the uses of I until we find a memory use.
static cl::opt< bool > StressStoreExtract("stress-cgp-store-extract", cl::Hidden, cl::init(false), cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"))
static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, const TargetLowering *TLI, SelectInst *SI)
Returns true if a SelectInst should be turned into an explicit branch.
static std::optional< std::pair< Instruction *, Constant * > > getIVIncrement(const PHINode *PN, const LoopInfo *LI)
If given PN is an inductive variable with value IVInc coming from the backedge, and on each iteration...
static cl::opt< bool > AddrSinkCombineBaseGV("addr-sink-combine-base-gv", cl::Hidden, cl::init(true), cl::desc("Allow combining of BaseGV field in Address sinking."))
static cl::opt< bool > AddrSinkUsingGEPs("addr-sink-using-gep", cl::Hidden, cl::init(true), cl::desc("Address sinking in CGP using GEPs."))
static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem, const LoopInfo *LI, Value *&RemAmtOut, PHINode *&LoopIncrPNOut)
static Value * getTrueOrFalseValue(SelectInst *SI, bool isTrue, const SmallPtrSet< const Instruction *, 2 > &Selects)
If isTrue is true, return the true value of SI, otherwise return false value of SI.
static void DbgInserterHelper(DbgValueInst *DVI, Instruction *VI)
static cl::opt< bool > DisableBranchOpts("disable-cgp-branch-opts", cl::Hidden, cl::init(false), cl::desc("Disable branch optimizations in CodeGenPrepare"))
static cl::opt< bool > EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, cl::desc("Enable merging of redundant sexts when one is dominating" " the other."), cl::init(true))
static cl::opt< bool > EnableAndCmpSinking("enable-andcmp-sinking", cl::Hidden, cl::init(true), cl::desc("Enable sinkinig and/cmp into branches."))
static cl::opt< bool > ProfileGuidedSectionPrefix("profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use profile info to add section prefix for hot/cold functions"))
static cl::opt< unsigned > HugeFuncThresholdInCGPP("cgpp-huge-func", cl::init(10000), cl::Hidden, cl::desc("Least BB number of huge function."))
static cl::opt< bool > AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), cl::desc("Allow creation of selects in Address sinking."))
static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, const TargetTransformInfo *TTI)
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, const TargetLowering &TLI, const TargetRegisterInfo &TRI)
Check to see if all uses of OpVal by the specified inline asm call are due to memory operands.
static bool isIntrinsicOrLFToBeTailCalled(const TargetLibraryInfo *TLInfo, const CallInst *CI)
static cl::opt< bool > ForceSplitStore("force-split-store", cl::Hidden, cl::init(false), cl::desc("Force store splitting no matter what the target query says."))
static void computeBaseDerivedRelocateMap(const SmallVectorImpl< GCRelocateInst * > &AllRelocateCalls, MapVector< GCRelocateInst *, SmallVector< GCRelocateInst *, 0 > > &RelocateInstMap)
static bool simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, const SmallVectorImpl< GCRelocateInst * > &Targets)
static cl::opt< bool > AddrSinkCombineScaledReg("addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), cl::desc("Allow combining of ScaledReg field in Address sinking."))
static bool foldICmpWithDominatingICmp(CmpInst *Cmp, const TargetLowering &TLI)
For pattern like:
static bool MightBeFoldableInst(Instruction *I)
This is a little filter, which returns true if an addressing computation involving I might be folded ...
static bool matchIncrement(const Instruction *IVInc, Instruction *&LHS, Constant *&Step)
static cl::opt< bool > EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, cl::init(true), cl::desc("Enable splitting large offset of GEP."))
static cl::opt< bool > DisableComplexAddrModes("disable-complex-addr-modes", cl::Hidden, cl::init(false), cl::desc("Disables combining addressing modes with different parts " "in optimizeMemoryInst."))
static cl::opt< bool > EnableICMP_EQToICMP_ST("cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion."))
static cl::opt< bool > VerifyBFIUpdates("cgp-verify-bfi-updates", cl::Hidden, cl::init(false), cl::desc("Enable BFI update verification for " "CodeGenPrepare."))
static cl::opt< bool > BBSectionsGuidedSectionPrefix("bbsections-guided-section-prefix", cl::Hidden, cl::init(true), cl::desc("Use the basic-block-sections profile to determine the text " "section prefix for hot functions. Functions with " "basic-block-sections profile will be placed in `.text.hot` " "regardless of their FDO profile info. Other functions won't be " "impacted, i.e., their prefixes will be decided by FDO/sampleFDO " "profiles."))
static bool isIVIncrement(const Value *V, const LoopInfo *LI)
static cl::opt< bool > DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), cl::desc("Disable GC optimizations in CodeGenPrepare"))
static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP)
static bool isPromotedInstructionLegal(const TargetLowering &TLI, const DataLayout &DL, Value *Val)
Check whether or not Val is a legal instruction for TLI.
static cl::opt< uint64_t > FreqRatioToSkipMerge("cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), cl::desc("Skip merging empty blocks if (frequency of empty block) / " "(frequency of destination block) is greater than this ratio"))
static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
#define DEBUG_TYPE
static bool IsNonLocalValue(Value *V, BasicBlock *BB)
Return true if the specified values are defined in a different basic block than BB.
static bool despeculateCountZeros(IntrinsicInst *CountZeros, LoopInfo &LI, const TargetLowering *TLI, const DataLayout *DL, ModifyDT &ModifiedDT, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHugeFunc)
If counting leading or trailing zeros is an expensive operation and a zero input is defined,...
static bool hasSameExtUse(Value *Val, const TargetLowering &TLI)
Check if all the uses of Val are equivalent (or free) zero or sign extensions.
static cl::opt< bool > StressExtLdPromotion("stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " "optimization in CodeGenPrepare"))
static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, BinaryOperator *&Add)
Match special-case patterns that check for unsigned add overflow.
static cl::opt< bool > DisableSelectToBranch("disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion."))
static cl::opt< bool > DisableDeletePHIs("disable-cgp-delete-phis", cl::Hidden, cl::init(false), cl::desc("Disable elimination of dead PHI nodes."))
static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL, const LoopInfo *LI, SmallSet< BasicBlock *, 32 > &FreshBBs, bool IsHuge)
static cl::opt< bool > AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), cl::desc("Allow creation of Phis in Address sinking."))
Defines an IR pass for CodeGen Prepare.
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:533
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:199
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:148
static cl::opt< TargetTransformInfo::TargetCostKind > CostKind("cost-kind", cl::desc("Target cost kind"), cl::init(TargetTransformInfo::TCK_RecipThroughput), cl::values(clEnumValN(TargetTransformInfo::TCK_RecipThroughput, "throughput", "Reciprocal throughput"), clEnumValN(TargetTransformInfo::TCK_Latency, "latency", "Instruction latency"), clEnumValN(TargetTransformInfo::TCK_CodeSize, "code-size", "Code size"), clEnumValN(TargetTransformInfo::TCK_SizeAndLatency, "size-latency", "Code size and latency")))
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
uint64_t Addr
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Rewrite Partial Register Uses
Hexagon Common GEP
IRTranslator LLVM IR MI
This defines the Use class.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
Definition: LICM.cpp:1502
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
This file implements a map that provides insertion order iteration.
Module.h This file contains the declarations for the Module class.
uint64_t IntrinsicInst * II
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define P(N)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
This file defines the PointerIntPair class.
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
static bool dominates(InstrPosIndexes &PosIndexes, const MachineInstr &A, const MachineInstr &B)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static bool optimizeCallInst(CallInst *CI, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
static bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT, const TargetTransformInfo &TTI, const DataLayout &DL, DomTreeUpdater *DTU)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
This file describes how to lower LLVM code to machine code.
static cl::opt< bool > DisableSelectOptimize("disable-select-optimize", cl::init(true), cl::Hidden, cl::desc("Disable the select-optimization pass from running"))
Disable the select optimization pass.
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
static Constant * getConstantVector(MVT VT, ArrayRef< APInt > Bits, const APInt &Undefs, LLVMContext &C)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:78
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:981
bool ugt(const APInt &RHS) const
Unsigned greater than comparison.
Definition: APInt.h:1160
bool isZero() const
Determine if this value is zero, i.e. all bits are clear.
Definition: APInt.h:358
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:413
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
Definition: APInt.h:1489
unsigned logBase2() const
Definition: APInt.h:1717
APInt sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:954
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:418
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1520
an instruction to allocate memory on the stack
Definition: Instructions.h:61
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:122
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:115
void setAlignment(Align Align)
Definition: Instructions.h:126
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result * getCachedResult(IRUnitT &IR) const
Get the cached result of an analysis pass for a given IR unit.
Definition: PassManager.h:424
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
Represent the analysis usage information of a pass.
AnalysisUsage & addUsedIfAvailable()
Add the specified Pass class to the set of analyses used by this pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
Value handle that asserts if the Value is deleted.
Definition: ValueHandle.h:264
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
static unsigned getPointerOperandIndex()
Definition: Instructions.h:623
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
static unsigned getPointerOperandIndex()
Definition: Instructions.h:854
Analysis pass providing the BasicBlockSectionsProfileReader.
bool isFunctionHot(StringRef FuncName) const
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches,...
Definition: BasicBlock.h:658
void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:459
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:489
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
Definition: BasicBlock.cpp:279
void insertDbgRecordAfter(DbgRecord *DR, Instruction *I)
Insert a DbgRecord into a block at the position given by I.
const Instruction * getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
Definition: BasicBlock.cpp:386
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:168
bool IsNewDbgInfoFormat
Flag recording whether or not this block stores debug-info in the form of intrinsic instructions (fal...
Definition: BasicBlock.h:67
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
BinaryOps getOpcode() const
Definition: InstrTypes.h:442
static BinaryOperator * Create(BinaryOps Op, Value *S1, Value *S2, const Twine &Name=Twine(), InsertPosition InsertBefore=nullptr)
Construct a binary instruction, given the opcode and the two operands.
This class represents a no-op cast from one type to another.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Conditional or Unconditional Branch instruction.
void swapSuccessors()
Swap the successors of this branch instruction.
bool isConditional() const
BasicBlock * getSuccessor(unsigned i) const
bool isUnconditional() const
Analysis providing branch probability information.
static BranchProbability getBranchProbability(uint64_t Numerator, uint64_t Denominator)
bool isInlineAsm() const
Check if this call is an inline asm statement.
Definition: InstrTypes.h:1532
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1465
bool hasFnAttr(Attribute::AttrKind Kind) const
Determine whether this call has the given attribute.
Definition: InstrTypes.h:1551
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1410
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1415
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
Definition: InstrTypes.h:1401
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Definition: InstrTypes.h:530
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
@ ICMP_SLT
signed less than
Definition: InstrTypes.h:786
@ ICMP_UGT
unsigned greater than
Definition: InstrTypes.h:780
@ ICMP_SGT
signed greater than
Definition: InstrTypes.h:784
@ ICMP_ULT
unsigned less than
Definition: InstrTypes.h:782
@ ICMP_EQ
equal
Definition: InstrTypes.h:778
@ ICMP_NE
not equal
Definition: InstrTypes.h:779
Predicate getSwappedPredicate() const
For example, EQ->EQ, SLE->SGE, ULT->UGT, OEQ->OEQ, ULE->UGE, OLT->OGT, etc.
Definition: InstrTypes.h:909
static CmpInst * Create(OtherOps Op, Predicate Pred, Value *S1, Value *S2, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Construct a compare instruction, given the opcode, the predicate and the two operands.
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:847
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Base class for constants with no operands.
Definition: Constants.h:53
A constant value that is initialized with an expression using other constant values.
Definition: Constants.h:1097
static Constant * getBitCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2295
static Constant * getNeg(Constant *C, bool HasNSW=false)
Definition: Constants.cpp:2599
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:206
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:161
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:146
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1450
static Constant * get(ArrayRef< Constant * > V)
Definition: Constants.cpp:1399
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getAllOnesValue(Type *Ty)
Definition: Constants.cpp:417
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:846
This represents the llvm.dbg.value instruction.
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType Type
Classification of the debug-info record that this DbgVariableRecord represents.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
iterator_range< location_op_iterator > location_ops() const
Get the locations corresponding to the variable referenced by the debug info intrinsic.
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
bool erase(const KeyT &Val)
Definition: DenseMap.h:336
unsigned size() const
Definition: DenseMap.h:99
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
This instruction extracts a struct member or array element value from an aggregate value.
iterator_range< idx_iterator > indices() const
This instruction compares its operands according to the predicate given to the constructor.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:680
This class implements simplifications for calls to fortified library functions (__st*cpy_chk,...
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
const BasicBlock & getEntryBlock() const
Definition: Function.h:807
const Value * getStatepoint() const
The statepoint with which this gc.relocate is associated.
Represents calls to the gc.relocate intrinsic.
unsigned getBasePtrIndex() const
The index into the associate statepoint's argument list which contains the base pointer of the pointe...
Represents a gc.statepoint intrinsic call.
Definition: Statepoint.h:61
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:915
static Type * getIndexedType(Type *Ty, ArrayRef< Value * > IdxList)
Returns the result type of a getelementptr with the given source element type and indexes.
void setAlignment(Align Align)
Sets the alignment attribute of the GlobalObject.
Definition: Globals.cpp:137
bool canIncreaseAlignment() const
Returns true if the alignment of the value can be unilaterally increased.
Definition: Globals.cpp:304
bool isThreadLocal() const
If the value is "Thread Local", its value isn't shared by the threads.
Definition: GlobalValue.h:263
Type * getValueType() const
Definition: GlobalValue.h:296
This instruction compares its operands according to the predicate given to the constructor.
Value * CreateZExtOrBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2157
ConstantInt * getTrue()
Get the constant value for i1 true.
Definition: IRBuilder.h:463
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.cpp:1091
Value * CreateFreeze(Value *V, const Twine &Name="")
Definition: IRBuilder.h:2555
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition: IRBuilder.h:217
Value * CreateNUWAdd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1357
Value * CreateGEP(Type *Ty, Value *Ptr, ArrayRef< Value * > IdxList, const Twine &Name="", GEPNoWrapFlags NW=GEPNoWrapFlags::none())
Definition: IRBuilder.h:1883
Value * createIsFPClass(Value *FPNum, unsigned Test)
Definition: IRBuilder.cpp:1288
Value * CreateCmp(CmpInst::Predicate Pred, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2386
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2417
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2261
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2147
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1137
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1843
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2371
ConstantInt * getInt(const APInt &AI)
Get a constant integer value.
Definition: IRBuilder.h:499
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
Instruction * clone() const
Create a copy of 'this' instruction that is identical in all ways except the following:
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:78
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
bool isEHPad() const
Return true if the instruction is a variety of EH-block.
Definition: Instruction.h:824
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
Definition: Instruction.h:169
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:274
bool isShift() const
Definition: Instruction.h:281
void dropPoisonGeneratingFlags()
Drops flags that may cause this instruction to evaluate to poison despite having non-poison inputs.
std::optional< simple_ilist< DbgRecord >::iterator > getDbgReinsertionPosition()
Return an iterator to the position of the "Next" DbgRecord after this instruction,...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:463
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:55
Invoke instruction.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
An instruction for reading from memory.
Definition: Instructions.h:174
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:259
Analysis pass that exposes the LoopInfo for a function.
Definition: LoopInfo.h:566
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
The legacy pass manager's analysis pass to compute loop information.
Definition: LoopInfo.h:593
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:39
MDNode * createBranchWeights(uint32_t TrueWeight, uint32_t FalseWeight, bool IsExpected=false)
Return metadata containing two branch weights.
Definition: MDBuilder.cpp:37
Machine Value Type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getIntegerVT(unsigned BitWidth)
void replacePhiUsesWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Update all phi nodes in this basic block to refer to basic block New instead of basic block Old.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:36
iterator end()
Definition: MapVector.h:71
VectorType::iterator erase(typename VectorType::iterator Iterator)
Remove the element given by Iterator.
Definition: MapVector.h:193
iterator find(const KeyT &Key)
Definition: MapVector.h:167
bool empty() const
Definition: MapVector.h:79
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: MapVector.h:141
void clear()
Definition: MapVector.h:88
This is the common base class for memset/memcpy/memmove.
This class wraps the llvm.memcpy/memmove intrinsics.
An analysis over an "inner" IR unit that provides access to an analysis manager over a "outer" IR uni...
Definition: PassManager.h:688
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
Value * getIncomingValueForBlock(const BasicBlock *BB) const
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
PointerIntPair - This class implements a pair of a pointer and small integer.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
An analysis pass based on the new PM to deliver ProfileSummaryInfo.
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Analysis providing profile information.
Return a value (possibly void), from a function.
Value * getReturnValue() const
Convenience accessor. Returns null if there is no return value.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
This class represents the LLVM 'select' instruction.
static SelectInst * Create(Value *C, Value *S1, Value *S2, const Twine &NameStr="", InsertPosition InsertBefore=nullptr, Instruction *MDFrom=nullptr)
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
value_type pop_back_val()
Definition: SetVector.h:285
This instruction constructs a fixed permutation of two input vectors.
VectorType * getType() const
Overload to return most specific vector type.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition: DenseSet.h:290
size_type size() const
Definition: SmallPtrSet.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:384
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
bool erase(const T &V)
Definition: SmallSet.h:207
void clear()
Definition: SmallSet.h:218
bool contains(const T &V) const
Check if the SmallSet contains the given element.
Definition: SmallSet.h:236
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:950
void reserve(size_type N)
Definition: SmallVector.h:676
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
static unsigned getPointerOperandIndex()
Definition: Instructions.h:379
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:571
TypeSize getElementOffset(unsigned Idx) const
Definition: DataLayout.h:600
Class to represent struct types.
Definition: DerivedTypes.h:216
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
virtual bool isVectorShiftByScalarCheap(Type *Ty) const
Return true if it's significantly cheaper to shift a vector by a uniform scalar than by an amount whi...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool isSelectSupported(SelectSupportKind) const
virtual bool isEqualityCmpFoldedWithSignedCmp() const
Return true if instruction generated for equality comparison is folded with instruction generated for...
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, bool MathUsed) const
Try to convert math with an overflow comparison into the corresponding DAG node operation.
virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const
Return if the target supports combining a chain like:
bool isExtLoad(const LoadInst *Load, const Instruction *Ext, const DataLayout &DL) const
Return true if Load and Ext can form an ExtLoad.
virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const
Return true if sign-extension from FromTy to ToTy is cheaper than zero-extension.
const TargetMachine & getTargetMachine() const
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool enableExtLdPromotion() const
Return true if the target wants to use the optimization that turns ext(promotableInst1(....
virtual bool isCheapToSpeculateCttz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic cttz.
bool isJumpExpensive() const
Return true if Flow Control is an expensive operation that should be avoided.
bool hasExtractBitsInsn() const
Return true if the target has BitExtract instructions.
SelectSupportKind
Enum that describes what type of support for selects the target has.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *=nullptr) const
Determine if the target supports unaligned memory accesses.
bool isSlowDivBypassed() const
Returns true if target has indicated at least one type should be bypassed.
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
virtual MVT getPreferredSwitchConditionType(LLVMContext &Context, EVT ConditionVT) const
Returns preferred type for switch condition.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, unsigned &Cost) const
Return true if the target can combine store(extractelement VectorTy, Idx).
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
virtual bool shouldConsiderGEPOffsetSplit() const
bool hasMultipleConditionRegisters() const
Return true if multiple condition registers are available.
bool isExtFree(const Instruction *I) const
Return true if the extension represented by I is free.
virtual bool getAddrModeArguments(IntrinsicInst *, SmallVectorImpl< Value * > &, Type *&) const
CodeGenPrepare sinks address calculations into the same BB as Load/Store instructions reading the add...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
bool isPredictableSelectExpensive() const
Return true if selects are only cheaper than branches if the branch is unlikely to be predicted right...
virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const
Return true if it is cheaper to split the store of a merged int val from a pair of smaller values int...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
const DenseMap< unsigned int, unsigned int > & getBypassSlowDivWidths() const
Returns map of slow types for division or remainder with corresponding fast types.
virtual bool isCheapToSpeculateCtlz(Type *Ty) const
Return true if it is cheap to speculate a call to intrinsic ctlz.
virtual bool useSoftFloat() const
virtual int64_t getPreferredLargeGEPBaseOffset(int64_t MinOffset, int64_t MaxOffset) const
Return the prefered common base offset.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
virtual bool shouldAlignPointerArgs(CallInst *, unsigned &, Align &) const
Return true if the pointer arguments to CI should be aligned by aligning the object whose address is ...
virtual Type * shouldConvertSplatType(ShuffleVectorInst *SVI) const
Given a shuffle vector SVI representing a vector splat, return a new scalar type of size equal to SVI...
virtual bool addressingModeSupportsTLS(const GlobalValue &) const
Returns true if the targets addressing mode can target thread local storage (TLS).
virtual bool shouldConvertPhiType(Type *From, Type *To) const
Given a set in interconnected phis of type 'From' that are loaded/stored or bitcast to type 'To',...
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool preferZeroCompareBranch() const
Return true if the heuristic to prefer icmp eq zero should be used in code gen prepare.
virtual bool shouldSinkOperands(Instruction *I, SmallVectorImpl< Use * > &Ops) const
Return true if sinking I's operands to the same basic block as I is profitable, e....
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
virtual bool optimizeExtendOrTruncateConversion(Instruction *I, Loop *L, const TargetTransformInfo &TTI) const
Try to optimize extending or truncating conversion instructions (like zext, trunc,...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
std::vector< AsmOperandInfo > AsmOperandInfoVector
virtual bool ExpandInlineAsm(CallInst *) const
This hook allows the target to expand an inline asm call to be explicit llvm code if it wants to.
virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, const TargetRegisterInfo *TRI, const CallBase &Call) const
Split up the constraint string from the inline assembly value into the specific constraints and their...
virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, SelectionDAG *DAG=nullptr) const
Determines the constraint code and constraint type to use for the specific AsmOperandInfo,...
virtual bool mayBeEmittedAsTailCall(const CallInst *) const
Return true if the target may be able emit the call instruction as a tail call.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
Target-Independent Code Generator Pass Configuration Options.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetLowering * getTargetLowering() const
virtual bool addrSinkUsingGEPs() const
Sink addresses into blocks using GEP instructions rather than pointer casts and arithmetic.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
bool isExpensiveToSpeculativelyExecute(const Instruction *I) const
Return true if the cost of the instruction is too high to speculatively execute and should be kept be...
InstructionCost getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind=TTI::TCK_RecipThroughput, TTI::OperandValueInfo Opd1Info={TTI::OK_AnyValue, TTI::OP_None}, TTI::OperandValueInfo Opd2Info={TTI::OK_AnyValue, TTI::OP_None}, ArrayRef< const Value * > Args=std::nullopt, const Instruction *CxtI=nullptr, const TargetLibraryInfo *TLibInfo=nullptr) const
This is an approximation of reciprocal throughput of a math/logic op.
TargetCostKind
The kind of cost model.
@ TCK_RecipThroughput
Reciprocal throughput.
@ TCK_SizeAndLatency
The weighted sum of size and latency.
InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const
Return the expected cost of materializing for the given integer immediate of the specified type.
bool shouldConsiderAddressTypePromotion(const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const
BranchProbability getPredictableBranchThreshold() const
If a branch or a select condition is skewed in one direction by more than this factor,...
@ TCC_Basic
The cost of a typical 'add' instruction.
InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, TTI::TargetCostKind CostKind, unsigned Index=-1, Value *Op0=nullptr, Value *Op1=nullptr) const
This class represents a truncation of integer types.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
unsigned getIntegerBitWidth() const
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isScalableTy() const
Return true if this is a type whose size is a known multiple of vscale.
bool isIntOrPtrTy() const
Return true if this is an integer type or a pointer type.
Definition: Type.h:239
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:224
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:343
'undef' values are things that do not have specified contents.
Definition: Constants.h:1398
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1833
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
op_range operands()
Definition: User.h:242
bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:21
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
See the file comment.
Definition: ValueMap.h:84
void clear()
Definition: ValueMap.h:145
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:736
user_iterator user_begin()
Definition: Value.h:397
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:377
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:927
bool isUsedInBasicBlock(const BasicBlock *BB) const
Check if this value is used in the specified basic block.
Definition: Value.cpp:234
bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition: Value.cpp:153
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:694
bool use_empty() const
Definition: Value.h:344
user_iterator user_end()
Definition: Value.h:405
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
unsigned getNumUses() const
This method computes the number of uses of this Value.
Definition: Value.cpp:255
iterator_range< use_iterator > uses()
Definition: Value.h:376
void mutateType(Type *Ty)
Mutate the type of this Value to be of the specified type.
Definition: Value.h:815
user_iterator_impl< User > user_iterator
Definition: Value.h:390
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:383
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:5266
Value handle that is nullable, but tries to track the Value.
Definition: ValueHandle.h:204
bool pointsToAliveValue() const
Definition: ValueHandle.h:224
This class represents zero extension of integer types.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
constexpr bool isNonZero() const
Definition: TypeSize.h:158
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
TypeSize getSequentialElementStride(const DataLayout &DL) const
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:826
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned getAddrMode(MCInstrInfo const &MCII, MCInst const &MCI)
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:524
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
class_match< BinaryOperator > m_BinOp()
Match an arbitrary binary operation and ignore it.
Definition: PatternMatch.h:100
BinaryOp_match< LHS, RHS, Instruction::URem > m_URem(const LHS &L, const RHS &R)
class_match< Constant > m_Constant()
Match an arbitrary Constant and ignore it.
Definition: PatternMatch.h:165
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:972
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:816
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:875
BinOpPred_match< LHS, RHS, is_right_shift_op > m_Shr(const LHS &L, const RHS &R)
Matches logical shift operations.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Add, OverflowingBinaryOperator::NoUnsignedWrap, true > m_c_NUWAdd(const LHS &L, const RHS &R)
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:168
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:592
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:599
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
auto m_LogicalOr()
Matches L || R where L and R are arbitrary values.
TwoOps_match< V1_t, V2_t, Instruction::ShuffleVector > m_Shuffle(const V1_t &v1, const V2_t &v2)
Matches ShuffleVectorInst independently of mask value.
match_combine_and< class_match< Constant >, match_unless< constantexpr_match > > m_ImmConstant()
Match an arbitrary immediate Constant and ignore it.
Definition: PatternMatch.h:854
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
class_match< CmpInst > m_Cmp()
Matches any compare instruction and ignore it.
Definition: PatternMatch.h:105
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:299
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
UAddWithOverflow_match< LHS_t, RHS_t, Sum_t > m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S)
Match an icmp instruction checking for unsigned overflow on addition.
auto m_LogicalAnd()
Matches L && R where L and R are arbitrary values.
auto m_Undef()
Match an arbitrary undef constant.
Definition: PatternMatch.h:152
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
ThreeOps_match< Val_t, Elt_t, Idx_t, Instruction::InsertElement > m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx)
Matches InsertElementInst.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
Definition: PatternMatch.h:239
int compare(DigitsT LDigits, int16_t LScale, DigitsT RDigits, int16_t RScale)
Compare two scaled numbers.
Definition: ScaledNumber.h:252
ManagedStatic< cl::opt< FnT >, OptCreatorT > Action
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
pred_iterator pred_end(BasicBlock *BB)
Definition: CFG.h:114
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:480
bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Definition: Local.cpp:540
bool ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions=false, const TargetLibraryInfo *TLI=nullptr, DomTreeUpdater *DTU=nullptr)
If a terminator instruction is predicated on a constant value, convert it into an unconditional branc...
Definition: Local.cpp:130
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2182
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
void salvageDebugInfo(const MachineRegisterInfo &MRI, MachineInstr &MI)
Assuming the instruction MI is going to be deleted, attempt to salvage debug users of MI by writing t...
Definition: Utils.cpp:1678
auto successors(const MachineBasicBlock *BB)
ReturnInst * FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB, BasicBlock *Pred, DomTreeUpdater *DTU=nullptr)
This method duplicates the specified return instruction into a predecessor which ends in an unconditi...
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:2060
Instruction * SplitBlockAndInsertIfElse(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ElseBlock=nullptr)
Similar to SplitBlockAndInsertIfThen, but the inserted block is on the false path of the branch.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2098
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified block, which must have no predecessors.
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2038
Value * getSplatValue(const Value *V)
Get splat value if the input is a splat vector or return nullptr.
void initializeCodeGenPrepareLegacyPassPass(PassRegistry &)
bool hasBranchWeightOrigin(const Instruction &I)
Check if Branch Weight Metadata has an "expected" field from an llvm.expect* intrinsic.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V, SmallVectorImpl< DbgVariableRecord * > *DbgVariableRecords=nullptr)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:138
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool SplitIndirectBrCriticalEdges(Function &F, bool IgnoreBlocksWithoutPHI, BranchProbabilityInfo *BPI=nullptr, BlockFrequencyInfo *BFI=nullptr)
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
Align getKnownAlignment(Value *V, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to infer an alignment for the specified pointer.
Definition: Local.h:242
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2090
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool isSplatValue(const Value *V, int Index=-1, unsigned Depth=0)
Return true if each element of the vector value V is poisoned or equal to every other non-poisoned el...
pred_iterator pred_begin(BasicBlock *BB)
Definition: CFG.h:110
bool DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr)
Examine each PHI in the given block and delete it if it is dead.
bool replaceAndRecursivelySimplify(Instruction *I, Value *SimpleV, const TargetLibraryInfo *TLI=nullptr, const DominatorTree *DT=nullptr, AssumptionCache *AC=nullptr, SmallSetVector< Instruction *, 8 > *UnsimplifiedUsers=nullptr)
Replace all uses of 'I' with 'SimpleV' and simplify the uses recursively.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool recognizeBSwapOrBitReverseIdiom(Instruction *I, bool MatchBSwaps, bool MatchBitReversals, SmallVectorImpl< Instruction * > &InsertedInsts)
Try to match a bswap or bitreverse idiom.
Definition: Local.cpp:4001
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
FPClassTest
Floating-point class tests, supported by 'is_fpclass' intrinsic.
void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1736
bool isSafeToSpeculativelyExecute(const Instruction *I, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr, bool UseVariableInfo=true)
Return true if the instruction does not have any effects besides calculating the result and does not ...
FunctionPass * createCodeGenPrepareLegacyPass()
createCodeGenPrepareLegacyPass - Transform the code to expose more pattern matching during instructio...
ISD::CondCode getFCmpCondCode(FCmpInst::Predicate Pred)
getFCmpCondCode - Return the ISD condition code corresponding to the given LLVM IR floating-point con...
Definition: Analysis.cpp:199
bool VerifyLoopInfo
Enable verification of loop info.
Definition: LoopInfo.cpp:51
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
bool attributesPermitTailCall(const Function *F, const Instruction *I, const ReturnInst *Ret, const TargetLoweringBase &TLI, bool *AllowDifferingSizes=nullptr)
Test if given that the input instruction is in the tail call position, if there is an attribute misma...
Definition: Analysis.cpp:583
bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ And
Bitwise or logical AND of integers.
@ Add
Sum of integers.
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1921
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
bool bypassSlowDivision(BasicBlock *BB, const DenseMap< unsigned int, unsigned int > &BypassWidth)
This optimization identifies DIV instructions in a BB that can be profitably bypassed and carried out...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
std::pair< Value *, FPClassTest > fcmpToClassTest(CmpInst::Predicate Pred, const Function &F, Value *LHS, Value *RHS, bool LookThroughSrc=true)
Returns a pair of values, which if passed to llvm.is.fpclass, returns the same result as an fcmp with...
auto predecessors(const MachineBasicBlock *BB)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
Value * simplifyURemInst(Value *LHS, Value *RHS, const SimplifyQuery &Q)
Given operands for a URem, fold the result or return null.
CGPassBuilderOption getCGPassBuilderOption()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define NC
Definition: regutils.h:42
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:291
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:275
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool isRound() const
Return true if the size is a power-of-two number of bytes.
Definition: ValueTypes.h:239
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:152
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
ExtAddrMode()=default
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
const DataLayout & DL
Definition: SimplifyQuery.h:71
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg + ScalableOffset*...
This contains information for each constraint that we are lowering.