Bug Summary

File:lib/CodeGen/CodeGenPrepare.cpp
Warning:line 5826, column 3
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CodeGenPrepare.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~svn373517/build-llvm/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen -I /build/llvm-toolchain-snapshot-10~svn373517/build-llvm/include -I /build/llvm-toolchain-snapshot-10~svn373517/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~svn373517/build-llvm/lib/CodeGen -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~svn373517=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-10-02-234743-9763-1 -x c++ /build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp

/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp

1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass munges the code in the input function to better prepare it for
10// SelectionDAG-based code generation. This works around limitations in it's
11// basic-block-at-a-time approach. It should eventually be removed.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/APInt.h"
16#include "llvm/ADT/ArrayRef.h"
17#include "llvm/ADT/DenseMap.h"
18#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/PointerIntPair.h"
20#include "llvm/ADT/STLExtras.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/Analysis/BlockFrequencyInfo.h"
25#include "llvm/Analysis/BranchProbabilityInfo.h"
26#include "llvm/Analysis/ConstantFolding.h"
27#include "llvm/Analysis/InstructionSimplify.h"
28#include "llvm/Analysis/LoopInfo.h"
29#include "llvm/Analysis/MemoryBuiltins.h"
30#include "llvm/Analysis/ProfileSummaryInfo.h"
31#include "llvm/Analysis/TargetLibraryInfo.h"
32#include "llvm/Analysis/TargetTransformInfo.h"
33#include "llvm/Transforms/Utils/Local.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/Analysis/VectorUtils.h"
36#include "llvm/CodeGen/Analysis.h"
37#include "llvm/CodeGen/ISDOpcodes.h"
38#include "llvm/CodeGen/SelectionDAGNodes.h"
39#include "llvm/CodeGen/TargetLowering.h"
40#include "llvm/CodeGen/TargetPassConfig.h"
41#include "llvm/CodeGen/TargetSubtargetInfo.h"
42#include "llvm/CodeGen/ValueTypes.h"
43#include "llvm/Config/llvm-config.h"
44#include "llvm/IR/Argument.h"
45#include "llvm/IR/Attributes.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CallSite.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Dominators.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/GlobalVariable.h"
57#include "llvm/IR/IRBuilder.h"
58#include "llvm/IR/InlineAsm.h"
59#include "llvm/IR/InstrTypes.h"
60#include "llvm/IR/Instruction.h"
61#include "llvm/IR/Instructions.h"
62#include "llvm/IR/IntrinsicInst.h"
63#include "llvm/IR/Intrinsics.h"
64#include "llvm/IR/LLVMContext.h"
65#include "llvm/IR/MDBuilder.h"
66#include "llvm/IR/Module.h"
67#include "llvm/IR/Operator.h"
68#include "llvm/IR/PatternMatch.h"
69#include "llvm/IR/Statepoint.h"
70#include "llvm/IR/Type.h"
71#include "llvm/IR/Use.h"
72#include "llvm/IR/User.h"
73#include "llvm/IR/Value.h"
74#include "llvm/IR/ValueHandle.h"
75#include "llvm/IR/ValueMap.h"
76#include "llvm/Pass.h"
77#include "llvm/Support/BlockFrequency.h"
78#include "llvm/Support/BranchProbability.h"
79#include "llvm/Support/Casting.h"
80#include "llvm/Support/CommandLine.h"
81#include "llvm/Support/Compiler.h"
82#include "llvm/Support/Debug.h"
83#include "llvm/Support/ErrorHandling.h"
84#include "llvm/Support/MachineValueType.h"
85#include "llvm/Support/MathExtras.h"
86#include "llvm/Support/raw_ostream.h"
87#include "llvm/Target/TargetMachine.h"
88#include "llvm/Target/TargetOptions.h"
89#include "llvm/Transforms/Utils/BasicBlockUtils.h"
90#include "llvm/Transforms/Utils/BypassSlowDivision.h"
91#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
92#include <algorithm>
93#include <cassert>
94#include <cstdint>
95#include <iterator>
96#include <limits>
97#include <memory>
98#include <utility>
99#include <vector>
100
101using namespace llvm;
102using namespace llvm::PatternMatch;
103
104#define DEBUG_TYPE"codegenprepare" "codegenprepare"
105
106STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim"
, "Number of blocks eliminated", {0}, {false}}
;
107STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim"
, "Number of trivial PHIs eliminated", {0}, {false}}
;
108STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim"
, "Number of GEPs converted to casts", {0}, {false}}
;
109STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
, {0}, {false}}
110 "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses"
, "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps"
, {0}, {false}}
;
111STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
, {0}, {false}}
112 "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses"
, "Number of uses of Cast expressions replaced with uses " "of sunken Casts"
, {0}, {false}}
;
113STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
, {0}, {false}}
114 "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts"
, "Number of memory instructions whose address " "computations were sunk"
, {0}, {false}}
;
115STATISTIC(NumMemoryInstsPhiCreated,static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions", {0}, {false
}}
116 "Number of phis created when address "static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions", {0}, {false
}}
117 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare"
, "NumMemoryInstsPhiCreated", "Number of phis created when address "
"computations were sunk to memory instructions", {0}, {false
}}
;
118STATISTIC(NumMemoryInstsSelectCreated,static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions", {0}, {false
}}
119 "Number of select created when address "static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions", {0}, {false
}}
120 "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare"
, "NumMemoryInstsSelectCreated", "Number of select created when address "
"computations were sunk to memory instructions", {0}, {false
}}
;
121STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved"
, "Number of [s|z]ext instructions combined with loads", {0},
{false}}
;
122STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses"
, "Number of uses of [s|z]ext instructions optimized", {0}, {
false}}
;
123STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads", {
0}, {false}}
124 "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded"
, "Number of and mask instructions added to form ext loads", {
0}, {false}}
;
125STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses"
, "Number of uses of and mask instructions optimized", {0}, {
false}}
;
126STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup"
, "Number of return instructions duplicated", {0}, {false}}
;
127STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved"
, "Number of debug value instructions moved", {0}, {false}}
;
128STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare"
, "NumSelectsExpanded", "Number of selects turned into branches"
, {0}, {false}}
;
129STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare"
, "NumStoreExtractExposed", "Number of store(extractelement) exposed"
, {0}, {false}}
;
130
131static cl::opt<bool> DisableBranchOpts(
132 "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
133 cl::desc("Disable branch optimizations in CodeGenPrepare"));
134
135static cl::opt<bool>
136 DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false),
137 cl::desc("Disable GC optimizations in CodeGenPrepare"));
138
139static cl::opt<bool> DisableSelectToBranch(
140 "disable-cgp-select2branch", cl::Hidden, cl::init(false),
141 cl::desc("Disable select to branch conversion."));
142
143static cl::opt<bool> AddrSinkUsingGEPs(
144 "addr-sink-using-gep", cl::Hidden, cl::init(true),
145 cl::desc("Address sinking in CGP using GEPs."));
146
147static cl::opt<bool> EnableAndCmpSinking(
148 "enable-andcmp-sinking", cl::Hidden, cl::init(true),
149 cl::desc("Enable sinkinig and/cmp into branches."));
150
151static cl::opt<bool> DisableStoreExtract(
152 "disable-cgp-store-extract", cl::Hidden, cl::init(false),
153 cl::desc("Disable store(extract) optimizations in CodeGenPrepare"));
154
155static cl::opt<bool> StressStoreExtract(
156 "stress-cgp-store-extract", cl::Hidden, cl::init(false),
157 cl::desc("Stress test store(extract) optimizations in CodeGenPrepare"));
158
159static cl::opt<bool> DisableExtLdPromotion(
160 "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
161 cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in "
162 "CodeGenPrepare"));
163
164static cl::opt<bool> StressExtLdPromotion(
165 "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false),
166 cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) "
167 "optimization in CodeGenPrepare"));
168
169static cl::opt<bool> DisablePreheaderProtect(
170 "disable-preheader-prot", cl::Hidden, cl::init(false),
171 cl::desc("Disable protection against removing loop preheaders"));
172
173static cl::opt<bool> ProfileGuidedSectionPrefix(
174 "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore,
175 cl::desc("Use profile info to add section prefix for hot/cold functions"));
176
177static cl::opt<unsigned> FreqRatioToSkipMerge(
178 "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
179 cl::desc("Skip merging empty blocks if (frequency of empty block) / "
180 "(frequency of destination block) is greater than this ratio"));
181
182static cl::opt<bool> ForceSplitStore(
183 "force-split-store", cl::Hidden, cl::init(false),
184 cl::desc("Force store splitting no matter what the target query says."));
185
186static cl::opt<bool>
187EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
188 cl::desc("Enable merging of redundant sexts when one is dominating"
189 " the other."), cl::init(true));
190
191static cl::opt<bool> DisableComplexAddrModes(
192 "disable-complex-addr-modes", cl::Hidden, cl::init(false),
193 cl::desc("Disables combining addressing modes with different parts "
194 "in optimizeMemoryInst."));
195
196static cl::opt<bool>
197AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false),
198 cl::desc("Allow creation of Phis in Address sinking."));
199
200static cl::opt<bool>
201AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true),
202 cl::desc("Allow creation of selects in Address sinking."));
203
204static cl::opt<bool> AddrSinkCombineBaseReg(
205 "addr-sink-combine-base-reg", cl::Hidden, cl::init(true),
206 cl::desc("Allow combining of BaseReg field in Address sinking."));
207
208static cl::opt<bool> AddrSinkCombineBaseGV(
209 "addr-sink-combine-base-gv", cl::Hidden, cl::init(true),
210 cl::desc("Allow combining of BaseGV field in Address sinking."));
211
212static cl::opt<bool> AddrSinkCombineBaseOffs(
213 "addr-sink-combine-base-offs", cl::Hidden, cl::init(true),
214 cl::desc("Allow combining of BaseOffs field in Address sinking."));
215
216static cl::opt<bool> AddrSinkCombineScaledReg(
217 "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true),
218 cl::desc("Allow combining of ScaledReg field in Address sinking."));
219
220static cl::opt<bool>
221 EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden,
222 cl::init(true),
223 cl::desc("Enable splitting large offset of GEP."));
224
225namespace {
226
227enum ExtType {
228 ZeroExtension, // Zero extension has been seen.
229 SignExtension, // Sign extension has been seen.
230 BothExtension // This extension type is used if we saw sext after
231 // ZeroExtension had been set, or if we saw zext after
232 // SignExtension had been set. It makes the type
233 // information of a promoted instruction invalid.
234};
235
236using SetOfInstrs = SmallPtrSet<Instruction *, 16>;
237using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>;
238using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>;
239using SExts = SmallVector<Instruction *, 16>;
240using ValueToSExts = DenseMap<Value *, SExts>;
241
242class TypePromotionTransaction;
243
244 class CodeGenPrepare : public FunctionPass {
245 const TargetMachine *TM = nullptr;
246 const TargetSubtargetInfo *SubtargetInfo;
247 const TargetLowering *TLI = nullptr;
248 const TargetRegisterInfo *TRI;
249 const TargetTransformInfo *TTI = nullptr;
250 const TargetLibraryInfo *TLInfo;
251 const LoopInfo *LI;
252 std::unique_ptr<BlockFrequencyInfo> BFI;
253 std::unique_ptr<BranchProbabilityInfo> BPI;
254
255 /// As we scan instructions optimizing them, this is the next instruction
256 /// to optimize. Transforms that can invalidate this should update it.
257 BasicBlock::iterator CurInstIterator;
258
259 /// Keeps track of non-local addresses that have been sunk into a block.
260 /// This allows us to avoid inserting duplicate code for blocks with
261 /// multiple load/stores of the same address. The usage of WeakTrackingVH
262 /// enables SunkAddrs to be treated as a cache whose entries can be
263 /// invalidated if a sunken address computation has been erased.
264 ValueMap<Value*, WeakTrackingVH> SunkAddrs;
265
266 /// Keeps track of all instructions inserted for the current function.
267 SetOfInstrs InsertedInsts;
268
269 /// Keeps track of the type of the related instruction before their
270 /// promotion for the current function.
271 InstrToOrigTy PromotedInsts;
272
273 /// Keep track of instructions removed during promotion.
274 SetOfInstrs RemovedInsts;
275
276 /// Keep track of sext chains based on their initial value.
277 DenseMap<Value *, Instruction *> SeenChainsForSExt;
278
279 /// Keep track of GEPs accessing the same data structures such as structs or
280 /// arrays that are candidates to be split later because of their large
281 /// size.
282 MapVector<
283 AssertingVH<Value>,
284 SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>>
285 LargeOffsetGEPMap;
286
287 /// Keep track of new GEP base after splitting the GEPs having large offset.
288 SmallSet<AssertingVH<Value>, 2> NewGEPBases;
289
290 /// Map serial numbers to Large offset GEPs.
291 DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID;
292
293 /// Keep track of SExt promoted.
294 ValueToSExts ValToSExtendedUses;
295
296 /// True if optimizing for size.
297 bool OptSize;
298
299 /// DataLayout for the Function being processed.
300 const DataLayout *DL = nullptr;
301
302 /// Building the dominator tree can be expensive, so we only build it
303 /// lazily and update it when required.
304 std::unique_ptr<DominatorTree> DT;
305
306 public:
307 static char ID; // Pass identification, replacement for typeid
308
309 CodeGenPrepare() : FunctionPass(ID) {
310 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
311 }
312
313 bool runOnFunction(Function &F) override;
314
315 StringRef getPassName() const override { return "CodeGen Prepare"; }
316
317 void getAnalysisUsage(AnalysisUsage &AU) const override {
318 // FIXME: When we can selectively preserve passes, preserve the domtree.
319 AU.addRequired<ProfileSummaryInfoWrapperPass>();
320 AU.addRequired<TargetLibraryInfoWrapperPass>();
321 AU.addRequired<TargetTransformInfoWrapperPass>();
322 AU.addRequired<LoopInfoWrapperPass>();
323 }
324
325 private:
326 template <typename F>
327 void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) {
328 // Substituting can cause recursive simplifications, which can invalidate
329 // our iterator. Use a WeakTrackingVH to hold onto it in case this
330 // happens.
331 Value *CurValue = &*CurInstIterator;
332 WeakTrackingVH IterHandle(CurValue);
333
334 f();
335
336 // If the iterator instruction was recursively deleted, start over at the
337 // start of the block.
338 if (IterHandle != CurValue) {
339 CurInstIterator = BB->begin();
340 SunkAddrs.clear();
341 }
342 }
343
344 // Get the DominatorTree, building if necessary.
345 DominatorTree &getDT(Function &F) {
346 if (!DT)
347 DT = std::make_unique<DominatorTree>(F);
348 return *DT;
349 }
350
351 bool eliminateFallThrough(Function &F);
352 bool eliminateMostlyEmptyBlocks(Function &F);
353 BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
354 bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
355 void eliminateMostlyEmptyBlock(BasicBlock *BB);
356 bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
357 bool isPreheader);
358 bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
359 bool optimizeInst(Instruction *I, bool &ModifiedDT);
360 bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
361 Type *AccessTy, unsigned AddrSpace);
362 bool optimizeInlineAsmInst(CallInst *CS);
363 bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
364 bool optimizeExt(Instruction *&I);
365 bool optimizeExtUses(Instruction *I);
366 bool optimizeLoadExt(LoadInst *Load);
367 bool optimizeShiftInst(BinaryOperator *BO);
368 bool optimizeSelectInst(SelectInst *SI);
369 bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
370 bool optimizeSwitchInst(SwitchInst *SI);
371 bool optimizeExtractElementInst(Instruction *Inst);
372 bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
373 bool placeDbgValues(Function &F);
374 bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
375 LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
376 bool tryToPromoteExts(TypePromotionTransaction &TPT,
377 const SmallVectorImpl<Instruction *> &Exts,
378 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
379 unsigned CreatedInstsCost = 0);
380 bool mergeSExts(Function &F);
381 bool splitLargeGEPOffsets();
382 bool performAddressTypePromotion(
383 Instruction *&Inst,
384 bool AllowPromotionWithoutCommonHeader,
385 bool HasPromoted, TypePromotionTransaction &TPT,
386 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
387 bool splitBranchCondition(Function &F, bool &ModifiedDT);
388 bool simplifyOffsetableRelocate(Instruction &I);
389
390 bool tryToSinkFreeOperands(Instruction *I);
391 bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
392 Intrinsic::ID IID);
393 bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
394 bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
395 bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
396 };
397
398} // end anonymous namespace
399
400char CodeGenPrepare::ID = 0;
401
402INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
403 "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry &
Registry) {
404INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
405INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
406 "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare"
, &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor
<CodeGenPrepare>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag
; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry
) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce
, std::ref(Registry)); }
407
408FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); }
409
410bool CodeGenPrepare::runOnFunction(Function &F) {
411 if (skipFunction(F))
1
Assuming the condition is false
2
Taking false branch
412 return false;
413
414 DL = &F.getParent()->getDataLayout();
415
416 bool EverMadeChange = false;
417 // Clear per function information.
418 InsertedInsts.clear();
419 PromotedInsts.clear();
420
421 if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
3
Assuming 'TPC' is null
4
Taking false branch
422 TM = &TPC->getTM<TargetMachine>();
423 SubtargetInfo = TM->getSubtargetImpl(F);
424 TLI = SubtargetInfo->getTargetLowering();
425 TRI = SubtargetInfo->getRegisterInfo();
426 }
427 TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
428 TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
429 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
430 BPI.reset(new BranchProbabilityInfo(F, *LI));
431 BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
432 OptSize = F.hasOptSize();
433
434 ProfileSummaryInfo *PSI =
435 &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
436 if (ProfileGuidedSectionPrefix) {
5
Assuming the condition is false
6
Taking false branch
437 if (PSI->isFunctionHotInCallGraph(&F, *BFI))
438 F.setSectionPrefix(".hot");
439 else if (PSI->isFunctionColdInCallGraph(&F, *BFI))
440 F.setSectionPrefix(".unlikely");
441 }
442
443 /// This optimization identifies DIV instructions that can be
444 /// profitably bypassed and carried out with a shorter, faster divide.
445 if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI &&
7
Assuming field 'OptSize' is true
446 TLI->isSlowDivBypassed()) {
447 const DenseMap<unsigned int, unsigned int> &BypassWidths =
448 TLI->getBypassSlowDivWidths();
449 BasicBlock* BB = &*F.begin();
450 while (BB != nullptr) {
451 // bypassSlowDivision may create new BBs, but we don't want to reapply the
452 // optimization to those blocks.
453 BasicBlock* Next = BB->getNextNode();
454 EverMadeChange |= bypassSlowDivision(BB, BypassWidths);
455 BB = Next;
456 }
457 }
458
459 // Eliminate blocks that contain only PHI nodes and an
460 // unconditional branch.
461 EverMadeChange |= eliminateMostlyEmptyBlocks(F);
462
463 bool ModifiedDT = false;
464 if (!DisableBranchOpts)
8
Assuming the condition is false
9
Taking false branch
465 EverMadeChange |= splitBranchCondition(F, ModifiedDT);
466
467 // Split some critical edges where one of the sources is an indirect branch,
468 // to help generate sane code for PHIs involving such edges.
469 EverMadeChange |= SplitIndirectBrCriticalEdges(F);
470
471 bool MadeChange = true;
472 while (MadeChange) {
10
Loop condition is true. Entering loop body
473 MadeChange = false;
474 DT.reset();
475 for (Function::iterator I = F.begin(); I != F.end(); ) {
11
Loop condition is true. Entering loop body
476 BasicBlock *BB = &*I++;
477 bool ModifiedDTOnIteration = false;
478 MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration);
12
Calling 'CodeGenPrepare::optimizeBlock'
479
480 // Restart BB iteration if the dominator tree of the Function was changed
481 if (ModifiedDTOnIteration)
482 break;
483 }
484 if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
485 MadeChange |= mergeSExts(F);
486 if (!LargeOffsetGEPMap.empty())
487 MadeChange |= splitLargeGEPOffsets();
488
489 // Really free removed instructions during promotion.
490 for (Instruction *I : RemovedInsts)
491 I->deleteValue();
492
493 EverMadeChange |= MadeChange;
494 SeenChainsForSExt.clear();
495 ValToSExtendedUses.clear();
496 RemovedInsts.clear();
497 LargeOffsetGEPMap.clear();
498 LargeOffsetGEPID.clear();
499 }
500
501 SunkAddrs.clear();
502
503 if (!DisableBranchOpts) {
504 MadeChange = false;
505 // Use a set vector to get deterministic iteration order. The order the
506 // blocks are removed may affect whether or not PHI nodes in successors
507 // are removed.
508 SmallSetVector<BasicBlock*, 8> WorkList;
509 for (BasicBlock &BB : F) {
510 SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB));
511 MadeChange |= ConstantFoldTerminator(&BB, true);
512 if (!MadeChange) continue;
513
514 for (SmallVectorImpl<BasicBlock*>::iterator
515 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
516 if (pred_begin(*II) == pred_end(*II))
517 WorkList.insert(*II);
518 }
519
520 // Delete the dead blocks and any of their dead successors.
521 MadeChange |= !WorkList.empty();
522 while (!WorkList.empty()) {
523 BasicBlock *BB = WorkList.pop_back_val();
524 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
525
526 DeleteDeadBlock(BB);
527
528 for (SmallVectorImpl<BasicBlock*>::iterator
529 II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
530 if (pred_begin(*II) == pred_end(*II))
531 WorkList.insert(*II);
532 }
533
534 // Merge pairs of basic blocks with unconditional branches, connected by
535 // a single edge.
536 if (EverMadeChange || MadeChange)
537 MadeChange |= eliminateFallThrough(F);
538
539 EverMadeChange |= MadeChange;
540 }
541
542 if (!DisableGCOpts) {
543 SmallVector<Instruction *, 2> Statepoints;
544 for (BasicBlock &BB : F)
545 for (Instruction &I : BB)
546 if (isStatepoint(I))
547 Statepoints.push_back(&I);
548 for (auto &I : Statepoints)
549 EverMadeChange |= simplifyOffsetableRelocate(*I);
550 }
551
552 // Do this last to clean up use-before-def scenarios introduced by other
553 // preparatory transforms.
554 EverMadeChange |= placeDbgValues(F);
555
556 return EverMadeChange;
557}
558
559/// Merge basic blocks which are connected by a single edge, where one of the
560/// basic blocks has a single successor pointing to the other basic block,
561/// which has a single predecessor.
562bool CodeGenPrepare::eliminateFallThrough(Function &F) {
563 bool Changed = false;
564 // Scan all of the blocks in the function, except for the entry block.
565 // Use a temporary array to avoid iterator being invalidated when
566 // deleting blocks.
567 SmallVector<WeakTrackingVH, 16> Blocks;
568 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
569 Blocks.push_back(&Block);
570
571 for (auto &Block : Blocks) {
572 auto *BB = cast_or_null<BasicBlock>(Block);
573 if (!BB)
574 continue;
575 // If the destination block has a single pred, then this is a trivial
576 // edge, just collapse it.
577 BasicBlock *SinglePred = BB->getSinglePredecessor();
578
579 // Don't merge if BB's address is taken.
580 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
581
582 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
583 if (Term && !Term->isConditional()) {
584 Changed = true;
585 LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "To merge:\n" << *
BB << "\n\n\n"; } } while (false)
;
586
587 // Merge BB into SinglePred and delete it.
588 MergeBlockIntoPredecessor(BB);
589 }
590 }
591 return Changed;
592}
593
594/// Find a destination block from BB if BB is mergeable empty block.
595BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
596 // If this block doesn't end with an uncond branch, ignore it.
597 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
598 if (!BI || !BI->isUnconditional())
599 return nullptr;
600
601 // If the instruction before the branch (skipping debug info) isn't a phi
602 // node, then other stuff is happening here.
603 BasicBlock::iterator BBI = BI->getIterator();
604 if (BBI != BB->begin()) {
605 --BBI;
606 while (isa<DbgInfoIntrinsic>(BBI)) {
607 if (BBI == BB->begin())
608 break;
609 --BBI;
610 }
611 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
612 return nullptr;
613 }
614
615 // Do not break infinite loops.
616 BasicBlock *DestBB = BI->getSuccessor(0);
617 if (DestBB == BB)
618 return nullptr;
619
620 if (!canMergeBlocks(BB, DestBB))
621 DestBB = nullptr;
622
623 return DestBB;
624}
625
626/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
627/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
628/// edges in ways that are non-optimal for isel. Start by eliminating these
629/// blocks so we can split them the way we want them.
630bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
631 SmallPtrSet<BasicBlock *, 16> Preheaders;
632 SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end());
633 while (!LoopList.empty()) {
634 Loop *L = LoopList.pop_back_val();
635 LoopList.insert(LoopList.end(), L->begin(), L->end());
636 if (BasicBlock *Preheader = L->getLoopPreheader())
637 Preheaders.insert(Preheader);
638 }
639
640 bool MadeChange = false;
641 // Copy blocks into a temporary array to avoid iterator invalidation issues
642 // as we remove them.
643 // Note that this intentionally skips the entry block.
644 SmallVector<WeakTrackingVH, 16> Blocks;
645 for (auto &Block : llvm::make_range(std::next(F.begin()), F.end()))
646 Blocks.push_back(&Block);
647
648 for (auto &Block : Blocks) {
649 BasicBlock *BB = cast_or_null<BasicBlock>(Block);
650 if (!BB)
651 continue;
652 BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
653 if (!DestBB ||
654 !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
655 continue;
656
657 eliminateMostlyEmptyBlock(BB);
658 MadeChange = true;
659 }
660 return MadeChange;
661}
662
663bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
664 BasicBlock *DestBB,
665 bool isPreheader) {
666 // Do not delete loop preheaders if doing so would create a critical edge.
667 // Loop preheaders can be good locations to spill registers. If the
668 // preheader is deleted and we create a critical edge, registers may be
669 // spilled in the loop body instead.
670 if (!DisablePreheaderProtect && isPreheader &&
671 !(BB->getSinglePredecessor() &&
672 BB->getSinglePredecessor()->getSingleSuccessor()))
673 return false;
674
675 // Skip merging if the block's successor is also a successor to any callbr
676 // that leads to this block.
677 // FIXME: Is this really needed? Is this a correctness issue?
678 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
679 if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
680 for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
681 if (DestBB == CBI->getSuccessor(i))
682 return false;
683 }
684
685 // Try to skip merging if the unique predecessor of BB is terminated by a
686 // switch or indirect branch instruction, and BB is used as an incoming block
687 // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
688 // add COPY instructions in the predecessor of BB instead of BB (if it is not
689 // merged). Note that the critical edge created by merging such blocks wont be
690 // split in MachineSink because the jump table is not analyzable. By keeping
691 // such empty block (BB), ISel will place COPY instructions in BB, not in the
692 // predecessor of BB.
693 BasicBlock *Pred = BB->getUniquePredecessor();
694 if (!Pred ||
695 !(isa<SwitchInst>(Pred->getTerminator()) ||
696 isa<IndirectBrInst>(Pred->getTerminator())))
697 return true;
698
699 if (BB->getTerminator() != BB->getFirstNonPHIOrDbg())
700 return true;
701
702 // We use a simple cost heuristic which determine skipping merging is
703 // profitable if the cost of skipping merging is less than the cost of
704 // merging : Cost(skipping merging) < Cost(merging BB), where the
705 // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
706 // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
707 // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
708 // Freq(Pred) / Freq(BB) > 2.
709 // Note that if there are multiple empty blocks sharing the same incoming
710 // value for the PHIs in the DestBB, we consider them together. In such
711 // case, Cost(merging BB) will be the sum of their frequencies.
712
713 if (!isa<PHINode>(DestBB->begin()))
714 return true;
715
716 SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
717
718 // Find all other incoming blocks from which incoming values of all PHIs in
719 // DestBB are the same as the ones from BB.
720 for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
721 ++PI) {
722 BasicBlock *DestBBPred = *PI;
723 if (DestBBPred == BB)
724 continue;
725
726 if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) {
727 return DestPN.getIncomingValueForBlock(BB) ==
728 DestPN.getIncomingValueForBlock(DestBBPred);
729 }))
730 SameIncomingValueBBs.insert(DestBBPred);
731 }
732
733 // See if all BB's incoming values are same as the value from Pred. In this
734 // case, no reason to skip merging because COPYs are expected to be place in
735 // Pred already.
736 if (SameIncomingValueBBs.count(Pred))
737 return true;
738
739 BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
740 BlockFrequency BBFreq = BFI->getBlockFreq(BB);
741
742 for (auto SameValueBB : SameIncomingValueBBs)
743 if (SameValueBB->getUniquePredecessor() == Pred &&
744 DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
745 BBFreq += BFI->getBlockFreq(SameValueBB);
746
747 return PredFreq.getFrequency() <=
748 BBFreq.getFrequency() * FreqRatioToSkipMerge;
749}
750
751/// Return true if we can merge BB into DestBB if there is a single
752/// unconditional branch between them, and BB contains no other non-phi
753/// instructions.
754bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB,
755 const BasicBlock *DestBB) const {
756 // We only want to eliminate blocks whose phi nodes are used by phi nodes in
757 // the successor. If there are more complex condition (e.g. preheaders),
758 // don't mess around with them.
759 for (const PHINode &PN : BB->phis()) {
760 for (const User *U : PN.users()) {
761 const Instruction *UI = cast<Instruction>(U);
762 if (UI->getParent() != DestBB || !isa<PHINode>(UI))
763 return false;
764 // If User is inside DestBB block and it is a PHINode then check
765 // incoming value. If incoming value is not from BB then this is
766 // a complex condition (e.g. preheaders) we want to avoid here.
767 if (UI->getParent() == DestBB) {
768 if (const PHINode *UPN = dyn_cast<PHINode>(UI))
769 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
770 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
771 if (Insn && Insn->getParent() == BB &&
772 Insn->getParent() != UPN->getIncomingBlock(I))
773 return false;
774 }
775 }
776 }
777 }
778
779 // If BB and DestBB contain any common predecessors, then the phi nodes in BB
780 // and DestBB may have conflicting incoming values for the block. If so, we
781 // can't merge the block.
782 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
783 if (!DestBBPN) return true; // no conflict.
784
785 // Collect the preds of BB.
786 SmallPtrSet<const BasicBlock*, 16> BBPreds;
787 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
788 // It is faster to get preds from a PHI than with pred_iterator.
789 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
790 BBPreds.insert(BBPN->getIncomingBlock(i));
791 } else {
792 BBPreds.insert(pred_begin(BB), pred_end(BB));
793 }
794
795 // Walk the preds of DestBB.
796 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
797 BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
798 if (BBPreds.count(Pred)) { // Common predecessor?
799 for (const PHINode &PN : DestBB->phis()) {
800 const Value *V1 = PN.getIncomingValueForBlock(Pred);
801 const Value *V2 = PN.getIncomingValueForBlock(BB);
802
803 // If V2 is a phi node in BB, look up what the mapped value will be.
804 if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
805 if (V2PN->getParent() == BB)
806 V2 = V2PN->getIncomingValueForBlock(Pred);
807
808 // If there is a conflict, bail out.
809 if (V1 != V2) return false;
810 }
811 }
812 }
813
814 return true;
815}
816
817/// Eliminate a basic block that has only phi's and an unconditional branch in
818/// it.
819void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
820 BranchInst *BI = cast<BranchInst>(BB->getTerminator());
821 BasicBlock *DestBB = BI->getSuccessor(0);
822
823 LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
824 << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"
<< *BB << *DestBB; } } while (false)
;
825
826 // If the destination block has a single pred, then this is a trivial edge,
827 // just collapse it.
828 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
829 if (SinglePred != DestBB) {
830 assert(SinglePred == BB &&((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 831, __PRETTY_FUNCTION__))
831 "Single predecessor not the same as predecessor")((SinglePred == BB && "Single predecessor not the same as predecessor"
) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 831, __PRETTY_FUNCTION__))
;
832 // Merge DestBB into SinglePred/BB and delete it.
833 MergeBlockIntoPredecessor(DestBB);
834 // Note: BB(=SinglePred) will not be deleted on this path.
835 // DestBB(=its single successor) is the one that was deleted.
836 LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *SinglePred
<< "\n\n\n"; } } while (false)
;
837 return;
838 }
839 }
840
841 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB
842 // to handle the new incoming edges it is about to have.
843 for (PHINode &PN : DestBB->phis()) {
844 // Remove the incoming value for BB, and remember it.
845 Value *InVal = PN.removeIncomingValue(BB, false);
846
847 // Two options: either the InVal is a phi node defined in BB or it is some
848 // value that dominates BB.
849 PHINode *InValPhi = dyn_cast<PHINode>(InVal);
850 if (InValPhi && InValPhi->getParent() == BB) {
851 // Add all of the input values of the input PHI as inputs of this phi.
852 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
853 PN.addIncoming(InValPhi->getIncomingValue(i),
854 InValPhi->getIncomingBlock(i));
855 } else {
856 // Otherwise, add one instance of the dominating value for each edge that
857 // we will be adding.
858 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
859 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
860 PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
861 } else {
862 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
863 PN.addIncoming(InVal, *PI);
864 }
865 }
866 }
867
868 // The PHIs are now updated, change everything that refers to BB to use
869 // DestBB and remove BB.
870 BB->replaceAllUsesWith(DestBB);
871 BB->eraseFromParent();
872 ++NumBlocksElim;
873
874 LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB
<< "\n\n\n"; } } while (false)
;
875}
876
877// Computes a map of base pointer relocation instructions to corresponding
878// derived pointer relocation instructions given a vector of all relocate calls
879static void computeBaseDerivedRelocateMap(
880 const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls,
881 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>>
882 &RelocateInstMap) {
883 // Collect information in two maps: one primarily for locating the base object
884 // while filling the second map; the second map is the final structure holding
885 // a mapping between Base and corresponding Derived relocate calls
886 DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap;
887 for (auto *ThisRelocate : AllRelocateCalls) {
888 auto K = std::make_pair(ThisRelocate->getBasePtrIndex(),
889 ThisRelocate->getDerivedPtrIndex());
890 RelocateIdxMap.insert(std::make_pair(K, ThisRelocate));
891 }
892 for (auto &Item : RelocateIdxMap) {
893 std::pair<unsigned, unsigned> Key = Item.first;
894 if (Key.first == Key.second)
895 // Base relocation: nothing to insert
896 continue;
897
898 GCRelocateInst *I = Item.second;
899 auto BaseKey = std::make_pair(Key.first, Key.first);
900
901 // We're iterating over RelocateIdxMap so we cannot modify it.
902 auto MaybeBase = RelocateIdxMap.find(BaseKey);
903 if (MaybeBase == RelocateIdxMap.end())
904 // TODO: We might want to insert a new base object relocate and gep off
905 // that, if there are enough derived object relocates.
906 continue;
907
908 RelocateInstMap[MaybeBase->second].push_back(I);
909 }
910}
911
912// Accepts a GEP and extracts the operands into a vector provided they're all
913// small integer constants
914static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP,
915 SmallVectorImpl<Value *> &OffsetV) {
916 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
917 // Only accept small constant integer operands
918 auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i));
919 if (!Op || Op->getZExtValue() > 20)
920 return false;
921 }
922
923 for (unsigned i = 1; i < GEP->getNumOperands(); i++)
924 OffsetV.push_back(GEP->getOperand(i));
925 return true;
926}
927
928// Takes a RelocatedBase (base pointer relocation instruction) and Targets to
929// replace, computes a replacement, and affects it.
930static bool
931simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase,
932 const SmallVectorImpl<GCRelocateInst *> &Targets) {
933 bool MadeChange = false;
934 // We must ensure the relocation of derived pointer is defined after
935 // relocation of base pointer. If we find a relocation corresponding to base
936 // defined earlier than relocation of base then we move relocation of base
937 // right before found relocation. We consider only relocation in the same
938 // basic block as relocation of base. Relocations from other basic block will
939 // be skipped by optimization and we do not care about them.
940 for (auto R = RelocatedBase->getParent()->getFirstInsertionPt();
941 &*R != RelocatedBase; ++R)
942 if (auto RI = dyn_cast<GCRelocateInst>(R))
943 if (RI->getStatepoint() == RelocatedBase->getStatepoint())
944 if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) {
945 RelocatedBase->moveBefore(RI);
946 break;
947 }
948
949 for (GCRelocateInst *ToReplace : Targets) {
950 assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 951, __PRETTY_FUNCTION__))
951 "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex
() && "Not relocating a derived object of the original base object"
) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 951, __PRETTY_FUNCTION__))
;
952 if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) {
953 // A duplicate relocate call. TODO: coalesce duplicates.
954 continue;
955 }
956
957 if (RelocatedBase->getParent() != ToReplace->getParent()) {
958 // Base and derived relocates are in different basic blocks.
959 // In this case transform is only valid when base dominates derived
960 // relocate. However it would be too expensive to check dominance
961 // for each such relocate, so we skip the whole transformation.
962 continue;
963 }
964
965 Value *Base = ToReplace->getBasePtr();
966 auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr());
967 if (!Derived || Derived->getPointerOperand() != Base)
968 continue;
969
970 SmallVector<Value *, 2> OffsetV;
971 if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV))
972 continue;
973
974 // Create a Builder and replace the target callsite with a gep
975 assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 976, __PRETTY_FUNCTION__))
976 "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator"
) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 976, __PRETTY_FUNCTION__))
;
977
978 // Insert after RelocatedBase
979 IRBuilder<> Builder(RelocatedBase->getNextNode());
980 Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
981
982 // If gc_relocate does not match the actual type, cast it to the right type.
983 // In theory, there must be a bitcast after gc_relocate if the type does not
984 // match, and we should reuse it to get the derived pointer. But it could be
985 // cases like this:
986 // bb1:
987 // ...
988 // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
989 // br label %merge
990 //
991 // bb2:
992 // ...
993 // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...)
994 // br label %merge
995 //
996 // merge:
997 // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ]
998 // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)*
999 //
1000 // In this case, we can not find the bitcast any more. So we insert a new bitcast
1001 // no matter there is already one or not. In this way, we can handle all cases, and
1002 // the extra bitcast should be optimized away in later passes.
1003 Value *ActualRelocatedBase = RelocatedBase;
1004 if (RelocatedBase->getType() != Base->getType()) {
1005 ActualRelocatedBase =
1006 Builder.CreateBitCast(RelocatedBase, Base->getType());
1007 }
1008 Value *Replacement = Builder.CreateGEP(
1009 Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV));
1010 Replacement->takeName(ToReplace);
1011 // If the newly generated derived pointer's type does not match the original derived
1012 // pointer's type, cast the new derived pointer to match it. Same reasoning as above.
1013 Value *ActualReplacement = Replacement;
1014 if (Replacement->getType() != ToReplace->getType()) {
1015 ActualReplacement =
1016 Builder.CreateBitCast(Replacement, ToReplace->getType());
1017 }
1018 ToReplace->replaceAllUsesWith(ActualReplacement);
1019 ToReplace->eraseFromParent();
1020
1021 MadeChange = true;
1022 }
1023 return MadeChange;
1024}
1025
1026// Turns this:
1027//
1028// %base = ...
1029// %ptr = gep %base + 15
1030// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1031// %base' = relocate(%tok, i32 4, i32 4)
1032// %ptr' = relocate(%tok, i32 4, i32 5)
1033// %val = load %ptr'
1034//
1035// into this:
1036//
1037// %base = ...
1038// %ptr = gep %base + 15
1039// %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr)
1040// %base' = gc.relocate(%tok, i32 4, i32 4)
1041// %ptr' = gep %base' + 15
1042// %val = load %ptr'
1043bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
1044 bool MadeChange = false;
1045 SmallVector<GCRelocateInst *, 2> AllRelocateCalls;
1046
1047 for (auto *U : I.users())
1048 if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U))
1049 // Collect all the relocate calls associated with a statepoint
1050 AllRelocateCalls.push_back(Relocate);
1051
1052 // We need atleast one base pointer relocation + one derived pointer
1053 // relocation to mangle
1054 if (AllRelocateCalls.size() < 2)
1055 return false;
1056
1057 // RelocateInstMap is a mapping from the base relocate instruction to the
1058 // corresponding derived relocate instructions
1059 DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap;
1060 computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap);
1061 if (RelocateInstMap.empty())
1062 return false;
1063
1064 for (auto &Item : RelocateInstMap)
1065 // Item.first is the RelocatedBase to offset against
1066 // Item.second is the vector of Targets to replace
1067 MadeChange = simplifyRelocatesOffABase(Item.first, Item.second);
1068 return MadeChange;
1069}
1070
1071/// Sink the specified cast instruction into its user blocks.
1072static bool SinkCast(CastInst *CI) {
1073 BasicBlock *DefBB = CI->getParent();
1074
1075 /// InsertedCasts - Only insert a cast in each block once.
1076 DenseMap<BasicBlock*, CastInst*> InsertedCasts;
1077
1078 bool MadeChange = false;
1079 for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
1080 UI != E; ) {
1081 Use &TheUse = UI.getUse();
1082 Instruction *User = cast<Instruction>(*UI);
1083
1084 // Figure out which BB this cast is used in. For PHI's this is the
1085 // appropriate predecessor block.
1086 BasicBlock *UserBB = User->getParent();
1087 if (PHINode *PN = dyn_cast<PHINode>(User)) {
1088 UserBB = PN->getIncomingBlock(TheUse);
1089 }
1090
1091 // Preincrement use iterator so we don't invalidate it.
1092 ++UI;
1093
1094 // The first insertion point of a block containing an EH pad is after the
1095 // pad. If the pad is the user, we cannot sink the cast past the pad.
1096 if (User->isEHPad())
1097 continue;
1098
1099 // If the block selected to receive the cast is an EH pad that does not
1100 // allow non-PHI instructions before the terminator, we can't sink the
1101 // cast.
1102 if (UserBB->getTerminator()->isEHPad())
1103 continue;
1104
1105 // If this user is in the same block as the cast, don't change the cast.
1106 if (UserBB == DefBB) continue;
1107
1108 // If we have already inserted a cast into this block, use it.
1109 CastInst *&InsertedCast = InsertedCasts[UserBB];
1110
1111 if (!InsertedCast) {
1112 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1113 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1113, __PRETTY_FUNCTION__))
;
1114 InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0),
1115 CI->getType(), "", &*InsertPt);
1116 InsertedCast->setDebugLoc(CI->getDebugLoc());
1117 }
1118
1119 // Replace a use of the cast with a use of the new cast.
1120 TheUse = InsertedCast;
1121 MadeChange = true;
1122 ++NumCastUses;
1123 }
1124
1125 // If we removed all uses, nuke the cast.
1126 if (CI->use_empty()) {
1127 salvageDebugInfo(*CI);
1128 CI->eraseFromParent();
1129 MadeChange = true;
1130 }
1131
1132 return MadeChange;
1133}
1134
1135/// If the specified cast instruction is a noop copy (e.g. it's casting from
1136/// one pointer type to another, i32->i8 on PPC), sink it into user blocks to
1137/// reduce the number of virtual registers that must be created and coalesced.
1138///
1139/// Return true if any changes are made.
1140static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
1141 const DataLayout &DL) {
1142 // Sink only "cheap" (or nop) address-space casts. This is a weaker condition
1143 // than sinking only nop casts, but is helpful on some platforms.
1144 if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
1145 if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
1146 ASC->getDestAddressSpace()))
1147 return false;
1148 }
1149
1150 // If this is a noop copy,
1151 EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
1152 EVT DstVT = TLI.getValueType(DL, CI->getType());
1153
1154 // This is an fp<->int conversion?
1155 if (SrcVT.isInteger() != DstVT.isInteger())
1156 return false;
1157
1158 // If this is an extension, it will be a zero or sign extension, which
1159 // isn't a noop.
1160 if (SrcVT.bitsLT(DstVT)) return false;
1161
1162 // If these values will be promoted, find out what they will be promoted
1163 // to. This helps us consider truncates on PPC as noop copies when they
1164 // are.
1165 if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
1166 TargetLowering::TypePromoteInteger)
1167 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
1168 if (TLI.getTypeAction(CI->getContext(), DstVT) ==
1169 TargetLowering::TypePromoteInteger)
1170 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
1171
1172 // If, after promotion, these are the same types, this is a noop copy.
1173 if (SrcVT != DstVT)
1174 return false;
1175
1176 return SinkCast(CI);
1177}
1178
1179bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
1180 CmpInst *Cmp,
1181 Intrinsic::ID IID) {
1182 if (BO->getParent() != Cmp->getParent()) {
1183 // We used to use a dominator tree here to allow multi-block optimization.
1184 // But that was problematic because:
1185 // 1. It could cause a perf regression by hoisting the math op into the
1186 // critical path.
1187 // 2. It could cause a perf regression by creating a value that was live
1188 // across multiple blocks and increasing register pressure.
1189 // 3. Use of a dominator tree could cause large compile-time regression.
1190 // This is because we recompute the DT on every change in the main CGP
1191 // run-loop. The recomputing is probably unnecessary in many cases, so if
1192 // that was fixed, using a DT here would be ok.
1193 return false;
1194 }
1195
1196 // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
1197 Value *Arg0 = BO->getOperand(0);
1198 Value *Arg1 = BO->getOperand(1);
1199 if (BO->getOpcode() == Instruction::Add &&
1200 IID == Intrinsic::usub_with_overflow) {
1201 assert(isa<Constant>(Arg1) && "Unexpected input for usubo")((isa<Constant>(Arg1) && "Unexpected input for usubo"
) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(Arg1) && \"Unexpected input for usubo\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1201, __PRETTY_FUNCTION__))
;
1202 Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
1203 }
1204
1205 // Insert at the first instruction of the pair.
1206 Instruction *InsertPt = nullptr;
1207 for (Instruction &Iter : *Cmp->getParent()) {
1208 if (&Iter == BO || &Iter == Cmp) {
1209 InsertPt = &Iter;
1210 break;
1211 }
1212 }
1213 assert(InsertPt != nullptr && "Parent block did not contain cmp or binop")((InsertPt != nullptr && "Parent block did not contain cmp or binop"
) ? static_cast<void> (0) : __assert_fail ("InsertPt != nullptr && \"Parent block did not contain cmp or binop\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1213, __PRETTY_FUNCTION__))
;
1214
1215 IRBuilder<> Builder(InsertPt);
1216 Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
1217 Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
1218 Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
1219 BO->replaceAllUsesWith(Math);
1220 Cmp->replaceAllUsesWith(OV);
1221 BO->eraseFromParent();
1222 Cmp->eraseFromParent();
1223 return true;
1224}
1225
1226/// Match special-case patterns that check for unsigned add overflow.
1227static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
1228 BinaryOperator *&Add) {
1229 // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
1230 // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
1231 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1232
1233 // We are not expecting non-canonical/degenerate code. Just bail out.
1234 if (isa<Constant>(A))
1235 return false;
1236
1237 ICmpInst::Predicate Pred = Cmp->getPredicate();
1238 if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
1239 B = ConstantInt::get(B->getType(), 1);
1240 else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
1241 B = ConstantInt::get(B->getType(), -1);
1242 else
1243 return false;
1244
1245 // Check the users of the variable operand of the compare looking for an add
1246 // with the adjusted constant.
1247 for (User *U : A->users()) {
1248 if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
1249 Add = cast<BinaryOperator>(U);
1250 return true;
1251 }
1252 }
1253 return false;
1254}
1255
1256/// Try to combine the compare into a call to the llvm.uadd.with.overflow
1257/// intrinsic. Return true if any changes were made.
1258bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
1259 bool &ModifiedDT) {
1260 Value *A, *B;
1261 BinaryOperator *Add;
1262 if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
1263 if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
1264 return false;
1265
1266 if (!TLI->shouldFormOverflowOp(ISD::UADDO,
1267 TLI->getValueType(*DL, Add->getType())))
1268 return false;
1269
1270 // We don't want to move around uses of condition values this late, so we
1271 // check if it is legal to create the call to the intrinsic in the basic
1272 // block containing the icmp.
1273 if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
1274 return false;
1275
1276 if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
1277 return false;
1278
1279 // Reset callers - do not crash by iterating over a dead instruction.
1280 ModifiedDT = true;
1281 return true;
1282}
1283
1284bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
1285 bool &ModifiedDT) {
1286 // We are not expecting non-canonical/degenerate code. Just bail out.
1287 Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
1288 if (isa<Constant>(A) && isa<Constant>(B))
1289 return false;
1290
1291 // Convert (A u> B) to (A u< B) to simplify pattern matching.
1292 ICmpInst::Predicate Pred = Cmp->getPredicate();
1293 if (Pred == ICmpInst::ICMP_UGT) {
1294 std::swap(A, B);
1295 Pred = ICmpInst::ICMP_ULT;
1296 }
1297 // Convert special-case: (A == 0) is the same as (A u< 1).
1298 if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
1299 B = ConstantInt::get(B->getType(), 1);
1300 Pred = ICmpInst::ICMP_ULT;
1301 }
1302 // Convert special-case: (A != 0) is the same as (0 u< A).
1303 if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
1304 std::swap(A, B);
1305 Pred = ICmpInst::ICMP_ULT;
1306 }
1307 if (Pred != ICmpInst::ICMP_ULT)
1308 return false;
1309
1310 // Walk the users of a variable operand of a compare looking for a subtract or
1311 // add with that same operand. Also match the 2nd operand of the compare to
1312 // the add/sub, but that may be a negated constant operand of an add.
1313 Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
1314 BinaryOperator *Sub = nullptr;
1315 for (User *U : CmpVariableOperand->users()) {
1316 // A - B, A u< B --> usubo(A, B)
1317 if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
1318 Sub = cast<BinaryOperator>(U);
1319 break;
1320 }
1321
1322 // A + (-C), A u< C (canonicalized form of (sub A, C))
1323 const APInt *CmpC, *AddC;
1324 if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
1325 match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
1326 Sub = cast<BinaryOperator>(U);
1327 break;
1328 }
1329 }
1330 if (!Sub)
1331 return false;
1332
1333 if (!TLI->shouldFormOverflowOp(ISD::USUBO,
1334 TLI->getValueType(*DL, Sub->getType())))
1335 return false;
1336
1337 if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
1338 return false;
1339
1340 // Reset callers - do not crash by iterating over a dead instruction.
1341 ModifiedDT = true;
1342 return true;
1343}
1344
1345/// Sink the given CmpInst into user blocks to reduce the number of virtual
1346/// registers that must be created and coalesced. This is a clear win except on
1347/// targets with multiple condition code registers (PowerPC), where it might
1348/// lose; some adjustment may be wanted there.
1349///
1350/// Return true if any changes are made.
1351static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
1352 if (TLI.hasMultipleConditionRegisters())
1353 return false;
1354
1355 // Avoid sinking soft-FP comparisons, since this can move them into a loop.
1356 if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
1357 return false;
1358
1359 // Only insert a cmp in each block once.
1360 DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
1361
1362 bool MadeChange = false;
1363 for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
1364 UI != E; ) {
1365 Use &TheUse = UI.getUse();
1366 Instruction *User = cast<Instruction>(*UI);
1367
1368 // Preincrement use iterator so we don't invalidate it.
1369 ++UI;
1370
1371 // Don't bother for PHI nodes.
1372 if (isa<PHINode>(User))
1373 continue;
1374
1375 // Figure out which BB this cmp is used in.
1376 BasicBlock *UserBB = User->getParent();
1377 BasicBlock *DefBB = Cmp->getParent();
1378
1379 // If this user is in the same block as the cmp, don't change the cmp.
1380 if (UserBB == DefBB) continue;
1381
1382 // If we have already inserted a cmp into this block, use it.
1383 CmpInst *&InsertedCmp = InsertedCmps[UserBB];
1384
1385 if (!InsertedCmp) {
1386 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1387 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1387, __PRETTY_FUNCTION__))
;
1388 InsertedCmp =
1389 CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
1390 Cmp->getOperand(0), Cmp->getOperand(1), "",
1391 &*InsertPt);
1392 // Propagate the debug info.
1393 InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
1394 }
1395
1396 // Replace a use of the cmp with a use of the new cmp.
1397 TheUse = InsertedCmp;
1398 MadeChange = true;
1399 ++NumCmpUses;
1400 }
1401
1402 // If we removed all uses, nuke the cmp.
1403 if (Cmp->use_empty()) {
1404 Cmp->eraseFromParent();
1405 MadeChange = true;
1406 }
1407
1408 return MadeChange;
1409}
1410
1411bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
1412 if (sinkCmpExpression(Cmp, *TLI))
1413 return true;
1414
1415 if (combineToUAddWithOverflow(Cmp, ModifiedDT))
1416 return true;
1417
1418 if (combineToUSubWithOverflow(Cmp, ModifiedDT))
1419 return true;
1420
1421 return false;
1422}
1423
1424/// Duplicate and sink the given 'and' instruction into user blocks where it is
1425/// used in a compare to allow isel to generate better code for targets where
1426/// this operation can be combined.
1427///
1428/// Return true if any changes are made.
1429static bool sinkAndCmp0Expression(Instruction *AndI,
1430 const TargetLowering &TLI,
1431 SetOfInstrs &InsertedInsts) {
1432 // Double-check that we're not trying to optimize an instruction that was
1433 // already optimized by some other part of this pass.
1434 assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1435, __PRETTY_FUNCTION__))
1435 "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction"
) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1435, __PRETTY_FUNCTION__))
;
1436 (void) InsertedInsts;
1437
1438 // Nothing to do for single use in same basic block.
1439 if (AndI->hasOneUse() &&
1440 AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
1441 return false;
1442
1443 // Try to avoid cases where sinking/duplicating is likely to increase register
1444 // pressure.
1445 if (!isa<ConstantInt>(AndI->getOperand(0)) &&
1446 !isa<ConstantInt>(AndI->getOperand(1)) &&
1447 AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
1448 return false;
1449
1450 for (auto *U : AndI->users()) {
1451 Instruction *User = cast<Instruction>(U);
1452
1453 // Only sink 'and' feeding icmp with 0.
1454 if (!isa<ICmpInst>(User))
1455 return false;
1456
1457 auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
1458 if (!CmpC || !CmpC->isZero())
1459 return false;
1460 }
1461
1462 if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
1463 return false;
1464
1465 LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n"
; } } while (false)
;
1466 LLVM_DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { AndI->getParent()->dump(); } } while
(false)
;
1467
1468 // Push the 'and' into the same block as the icmp 0. There should only be
1469 // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
1470 // others, so we don't need to keep track of which BBs we insert into.
1471 for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
1472 UI != E; ) {
1473 Use &TheUse = UI.getUse();
1474 Instruction *User = cast<Instruction>(*UI);
1475
1476 // Preincrement use iterator so we don't invalidate it.
1477 ++UI;
1478
1479 LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "sinking 'and' use: " <<
*User << "\n"; } } while (false)
;
1480
1481 // Keep the 'and' in the same place if the use is already in the same block.
1482 Instruction *InsertPt =
1483 User->getParent() == AndI->getParent() ? AndI : User;
1484 Instruction *InsertedAnd =
1485 BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
1486 AndI->getOperand(1), "", InsertPt);
1487 // Propagate the debug info.
1488 InsertedAnd->setDebugLoc(AndI->getDebugLoc());
1489
1490 // Replace a use of the 'and' with a use of the new 'and'.
1491 TheUse = InsertedAnd;
1492 ++NumAndUses;
1493 LLVM_DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { User->getParent()->dump(); } } while
(false)
;
1494 }
1495
1496 // We removed all uses, nuke the and.
1497 AndI->eraseFromParent();
1498 return true;
1499}
1500
1501/// Check if the candidates could be combined with a shift instruction, which
1502/// includes:
1503/// 1. Truncate instruction
1504/// 2. And instruction and the imm is a mask of the low bits:
1505/// imm & (imm+1) == 0
1506static bool isExtractBitsCandidateUse(Instruction *User) {
1507 if (!isa<TruncInst>(User)) {
1508 if (User->getOpcode() != Instruction::And ||
1509 !isa<ConstantInt>(User->getOperand(1)))
1510 return false;
1511
1512 const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue();
1513
1514 if ((Cimm & (Cimm + 1)).getBoolValue())
1515 return false;
1516 }
1517 return true;
1518}
1519
1520/// Sink both shift and truncate instruction to the use of truncate's BB.
1521static bool
1522SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
1523 DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts,
1524 const TargetLowering &TLI, const DataLayout &DL) {
1525 BasicBlock *UserBB = User->getParent();
1526 DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
1527 TruncInst *TruncI = dyn_cast<TruncInst>(User);
1528 bool MadeChange = false;
1529
1530 for (Value::user_iterator TruncUI = TruncI->user_begin(),
1531 TruncE = TruncI->user_end();
1532 TruncUI != TruncE;) {
1533
1534 Use &TruncTheUse = TruncUI.getUse();
1535 Instruction *TruncUser = cast<Instruction>(*TruncUI);
1536 // Preincrement use iterator so we don't invalidate it.
1537
1538 ++TruncUI;
1539
1540 int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode());
1541 if (!ISDOpcode)
1542 continue;
1543
1544 // If the use is actually a legal node, there will not be an
1545 // implicit truncate.
1546 // FIXME: always querying the result type is just an
1547 // approximation; some nodes' legality is determined by the
1548 // operand or other means. There's no good way to find out though.
1549 if (TLI.isOperationLegalOrCustom(
1550 ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true)))
1551 continue;
1552
1553 // Don't bother for PHI nodes.
1554 if (isa<PHINode>(TruncUser))
1555 continue;
1556
1557 BasicBlock *TruncUserBB = TruncUser->getParent();
1558
1559 if (UserBB == TruncUserBB)
1560 continue;
1561
1562 BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB];
1563 CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB];
1564
1565 if (!InsertedShift && !InsertedTrunc) {
1566 BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt();
1567 assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void>
(0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1567, __PRETTY_FUNCTION__))
;
1568 // Sink the shift
1569 if (ShiftI->getOpcode() == Instruction::AShr)
1570 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1571 "", &*InsertPt);
1572 else
1573 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1574 "", &*InsertPt);
1575 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1576
1577 // Sink the trunc
1578 BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt();
1579 TruncInsertPt++;
1580 assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void
> (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()"
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1580, __PRETTY_FUNCTION__))
;
1581
1582 InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift,
1583 TruncI->getType(), "", &*TruncInsertPt);
1584 InsertedTrunc->setDebugLoc(TruncI->getDebugLoc());
1585
1586 MadeChange = true;
1587
1588 TruncTheUse = InsertedTrunc;
1589 }
1590 }
1591 return MadeChange;
1592}
1593
1594/// Sink the shift *right* instruction into user blocks if the uses could
1595/// potentially be combined with this shift instruction and generate BitExtract
1596/// instruction. It will only be applied if the architecture supports BitExtract
1597/// instruction. Here is an example:
1598/// BB1:
1599/// %x.extract.shift = lshr i64 %arg1, 32
1600/// BB2:
1601/// %x.extract.trunc = trunc i64 %x.extract.shift to i16
1602/// ==>
1603///
1604/// BB2:
1605/// %x.extract.shift.1 = lshr i64 %arg1, 32
1606/// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16
1607///
1608/// CodeGen will recognize the pattern in BB2 and generate BitExtract
1609/// instruction.
1610/// Return true if any changes are made.
1611static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
1612 const TargetLowering &TLI,
1613 const DataLayout &DL) {
1614 BasicBlock *DefBB = ShiftI->getParent();
1615
1616 /// Only insert instructions in each block once.
1617 DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts;
1618
1619 bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType()));
1620
1621 bool MadeChange = false;
1622 for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end();
1623 UI != E;) {
1624 Use &TheUse = UI.getUse();
1625 Instruction *User = cast<Instruction>(*UI);
1626 // Preincrement use iterator so we don't invalidate it.
1627 ++UI;
1628
1629 // Don't bother for PHI nodes.
1630 if (isa<PHINode>(User))
1631 continue;
1632
1633 if (!isExtractBitsCandidateUse(User))
1634 continue;
1635
1636 BasicBlock *UserBB = User->getParent();
1637
1638 if (UserBB == DefBB) {
1639 // If the shift and truncate instruction are in the same BB. The use of
1640 // the truncate(TruncUse) may still introduce another truncate if not
1641 // legal. In this case, we would like to sink both shift and truncate
1642 // instruction to the BB of TruncUse.
1643 // for example:
1644 // BB1:
1645 // i64 shift.result = lshr i64 opnd, imm
1646 // trunc.result = trunc shift.result to i16
1647 //
1648 // BB2:
1649 // ----> We will have an implicit truncate here if the architecture does
1650 // not have i16 compare.
1651 // cmp i16 trunc.result, opnd2
1652 //
1653 if (isa<TruncInst>(User) && shiftIsLegal
1654 // If the type of the truncate is legal, no truncate will be
1655 // introduced in other basic blocks.
1656 &&
1657 (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType()))))
1658 MadeChange =
1659 SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL);
1660
1661 continue;
1662 }
1663 // If we have already inserted a shift into this block, use it.
1664 BinaryOperator *&InsertedShift = InsertedShifts[UserBB];
1665
1666 if (!InsertedShift) {
1667 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1668 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 1668, __PRETTY_FUNCTION__))
;
1669
1670 if (ShiftI->getOpcode() == Instruction::AShr)
1671 InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI,
1672 "", &*InsertPt);
1673 else
1674 InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI,
1675 "", &*InsertPt);
1676 InsertedShift->setDebugLoc(ShiftI->getDebugLoc());
1677
1678 MadeChange = true;
1679 }
1680
1681 // Replace a use of the shift with a use of the new shift.
1682 TheUse = InsertedShift;
1683 }
1684
1685 // If we removed all uses, or there are none, nuke the shift.
1686 if (ShiftI->use_empty()) {
1687 salvageDebugInfo(*ShiftI);
1688 ShiftI->eraseFromParent();
1689 MadeChange = true;
1690 }
1691
1692 return MadeChange;
1693}
1694
1695/// If counting leading or trailing zeros is an expensive operation and a zero
1696/// input is defined, add a check for zero to avoid calling the intrinsic.
1697///
1698/// We want to transform:
1699/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false)
1700///
1701/// into:
1702/// entry:
1703/// %cmpz = icmp eq i64 %A, 0
1704/// br i1 %cmpz, label %cond.end, label %cond.false
1705/// cond.false:
1706/// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true)
1707/// br label %cond.end
1708/// cond.end:
1709/// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ]
1710///
1711/// If the transform is performed, return true and set ModifiedDT to true.
1712static bool despeculateCountZeros(IntrinsicInst *CountZeros,
1713 const TargetLowering *TLI,
1714 const DataLayout *DL,
1715 bool &ModifiedDT) {
1716 if (!TLI || !DL)
1717 return false;
1718
1719 // If a zero input is undefined, it doesn't make sense to despeculate that.
1720 if (match(CountZeros->getOperand(1), m_One()))
1721 return false;
1722
1723 // If it's cheap to speculate, there's nothing to do.
1724 auto IntrinsicID = CountZeros->getIntrinsicID();
1725 if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) ||
1726 (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz()))
1727 return false;
1728
1729 // Only handle legal scalar cases. Anything else requires too much work.
1730 Type *Ty = CountZeros->getType();
1731 unsigned SizeInBits = Ty->getPrimitiveSizeInBits();
1732 if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
1733 return false;
1734
1735 // The intrinsic will be sunk behind a compare against zero and branch.
1736 BasicBlock *StartBlock = CountZeros->getParent();
1737 BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
1738
1739 // Create another block after the count zero intrinsic. A PHI will be added
1740 // in this block to select the result of the intrinsic or the bit-width
1741 // constant if the input to the intrinsic is zero.
1742 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros));
1743 BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end");
1744
1745 // Set up a builder to create a compare, conditional branch, and PHI.
1746 IRBuilder<> Builder(CountZeros->getContext());
1747 Builder.SetInsertPoint(StartBlock->getTerminator());
1748 Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc());
1749
1750 // Replace the unconditional branch that was created by the first split with
1751 // a compare against zero and a conditional branch.
1752 Value *Zero = Constant::getNullValue(Ty);
1753 Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz");
1754 Builder.CreateCondBr(Cmp, EndBlock, CallBlock);
1755 StartBlock->getTerminator()->eraseFromParent();
1756
1757 // Create a PHI in the end block to select either the output of the intrinsic
1758 // or the bit width of the operand.
1759 Builder.SetInsertPoint(&EndBlock->front());
1760 PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz");
1761 CountZeros->replaceAllUsesWith(PN);
1762 Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits));
1763 PN->addIncoming(BitWidth, StartBlock);
1764 PN->addIncoming(CountZeros, CallBlock);
1765
1766 // We are explicitly handling the zero case, so we can set the intrinsic's
1767 // undefined zero argument to 'true'. This will also prevent reprocessing the
1768 // intrinsic; we only despeculate when a zero input is defined.
1769 CountZeros->setArgOperand(1, Builder.getTrue());
1770 ModifiedDT = true;
1771 return true;
1772}
1773
1774bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
1775 BasicBlock *BB = CI->getParent();
1776
1777 // Lower inline assembly if we can.
1778 // If we found an inline asm expession, and if the target knows how to
1779 // lower it to normal LLVM code, do so now.
1780 if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
1781 if (TLI->ExpandInlineAsm(CI)) {
1782 // Avoid invalidating the iterator.
1783 CurInstIterator = BB->begin();
1784 // Avoid processing instructions out of order, which could cause
1785 // reuse before a value is defined.
1786 SunkAddrs.clear();
1787 return true;
1788 }
1789 // Sink address computing for memory operands into the block.
1790 if (optimizeInlineAsmInst(CI))
1791 return true;
1792 }
1793
1794 // Align the pointer arguments to this call if the target thinks it's a good
1795 // idea
1796 unsigned MinSize, PrefAlign;
1797 if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) {
1798 for (auto &Arg : CI->arg_operands()) {
1799 // We want to align both objects whose address is used directly and
1800 // objects whose address is used in casts and GEPs, though it only makes
1801 // sense for GEPs if the offset is a multiple of the desired alignment and
1802 // if size - offset meets the size threshold.
1803 if (!Arg->getType()->isPointerTy())
1804 continue;
1805 APInt Offset(DL->getIndexSizeInBits(
1806 cast<PointerType>(Arg->getType())->getAddressSpace()),
1807 0);
1808 Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset);
1809 uint64_t Offset2 = Offset.getLimitedValue();
1810 if ((Offset2 & (PrefAlign-1)) != 0)
1811 continue;
1812 AllocaInst *AI;
1813 if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
1814 DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
1815 AI->setAlignment(MaybeAlign(PrefAlign));
1816 // Global variables can only be aligned if they are defined in this
1817 // object (i.e. they are uniquely initialized in this object), and
1818 // over-aligning global variables that have an explicit section is
1819 // forbidden.
1820 GlobalVariable *GV;
1821 if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() &&
1822 GV->getPointerAlignment(*DL) < PrefAlign &&
1823 DL->getTypeAllocSize(GV->getValueType()) >=
1824 MinSize + Offset2)
1825 GV->setAlignment(PrefAlign);
1826 }
1827 // If this is a memcpy (or similar) then we may be able to improve the
1828 // alignment
1829 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) {
1830 unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL);
1831 if (DestAlign > MI->getDestAlignment())
1832 MI->setDestAlignment(DestAlign);
1833 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) {
1834 unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL);
1835 if (SrcAlign > MTI->getSourceAlignment())
1836 MTI->setSourceAlignment(SrcAlign);
1837 }
1838 }
1839 }
1840
1841 // If we have a cold call site, try to sink addressing computation into the
1842 // cold block. This interacts with our handling for loads and stores to
1843 // ensure that we can fold all uses of a potential addressing computation
1844 // into their uses. TODO: generalize this to work over profiling data
1845 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
1846 for (auto &Arg : CI->arg_operands()) {
1847 if (!Arg->getType()->isPointerTy())
1848 continue;
1849 unsigned AS = Arg->getType()->getPointerAddressSpace();
1850 return optimizeMemoryInst(CI, Arg, Arg->getType(), AS);
1851 }
1852
1853 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
1854 if (II) {
1855 switch (II->getIntrinsicID()) {
1856 default: break;
1857 case Intrinsic::experimental_widenable_condition: {
1858 // Give up on future widening oppurtunties so that we can fold away dead
1859 // paths and merge blocks before going into block-local instruction
1860 // selection.
1861 if (II->use_empty()) {
1862 II->eraseFromParent();
1863 return true;
1864 }
1865 Constant *RetVal = ConstantInt::getTrue(II->getContext());
1866 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1867 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1868 });
1869 return true;
1870 }
1871 case Intrinsic::objectsize: {
1872 // Lower all uses of llvm.objectsize.*
1873 Value *RetVal =
1874 lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
1875
1876 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1877 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1878 });
1879 return true;
1880 }
1881 case Intrinsic::is_constant: {
1882 // If is_constant hasn't folded away yet, lower it to false now.
1883 Constant *RetVal = ConstantInt::get(II->getType(), 0);
1884 resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
1885 replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
1886 });
1887 return true;
1888 }
1889 case Intrinsic::aarch64_stlxr:
1890 case Intrinsic::aarch64_stxr: {
1891 ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
1892 if (!ExtVal || !ExtVal->hasOneUse() ||
1893 ExtVal->getParent() == CI->getParent())
1894 return false;
1895 // Sink a zext feeding stlxr/stxr before it, so it can be folded into it.
1896 ExtVal->moveBefore(CI);
1897 // Mark this instruction as "inserted by CGP", so that other
1898 // optimizations don't touch it.
1899 InsertedInsts.insert(ExtVal);
1900 return true;
1901 }
1902
1903 case Intrinsic::launder_invariant_group:
1904 case Intrinsic::strip_invariant_group: {
1905 Value *ArgVal = II->getArgOperand(0);
1906 auto it = LargeOffsetGEPMap.find(II);
1907 if (it != LargeOffsetGEPMap.end()) {
1908 // Merge entries in LargeOffsetGEPMap to reflect the RAUW.
1909 // Make sure not to have to deal with iterator invalidation
1910 // after possibly adding ArgVal to LargeOffsetGEPMap.
1911 auto GEPs = std::move(it->second);
1912 LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end());
1913 LargeOffsetGEPMap.erase(II);
1914 }
1915
1916 II->replaceAllUsesWith(ArgVal);
1917 II->eraseFromParent();
1918 return true;
1919 }
1920 case Intrinsic::cttz:
1921 case Intrinsic::ctlz:
1922 // If counting zeros is expensive, try to avoid it.
1923 return despeculateCountZeros(II, TLI, DL, ModifiedDT);
1924 }
1925
1926 if (TLI) {
1927 SmallVector<Value*, 2> PtrOps;
1928 Type *AccessTy;
1929 if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
1930 while (!PtrOps.empty()) {
1931 Value *PtrVal = PtrOps.pop_back_val();
1932 unsigned AS = PtrVal->getType()->getPointerAddressSpace();
1933 if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
1934 return true;
1935 }
1936 }
1937 }
1938
1939 // From here on out we're working with named functions.
1940 if (!CI->getCalledFunction()) return false;
1941
1942 // Lower all default uses of _chk calls. This is very similar
1943 // to what InstCombineCalls does, but here we are only lowering calls
1944 // to fortified library functions (e.g. __memcpy_chk) that have the default
1945 // "don't know" as the objectsize. Anything else should be left alone.
1946 FortifiedLibCallSimplifier Simplifier(TLInfo, true);
1947 if (Value *V = Simplifier.optimizeCall(CI)) {
1948 CI->replaceAllUsesWith(V);
1949 CI->eraseFromParent();
1950 return true;
1951 }
1952
1953 return false;
1954}
1955
1956/// Look for opportunities to duplicate return instructions to the predecessor
1957/// to enable tail call optimizations. The case it is currently looking for is:
1958/// @code
1959/// bb0:
1960/// %tmp0 = tail call i32 @f0()
1961/// br label %return
1962/// bb1:
1963/// %tmp1 = tail call i32 @f1()
1964/// br label %return
1965/// bb2:
1966/// %tmp2 = tail call i32 @f2()
1967/// br label %return
1968/// return:
1969/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
1970/// ret i32 %retval
1971/// @endcode
1972///
1973/// =>
1974///
1975/// @code
1976/// bb0:
1977/// %tmp0 = tail call i32 @f0()
1978/// ret i32 %tmp0
1979/// bb1:
1980/// %tmp1 = tail call i32 @f1()
1981/// ret i32 %tmp1
1982/// bb2:
1983/// %tmp2 = tail call i32 @f2()
1984/// ret i32 %tmp2
1985/// @endcode
1986bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
1987 if (!TLI)
1988 return false;
1989
1990 ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
1991 if (!RetI)
1992 return false;
1993
1994 PHINode *PN = nullptr;
1995 BitCastInst *BCI = nullptr;
1996 Value *V = RetI->getReturnValue();
1997 if (V) {
1998 BCI = dyn_cast<BitCastInst>(V);
1999 if (BCI)
2000 V = BCI->getOperand(0);
2001
2002 PN = dyn_cast<PHINode>(V);
2003 if (!PN)
2004 return false;
2005 }
2006
2007 if (PN && PN->getParent() != BB)
2008 return false;
2009
2010 // Make sure there are no instructions between the PHI and return, or that the
2011 // return is the first instruction in the block.
2012 if (PN) {
2013 BasicBlock::iterator BI = BB->begin();
2014 // Skip over debug and the bitcast.
2015 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
2016 if (&*BI != RetI)
2017 return false;
2018 } else {
2019 BasicBlock::iterator BI = BB->begin();
2020 while (isa<DbgInfoIntrinsic>(BI)) ++BI;
2021 if (&*BI != RetI)
2022 return false;
2023 }
2024
2025 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
2026 /// call.
2027 const Function *F = BB->getParent();
2028 SmallVector<BasicBlock*, 4> TailCallBBs;
2029 if (PN) {
2030 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
2031 // Look through bitcasts.
2032 Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
2033 CallInst *CI = dyn_cast<CallInst>(IncomingVal);
2034 BasicBlock *PredBB = PN->getIncomingBlock(I);
2035 // Make sure the phi value is indeed produced by the tail call.
2036 if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
2037 TLI->mayBeEmittedAsTailCall(CI) &&
2038 attributesPermitTailCall(F, CI, RetI, *TLI))
2039 TailCallBBs.push_back(PredBB);
2040 }
2041 } else {
2042 SmallPtrSet<BasicBlock*, 4> VisitedBBs;
2043 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
2044 if (!VisitedBBs.insert(*PI).second)
2045 continue;
2046
2047 BasicBlock::InstListType &InstList = (*PI)->getInstList();
2048 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
2049 BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
2050 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
2051 if (RI == RE)
2052 continue;
2053
2054 CallInst *CI = dyn_cast<CallInst>(&*RI);
2055 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
2056 attributesPermitTailCall(F, CI, RetI, *TLI))
2057 TailCallBBs.push_back(*PI);
2058 }
2059 }
2060
2061 bool Changed = false;
2062 for (auto const &TailCallBB : TailCallBBs) {
2063 // Make sure the call instruction is followed by an unconditional branch to
2064 // the return block.
2065 BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
2066 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
2067 continue;
2068
2069 // Duplicate the return into TailCallBB.
2070 (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
2071 ModifiedDT = Changed = true;
2072 ++NumRetsDup;
2073 }
2074
2075 // If we eliminated all predecessors of the block, delete the block now.
2076 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
2077 BB->eraseFromParent();
2078
2079 return Changed;
2080}
2081
2082//===----------------------------------------------------------------------===//
2083// Memory Optimization
2084//===----------------------------------------------------------------------===//
2085
2086namespace {
2087
2088/// This is an extended version of TargetLowering::AddrMode
2089/// which holds actual Value*'s for register values.
2090struct ExtAddrMode : public TargetLowering::AddrMode {
2091 Value *BaseReg = nullptr;
2092 Value *ScaledReg = nullptr;
2093 Value *OriginalValue = nullptr;
2094 bool InBounds = true;
2095
2096 enum FieldName {
2097 NoField = 0x00,
2098 BaseRegField = 0x01,
2099 BaseGVField = 0x02,
2100 BaseOffsField = 0x04,
2101 ScaledRegField = 0x08,
2102 ScaleField = 0x10,
2103 MultipleFields = 0xff
2104 };
2105
2106
2107 ExtAddrMode() = default;
2108
2109 void print(raw_ostream &OS) const;
2110 void dump() const;
2111
2112 FieldName compare(const ExtAddrMode &other) {
2113 // First check that the types are the same on each field, as differing types
2114 // is something we can't cope with later on.
2115 if (BaseReg && other.BaseReg &&
2116 BaseReg->getType() != other.BaseReg->getType())
2117 return MultipleFields;
2118 if (BaseGV && other.BaseGV &&
2119 BaseGV->getType() != other.BaseGV->getType())
2120 return MultipleFields;
2121 if (ScaledReg && other.ScaledReg &&
2122 ScaledReg->getType() != other.ScaledReg->getType())
2123 return MultipleFields;
2124
2125 // Conservatively reject 'inbounds' mismatches.
2126 if (InBounds != other.InBounds)
2127 return MultipleFields;
2128
2129 // Check each field to see if it differs.
2130 unsigned Result = NoField;
2131 if (BaseReg != other.BaseReg)
2132 Result |= BaseRegField;
2133 if (BaseGV != other.BaseGV)
2134 Result |= BaseGVField;
2135 if (BaseOffs != other.BaseOffs)
2136 Result |= BaseOffsField;
2137 if (ScaledReg != other.ScaledReg)
2138 Result |= ScaledRegField;
2139 // Don't count 0 as being a different scale, because that actually means
2140 // unscaled (which will already be counted by having no ScaledReg).
2141 if (Scale && other.Scale && Scale != other.Scale)
2142 Result |= ScaleField;
2143
2144 if (countPopulation(Result) > 1)
2145 return MultipleFields;
2146 else
2147 return static_cast<FieldName>(Result);
2148 }
2149
2150 // An AddrMode is trivial if it involves no calculation i.e. it is just a base
2151 // with no offset.
2152 bool isTrivial() {
2153 // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is
2154 // trivial if at most one of these terms is nonzero, except that BaseGV and
2155 // BaseReg both being zero actually means a null pointer value, which we
2156 // consider to be 'non-zero' here.
2157 return !BaseOffs && !Scale && !(BaseGV && BaseReg);
2158 }
2159
2160 Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) {
2161 switch (Field) {
2162 default:
2163 return nullptr;
2164 case BaseRegField:
2165 return BaseReg;
2166 case BaseGVField:
2167 return BaseGV;
2168 case ScaledRegField:
2169 return ScaledReg;
2170 case BaseOffsField:
2171 return ConstantInt::get(IntPtrTy, BaseOffs);
2172 }
2173 }
2174
2175 void SetCombinedField(FieldName Field, Value *V,
2176 const SmallVectorImpl<ExtAddrMode> &AddrModes) {
2177 switch (Field) {
2178 default:
2179 llvm_unreachable("Unhandled fields are expected to be rejected earlier")::llvm::llvm_unreachable_internal("Unhandled fields are expected to be rejected earlier"
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2179)
;
2180 break;
2181 case ExtAddrMode::BaseRegField:
2182 BaseReg = V;
2183 break;
2184 case ExtAddrMode::BaseGVField:
2185 // A combined BaseGV is an Instruction, not a GlobalValue, so it goes
2186 // in the BaseReg field.
2187 assert(BaseReg == nullptr)((BaseReg == nullptr) ? static_cast<void> (0) : __assert_fail
("BaseReg == nullptr", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2187, __PRETTY_FUNCTION__))
;
2188 BaseReg = V;
2189 BaseGV = nullptr;
2190 break;
2191 case ExtAddrMode::ScaledRegField:
2192 ScaledReg = V;
2193 // If we have a mix of scaled and unscaled addrmodes then we want scale
2194 // to be the scale and not zero.
2195 if (!Scale)
2196 for (const ExtAddrMode &AM : AddrModes)
2197 if (AM.Scale) {
2198 Scale = AM.Scale;
2199 break;
2200 }
2201 break;
2202 case ExtAddrMode::BaseOffsField:
2203 // The offset is no longer a constant, so it goes in ScaledReg with a
2204 // scale of 1.
2205 assert(ScaledReg == nullptr)((ScaledReg == nullptr) ? static_cast<void> (0) : __assert_fail
("ScaledReg == nullptr", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2205, __PRETTY_FUNCTION__))
;
2206 ScaledReg = V;
2207 Scale = 1;
2208 BaseOffs = 0;
2209 break;
2210 }
2211 }
2212};
2213
2214} // end anonymous namespace
2215
2216#ifndef NDEBUG
2217static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
2218 AM.print(OS);
2219 return OS;
2220}
2221#endif
2222
2223#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
2224void ExtAddrMode::print(raw_ostream &OS) const {
2225 bool NeedPlus = false;
2226 OS << "[";
2227 if (InBounds)
2228 OS << "inbounds ";
2229 if (BaseGV) {
2230 OS << (NeedPlus ? " + " : "")
2231 << "GV:";
2232 BaseGV->printAsOperand(OS, /*PrintType=*/false);
2233 NeedPlus = true;
2234 }
2235
2236 if (BaseOffs) {
2237 OS << (NeedPlus ? " + " : "")
2238 << BaseOffs;
2239 NeedPlus = true;
2240 }
2241
2242 if (BaseReg) {
2243 OS << (NeedPlus ? " + " : "")
2244 << "Base:";
2245 BaseReg->printAsOperand(OS, /*PrintType=*/false);
2246 NeedPlus = true;
2247 }
2248 if (Scale) {
2249 OS << (NeedPlus ? " + " : "")
2250 << Scale << "*";
2251 ScaledReg->printAsOperand(OS, /*PrintType=*/false);
2252 }
2253
2254 OS << ']';
2255}
2256
2257LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const {
2258 print(dbgs());
2259 dbgs() << '\n';
2260}
2261#endif
2262
2263namespace {
2264
2265/// This class provides transaction based operation on the IR.
2266/// Every change made through this class is recorded in the internal state and
2267/// can be undone (rollback) until commit is called.
2268class TypePromotionTransaction {
2269 /// This represents the common interface of the individual transaction.
2270 /// Each class implements the logic for doing one specific modification on
2271 /// the IR via the TypePromotionTransaction.
2272 class TypePromotionAction {
2273 protected:
2274 /// The Instruction modified.
2275 Instruction *Inst;
2276
2277 public:
2278 /// Constructor of the action.
2279 /// The constructor performs the related action on the IR.
2280 TypePromotionAction(Instruction *Inst) : Inst(Inst) {}
2281
2282 virtual ~TypePromotionAction() = default;
2283
2284 /// Undo the modification done by this action.
2285 /// When this method is called, the IR must be in the same state as it was
2286 /// before this action was applied.
2287 /// \pre Undoing the action works if and only if the IR is in the exact same
2288 /// state as it was directly after this action was applied.
2289 virtual void undo() = 0;
2290
2291 /// Advocate every change made by this action.
2292 /// When the results on the IR of the action are to be kept, it is important
2293 /// to call this function, otherwise hidden information may be kept forever.
2294 virtual void commit() {
2295 // Nothing to be done, this action is not doing anything.
2296 }
2297 };
2298
2299 /// Utility to remember the position of an instruction.
2300 class InsertionHandler {
2301 /// Position of an instruction.
2302 /// Either an instruction:
2303 /// - Is the first in a basic block: BB is used.
2304 /// - Has a previous instruction: PrevInst is used.
2305 union {
2306 Instruction *PrevInst;
2307 BasicBlock *BB;
2308 } Point;
2309
2310 /// Remember whether or not the instruction had a previous instruction.
2311 bool HasPrevInstruction;
2312
2313 public:
2314 /// Record the position of \p Inst.
2315 InsertionHandler(Instruction *Inst) {
2316 BasicBlock::iterator It = Inst->getIterator();
2317 HasPrevInstruction = (It != (Inst->getParent()->begin()));
2318 if (HasPrevInstruction)
2319 Point.PrevInst = &*--It;
2320 else
2321 Point.BB = Inst->getParent();
2322 }
2323
2324 /// Insert \p Inst at the recorded position.
2325 void insert(Instruction *Inst) {
2326 if (HasPrevInstruction) {
2327 if (Inst->getParent())
2328 Inst->removeFromParent();
2329 Inst->insertAfter(Point.PrevInst);
2330 } else {
2331 Instruction *Position = &*Point.BB->getFirstInsertionPt();
2332 if (Inst->getParent())
2333 Inst->moveBefore(Position);
2334 else
2335 Inst->insertBefore(Position);
2336 }
2337 }
2338 };
2339
2340 /// Move an instruction before another.
2341 class InstructionMoveBefore : public TypePromotionAction {
2342 /// Original position of the instruction.
2343 InsertionHandler Position;
2344
2345 public:
2346 /// Move \p Inst before \p Before.
2347 InstructionMoveBefore(Instruction *Inst, Instruction *Before)
2348 : TypePromotionAction(Inst), Position(Inst) {
2349 LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Beforedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
2350 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: move: " << *
Inst << "\nbefore: " << *Before << "\n"; } }
while (false)
;
2351 Inst->moveBefore(Before);
2352 }
2353
2354 /// Move the instruction back to its original position.
2355 void undo() override {
2356 LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: moveBefore: " <<
*Inst << "\n"; } } while (false)
;
2357 Position.insert(Inst);
2358 }
2359 };
2360
2361 /// Set the operand of an instruction with a new value.
2362 class OperandSetter : public TypePromotionAction {
2363 /// Original operand of the instruction.
2364 Value *Origin;
2365
2366 /// Index of the modified instruction.
2367 unsigned Idx;
2368
2369 public:
2370 /// Set \p Idx operand of \p Inst with \p NewVal.
2371 OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal)
2372 : TypePromotionAction(Inst), Idx(Idx) {
2373 LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2374 << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
2375 << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: setOperand: " <<
Idx << "\n" << "for:" << *Inst << "\n"
<< "with:" << *NewVal << "\n"; } } while (
false)
;
2376 Origin = Inst->getOperand(Idx);
2377 Inst->setOperand(Idx, NewVal);
2378 }
2379
2380 /// Restore the original value of the instruction.
2381 void undo() override {
2382 LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2383 << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
2384 << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: setOperand:" <<
Idx << "\n" << "for: " << *Inst << "\n"
<< "with: " << *Origin << "\n"; } } while (
false)
;
2385 Inst->setOperand(Idx, Origin);
2386 }
2387 };
2388
2389 /// Hide the operands of an instruction.
2390 /// Do as if this instruction was not using any of its operands.
2391 class OperandsHider : public TypePromotionAction {
2392 /// The list of original operands.
2393 SmallVector<Value *, 4> OriginalValues;
2394
2395 public:
2396 /// Remove \p Inst from the uses of the operands of \p Inst.
2397 OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) {
2398 LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: OperandsHider: " <<
*Inst << "\n"; } } while (false)
;
2399 unsigned NumOpnds = Inst->getNumOperands();
2400 OriginalValues.reserve(NumOpnds);
2401 for (unsigned It = 0; It < NumOpnds; ++It) {
2402 // Save the current operand.
2403 Value *Val = Inst->getOperand(It);
2404 OriginalValues.push_back(Val);
2405 // Set a dummy one.
2406 // We could use OperandSetter here, but that would imply an overhead
2407 // that we are not willing to pay.
2408 Inst->setOperand(It, UndefValue::get(Val->getType()));
2409 }
2410 }
2411
2412 /// Restore the original list of uses.
2413 void undo() override {
2414 LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: OperandsHider: "
<< *Inst << "\n"; } } while (false)
;
2415 for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It)
2416 Inst->setOperand(It, OriginalValues[It]);
2417 }
2418 };
2419
2420 /// Build a truncate instruction.
2421 class TruncBuilder : public TypePromotionAction {
2422 Value *Val;
2423
2424 public:
2425 /// Build a truncate instruction of \p Opnd producing a \p Ty
2426 /// result.
2427 /// trunc Opnd to Ty.
2428 TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) {
2429 IRBuilder<> Builder(Opnd);
2430 Val = Builder.CreateTrunc(Opnd, Ty, "promoted");
2431 LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2432 }
2433
2434 /// Get the built value.
2435 Value *getBuiltValue() { return Val; }
2436
2437 /// Remove the built instruction.
2438 void undo() override {
2439 LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " <<
*Val << "\n"; } } while (false)
;
2440 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2441 IVal->eraseFromParent();
2442 }
2443 };
2444
2445 /// Build a sign extension instruction.
2446 class SExtBuilder : public TypePromotionAction {
2447 Value *Val;
2448
2449 public:
2450 /// Build a sign extension instruction of \p Opnd producing a \p Ty
2451 /// result.
2452 /// sext Opnd to Ty.
2453 SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2454 : TypePromotionAction(InsertPt) {
2455 IRBuilder<> Builder(InsertPt);
2456 Val = Builder.CreateSExt(Opnd, Ty, "promoted");
2457 LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2458 }
2459
2460 /// Get the built value.
2461 Value *getBuiltValue() { return Val; }
2462
2463 /// Remove the built instruction.
2464 void undo() override {
2465 LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2466 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2467 IVal->eraseFromParent();
2468 }
2469 };
2470
2471 /// Build a zero extension instruction.
2472 class ZExtBuilder : public TypePromotionAction {
2473 Value *Val;
2474
2475 public:
2476 /// Build a zero extension instruction of \p Opnd producing a \p Ty
2477 /// result.
2478 /// zext Opnd to Ty.
2479 ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty)
2480 : TypePromotionAction(InsertPt) {
2481 IRBuilder<> Builder(InsertPt);
2482 Val = Builder.CreateZExt(Opnd, Ty, "promoted");
2483 LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2484 }
2485
2486 /// Get the built value.
2487 Value *getBuiltValue() { return Val; }
2488
2489 /// Remove the built instruction.
2490 void undo() override {
2491 LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " <<
*Val << "\n"; } } while (false)
;
2492 if (Instruction *IVal = dyn_cast<Instruction>(Val))
2493 IVal->eraseFromParent();
2494 }
2495 };
2496
2497 /// Mutate an instruction to another type.
2498 class TypeMutator : public TypePromotionAction {
2499 /// Record the original type.
2500 Type *OrigTy;
2501
2502 public:
2503 /// Mutate the type of \p Inst into \p NewTy.
2504 TypeMutator(Instruction *Inst, Type *NewTy)
2505 : TypePromotionAction(Inst), OrigTy(Inst->getType()) {
2506 LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
2507 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: MutateType: " <<
*Inst << " with " << *NewTy << "\n"; } } while
(false)
;
2508 Inst->mutateType(NewTy);
2509 }
2510
2511 /// Mutate the instruction back to its original type.
2512 void undo() override {
2513 LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
2514 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: MutateType: " <<
*Inst << " with " << *OrigTy << "\n"; } } while
(false)
;
2515 Inst->mutateType(OrigTy);
2516 }
2517 };
2518
2519 /// Replace the uses of an instruction by another instruction.
2520 class UsesReplacer : public TypePromotionAction {
2521 /// Helper structure to keep track of the replaced uses.
2522 struct InstructionAndIdx {
2523 /// The instruction using the instruction.
2524 Instruction *Inst;
2525
2526 /// The index where this instruction is used for Inst.
2527 unsigned Idx;
2528
2529 InstructionAndIdx(Instruction *Inst, unsigned Idx)
2530 : Inst(Inst), Idx(Idx) {}
2531 };
2532
2533 /// Keep track of the original uses (pair Instruction, Index).
2534 SmallVector<InstructionAndIdx, 4> OriginalUses;
2535 /// Keep track of the debug users.
2536 SmallVector<DbgValueInst *, 1> DbgValues;
2537
2538 using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
2539
2540 public:
2541 /// Replace all the use of \p Inst by \p New.
2542 UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
2543 LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
2544 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: UsersReplacer: " <<
*Inst << " with " << *New << "\n"; } } while
(false)
;
2545 // Record the original uses.
2546 for (Use &U : Inst->uses()) {
2547 Instruction *UserI = cast<Instruction>(U.getUser());
2548 OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo()));
2549 }
2550 // Record the debug uses separately. They are not in the instruction's
2551 // use list, but they are replaced by RAUW.
2552 findDbgValues(DbgValues, Inst);
2553
2554 // Now, we can replace the uses.
2555 Inst->replaceAllUsesWith(New);
2556 }
2557
2558 /// Reassign the original uses of Inst to Inst.
2559 void undo() override {
2560 LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: UsersReplacer: "
<< *Inst << "\n"; } } while (false)
;
2561 for (use_iterator UseIt = OriginalUses.begin(),
2562 EndIt = OriginalUses.end();
2563 UseIt != EndIt; ++UseIt) {
2564 UseIt->Inst->setOperand(UseIt->Idx, Inst);
2565 }
2566 // RAUW has replaced all original uses with references to the new value,
2567 // including the debug uses. Since we are undoing the replacements,
2568 // the original debug uses must also be reinstated to maintain the
2569 // correctness and utility of debug value instructions.
2570 for (auto *DVI: DbgValues) {
2571 LLVMContext &Ctx = Inst->getType()->getContext();
2572 auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
2573 DVI->setOperand(0, MV);
2574 }
2575 }
2576 };
2577
2578 /// Remove an instruction from the IR.
2579 class InstructionRemover : public TypePromotionAction {
2580 /// Original position of the instruction.
2581 InsertionHandler Inserter;
2582
2583 /// Helper structure to hide all the link to the instruction. In other
2584 /// words, this helps to do as if the instruction was removed.
2585 OperandsHider Hider;
2586
2587 /// Keep track of the uses replaced, if any.
2588 UsesReplacer *Replacer = nullptr;
2589
2590 /// Keep track of instructions removed.
2591 SetOfInstrs &RemovedInsts;
2592
2593 public:
2594 /// Remove all reference of \p Inst and optionally replace all its
2595 /// uses with New.
2596 /// \p RemovedInsts Keep track of the instructions removed by this Action.
2597 /// \pre If !Inst->use_empty(), then New != nullptr
2598 InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
2599 Value *New = nullptr)
2600 : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
2601 RemovedInsts(RemovedInsts) {
2602 if (New)
2603 Replacer = new UsesReplacer(Inst, New);
2604 LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Do: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2605 RemovedInsts.insert(Inst);
2606 /// The instructions removed here will be freed after completing
2607 /// optimizeBlock() for all blocks as we need to keep track of the
2608 /// removed instructions during promotion.
2609 Inst->removeFromParent();
2610 }
2611
2612 ~InstructionRemover() override { delete Replacer; }
2613
2614 /// Resurrect the instruction and reassign it to the proper uses if
2615 /// new value was provided when build this action.
2616 void undo() override {
2617 LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Undo: InstructionRemover: "
<< *Inst << "\n"; } } while (false)
;
2618 Inserter.insert(Inst);
2619 if (Replacer)
2620 Replacer->undo();
2621 Hider.undo();
2622 RemovedInsts.erase(Inst);
2623 }
2624 };
2625
2626public:
2627 /// Restoration point.
2628 /// The restoration point is a pointer to an action instead of an iterator
2629 /// because the iterator may be invalidated but not the pointer.
2630 using ConstRestorationPt = const TypePromotionAction *;
2631
2632 TypePromotionTransaction(SetOfInstrs &RemovedInsts)
2633 : RemovedInsts(RemovedInsts) {}
2634
2635 /// Advocate every changes made in that transaction.
2636 void commit();
2637
2638 /// Undo all the changes made after the given point.
2639 void rollback(ConstRestorationPt Point);
2640
2641 /// Get the current restoration point.
2642 ConstRestorationPt getRestorationPoint() const;
2643
2644 /// \name API for IR modification with state keeping to support rollback.
2645 /// @{
2646 /// Same as Instruction::setOperand.
2647 void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal);
2648
2649 /// Same as Instruction::eraseFromParent.
2650 void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr);
2651
2652 /// Same as Value::replaceAllUsesWith.
2653 void replaceAllUsesWith(Instruction *Inst, Value *New);
2654
2655 /// Same as Value::mutateType.
2656 void mutateType(Instruction *Inst, Type *NewTy);
2657
2658 /// Same as IRBuilder::createTrunc.
2659 Value *createTrunc(Instruction *Opnd, Type *Ty);
2660
2661 /// Same as IRBuilder::createSExt.
2662 Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty);
2663
2664 /// Same as IRBuilder::createZExt.
2665 Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty);
2666
2667 /// Same as Instruction::moveBefore.
2668 void moveBefore(Instruction *Inst, Instruction *Before);
2669 /// @}
2670
2671private:
2672 /// The ordered list of actions made so far.
2673 SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
2674
2675 using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator;
2676
2677 SetOfInstrs &RemovedInsts;
2678};
2679
2680} // end anonymous namespace
2681
2682void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
2683 Value *NewVal) {
2684 Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
2685 Inst, Idx, NewVal));
2686}
2687
2688void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
2689 Value *NewVal) {
2690 Actions.push_back(
2691 std::make_unique<TypePromotionTransaction::InstructionRemover>(
2692 Inst, RemovedInsts, NewVal));
2693}
2694
2695void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
2696 Value *New) {
2697 Actions.push_back(
2698 std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
2699}
2700
2701void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
2702 Actions.push_back(
2703 std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
2704}
2705
2706Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
2707 Type *Ty) {
2708 std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty));
2709 Value *Val = Ptr->getBuiltValue();
2710 Actions.push_back(std::move(Ptr));
2711 return Val;
2712}
2713
2714Value *TypePromotionTransaction::createSExt(Instruction *Inst,
2715 Value *Opnd, Type *Ty) {
2716 std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty));
2717 Value *Val = Ptr->getBuiltValue();
2718 Actions.push_back(std::move(Ptr));
2719 return Val;
2720}
2721
2722Value *TypePromotionTransaction::createZExt(Instruction *Inst,
2723 Value *Opnd, Type *Ty) {
2724 std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty));
2725 Value *Val = Ptr->getBuiltValue();
2726 Actions.push_back(std::move(Ptr));
2727 return Val;
2728}
2729
2730void TypePromotionTransaction::moveBefore(Instruction *Inst,
2731 Instruction *Before) {
2732 Actions.push_back(
2733 std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
2734 Inst, Before));
2735}
2736
2737TypePromotionTransaction::ConstRestorationPt
2738TypePromotionTransaction::getRestorationPoint() const {
2739 return !Actions.empty() ? Actions.back().get() : nullptr;
2740}
2741
2742void TypePromotionTransaction::commit() {
2743 for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
2744 ++It)
2745 (*It)->commit();
2746 Actions.clear();
2747}
2748
2749void TypePromotionTransaction::rollback(
2750 TypePromotionTransaction::ConstRestorationPt Point) {
2751 while (!Actions.empty() && Point != Actions.back().get()) {
2752 std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val();
2753 Curr->undo();
2754 }
2755}
2756
2757namespace {
2758
2759/// A helper class for matching addressing modes.
2760///
2761/// This encapsulates the logic for matching the target-legal addressing modes.
2762class AddressingModeMatcher {
2763 SmallVectorImpl<Instruction*> &AddrModeInsts;
2764 const TargetLowering &TLI;
2765 const TargetRegisterInfo &TRI;
2766 const DataLayout &DL;
2767
2768 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
2769 /// the memory instruction that we're computing this address for.
2770 Type *AccessTy;
2771 unsigned AddrSpace;
2772 Instruction *MemoryInst;
2773
2774 /// This is the addressing mode that we're building up. This is
2775 /// part of the return value of this addressing mode matching stuff.
2776 ExtAddrMode &AddrMode;
2777
2778 /// The instructions inserted by other CodeGenPrepare optimizations.
2779 const SetOfInstrs &InsertedInsts;
2780
2781 /// A map from the instructions to their type before promotion.
2782 InstrToOrigTy &PromotedInsts;
2783
2784 /// The ongoing transaction where every action should be registered.
2785 TypePromotionTransaction &TPT;
2786
2787 // A GEP which has too large offset to be folded into the addressing mode.
2788 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP;
2789
2790 /// This is set to true when we should not do profitability checks.
2791 /// When true, IsProfitableToFoldIntoAddressingMode always returns true.
2792 bool IgnoreProfitability;
2793
2794 AddressingModeMatcher(
2795 SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
2796 const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
2797 ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
2798 InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
2799 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP)
2800 : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
2801 DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
2802 MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
2803 PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) {
2804 IgnoreProfitability = false;
2805 }
2806
2807public:
2808 /// Find the maximal addressing mode that a load/store of V can fold,
2809 /// give an access type of AccessTy. This returns a list of involved
2810 /// instructions in AddrModeInsts.
2811 /// \p InsertedInsts The instructions inserted by other CodeGenPrepare
2812 /// optimizations.
2813 /// \p PromotedInsts maps the instructions to their type before promotion.
2814 /// \p The ongoing transaction where every action should be registered.
2815 static ExtAddrMode
2816 Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
2817 SmallVectorImpl<Instruction *> &AddrModeInsts,
2818 const TargetLowering &TLI, const TargetRegisterInfo &TRI,
2819 const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
2820 TypePromotionTransaction &TPT,
2821 std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) {
2822 ExtAddrMode Result;
2823
2824 bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
2825 MemoryInst, Result, InsertedInsts,
2826 PromotedInsts, TPT, LargeOffsetGEP)
2827 .matchAddr(V, 0);
2828 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2828, __PRETTY_FUNCTION__))
;
2829 return Result;
2830 }
2831
2832private:
2833 bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
2834 bool matchAddr(Value *Addr, unsigned Depth);
2835 bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth,
2836 bool *MovedAway = nullptr);
2837 bool isProfitableToFoldIntoAddressingMode(Instruction *I,
2838 ExtAddrMode &AMBefore,
2839 ExtAddrMode &AMAfter);
2840 bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
2841 bool isPromotionProfitable(unsigned NewCost, unsigned OldCost,
2842 Value *PromotedOperand) const;
2843};
2844
2845class PhiNodeSet;
2846
2847/// An iterator for PhiNodeSet.
2848class PhiNodeSetIterator {
2849 PhiNodeSet * const Set;
2850 size_t CurrentIndex = 0;
2851
2852public:
2853 /// The constructor. Start should point to either a valid element, or be equal
2854 /// to the size of the underlying SmallVector of the PhiNodeSet.
2855 PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start);
2856 PHINode * operator*() const;
2857 PhiNodeSetIterator& operator++();
2858 bool operator==(const PhiNodeSetIterator &RHS) const;
2859 bool operator!=(const PhiNodeSetIterator &RHS) const;
2860};
2861
2862/// Keeps a set of PHINodes.
2863///
2864/// This is a minimal set implementation for a specific use case:
2865/// It is very fast when there are very few elements, but also provides good
2866/// performance when there are many. It is similar to SmallPtrSet, but also
2867/// provides iteration by insertion order, which is deterministic and stable
2868/// across runs. It is also similar to SmallSetVector, but provides removing
2869/// elements in O(1) time. This is achieved by not actually removing the element
2870/// from the underlying vector, so comes at the cost of using more memory, but
2871/// that is fine, since PhiNodeSets are used as short lived objects.
2872class PhiNodeSet {
2873 friend class PhiNodeSetIterator;
2874
2875 using MapType = SmallDenseMap<PHINode *, size_t, 32>;
2876 using iterator = PhiNodeSetIterator;
2877
2878 /// Keeps the elements in the order of their insertion in the underlying
2879 /// vector. To achieve constant time removal, it never deletes any element.
2880 SmallVector<PHINode *, 32> NodeList;
2881
2882 /// Keeps the elements in the underlying set implementation. This (and not the
2883 /// NodeList defined above) is the source of truth on whether an element
2884 /// is actually in the collection.
2885 MapType NodeMap;
2886
2887 /// Points to the first valid (not deleted) element when the set is not empty
2888 /// and the value is not zero. Equals to the size of the underlying vector
2889 /// when the set is empty. When the value is 0, as in the beginning, the
2890 /// first element may or may not be valid.
2891 size_t FirstValidElement = 0;
2892
2893public:
2894 /// Inserts a new element to the collection.
2895 /// \returns true if the element is actually added, i.e. was not in the
2896 /// collection before the operation.
2897 bool insert(PHINode *Ptr) {
2898 if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) {
2899 NodeList.push_back(Ptr);
2900 return true;
2901 }
2902 return false;
2903 }
2904
2905 /// Removes the element from the collection.
2906 /// \returns whether the element is actually removed, i.e. was in the
2907 /// collection before the operation.
2908 bool erase(PHINode *Ptr) {
2909 auto it = NodeMap.find(Ptr);
2910 if (it != NodeMap.end()) {
2911 NodeMap.erase(Ptr);
2912 SkipRemovedElements(FirstValidElement);
2913 return true;
2914 }
2915 return false;
2916 }
2917
2918 /// Removes all elements and clears the collection.
2919 void clear() {
2920 NodeMap.clear();
2921 NodeList.clear();
2922 FirstValidElement = 0;
2923 }
2924
2925 /// \returns an iterator that will iterate the elements in the order of
2926 /// insertion.
2927 iterator begin() {
2928 if (FirstValidElement == 0)
2929 SkipRemovedElements(FirstValidElement);
2930 return PhiNodeSetIterator(this, FirstValidElement);
2931 }
2932
2933 /// \returns an iterator that points to the end of the collection.
2934 iterator end() { return PhiNodeSetIterator(this, NodeList.size()); }
2935
2936 /// Returns the number of elements in the collection.
2937 size_t size() const {
2938 return NodeMap.size();
2939 }
2940
2941 /// \returns 1 if the given element is in the collection, and 0 if otherwise.
2942 size_t count(PHINode *Ptr) const {
2943 return NodeMap.count(Ptr);
2944 }
2945
2946private:
2947 /// Updates the CurrentIndex so that it will point to a valid element.
2948 ///
2949 /// If the element of NodeList at CurrentIndex is valid, it does not
2950 /// change it. If there are no more valid elements, it updates CurrentIndex
2951 /// to point to the end of the NodeList.
2952 void SkipRemovedElements(size_t &CurrentIndex) {
2953 while (CurrentIndex < NodeList.size()) {
2954 auto it = NodeMap.find(NodeList[CurrentIndex]);
2955 // If the element has been deleted and added again later, NodeMap will
2956 // point to a different index, so CurrentIndex will still be invalid.
2957 if (it != NodeMap.end() && it->second == CurrentIndex)
2958 break;
2959 ++CurrentIndex;
2960 }
2961 }
2962};
2963
2964PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start)
2965 : Set(Set), CurrentIndex(Start) {}
2966
2967PHINode * PhiNodeSetIterator::operator*() const {
2968 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2969, __PRETTY_FUNCTION__))
2969 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2969, __PRETTY_FUNCTION__))
;
2970 return Set->NodeList[CurrentIndex];
2971}
2972
2973PhiNodeSetIterator& PhiNodeSetIterator::operator++() {
2974 assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2975, __PRETTY_FUNCTION__))
2975 "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range"
) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 2975, __PRETTY_FUNCTION__))
;
2976 ++CurrentIndex;
2977 Set->SkipRemovedElements(CurrentIndex);
2978 return *this;
2979}
2980
2981bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const {
2982 return CurrentIndex == RHS.CurrentIndex;
2983}
2984
2985bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const {
2986 return !((*this) == RHS);
2987}
2988
2989/// Keep track of simplification of Phi nodes.
2990/// Accept the set of all phi nodes and erase phi node from this set
2991/// if it is simplified.
2992class SimplificationTracker {
2993 DenseMap<Value *, Value *> Storage;
2994 const SimplifyQuery &SQ;
2995 // Tracks newly created Phi nodes. The elements are iterated by insertion
2996 // order.
2997 PhiNodeSet AllPhiNodes;
2998 // Tracks newly created Select nodes.
2999 SmallPtrSet<SelectInst *, 32> AllSelectNodes;
3000
3001public:
3002 SimplificationTracker(const SimplifyQuery &sq)
3003 : SQ(sq) {}
3004
3005 Value *Get(Value *V) {
3006 do {
3007 auto SV = Storage.find(V);
3008 if (SV == Storage.end())
3009 return V;
3010 V = SV->second;
3011 } while (true);
3012 }
3013
3014 Value *Simplify(Value *Val) {
3015 SmallVector<Value *, 32> WorkList;
3016 SmallPtrSet<Value *, 32> Visited;
3017 WorkList.push_back(Val);
3018 while (!WorkList.empty()) {
3019 auto P = WorkList.pop_back_val();
3020 if (!Visited.insert(P).second)
3021 continue;
3022 if (auto *PI = dyn_cast<Instruction>(P))
3023 if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) {
3024 for (auto *U : PI->users())
3025 WorkList.push_back(cast<Value>(U));
3026 Put(PI, V);
3027 PI->replaceAllUsesWith(V);
3028 if (auto *PHI = dyn_cast<PHINode>(PI))
3029 AllPhiNodes.erase(PHI);
3030 if (auto *Select = dyn_cast<SelectInst>(PI))
3031 AllSelectNodes.erase(Select);
3032 PI->eraseFromParent();
3033 }
3034 }
3035 return Get(Val);
3036 }
3037
3038 void Put(Value *From, Value *To) {
3039 Storage.insert({ From, To });
3040 }
3041
3042 void ReplacePhi(PHINode *From, PHINode *To) {
3043 Value* OldReplacement = Get(From);
3044 while (OldReplacement != From) {
3045 From = To;
3046 To = dyn_cast<PHINode>(OldReplacement);
3047 OldReplacement = Get(From);
3048 }
3049 assert(Get(To) == To && "Replacement PHI node is already replaced.")((Get(To) == To && "Replacement PHI node is already replaced."
) ? static_cast<void> (0) : __assert_fail ("Get(To) == To && \"Replacement PHI node is already replaced.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3049, __PRETTY_FUNCTION__))
;
3050 Put(From, To);
3051 From->replaceAllUsesWith(To);
3052 AllPhiNodes.erase(From);
3053 From->eraseFromParent();
3054 }
3055
3056 PhiNodeSet& newPhiNodes() { return AllPhiNodes; }
3057
3058 void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); }
3059
3060 void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); }
3061
3062 unsigned countNewPhiNodes() const { return AllPhiNodes.size(); }
3063
3064 unsigned countNewSelectNodes() const { return AllSelectNodes.size(); }
3065
3066 void destroyNewNodes(Type *CommonType) {
3067 // For safe erasing, replace the uses with dummy value first.
3068 auto Dummy = UndefValue::get(CommonType);
3069 for (auto I : AllPhiNodes) {
3070 I->replaceAllUsesWith(Dummy);
3071 I->eraseFromParent();
3072 }
3073 AllPhiNodes.clear();
3074 for (auto I : AllSelectNodes) {
3075 I->replaceAllUsesWith(Dummy);
3076 I->eraseFromParent();
3077 }
3078 AllSelectNodes.clear();
3079 }
3080};
3081
3082/// A helper class for combining addressing modes.
3083class AddressingModeCombiner {
3084 typedef DenseMap<Value *, Value *> FoldAddrToValueMapping;
3085 typedef std::pair<PHINode *, PHINode *> PHIPair;
3086
3087private:
3088 /// The addressing modes we've collected.
3089 SmallVector<ExtAddrMode, 16> AddrModes;
3090
3091 /// The field in which the AddrModes differ, when we have more than one.
3092 ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField;
3093
3094 /// Are the AddrModes that we have all just equal to their original values?
3095 bool AllAddrModesTrivial = true;
3096
3097 /// Common Type for all different fields in addressing modes.
3098 Type *CommonType;
3099
3100 /// SimplifyQuery for simplifyInstruction utility.
3101 const SimplifyQuery &SQ;
3102
3103 /// Original Address.
3104 Value *Original;
3105
3106public:
3107 AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue)
3108 : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {}
3109
3110 /// Get the combined AddrMode
3111 const ExtAddrMode &getAddrMode() const {
3112 return AddrModes[0];
3113 }
3114
3115 /// Add a new AddrMode if it's compatible with the AddrModes we already
3116 /// have.
3117 /// \return True iff we succeeded in doing so.
3118 bool addNewAddrMode(ExtAddrMode &NewAddrMode) {
3119 // Take note of if we have any non-trivial AddrModes, as we need to detect
3120 // when all AddrModes are trivial as then we would introduce a phi or select
3121 // which just duplicates what's already there.
3122 AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial();
3123
3124 // If this is the first addrmode then everything is fine.
3125 if (AddrModes.empty()) {
3126 AddrModes.emplace_back(NewAddrMode);
3127 return true;
3128 }
3129
3130 // Figure out how different this is from the other address modes, which we
3131 // can do just by comparing against the first one given that we only care
3132 // about the cumulative difference.
3133 ExtAddrMode::FieldName ThisDifferentField =
3134 AddrModes[0].compare(NewAddrMode);
3135 if (DifferentField == ExtAddrMode::NoField)
3136 DifferentField = ThisDifferentField;
3137 else if (DifferentField != ThisDifferentField)
3138 DifferentField = ExtAddrMode::MultipleFields;
3139
3140 // If NewAddrMode differs in more than one dimension we cannot handle it.
3141 bool CanHandle = DifferentField != ExtAddrMode::MultipleFields;
3142
3143 // If Scale Field is different then we reject.
3144 CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField;
3145
3146 // We also must reject the case when base offset is different and
3147 // scale reg is not null, we cannot handle this case due to merge of
3148 // different offsets will be used as ScaleReg.
3149 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField ||
3150 !NewAddrMode.ScaledReg);
3151
3152 // We also must reject the case when GV is different and BaseReg installed
3153 // due to we want to use base reg as a merge of GV values.
3154 CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField ||
3155 !NewAddrMode.HasBaseReg);
3156
3157 // Even if NewAddMode is the same we still need to collect it due to
3158 // original value is different. And later we will need all original values
3159 // as anchors during finding the common Phi node.
3160 if (CanHandle)
3161 AddrModes.emplace_back(NewAddrMode);
3162 else
3163 AddrModes.clear();
3164
3165 return CanHandle;
3166 }
3167
3168 /// Combine the addressing modes we've collected into a single
3169 /// addressing mode.
3170 /// \return True iff we successfully combined them or we only had one so
3171 /// didn't need to combine them anyway.
3172 bool combineAddrModes() {
3173 // If we have no AddrModes then they can't be combined.
3174 if (AddrModes.size() == 0)
3175 return false;
3176
3177 // A single AddrMode can trivially be combined.
3178 if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField)
3179 return true;
3180
3181 // If the AddrModes we collected are all just equal to the value they are
3182 // derived from then combining them wouldn't do anything useful.
3183 if (AllAddrModesTrivial)
3184 return false;
3185
3186 if (!addrModeCombiningAllowed())
3187 return false;
3188
3189 // Build a map between <original value, basic block where we saw it> to
3190 // value of base register.
3191 // Bail out if there is no common type.
3192 FoldAddrToValueMapping Map;
3193 if (!initializeMap(Map))
3194 return false;
3195
3196 Value *CommonValue = findCommon(Map);
3197 if (CommonValue)
3198 AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes);
3199 return CommonValue != nullptr;
3200 }
3201
3202private:
3203 /// Initialize Map with anchor values. For address seen
3204 /// we set the value of different field saw in this address.
3205 /// At the same time we find a common type for different field we will
3206 /// use to create new Phi/Select nodes. Keep it in CommonType field.
3207 /// Return false if there is no common type found.
3208 bool initializeMap(FoldAddrToValueMapping &Map) {
3209 // Keep track of keys where the value is null. We will need to replace it
3210 // with constant null when we know the common type.
3211 SmallVector<Value *, 2> NullValue;
3212 Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType());
3213 for (auto &AM : AddrModes) {
3214 Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy);
3215 if (DV) {
3216 auto *Type = DV->getType();
3217 if (CommonType && CommonType != Type)
3218 return false;
3219 CommonType = Type;
3220 Map[AM.OriginalValue] = DV;
3221 } else {
3222 NullValue.push_back(AM.OriginalValue);
3223 }
3224 }
3225 assert(CommonType && "At least one non-null value must be!")((CommonType && "At least one non-null value must be!"
) ? static_cast<void> (0) : __assert_fail ("CommonType && \"At least one non-null value must be!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3225, __PRETTY_FUNCTION__))
;
3226 for (auto *V : NullValue)
3227 Map[V] = Constant::getNullValue(CommonType);
3228 return true;
3229 }
3230
3231 /// We have mapping between value A and other value B where B was a field in
3232 /// addressing mode represented by A. Also we have an original value C
3233 /// representing an address we start with. Traversing from C through phi and
3234 /// selects we ended up with A's in a map. This utility function tries to find
3235 /// a value V which is a field in addressing mode C and traversing through phi
3236 /// nodes and selects we will end up in corresponded values B in a map.
3237 /// The utility will create a new Phi/Selects if needed.
3238 // The simple example looks as follows:
3239 // BB1:
3240 // p1 = b1 + 40
3241 // br cond BB2, BB3
3242 // BB2:
3243 // p2 = b2 + 40
3244 // br BB3
3245 // BB3:
3246 // p = phi [p1, BB1], [p2, BB2]
3247 // v = load p
3248 // Map is
3249 // p1 -> b1
3250 // p2 -> b2
3251 // Request is
3252 // p -> ?
3253 // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3.
3254 Value *findCommon(FoldAddrToValueMapping &Map) {
3255 // Tracks the simplification of newly created phi nodes. The reason we use
3256 // this mapping is because we will add new created Phi nodes in AddrToBase.
3257 // Simplification of Phi nodes is recursive, so some Phi node may
3258 // be simplified after we added it to AddrToBase. In reality this
3259 // simplification is possible only if original phi/selects were not
3260 // simplified yet.
3261 // Using this mapping we can find the current value in AddrToBase.
3262 SimplificationTracker ST(SQ);
3263
3264 // First step, DFS to create PHI nodes for all intermediate blocks.
3265 // Also fill traverse order for the second step.
3266 SmallVector<Value *, 32> TraverseOrder;
3267 InsertPlaceholders(Map, TraverseOrder, ST);
3268
3269 // Second Step, fill new nodes by merged values and simplify if possible.
3270 FillPlaceholders(Map, TraverseOrder, ST);
3271
3272 if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) {
3273 ST.destroyNewNodes(CommonType);
3274 return nullptr;
3275 }
3276
3277 // Now we'd like to match New Phi nodes to existed ones.
3278 unsigned PhiNotMatchedCount = 0;
3279 if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) {
3280 ST.destroyNewNodes(CommonType);
3281 return nullptr;
3282 }
3283
3284 auto *Result = ST.Get(Map.find(Original)->second);
3285 if (Result) {
3286 NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount;
3287 NumMemoryInstsSelectCreated += ST.countNewSelectNodes();
3288 }
3289 return Result;
3290 }
3291
3292 /// Try to match PHI node to Candidate.
3293 /// Matcher tracks the matched Phi nodes.
3294 bool MatchPhiNode(PHINode *PHI, PHINode *Candidate,
3295 SmallSetVector<PHIPair, 8> &Matcher,
3296 PhiNodeSet &PhiNodesToMatch) {
3297 SmallVector<PHIPair, 8> WorkList;
3298 Matcher.insert({ PHI, Candidate });
3299 SmallSet<PHINode *, 8> MatchedPHIs;
3300 MatchedPHIs.insert(PHI);
3301 WorkList.push_back({ PHI, Candidate });
3302 SmallSet<PHIPair, 8> Visited;
3303 while (!WorkList.empty()) {
3304 auto Item = WorkList.pop_back_val();
3305 if (!Visited.insert(Item).second)
3306 continue;
3307 // We iterate over all incoming values to Phi to compare them.
3308 // If values are different and both of them Phi and the first one is a
3309 // Phi we added (subject to match) and both of them is in the same basic
3310 // block then we can match our pair if values match. So we state that
3311 // these values match and add it to work list to verify that.
3312 for (auto B : Item.first->blocks()) {
3313 Value *FirstValue = Item.first->getIncomingValueForBlock(B);
3314 Value *SecondValue = Item.second->getIncomingValueForBlock(B);
3315 if (FirstValue == SecondValue)
3316 continue;
3317
3318 PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue);
3319 PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue);
3320
3321 // One of them is not Phi or
3322 // The first one is not Phi node from the set we'd like to match or
3323 // Phi nodes from different basic blocks then
3324 // we will not be able to match.
3325 if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) ||
3326 FirstPhi->getParent() != SecondPhi->getParent())
3327 return false;
3328
3329 // If we already matched them then continue.
3330 if (Matcher.count({ FirstPhi, SecondPhi }))
3331 continue;
3332 // So the values are different and does not match. So we need them to
3333 // match. (But we register no more than one match per PHI node, so that
3334 // we won't later try to replace them twice.)
3335 if (MatchedPHIs.insert(FirstPhi).second)
3336 Matcher.insert({ FirstPhi, SecondPhi });
3337 // But me must check it.
3338 WorkList.push_back({ FirstPhi, SecondPhi });
3339 }
3340 }
3341 return true;
3342 }
3343
3344 /// For the given set of PHI nodes (in the SimplificationTracker) try
3345 /// to find their equivalents.
3346 /// Returns false if this matching fails and creation of new Phi is disabled.
3347 bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes,
3348 unsigned &PhiNotMatchedCount) {
3349 // Matched and PhiNodesToMatch iterate their elements in a deterministic
3350 // order, so the replacements (ReplacePhi) are also done in a deterministic
3351 // order.
3352 SmallSetVector<PHIPair, 8> Matched;
3353 SmallPtrSet<PHINode *, 8> WillNotMatch;
3354 PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes();
3355 while (PhiNodesToMatch.size()) {
3356 PHINode *PHI = *PhiNodesToMatch.begin();
3357
3358 // Add us, if no Phi nodes in the basic block we do not match.
3359 WillNotMatch.clear();
3360 WillNotMatch.insert(PHI);
3361
3362 // Traverse all Phis until we found equivalent or fail to do that.
3363 bool IsMatched = false;
3364 for (auto &P : PHI->getParent()->phis()) {
3365 if (&P == PHI)
3366 continue;
3367 if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch)))
3368 break;
3369 // If it does not match, collect all Phi nodes from matcher.
3370 // if we end up with no match, them all these Phi nodes will not match
3371 // later.
3372 for (auto M : Matched)
3373 WillNotMatch.insert(M.first);
3374 Matched.clear();
3375 }
3376 if (IsMatched) {
3377 // Replace all matched values and erase them.
3378 for (auto MV : Matched)
3379 ST.ReplacePhi(MV.first, MV.second);
3380 Matched.clear();
3381 continue;
3382 }
3383 // If we are not allowed to create new nodes then bail out.
3384 if (!AllowNewPhiNodes)
3385 return false;
3386 // Just remove all seen values in matcher. They will not match anything.
3387 PhiNotMatchedCount += WillNotMatch.size();
3388 for (auto *P : WillNotMatch)
3389 PhiNodesToMatch.erase(P);
3390 }
3391 return true;
3392 }
3393 /// Fill the placeholders with values from predecessors and simplify them.
3394 void FillPlaceholders(FoldAddrToValueMapping &Map,
3395 SmallVectorImpl<Value *> &TraverseOrder,
3396 SimplificationTracker &ST) {
3397 while (!TraverseOrder.empty()) {
3398 Value *Current = TraverseOrder.pop_back_val();
3399 assert(Map.find(Current) != Map.end() && "No node to fill!!!")((Map.find(Current) != Map.end() && "No node to fill!!!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(Current) != Map.end() && \"No node to fill!!!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3399, __PRETTY_FUNCTION__))
;
3400 Value *V = Map[Current];
3401
3402 if (SelectInst *Select = dyn_cast<SelectInst>(V)) {
3403 // CurrentValue also must be Select.
3404 auto *CurrentSelect = cast<SelectInst>(Current);
3405 auto *TrueValue = CurrentSelect->getTrueValue();
3406 assert(Map.find(TrueValue) != Map.end() && "No True Value!")((Map.find(TrueValue) != Map.end() && "No True Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(TrueValue) != Map.end() && \"No True Value!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3406, __PRETTY_FUNCTION__))
;
3407 Select->setTrueValue(ST.Get(Map[TrueValue]));
3408 auto *FalseValue = CurrentSelect->getFalseValue();
3409 assert(Map.find(FalseValue) != Map.end() && "No False Value!")((Map.find(FalseValue) != Map.end() && "No False Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(FalseValue) != Map.end() && \"No False Value!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3409, __PRETTY_FUNCTION__))
;
3410 Select->setFalseValue(ST.Get(Map[FalseValue]));
3411 } else {
3412 // Must be a Phi node then.
3413 PHINode *PHI = cast<PHINode>(V);
3414 auto *CurrentPhi = dyn_cast<PHINode>(Current);
3415 // Fill the Phi node with values from predecessors.
3416 for (auto B : predecessors(PHI->getParent())) {
3417 Value *PV = CurrentPhi->getIncomingValueForBlock(B);
3418 assert(Map.find(PV) != Map.end() && "No predecessor Value!")((Map.find(PV) != Map.end() && "No predecessor Value!"
) ? static_cast<void> (0) : __assert_fail ("Map.find(PV) != Map.end() && \"No predecessor Value!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3418, __PRETTY_FUNCTION__))
;
3419 PHI->addIncoming(ST.Get(Map[PV]), B);
3420 }
3421 }
3422 Map[Current] = ST.Simplify(V);
3423 }
3424 }
3425
3426 /// Starting from original value recursively iterates over def-use chain up to
3427 /// known ending values represented in a map. For each traversed phi/select
3428 /// inserts a placeholder Phi or Select.
3429 /// Reports all new created Phi/Select nodes by adding them to set.
3430 /// Also reports and order in what values have been traversed.
3431 void InsertPlaceholders(FoldAddrToValueMapping &Map,
3432 SmallVectorImpl<Value *> &TraverseOrder,
3433 SimplificationTracker &ST) {
3434 SmallVector<Value *, 32> Worklist;
3435 assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3436, __PRETTY_FUNCTION__))
3436 "Address must be a Phi or Select node")(((isa<PHINode>(Original) || isa<SelectInst>(Original
)) && "Address must be a Phi or Select node") ? static_cast
<void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3436, __PRETTY_FUNCTION__))
;
3437 auto *Dummy = UndefValue::get(CommonType);
3438 Worklist.push_back(Original);
3439 while (!Worklist.empty()) {
3440 Value *Current = Worklist.pop_back_val();
3441 // if it is already visited or it is an ending value then skip it.
3442 if (Map.find(Current) != Map.end())
3443 continue;
3444 TraverseOrder.push_back(Current);
3445
3446 // CurrentValue must be a Phi node or select. All others must be covered
3447 // by anchors.
3448 if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) {
3449 // Is it OK to get metadata from OrigSelect?!
3450 // Create a Select placeholder with dummy value.
3451 SelectInst *Select = SelectInst::Create(
3452 CurrentSelect->getCondition(), Dummy, Dummy,
3453 CurrentSelect->getName(), CurrentSelect, CurrentSelect);
3454 Map[Current] = Select;
3455 ST.insertNewSelect(Select);
3456 // We are interested in True and False values.
3457 Worklist.push_back(CurrentSelect->getTrueValue());
3458 Worklist.push_back(CurrentSelect->getFalseValue());
3459 } else {
3460 // It must be a Phi node then.
3461 PHINode *CurrentPhi = cast<PHINode>(Current);
3462 unsigned PredCount = CurrentPhi->getNumIncomingValues();
3463 PHINode *PHI =
3464 PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi);
3465 Map[Current] = PHI;
3466 ST.insertNewPhi(PHI);
3467 for (Value *P : CurrentPhi->incoming_values())
3468 Worklist.push_back(P);
3469 }
3470 }
3471 }
3472
3473 bool addrModeCombiningAllowed() {
3474 if (DisableComplexAddrModes)
3475 return false;
3476 switch (DifferentField) {
3477 default:
3478 return false;
3479 case ExtAddrMode::BaseRegField:
3480 return AddrSinkCombineBaseReg;
3481 case ExtAddrMode::BaseGVField:
3482 return AddrSinkCombineBaseGV;
3483 case ExtAddrMode::BaseOffsField:
3484 return AddrSinkCombineBaseOffs;
3485 case ExtAddrMode::ScaledRegField:
3486 return AddrSinkCombineScaledReg;
3487 }
3488 }
3489};
3490} // end anonymous namespace
3491
3492/// Try adding ScaleReg*Scale to the current addressing mode.
3493/// Return true and update AddrMode if this addr mode is legal for the target,
3494/// false if not.
3495bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
3496 unsigned Depth) {
3497 // If Scale is 1, then this is the same as adding ScaleReg to the addressing
3498 // mode. Just process that directly.
3499 if (Scale == 1)
3500 return matchAddr(ScaleReg, Depth);
3501
3502 // If the scale is 0, it takes nothing to add this.
3503 if (Scale == 0)
3504 return true;
3505
3506 // If we already have a scale of this value, we can add to it, otherwise, we
3507 // need an available scale field.
3508 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
3509 return false;
3510
3511 ExtAddrMode TestAddrMode = AddrMode;
3512
3513 // Add scale to turn X*4+X*3 -> X*7. This could also do things like
3514 // [A+B + A*7] -> [B+A*8].
3515 TestAddrMode.Scale += Scale;
3516 TestAddrMode.ScaledReg = ScaleReg;
3517
3518 // If the new address isn't legal, bail out.
3519 if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace))
3520 return false;
3521
3522 // It was legal, so commit it.
3523 AddrMode = TestAddrMode;
3524
3525 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
3526 // to see if ScaleReg is actually X+C. If so, we can turn this into adding
3527 // X*Scale + C*Scale to addr mode.
3528 ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
3529 if (isa<Instruction>(ScaleReg) && // not a constant expr.
3530 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
3531 TestAddrMode.InBounds = false;
3532 TestAddrMode.ScaledReg = AddLHS;
3533 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
3534
3535 // If this addressing mode is legal, commit it and remember that we folded
3536 // this instruction.
3537 if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) {
3538 AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
3539 AddrMode = TestAddrMode;
3540 return true;
3541 }
3542 }
3543
3544 // Otherwise, not (x+c)*scale, just return what we have.
3545 return true;
3546}
3547
3548/// This is a little filter, which returns true if an addressing computation
3549/// involving I might be folded into a load/store accessing it.
3550/// This doesn't need to be perfect, but needs to accept at least
3551/// the set of instructions that MatchOperationAddr can.
3552static bool MightBeFoldableInst(Instruction *I) {
3553 switch (I->getOpcode()) {
3554 case Instruction::BitCast:
3555 case Instruction::AddrSpaceCast:
3556 // Don't touch identity bitcasts.
3557 if (I->getType() == I->getOperand(0)->getType())
3558 return false;
3559 return I->getType()->isIntOrPtrTy();
3560 case Instruction::PtrToInt:
3561 // PtrToInt is always a noop, as we know that the int type is pointer sized.
3562 return true;
3563 case Instruction::IntToPtr:
3564 // We know the input is intptr_t, so this is foldable.
3565 return true;
3566 case Instruction::Add:
3567 return true;
3568 case Instruction::Mul:
3569 case Instruction::Shl:
3570 // Can only handle X*C and X << C.
3571 return isa<ConstantInt>(I->getOperand(1));
3572 case Instruction::GetElementPtr:
3573 return true;
3574 default:
3575 return false;
3576 }
3577}
3578
3579/// Check whether or not \p Val is a legal instruction for \p TLI.
3580/// \note \p Val is assumed to be the product of some type promotion.
3581/// Therefore if \p Val has an undefined state in \p TLI, this is assumed
3582/// to be legal, as the non-promoted value would have had the same state.
3583static bool isPromotedInstructionLegal(const TargetLowering &TLI,
3584 const DataLayout &DL, Value *Val) {
3585 Instruction *PromotedInst = dyn_cast<Instruction>(Val);
3586 if (!PromotedInst)
3587 return false;
3588 int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode());
3589 // If the ISDOpcode is undefined, it was undefined before the promotion.
3590 if (!ISDOpcode)
3591 return true;
3592 // Otherwise, check if the promoted instruction is legal or not.
3593 return TLI.isOperationLegalOrCustom(
3594 ISDOpcode, TLI.getValueType(DL, PromotedInst->getType()));
3595}
3596
3597namespace {
3598
3599/// Hepler class to perform type promotion.
3600class TypePromotionHelper {
3601 /// Utility function to add a promoted instruction \p ExtOpnd to
3602 /// \p PromotedInsts and record the type of extension we have seen.
3603 static void addPromotedInst(InstrToOrigTy &PromotedInsts,
3604 Instruction *ExtOpnd,
3605 bool IsSExt) {
3606 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3607 InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd);
3608 if (It != PromotedInsts.end()) {
3609 // If the new extension is same as original, the information in
3610 // PromotedInsts[ExtOpnd] is still correct.
3611 if (It->second.getInt() == ExtTy)
3612 return;
3613
3614 // Now the new extension is different from old extension, we make
3615 // the type information invalid by setting extension type to
3616 // BothExtension.
3617 ExtTy = BothExtension;
3618 }
3619 PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy);
3620 }
3621
3622 /// Utility function to query the original type of instruction \p Opnd
3623 /// with a matched extension type. If the extension doesn't match, we
3624 /// cannot use the information we had on the original type.
3625 /// BothExtension doesn't match any extension type.
3626 static const Type *getOrigType(const InstrToOrigTy &PromotedInsts,
3627 Instruction *Opnd,
3628 bool IsSExt) {
3629 ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension;
3630 InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd);
3631 if (It != PromotedInsts.end() && It->second.getInt() == ExtTy)
3632 return It->second.getPointer();
3633 return nullptr;
3634 }
3635
3636 /// Utility function to check whether or not a sign or zero extension
3637 /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by
3638 /// either using the operands of \p Inst or promoting \p Inst.
3639 /// The type of the extension is defined by \p IsSExt.
3640 /// In other words, check if:
3641 /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType.
3642 /// #1 Promotion applies:
3643 /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...).
3644 /// #2 Operand reuses:
3645 /// ext opnd1 to ConsideredExtType.
3646 /// \p PromotedInsts maps the instructions to their type before promotion.
3647 static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType,
3648 const InstrToOrigTy &PromotedInsts, bool IsSExt);
3649
3650 /// Utility function to determine if \p OpIdx should be promoted when
3651 /// promoting \p Inst.
3652 static bool shouldExtOperand(const Instruction *Inst, int OpIdx) {
3653 return !(isa<SelectInst>(Inst) && OpIdx == 0);
3654 }
3655
3656 /// Utility function to promote the operand of \p Ext when this
3657 /// operand is a promotable trunc or sext or zext.
3658 /// \p PromotedInsts maps the instructions to their type before promotion.
3659 /// \p CreatedInstsCost[out] contains the cost of all instructions
3660 /// created to promote the operand of Ext.
3661 /// Newly added extensions are inserted in \p Exts.
3662 /// Newly added truncates are inserted in \p Truncs.
3663 /// Should never be called directly.
3664 /// \return The promoted value which is used instead of Ext.
3665 static Value *promoteOperandForTruncAndAnyExt(
3666 Instruction *Ext, TypePromotionTransaction &TPT,
3667 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3668 SmallVectorImpl<Instruction *> *Exts,
3669 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI);
3670
3671 /// Utility function to promote the operand of \p Ext when this
3672 /// operand is promotable and is not a supported trunc or sext.
3673 /// \p PromotedInsts maps the instructions to their type before promotion.
3674 /// \p CreatedInstsCost[out] contains the cost of all the instructions
3675 /// created to promote the operand of Ext.
3676 /// Newly added extensions are inserted in \p Exts.
3677 /// Newly added truncates are inserted in \p Truncs.
3678 /// Should never be called directly.
3679 /// \return The promoted value which is used instead of Ext.
3680 static Value *promoteOperandForOther(Instruction *Ext,
3681 TypePromotionTransaction &TPT,
3682 InstrToOrigTy &PromotedInsts,
3683 unsigned &CreatedInstsCost,
3684 SmallVectorImpl<Instruction *> *Exts,
3685 SmallVectorImpl<Instruction *> *Truncs,
3686 const TargetLowering &TLI, bool IsSExt);
3687
3688 /// \see promoteOperandForOther.
3689 static Value *signExtendOperandForOther(
3690 Instruction *Ext, TypePromotionTransaction &TPT,
3691 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3692 SmallVectorImpl<Instruction *> *Exts,
3693 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3694 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3695 Exts, Truncs, TLI, true);
3696 }
3697
3698 /// \see promoteOperandForOther.
3699 static Value *zeroExtendOperandForOther(
3700 Instruction *Ext, TypePromotionTransaction &TPT,
3701 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3702 SmallVectorImpl<Instruction *> *Exts,
3703 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3704 return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost,
3705 Exts, Truncs, TLI, false);
3706 }
3707
3708public:
3709 /// Type for the utility function that promotes the operand of Ext.
3710 using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT,
3711 InstrToOrigTy &PromotedInsts,
3712 unsigned &CreatedInstsCost,
3713 SmallVectorImpl<Instruction *> *Exts,
3714 SmallVectorImpl<Instruction *> *Truncs,
3715 const TargetLowering &TLI);
3716
3717 /// Given a sign/zero extend instruction \p Ext, return the appropriate
3718 /// action to promote the operand of \p Ext instead of using Ext.
3719 /// \return NULL if no promotable action is possible with the current
3720 /// sign extension.
3721 /// \p InsertedInsts keeps track of all the instructions inserted by the
3722 /// other CodeGenPrepare optimizations. This information is important
3723 /// because we do not want to promote these instructions as CodeGenPrepare
3724 /// will reinsert them later. Thus creating an infinite loop: create/remove.
3725 /// \p PromotedInsts maps the instructions to their type before promotion.
3726 static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts,
3727 const TargetLowering &TLI,
3728 const InstrToOrigTy &PromotedInsts);
3729};
3730
3731} // end anonymous namespace
3732
3733bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
3734 Type *ConsideredExtType,
3735 const InstrToOrigTy &PromotedInsts,
3736 bool IsSExt) {
3737 // The promotion helper does not know how to deal with vector types yet.
3738 // To be able to fix that, we would need to fix the places where we
3739 // statically extend, e.g., constants and such.
3740 if (Inst->getType()->isVectorTy())
3741 return false;
3742
3743 // We can always get through zext.
3744 if (isa<ZExtInst>(Inst))
3745 return true;
3746
3747 // sext(sext) is ok too.
3748 if (IsSExt && isa<SExtInst>(Inst))
3749 return true;
3750
3751 // We can get through binary operator, if it is legal. In other words, the
3752 // binary operator must have a nuw or nsw flag.
3753 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst);
3754 if (BinOp && isa<OverflowingBinaryOperator>(BinOp) &&
3755 ((!IsSExt && BinOp->hasNoUnsignedWrap()) ||
3756 (IsSExt && BinOp->hasNoSignedWrap())))
3757 return true;
3758
3759 // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst))
3760 if ((Inst->getOpcode() == Instruction::And ||
3761 Inst->getOpcode() == Instruction::Or))
3762 return true;
3763
3764 // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst))
3765 if (Inst->getOpcode() == Instruction::Xor) {
3766 const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1));
3767 // Make sure it is not a NOT.
3768 if (Cst && !Cst->getValue().isAllOnesValue())
3769 return true;
3770 }
3771
3772 // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst))
3773 // It may change a poisoned value into a regular value, like
3774 // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12
3775 // poisoned value regular value
3776 // It should be OK since undef covers valid value.
3777 if (Inst->getOpcode() == Instruction::LShr && !IsSExt)
3778 return true;
3779
3780 // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst)
3781 // It may change a poisoned value into a regular value, like
3782 // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12
3783 // poisoned value regular value
3784 // It should be OK since undef covers valid value.
3785 if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
3786 const Instruction *ExtInst =
3787 dyn_cast<const Instruction>(*Inst->user_begin());
3788 if (ExtInst->hasOneUse()) {
3789 const Instruction *AndInst =
3790 dyn_cast<const Instruction>(*ExtInst->user_begin());
3791 if (AndInst && AndInst->getOpcode() == Instruction::And) {
3792 const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
3793 if (Cst &&
3794 Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
3795 return true;
3796 }
3797 }
3798 }
3799
3800 // Check if we can do the following simplification.
3801 // ext(trunc(opnd)) --> ext(opnd)
3802 if (!isa<TruncInst>(Inst))
3803 return false;
3804
3805 Value *OpndVal = Inst->getOperand(0);
3806 // Check if we can use this operand in the extension.
3807 // If the type is larger than the result type of the extension, we cannot.
3808 if (!OpndVal->getType()->isIntegerTy() ||
3809 OpndVal->getType()->getIntegerBitWidth() >
3810 ConsideredExtType->getIntegerBitWidth())
3811 return false;
3812
3813 // If the operand of the truncate is not an instruction, we will not have
3814 // any information on the dropped bits.
3815 // (Actually we could for constant but it is not worth the extra logic).
3816 Instruction *Opnd = dyn_cast<Instruction>(OpndVal);
3817 if (!Opnd)
3818 return false;
3819
3820 // Check if the source of the type is narrow enough.
3821 // I.e., check that trunc just drops extended bits of the same kind of
3822 // the extension.
3823 // #1 get the type of the operand and check the kind of the extended bits.
3824 const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt);
3825 if (OpndType)
3826 ;
3827 else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd)))
3828 OpndType = Opnd->getOperand(0)->getType();
3829 else
3830 return false;
3831
3832 // #2 check that the truncate just drops extended bits.
3833 return Inst->getType()->getIntegerBitWidth() >=
3834 OpndType->getIntegerBitWidth();
3835}
3836
3837TypePromotionHelper::Action TypePromotionHelper::getAction(
3838 Instruction *Ext, const SetOfInstrs &InsertedInsts,
3839 const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) {
3840 assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3841, __PRETTY_FUNCTION__))
3841 "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&
"Unexpected instruction type") ? static_cast<void> (0)
: __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 3841, __PRETTY_FUNCTION__))
;
3842 Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0));
3843 Type *ExtTy = Ext->getType();
3844 bool IsSExt = isa<SExtInst>(Ext);
3845 // If the operand of the extension is not an instruction, we cannot
3846 // get through.
3847 // If it, check we can get through.
3848 if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt))
3849 return nullptr;
3850
3851 // Do not promote if the operand has been added by codegenprepare.
3852 // Otherwise, it means we are undoing an optimization that is likely to be
3853 // redone, thus causing potential infinite loop.
3854 if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd))
3855 return nullptr;
3856
3857 // SExt or Trunc instructions.
3858 // Return the related handler.
3859 if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) ||
3860 isa<ZExtInst>(ExtOpnd))
3861 return promoteOperandForTruncAndAnyExt;
3862
3863 // Regular instruction.
3864 // Abort early if we will have to insert non-free instructions.
3865 if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType()))
3866 return nullptr;
3867 return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther;
3868}
3869
3870Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt(
3871 Instruction *SExt, TypePromotionTransaction &TPT,
3872 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3873 SmallVectorImpl<Instruction *> *Exts,
3874 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) {
3875 // By construction, the operand of SExt is an instruction. Otherwise we cannot
3876 // get through it and this method should not be called.
3877 Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0));
3878 Value *ExtVal = SExt;
3879 bool HasMergedNonFreeExt = false;
3880 if (isa<ZExtInst>(SExtOpnd)) {
3881 // Replace s|zext(zext(opnd))
3882 // => zext(opnd).
3883 HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd);
3884 Value *ZExt =
3885 TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType());
3886 TPT.replaceAllUsesWith(SExt, ZExt);
3887 TPT.eraseInstruction(SExt);
3888 ExtVal = ZExt;
3889 } else {
3890 // Replace z|sext(trunc(opnd)) or sext(sext(opnd))
3891 // => z|sext(opnd).
3892 TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0));
3893 }
3894 CreatedInstsCost = 0;
3895
3896 // Remove dead code.
3897 if (SExtOpnd->use_empty())
3898 TPT.eraseInstruction(SExtOpnd);
3899
3900 // Check if the extension is still needed.
3901 Instruction *ExtInst = dyn_cast<Instruction>(ExtVal);
3902 if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) {
3903 if (ExtInst) {
3904 if (Exts)
3905 Exts->push_back(ExtInst);
3906 CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt;
3907 }
3908 return ExtVal;
3909 }
3910
3911 // At this point we have: ext ty opnd to ty.
3912 // Reassign the uses of ExtInst to the opnd and remove ExtInst.
3913 Value *NextVal = ExtInst->getOperand(0);
3914 TPT.eraseInstruction(ExtInst, NextVal);
3915 return NextVal;
3916}
3917
3918Value *TypePromotionHelper::promoteOperandForOther(
3919 Instruction *Ext, TypePromotionTransaction &TPT,
3920 InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost,
3921 SmallVectorImpl<Instruction *> *Exts,
3922 SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI,
3923 bool IsSExt) {
3924 // By construction, the operand of Ext is an instruction. Otherwise we cannot
3925 // get through it and this method should not be called.
3926 Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0));
3927 CreatedInstsCost = 0;
3928 if (!ExtOpnd->hasOneUse()) {
3929 // ExtOpnd will be promoted.
3930 // All its uses, but Ext, will need to use a truncated value of the
3931 // promoted version.
3932 // Create the truncate now.
3933 Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType());
3934 if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) {
3935 // Insert it just after the definition.
3936 ITrunc->moveAfter(ExtOpnd);
3937 if (Truncs)
3938 Truncs->push_back(ITrunc);
3939 }
3940
3941 TPT.replaceAllUsesWith(ExtOpnd, Trunc);
3942 // Restore the operand of Ext (which has been replaced by the previous call
3943 // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext.
3944 TPT.setOperand(Ext, 0, ExtOpnd);
3945 }
3946
3947 // Get through the Instruction:
3948 // 1. Update its type.
3949 // 2. Replace the uses of Ext by Inst.
3950 // 3. Extend each operand that needs to be extended.
3951
3952 // Remember the original type of the instruction before promotion.
3953 // This is useful to know that the high bits are sign extended bits.
3954 addPromotedInst(PromotedInsts, ExtOpnd, IsSExt);
3955 // Step #1.
3956 TPT.mutateType(ExtOpnd, Ext->getType());
3957 // Step #2.
3958 TPT.replaceAllUsesWith(Ext, ExtOpnd);
3959 // Step #3.
3960 Instruction *ExtForOpnd = Ext;
3961
3962 LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Propagate Ext to operands\n"
; } } while (false)
;
3963 for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx;
3964 ++OpIdx) {
3965 LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Operand:\n" << *
(ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false
)
;
3966 if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() ||
3967 !shouldExtOperand(ExtOpnd, OpIdx)) {
3968 LLVM_DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "No need to propagate\n"
; } } while (false)
;
3969 continue;
3970 }
3971 // Check if we can statically extend the operand.
3972 Value *Opnd = ExtOpnd->getOperand(OpIdx);
3973 if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) {
3974 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
3975 unsigned BitWidth = Ext->getType()->getIntegerBitWidth();
3976 APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth)
3977 : Cst->getValue().zext(BitWidth);
3978 TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal));
3979 continue;
3980 }
3981 // UndefValue are typed, so we have to statically sign extend them.
3982 if (isa<UndefValue>(Opnd)) {
3983 LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Statically extend\n"; }
} while (false)
;
3984 TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType()));
3985 continue;
3986 }
3987
3988 // Otherwise we have to explicitly sign extend the operand.
3989 // Check if Ext was reused to extend an operand.
3990 if (!ExtForOpnd) {
3991 // If yes, create a new one.
3992 LLVM_DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "More operands to ext\n"
; } } while (false)
;
3993 Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType())
3994 : TPT.createZExt(Ext, Opnd, Ext->getType());
3995 if (!isa<Instruction>(ValForExtOpnd)) {
3996 TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd);
3997 continue;
3998 }
3999 ExtForOpnd = cast<Instruction>(ValForExtOpnd);
4000 }
4001 if (Exts)
4002 Exts->push_back(ExtForOpnd);
4003 TPT.setOperand(ExtForOpnd, 0, Opnd);
4004
4005 // Move the sign extension before the insertion point.
4006 TPT.moveBefore(ExtForOpnd, ExtOpnd);
4007 TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd);
4008 CreatedInstsCost += !TLI.isExtFree(ExtForOpnd);
4009 // If more sext are required, new instructions will have to be created.
4010 ExtForOpnd = nullptr;
4011 }
4012 if (ExtForOpnd == Ext) {
4013 LLVM_DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Extension is useless now\n"
; } } while (false)
;
4014 TPT.eraseInstruction(Ext);
4015 }
4016 return ExtOpnd;
4017}
4018
4019/// Check whether or not promoting an instruction to a wider type is profitable.
4020/// \p NewCost gives the cost of extension instructions created by the
4021/// promotion.
4022/// \p OldCost gives the cost of extension instructions before the promotion
4023/// plus the number of instructions that have been
4024/// matched in the addressing mode the promotion.
4025/// \p PromotedOperand is the value that has been promoted.
4026/// \return True if the promotion is profitable, false otherwise.
4027bool AddressingModeMatcher::isPromotionProfitable(
4028 unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const {
4029 LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
4030 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "OldCost: " << OldCost
<< "\tNewCost: " << NewCost << '\n'; } } while
(false)
;
4031 // The cost of the new extensions is greater than the cost of the
4032 // old extension plus what we folded.
4033 // This is not profitable.
4034 if (NewCost > OldCost)
4035 return false;
4036 if (NewCost < OldCost)
4037 return true;
4038 // The promotion is neutral but it may help folding the sign extension in
4039 // loads for instance.
4040 // Check that we did not create an illegal instruction.
4041 return isPromotedInstructionLegal(TLI, DL, PromotedOperand);
4042}
4043
4044/// Given an instruction or constant expr, see if we can fold the operation
4045/// into the addressing mode. If so, update the addressing mode and return
4046/// true, otherwise return false without modifying AddrMode.
4047/// If \p MovedAway is not NULL, it contains the information of whether or
4048/// not AddrInst has to be folded into the addressing mode on success.
4049/// If \p MovedAway == true, \p AddrInst will not be part of the addressing
4050/// because it has been moved away.
4051/// Thus AddrInst must not be added in the matched instructions.
4052/// This state can happen when AddrInst is a sext, since it may be moved away.
4053/// Therefore, AddrInst may not be valid when MovedAway is true and it must
4054/// not be referenced anymore.
4055bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
4056 unsigned Depth,
4057 bool *MovedAway) {
4058 // Avoid exponential behavior on extremely deep expression trees.
4059 if (Depth >= 5) return false;
4060
4061 // By default, all matched instructions stay in place.
4062 if (MovedAway)
4063 *MovedAway = false;
4064
4065 switch (Opcode) {
4066 case Instruction::PtrToInt:
4067 // PtrToInt is always a noop, as we know that the int type is pointer sized.
4068 return matchAddr(AddrInst->getOperand(0), Depth);
4069 case Instruction::IntToPtr: {
4070 auto AS = AddrInst->getType()->getPointerAddressSpace();
4071 auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
4072 // This inttoptr is a no-op if the integer type is pointer sized.
4073 if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy)
4074 return matchAddr(AddrInst->getOperand(0), Depth);
4075 return false;
4076 }
4077 case Instruction::BitCast:
4078 // BitCast is always a noop, and we can handle it as long as it is
4079 // int->int or pointer->pointer (we don't want int<->fp or something).
4080 if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() &&
4081 // Don't touch identity bitcasts. These were probably put here by LSR,
4082 // and we don't want to mess around with them. Assume it knows what it
4083 // is doing.
4084 AddrInst->getOperand(0)->getType() != AddrInst->getType())
4085 return matchAddr(AddrInst->getOperand(0), Depth);
4086 return false;
4087 case Instruction::AddrSpaceCast: {
4088 unsigned SrcAS
4089 = AddrInst->getOperand(0)->getType()->getPointerAddressSpace();
4090 unsigned DestAS = AddrInst->getType()->getPointerAddressSpace();
4091 if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
4092 return matchAddr(AddrInst->getOperand(0), Depth);
4093 return false;
4094 }
4095 case Instruction::Add: {
4096 // Check to see if we can merge in the RHS then the LHS. If so, we win.
4097 ExtAddrMode BackupAddrMode = AddrMode;
4098 unsigned OldSize = AddrModeInsts.size();
4099 // Start a transaction at this point.
4100 // The LHS may match but not the RHS.
4101 // Therefore, we need a higher level restoration point to undo partially
4102 // matched operation.
4103 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4104 TPT.getRestorationPoint();
4105
4106 AddrMode.InBounds = false;
4107 if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
4108 matchAddr(AddrInst->getOperand(0), Depth+1))
4109 return true;
4110
4111 // Restore the old addr mode info.
4112 AddrMode = BackupAddrMode;
4113 AddrModeInsts.resize(OldSize);
4114 TPT.rollback(LastKnownGood);
4115
4116 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS.
4117 if (matchAddr(AddrInst->getOperand(0), Depth+1) &&
4118 matchAddr(AddrInst->getOperand(1), Depth+1))
4119 return true;
4120
4121 // Otherwise we definitely can't merge the ADD in.
4122 AddrMode = BackupAddrMode;
4123 AddrModeInsts.resize(OldSize);
4124 TPT.rollback(LastKnownGood);
4125 break;
4126 }
4127 //case Instruction::Or:
4128 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
4129 //break;
4130 case Instruction::Mul:
4131 case Instruction::Shl: {
4132 // Can only handle X*C and X << C.
4133 AddrMode.InBounds = false;
4134 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
4135 if (!RHS || RHS->getBitWidth() > 64)
4136 return false;
4137 int64_t Scale = RHS->getSExtValue();
4138 if (Opcode == Instruction::Shl)
4139 Scale = 1LL << Scale;
4140
4141 return matchScaledValue(AddrInst->getOperand(0), Scale, Depth);
4142 }
4143 case Instruction::GetElementPtr: {
4144 // Scan the GEP. We check it if it contains constant offsets and at most
4145 // one variable offset.
4146 int VariableOperand = -1;
4147 unsigned VariableScale = 0;
4148
4149 int64_t ConstantOffset = 0;
4150 gep_type_iterator GTI = gep_type_begin(AddrInst);
4151 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
4152 if (StructType *STy = GTI.getStructTypeOrNull()) {
4153 const StructLayout *SL = DL.getStructLayout(STy);
4154 unsigned Idx =
4155 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
4156 ConstantOffset += SL->getElementOffset(Idx);
4157 } else {
4158 uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType());
4159 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
4160 const APInt &CVal = CI->getValue();
4161 if (CVal.getMinSignedBits() <= 64) {
4162 ConstantOffset += CVal.getSExtValue() * TypeSize;
4163 continue;
4164 }
4165 }
4166 if (TypeSize) { // Scales of zero don't do anything.
4167 // We only allow one variable index at the moment.
4168 if (VariableOperand != -1)
4169 return false;
4170
4171 // Remember the variable index.
4172 VariableOperand = i;
4173 VariableScale = TypeSize;
4174 }
4175 }
4176 }
4177
4178 // A common case is for the GEP to only do a constant offset. In this case,
4179 // just add it to the disp field and check validity.
4180 if (VariableOperand == -1) {
4181 AddrMode.BaseOffs += ConstantOffset;
4182 if (ConstantOffset == 0 ||
4183 TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
4184 // Check to see if we can fold the base pointer in too.
4185 if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
4186 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4187 AddrMode.InBounds = false;
4188 return true;
4189 }
4190 } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
4191 TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
4192 ConstantOffset > 0) {
4193 // Record GEPs with non-zero offsets as candidates for splitting in the
4194 // event that the offset cannot fit into the r+i addressing mode.
4195 // Simple and common case that only one GEP is used in calculating the
4196 // address for the memory access.
4197 Value *Base = AddrInst->getOperand(0);
4198 auto *BaseI = dyn_cast<Instruction>(Base);
4199 auto *GEP = cast<GetElementPtrInst>(AddrInst);
4200 if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
4201 (BaseI && !isa<CastInst>(BaseI) &&
4202 !isa<GetElementPtrInst>(BaseI))) {
4203 // Make sure the parent block allows inserting non-PHI instructions
4204 // before the terminator.
4205 BasicBlock *Parent =
4206 BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
4207 if (!Parent->getTerminator()->isEHPad())
4208 LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
4209 }
4210 }
4211 AddrMode.BaseOffs -= ConstantOffset;
4212 return false;
4213 }
4214
4215 // Save the valid addressing mode in case we can't match.
4216 ExtAddrMode BackupAddrMode = AddrMode;
4217 unsigned OldSize = AddrModeInsts.size();
4218
4219 // See if the scale and offset amount is valid for this target.
4220 AddrMode.BaseOffs += ConstantOffset;
4221 if (!cast<GEPOperator>(AddrInst)->isInBounds())
4222 AddrMode.InBounds = false;
4223
4224 // Match the base operand of the GEP.
4225 if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
4226 // If it couldn't be matched, just stuff the value in a register.
4227 if (AddrMode.HasBaseReg) {
4228 AddrMode = BackupAddrMode;
4229 AddrModeInsts.resize(OldSize);
4230 return false;
4231 }
4232 AddrMode.HasBaseReg = true;
4233 AddrMode.BaseReg = AddrInst->getOperand(0);
4234 }
4235
4236 // Match the remaining variable portion of the GEP.
4237 if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
4238 Depth)) {
4239 // If it couldn't be matched, try stuffing the base into a register
4240 // instead of matching it, and retrying the match of the scale.
4241 AddrMode = BackupAddrMode;
4242 AddrModeInsts.resize(OldSize);
4243 if (AddrMode.HasBaseReg)
4244 return false;
4245 AddrMode.HasBaseReg = true;
4246 AddrMode.BaseReg = AddrInst->getOperand(0);
4247 AddrMode.BaseOffs += ConstantOffset;
4248 if (!matchScaledValue(AddrInst->getOperand(VariableOperand),
4249 VariableScale, Depth)) {
4250 // If even that didn't work, bail.
4251 AddrMode = BackupAddrMode;
4252 AddrModeInsts.resize(OldSize);
4253 return false;
4254 }
4255 }
4256
4257 return true;
4258 }
4259 case Instruction::SExt:
4260 case Instruction::ZExt: {
4261 Instruction *Ext = dyn_cast<Instruction>(AddrInst);
4262 if (!Ext)
4263 return false;
4264
4265 // Try to move this ext out of the way of the addressing mode.
4266 // Ask for a method for doing so.
4267 TypePromotionHelper::Action TPH =
4268 TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts);
4269 if (!TPH)
4270 return false;
4271
4272 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4273 TPT.getRestorationPoint();
4274 unsigned CreatedInstsCost = 0;
4275 unsigned ExtCost = !TLI.isExtFree(Ext);
4276 Value *PromotedOperand =
4277 TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI);
4278 // SExt has been moved away.
4279 // Thus either it will be rematched later in the recursive calls or it is
4280 // gone. Anyway, we must not fold it into the addressing mode at this point.
4281 // E.g.,
4282 // op = add opnd, 1
4283 // idx = ext op
4284 // addr = gep base, idx
4285 // is now:
4286 // promotedOpnd = ext opnd <- no match here
4287 // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls)
4288 // addr = gep base, op <- match
4289 if (MovedAway)
4290 *MovedAway = true;
4291
4292 assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 4293, __PRETTY_FUNCTION__))
4293 "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 4293, __PRETTY_FUNCTION__))
;
4294
4295 ExtAddrMode BackupAddrMode = AddrMode;
4296 unsigned OldSize = AddrModeInsts.size();
4297
4298 if (!matchAddr(PromotedOperand, Depth) ||
4299 // The total of the new cost is equal to the cost of the created
4300 // instructions.
4301 // The total of the old cost is equal to the cost of the extension plus
4302 // what we have saved in the addressing mode.
4303 !isPromotionProfitable(CreatedInstsCost,
4304 ExtCost + (AddrModeInsts.size() - OldSize),
4305 PromotedOperand)) {
4306 AddrMode = BackupAddrMode;
4307 AddrModeInsts.resize(OldSize);
4308 LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n"
; } } while (false)
;
4309 TPT.rollback(LastKnownGood);
4310 return false;
4311 }
4312 return true;
4313 }
4314 }
4315 return false;
4316}
4317
4318/// If we can, try to add the value of 'Addr' into the current addressing mode.
4319/// If Addr can't be added to AddrMode this returns false and leaves AddrMode
4320/// unmodified. This assumes that Addr is either a pointer type or intptr_t
4321/// for the target.
4322///
4323bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
4324 // Start a transaction at this point that we will rollback if the matching
4325 // fails.
4326 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4327 TPT.getRestorationPoint();
4328 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
4329 // Fold in immediates if legal for the target.
4330 AddrMode.BaseOffs += CI->getSExtValue();
4331 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4332 return true;
4333 AddrMode.BaseOffs -= CI->getSExtValue();
4334 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
4335 // If this is a global variable, try to fold it into the addressing mode.
4336 if (!AddrMode.BaseGV) {
4337 AddrMode.BaseGV = GV;
4338 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4339 return true;
4340 AddrMode.BaseGV = nullptr;
4341 }
4342 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
4343 ExtAddrMode BackupAddrMode = AddrMode;
4344 unsigned OldSize = AddrModeInsts.size();
4345
4346 // Check to see if it is possible to fold this operation.
4347 bool MovedAway = false;
4348 if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) {
4349 // This instruction may have been moved away. If so, there is nothing
4350 // to check here.
4351 if (MovedAway)
4352 return true;
4353 // Okay, it's possible to fold this. Check to see if it is actually
4354 // *profitable* to do so. We use a simple cost model to avoid increasing
4355 // register pressure too much.
4356 if (I->hasOneUse() ||
4357 isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
4358 AddrModeInsts.push_back(I);
4359 return true;
4360 }
4361
4362 // It isn't profitable to do this, roll back.
4363 //cerr << "NOT FOLDING: " << *I;
4364 AddrMode = BackupAddrMode;
4365 AddrModeInsts.resize(OldSize);
4366 TPT.rollback(LastKnownGood);
4367 }
4368 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
4369 if (matchOperationAddr(CE, CE->getOpcode(), Depth))
4370 return true;
4371 TPT.rollback(LastKnownGood);
4372 } else if (isa<ConstantPointerNull>(Addr)) {
4373 // Null pointer gets folded without affecting the addressing mode.
4374 return true;
4375 }
4376
4377 // Worse case, the target should support [reg] addressing modes. :)
4378 if (!AddrMode.HasBaseReg) {
4379 AddrMode.HasBaseReg = true;
4380 AddrMode.BaseReg = Addr;
4381 // Still check for legality in case the target supports [imm] but not [i+r].
4382 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4383 return true;
4384 AddrMode.HasBaseReg = false;
4385 AddrMode.BaseReg = nullptr;
4386 }
4387
4388 // If the base register is already taken, see if we can do [r+r].
4389 if (AddrMode.Scale == 0) {
4390 AddrMode.Scale = 1;
4391 AddrMode.ScaledReg = Addr;
4392 if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace))
4393 return true;
4394 AddrMode.Scale = 0;
4395 AddrMode.ScaledReg = nullptr;
4396 }
4397 // Couldn't match.
4398 TPT.rollback(LastKnownGood);
4399 return false;
4400}
4401
4402/// Check to see if all uses of OpVal by the specified inline asm call are due
4403/// to memory operands. If so, return true, otherwise return false.
4404static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
4405 const TargetLowering &TLI,
4406 const TargetRegisterInfo &TRI) {
4407 const Function *F = CI->getFunction();
4408 TargetLowering::AsmOperandInfoVector TargetConstraints =
4409 TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
4410 ImmutableCallSite(CI));
4411
4412 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
4413 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
4414
4415 // Compute the constraint code and ConstraintType to use.
4416 TLI.ComputeConstraintToUse(OpInfo, SDValue());
4417
4418 // If this asm operand is our Value*, and if it isn't an indirect memory
4419 // operand, we can't fold it!
4420 if (OpInfo.CallOperandVal == OpVal &&
4421 (OpInfo.ConstraintType != TargetLowering::C_Memory ||
4422 !OpInfo.isIndirect))
4423 return false;
4424 }
4425
4426 return true;
4427}
4428
4429// Max number of memory uses to look at before aborting the search to conserve
4430// compile time.
4431static constexpr int MaxMemoryUsesToScan = 20;
4432
4433/// Recursively walk all the uses of I until we find a memory use.
4434/// If we find an obviously non-foldable instruction, return true.
4435/// Add the ultimately found memory instructions to MemoryUses.
4436static bool FindAllMemoryUses(
4437 Instruction *I,
4438 SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
4439 SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI,
4440 const TargetRegisterInfo &TRI, int SeenInsts = 0) {
4441 // If we already considered this instruction, we're done.
4442 if (!ConsideredInsts.insert(I).second)
4443 return false;
4444
4445 // If this is an obviously unfoldable instruction, bail out.
4446 if (!MightBeFoldableInst(I))
4447 return true;
4448
4449 const bool OptSize = I->getFunction()->hasOptSize();
4450
4451 // Loop over all the uses, recursively processing them.
4452 for (Use &U : I->uses()) {
4453 // Conservatively return true if we're seeing a large number or a deep chain
4454 // of users. This avoids excessive compilation times in pathological cases.
4455 if (SeenInsts++ >= MaxMemoryUsesToScan)
4456 return true;
4457
4458 Instruction *UserI = cast<Instruction>(U.getUser());
4459 if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) {
4460 MemoryUses.push_back(std::make_pair(LI, U.getOperandNo()));
4461 continue;
4462 }
4463
4464 if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
4465 unsigned opNo = U.getOperandNo();
4466 if (opNo != StoreInst::getPointerOperandIndex())
4467 return true; // Storing addr, not into addr.
4468 MemoryUses.push_back(std::make_pair(SI, opNo));
4469 continue;
4470 }
4471
4472 if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
4473 unsigned opNo = U.getOperandNo();
4474 if (opNo != AtomicRMWInst::getPointerOperandIndex())
4475 return true; // Storing addr, not into addr.
4476 MemoryUses.push_back(std::make_pair(RMW, opNo));
4477 continue;
4478 }
4479
4480 if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
4481 unsigned opNo = U.getOperandNo();
4482 if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
4483 return true; // Storing addr, not into addr.
4484 MemoryUses.push_back(std::make_pair(CmpX, opNo));
4485 continue;
4486 }
4487
4488 if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
4489 // If this is a cold call, we can sink the addressing calculation into
4490 // the cold path. See optimizeCallInst
4491 if (!OptSize && CI->hasFnAttr(Attribute::Cold))
4492 continue;
4493
4494 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
4495 if (!IA) return true;
4496
4497 // If this is a memory operand, we're cool, otherwise bail out.
4498 if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
4499 return true;
4500 continue;
4501 }
4502
4503 if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI,
4504 SeenInsts))
4505 return true;
4506 }
4507
4508 return false;
4509}
4510
4511/// Return true if Val is already known to be live at the use site that we're
4512/// folding it into. If so, there is no cost to include it in the addressing
4513/// mode. KnownLive1 and KnownLive2 are two values that we know are live at the
4514/// instruction already.
4515bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
4516 Value *KnownLive2) {
4517 // If Val is either of the known-live values, we know it is live!
4518 if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2)
4519 return true;
4520
4521 // All values other than instructions and arguments (e.g. constants) are live.
4522 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
4523
4524 // If Val is a constant sized alloca in the entry block, it is live, this is
4525 // true because it is just a reference to the stack/frame pointer, which is
4526 // live for the whole function.
4527 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
4528 if (AI->isStaticAlloca())
4529 return true;
4530
4531 // Check to see if this value is already used in the memory instruction's
4532 // block. If so, it's already live into the block at the very least, so we
4533 // can reasonably fold it.
4534 return Val->isUsedInBasicBlock(MemoryInst->getParent());
4535}
4536
4537/// It is possible for the addressing mode of the machine to fold the specified
4538/// instruction into a load or store that ultimately uses it.
4539/// However, the specified instruction has multiple uses.
4540/// Given this, it may actually increase register pressure to fold it
4541/// into the load. For example, consider this code:
4542///
4543/// X = ...
4544/// Y = X+1
4545/// use(Y) -> nonload/store
4546/// Z = Y+1
4547/// load Z
4548///
4549/// In this case, Y has multiple uses, and can be folded into the load of Z
4550/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to
4551/// be live at the use(Y) line. If we don't fold Y into load Z, we use one
4552/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the
4553/// number of computations either.
4554///
4555/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If
4556/// X was live across 'load Z' for other reasons, we actually *would* want to
4557/// fold the addressing mode in the Z case. This would make Y die earlier.
4558bool AddressingModeMatcher::
4559isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
4560 ExtAddrMode &AMAfter) {
4561 if (IgnoreProfitability) return true;
4562
4563 // AMBefore is the addressing mode before this instruction was folded into it,
4564 // and AMAfter is the addressing mode after the instruction was folded. Get
4565 // the set of registers referenced by AMAfter and subtract out those
4566 // referenced by AMBefore: this is the set of values which folding in this
4567 // address extends the lifetime of.
4568 //
4569 // Note that there are only two potential values being referenced here,
4570 // BaseReg and ScaleReg (global addresses are always available, as are any
4571 // folded immediates).
4572 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
4573
4574 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
4575 // lifetime wasn't extended by adding this instruction.
4576 if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4577 BaseReg = nullptr;
4578 if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
4579 ScaledReg = nullptr;
4580
4581 // If folding this instruction (and it's subexprs) didn't extend any live
4582 // ranges, we're ok with it.
4583 if (!BaseReg && !ScaledReg)
4584 return true;
4585
4586 // If all uses of this instruction can have the address mode sunk into them,
4587 // we can remove the addressing mode and effectively trade one live register
4588 // for another (at worst.) In this context, folding an addressing mode into
4589 // the use is just a particularly nice way of sinking it.
4590 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
4591 SmallPtrSet<Instruction*, 16> ConsideredInsts;
4592 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
4593 return false; // Has a non-memory, non-foldable use!
4594
4595 // Now that we know that all uses of this instruction are part of a chain of
4596 // computation involving only operations that could theoretically be folded
4597 // into a memory use, loop over each of these memory operation uses and see
4598 // if they could *actually* fold the instruction. The assumption is that
4599 // addressing modes are cheap and that duplicating the computation involved
4600 // many times is worthwhile, even on a fastpath. For sinking candidates
4601 // (i.e. cold call sites), this serves as a way to prevent excessive code
4602 // growth since most architectures have some reasonable small and fast way to
4603 // compute an effective address. (i.e LEA on x86)
4604 SmallVector<Instruction*, 32> MatchedAddrModeInsts;
4605 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
4606 Instruction *User = MemoryUses[i].first;
4607 unsigned OpNo = MemoryUses[i].second;
4608
4609 // Get the access type of this use. If the use isn't a pointer, we don't
4610 // know what it accesses.
4611 Value *Address = User->getOperand(OpNo);
4612 PointerType *AddrTy = dyn_cast<PointerType>(Address->getType());
4613 if (!AddrTy)
4614 return false;
4615 Type *AddressAccessTy = AddrTy->getElementType();
4616 unsigned AS = AddrTy->getAddressSpace();
4617
4618 // Do a match against the root of this address, ignoring profitability. This
4619 // will tell us if the addressing mode for the memory operation will
4620 // *actually* cover the shared instruction.
4621 ExtAddrMode Result;
4622 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4623 0);
4624 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4625 TPT.getRestorationPoint();
4626 AddressingModeMatcher Matcher(
4627 MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
4628 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4629 Matcher.IgnoreProfitability = true;
4630 bool Success = Matcher.matchAddr(Address, 0);
4631 (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast
<void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 4631, __PRETTY_FUNCTION__))
;
4632
4633 // The match was to check the profitability, the changes made are not
4634 // part of the original matcher. Therefore, they should be dropped
4635 // otherwise the original matcher will not present the right state.
4636 TPT.rollback(LastKnownGood);
4637
4638 // If the match didn't cover I, then it won't be shared by it.
4639 if (!is_contained(MatchedAddrModeInsts, I))
4640 return false;
4641
4642 MatchedAddrModeInsts.clear();
4643 }
4644
4645 return true;
4646}
4647
4648/// Return true if the specified values are defined in a
4649/// different basic block than BB.
4650static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
4651 if (Instruction *I = dyn_cast<Instruction>(V))
4652 return I->getParent() != BB;
4653 return false;
4654}
4655
4656/// Sink addressing mode computation immediate before MemoryInst if doing so
4657/// can be done without increasing register pressure. The need for the
4658/// register pressure constraint means this can end up being an all or nothing
4659/// decision for all uses of the same addressing computation.
4660///
4661/// Load and Store Instructions often have addressing modes that can do
4662/// significant amounts of computation. As such, instruction selection will try
4663/// to get the load or store to do as much computation as possible for the
4664/// program. The problem is that isel can only see within a single block. As
4665/// such, we sink as much legal addressing mode work into the block as possible.
4666///
4667/// This method is used to optimize both load/store and inline asms with memory
4668/// operands. It's also used to sink addressing computations feeding into cold
4669/// call sites into their (cold) basic block.
4670///
4671/// The motivation for handling sinking into cold blocks is that doing so can
4672/// both enable other address mode sinking (by satisfying the register pressure
4673/// constraint above), and reduce register pressure globally (by removing the
4674/// addressing mode computation from the fast path entirely.).
4675bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
4676 Type *AccessTy, unsigned AddrSpace) {
4677 Value *Repl = Addr;
4678
4679 // Try to collapse single-value PHI nodes. This is necessary to undo
4680 // unprofitable PRE transformations.
4681 SmallVector<Value*, 8> worklist;
4682 SmallPtrSet<Value*, 16> Visited;
4683 worklist.push_back(Addr);
4684
4685 // Use a worklist to iteratively look through PHI and select nodes, and
4686 // ensure that the addressing mode obtained from the non-PHI/select roots of
4687 // the graph are compatible.
4688 bool PhiOrSelectSeen = false;
4689 SmallVector<Instruction*, 16> AddrModeInsts;
4690 const SimplifyQuery SQ(*DL, TLInfo);
4691 AddressingModeCombiner AddrModes(SQ, Addr);
4692 TypePromotionTransaction TPT(RemovedInsts);
4693 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
4694 TPT.getRestorationPoint();
4695 while (!worklist.empty()) {
4696 Value *V = worklist.back();
4697 worklist.pop_back();
4698
4699 // We allow traversing cyclic Phi nodes.
4700 // In case of success after this loop we ensure that traversing through
4701 // Phi nodes ends up with all cases to compute address of the form
4702 // BaseGV + Base + Scale * Index + Offset
4703 // where Scale and Offset are constans and BaseGV, Base and Index
4704 // are exactly the same Values in all cases.
4705 // It means that BaseGV, Scale and Offset dominate our memory instruction
4706 // and have the same value as they had in address computation represented
4707 // as Phi. So we can safely sink address computation to memory instruction.
4708 if (!Visited.insert(V).second)
4709 continue;
4710
4711 // For a PHI node, push all of its incoming values.
4712 if (PHINode *P = dyn_cast<PHINode>(V)) {
4713 for (Value *IncValue : P->incoming_values())
4714 worklist.push_back(IncValue);
4715 PhiOrSelectSeen = true;
4716 continue;
4717 }
4718 // Similar for select.
4719 if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
4720 worklist.push_back(SI->getFalseValue());
4721 worklist.push_back(SI->getTrueValue());
4722 PhiOrSelectSeen = true;
4723 continue;
4724 }
4725
4726 // For non-PHIs, determine the addressing mode being computed. Note that
4727 // the result may differ depending on what other uses our candidate
4728 // addressing instructions might have.
4729 AddrModeInsts.clear();
4730 std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
4731 0);
4732 ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
4733 V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
4734 InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
4735
4736 GetElementPtrInst *GEP = LargeOffsetGEP.first;
4737 if (GEP && !NewGEPBases.count(GEP)) {
4738 // If splitting the underlying data structure can reduce the offset of a
4739 // GEP, collect the GEP. Skip the GEPs that are the new bases of
4740 // previously split data structures.
4741 LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP);
4742 if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end())
4743 LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size();
4744 }
4745
4746 NewAddrMode.OriginalValue = V;
4747 if (!AddrModes.addNewAddrMode(NewAddrMode))
4748 break;
4749 }
4750
4751 // Try to combine the AddrModes we've collected. If we couldn't collect any,
4752 // or we have multiple but either couldn't combine them or combining them
4753 // wouldn't do anything useful, bail out now.
4754 if (!AddrModes.combineAddrModes()) {
4755 TPT.rollback(LastKnownGood);
4756 return false;
4757 }
4758 TPT.commit();
4759
4760 // Get the combined AddrMode (or the only AddrMode, if we only had one).
4761 ExtAddrMode AddrMode = AddrModes.getAddrMode();
4762
4763 // If all the instructions matched are already in this BB, don't do anything.
4764 // If we saw a Phi node then it is not local definitely, and if we saw a select
4765 // then we want to push the address calculation past it even if it's already
4766 // in this BB.
4767 if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) {
4768 return IsNonLocalValue(V, MemoryInst->getParent());
4769 })) {
4770 LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
4771 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Found local addrmode: "
<< AddrMode << "\n"; } } while (false)
;
4772 return false;
4773 }
4774
4775 // Insert this computation right after this user. Since our caller is
4776 // scanning from the top of the BB to the bottom, reuse of the expr are
4777 // guaranteed to happen later.
4778 IRBuilder<> Builder(MemoryInst);
4779
4780 // Now that we determined the addressing expression we want to use and know
4781 // that we have to sink it into this block. Check to see if we have already
4782 // done this for some other load/store instr in this block. If so, reuse
4783 // the computation. Before attempting reuse, check if the address is valid
4784 // as it may have been erased.
4785
4786 WeakTrackingVH SunkAddrVH = SunkAddrs[Addr];
4787
4788 Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
4789 if (SunkAddr) {
4790 LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4791 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4792 if (SunkAddr->getType() != Addr->getType())
4793 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4794 } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
4795 TM && SubtargetInfo->addrSinkUsingGEPs())) {
4796 // By default, we use the GEP-based method when AA is used later. This
4797 // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
4798 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4799 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4800 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4801 Value *ResultPtr = nullptr, *ResultIndex = nullptr;
4802
4803 // First, find the pointer.
4804 if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) {
4805 ResultPtr = AddrMode.BaseReg;
4806 AddrMode.BaseReg = nullptr;
4807 }
4808
4809 if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) {
4810 // We can't add more than one pointer together, nor can we scale a
4811 // pointer (both of which seem meaningless).
4812 if (ResultPtr || AddrMode.Scale != 1)
4813 return false;
4814
4815 ResultPtr = AddrMode.ScaledReg;
4816 AddrMode.Scale = 0;
4817 }
4818
4819 // It is only safe to sign extend the BaseReg if we know that the math
4820 // required to create it did not overflow before we extend it. Since
4821 // the original IR value was tossed in favor of a constant back when
4822 // the AddrMode was created we need to bail out gracefully if widths
4823 // do not match instead of extending it.
4824 //
4825 // (See below for code to add the scale.)
4826 if (AddrMode.Scale) {
4827 Type *ScaledRegTy = AddrMode.ScaledReg->getType();
4828 if (cast<IntegerType>(IntPtrTy)->getBitWidth() >
4829 cast<IntegerType>(ScaledRegTy)->getBitWidth())
4830 return false;
4831 }
4832
4833 if (AddrMode.BaseGV) {
4834 if (ResultPtr)
4835 return false;
4836
4837 ResultPtr = AddrMode.BaseGV;
4838 }
4839
4840 // If the real base value actually came from an inttoptr, then the matcher
4841 // will look through it and provide only the integer value. In that case,
4842 // use it here.
4843 if (!DL->isNonIntegralPointerType(Addr->getType())) {
4844 if (!ResultPtr && AddrMode.BaseReg) {
4845 ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(),
4846 "sunkaddr");
4847 AddrMode.BaseReg = nullptr;
4848 } else if (!ResultPtr && AddrMode.Scale == 1) {
4849 ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(),
4850 "sunkaddr");
4851 AddrMode.Scale = 0;
4852 }
4853 }
4854
4855 if (!ResultPtr &&
4856 !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) {
4857 SunkAddr = Constant::getNullValue(Addr->getType());
4858 } else if (!ResultPtr) {
4859 return false;
4860 } else {
4861 Type *I8PtrTy =
4862 Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace());
4863 Type *I8Ty = Builder.getInt8Ty();
4864
4865 // Start with the base register. Do this first so that subsequent address
4866 // matching finds it last, which will prevent it from trying to match it
4867 // as the scaled value in case it happens to be a mul. That would be
4868 // problematic if we've sunk a different mul for the scale, because then
4869 // we'd end up sinking both muls.
4870 if (AddrMode.BaseReg) {
4871 Value *V = AddrMode.BaseReg;
4872 if (V->getType() != IntPtrTy)
4873 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4874
4875 ResultIndex = V;
4876 }
4877
4878 // Add the scale value.
4879 if (AddrMode.Scale) {
4880 Value *V = AddrMode.ScaledReg;
4881 if (V->getType() == IntPtrTy) {
4882 // done.
4883 } else {
4884 assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 4886, __PRETTY_FUNCTION__))
4885 cast<IntegerType>(V->getType())->getBitWidth() &&((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 4886, __PRETTY_FUNCTION__))
4886 "We can't transform if ScaledReg is too narrow")((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast
<IntegerType>(V->getType())->getBitWidth() &&
"We can't transform if ScaledReg is too narrow") ? static_cast
<void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 4886, __PRETTY_FUNCTION__))
;
4887 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4888 }
4889
4890 if (AddrMode.Scale != 1)
4891 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4892 "sunkaddr");
4893 if (ResultIndex)
4894 ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr");
4895 else
4896 ResultIndex = V;
4897 }
4898
4899 // Add in the Base Offset if present.
4900 if (AddrMode.BaseOffs) {
4901 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
4902 if (ResultIndex) {
4903 // We need to add this separately from the scale above to help with
4904 // SDAG consecutive load/store merging.
4905 if (ResultPtr->getType() != I8PtrTy)
4906 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4907 ResultPtr =
4908 AddrMode.InBounds
4909 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
4910 "sunkaddr")
4911 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4912 }
4913
4914 ResultIndex = V;
4915 }
4916
4917 if (!ResultIndex) {
4918 SunkAddr = ResultPtr;
4919 } else {
4920 if (ResultPtr->getType() != I8PtrTy)
4921 ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
4922 SunkAddr =
4923 AddrMode.InBounds
4924 ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
4925 "sunkaddr")
4926 : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
4927 }
4928
4929 if (SunkAddr->getType() != Addr->getType())
4930 SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
4931 }
4932 } else {
4933 // We'd require a ptrtoint/inttoptr down the line, which we can't do for
4934 // non-integral pointers, so in that case bail out now.
4935 Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr;
4936 Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr;
4937 PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy);
4938 PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy);
4939 if (DL->isNonIntegralPointerType(Addr->getType()) ||
4940 (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) ||
4941 (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) ||
4942 (AddrMode.BaseGV &&
4943 DL->isNonIntegralPointerType(AddrMode.BaseGV->getType())))
4944 return false;
4945
4946 LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
4947 << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: "
<< AddrMode << " for " << *MemoryInst <<
"\n"; } } while (false)
;
4948 Type *IntPtrTy = DL->getIntPtrType(Addr->getType());
4949 Value *Result = nullptr;
4950
4951 // Start with the base register. Do this first so that subsequent address
4952 // matching finds it last, which will prevent it from trying to match it
4953 // as the scaled value in case it happens to be a mul. That would be
4954 // problematic if we've sunk a different mul for the scale, because then
4955 // we'd end up sinking both muls.
4956 if (AddrMode.BaseReg) {
4957 Value *V = AddrMode.BaseReg;
4958 if (V->getType()->isPointerTy())
4959 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4960 if (V->getType() != IntPtrTy)
4961 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
4962 Result = V;
4963 }
4964
4965 // Add the scale value.
4966 if (AddrMode.Scale) {
4967 Value *V = AddrMode.ScaledReg;
4968 if (V->getType() == IntPtrTy) {
4969 // done.
4970 } else if (V->getType()->isPointerTy()) {
4971 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
4972 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
4973 cast<IntegerType>(V->getType())->getBitWidth()) {
4974 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
4975 } else {
4976 // It is only safe to sign extend the BaseReg if we know that the math
4977 // required to create it did not overflow before we extend it. Since
4978 // the original IR value was tossed in favor of a constant back when
4979 // the AddrMode was created we need to bail out gracefully if widths
4980 // do not match instead of extending it.
4981 Instruction *I = dyn_cast_or_null<Instruction>(Result);
4982 if (I && (Result != AddrMode.BaseReg))
4983 I->eraseFromParent();
4984 return false;
4985 }
4986 if (AddrMode.Scale != 1)
4987 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
4988 "sunkaddr");
4989 if (Result)
4990 Result = Builder.CreateAdd(Result, V, "sunkaddr");
4991 else
4992 Result = V;
4993 }
4994
4995 // Add in the BaseGV if present.
4996 if (AddrMode.BaseGV) {
4997 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
4998 if (Result)
4999 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5000 else
5001 Result = V;
5002 }
5003
5004 // Add in the Base Offset if present.
5005 if (AddrMode.BaseOffs) {
5006 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
5007 if (Result)
5008 Result = Builder.CreateAdd(Result, V, "sunkaddr");
5009 else
5010 Result = V;
5011 }
5012
5013 if (!Result)
5014 SunkAddr = Constant::getNullValue(Addr->getType());
5015 else
5016 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
5017 }
5018
5019 MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
5020 // Store the newly computed address into the cache. In the case we reused a
5021 // value, this should be idempotent.
5022 SunkAddrs[Addr] = WeakTrackingVH(SunkAddr);
5023
5024 // If we have no uses, recursively delete the value and all dead instructions
5025 // using it.
5026 if (Repl->use_empty()) {
5027 // This can cause recursive deletion, which can invalidate our iterator.
5028 // Use a WeakTrackingVH to hold onto it in case this happens.
5029 Value *CurValue = &*CurInstIterator;
5030 WeakTrackingVH IterHandle(CurValue);
5031 BasicBlock *BB = CurInstIterator->getParent();
5032
5033 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
5034
5035 if (IterHandle != CurValue) {
5036 // If the iterator instruction was recursively deleted, start over at the
5037 // start of the block.
5038 CurInstIterator = BB->begin();
5039 SunkAddrs.clear();
5040 }
5041 }
5042 ++NumMemoryInsts;
5043 return true;
5044}
5045
5046/// If there are any memory operands, use OptimizeMemoryInst to sink their
5047/// address computing into the block when possible / profitable.
5048bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
5049 bool MadeChange = false;
5050
5051 const TargetRegisterInfo *TRI =
5052 TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo();
5053 TargetLowering::AsmOperandInfoVector TargetConstraints =
5054 TLI->ParseConstraints(*DL, TRI, CS);
5055 unsigned ArgNo = 0;
5056 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
5057 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
5058
5059 // Compute the constraint code and ConstraintType to use.
5060 TLI->ComputeConstraintToUse(OpInfo, SDValue());
5061
5062 if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
5063 OpInfo.isIndirect) {
5064 Value *OpVal = CS->getArgOperand(ArgNo++);
5065 MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u);
5066 } else if (OpInfo.Type == InlineAsm::isInput)
5067 ArgNo++;
5068 }
5069
5070 return MadeChange;
5071}
5072
5073/// Check if all the uses of \p Val are equivalent (or free) zero or
5074/// sign extensions.
5075static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
5076 assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use"
) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5076, __PRETTY_FUNCTION__))
;
5077 const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
5078 bool IsSExt = isa<SExtInst>(FirstUser);
5079 Type *ExtTy = FirstUser->getType();
5080 for (const User *U : Val->users()) {
5081 const Instruction *UI = cast<Instruction>(U);
5082 if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
5083 return false;
5084 Type *CurTy = UI->getType();
5085 // Same input and output types: Same instruction after CSE.
5086 if (CurTy == ExtTy)
5087 continue;
5088
5089 // If IsSExt is true, we are in this situation:
5090 // a = Val
5091 // b = sext ty1 a to ty2
5092 // c = sext ty1 a to ty3
5093 // Assuming ty2 is shorter than ty3, this could be turned into:
5094 // a = Val
5095 // b = sext ty1 a to ty2
5096 // c = sext ty2 b to ty3
5097 // However, the last sext is not free.
5098 if (IsSExt)
5099 return false;
5100
5101 // This is a ZExt, maybe this is free to extend from one type to another.
5102 // In that case, we would not account for a different use.
5103 Type *NarrowTy;
5104 Type *LargeTy;
5105 if (ExtTy->getScalarType()->getIntegerBitWidth() >
5106 CurTy->getScalarType()->getIntegerBitWidth()) {
5107 NarrowTy = CurTy;
5108 LargeTy = ExtTy;
5109 } else {
5110 NarrowTy = ExtTy;
5111 LargeTy = CurTy;
5112 }
5113
5114 if (!TLI.isZExtFree(NarrowTy, LargeTy))
5115 return false;
5116 }
5117 // All uses are the same or can be derived from one another for free.
5118 return true;
5119}
5120
5121/// Try to speculatively promote extensions in \p Exts and continue
5122/// promoting through newly promoted operands recursively as far as doing so is
5123/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
5124/// When some promotion happened, \p TPT contains the proper state to revert
5125/// them.
5126///
5127/// \return true if some promotion happened, false otherwise.
5128bool CodeGenPrepare::tryToPromoteExts(
5129 TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
5130 SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
5131 unsigned CreatedInstsCost) {
5132 bool Promoted = false;
5133
5134 // Iterate over all the extensions to try to promote them.
5135 for (auto I : Exts) {
5136 // Early check if we directly have ext(load).
5137 if (isa<LoadInst>(I->getOperand(0))) {
5138 ProfitablyMovedExts.push_back(I);
5139 continue;
5140 }
5141
5142 // Check whether or not we want to do any promotion. The reason we have
5143 // this check inside the for loop is to catch the case where an extension
5144 // is directly fed by a load because in such case the extension can be moved
5145 // up without any promotion on its operands.
5146 if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
5147 return false;
5148
5149 // Get the action to perform the promotion.
5150 TypePromotionHelper::Action TPH =
5151 TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
5152 // Check if we can promote.
5153 if (!TPH) {
5154 // Save the current extension as we cannot move up through its operand.
5155 ProfitablyMovedExts.push_back(I);
5156 continue;
5157 }
5158
5159 // Save the current state.
5160 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5161 TPT.getRestorationPoint();
5162 SmallVector<Instruction *, 4> NewExts;
5163 unsigned NewCreatedInstsCost = 0;
5164 unsigned ExtCost = !TLI->isExtFree(I);
5165 // Promote.
5166 Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost,
5167 &NewExts, nullptr, *TLI);
5168 assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5169, __PRETTY_FUNCTION__))
5169 "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases"
) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5169, __PRETTY_FUNCTION__))
;
5170
5171 // We would be able to merge only one extension in a load.
5172 // Therefore, if we have more than 1 new extension we heuristically
5173 // cut this search path, because it means we degrade the code quality.
5174 // With exactly 2, the transformation is neutral, because we will merge
5175 // one extension but leave one. However, we optimistically keep going,
5176 // because the new extension may be removed too.
5177 long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
5178 // FIXME: It would be possible to propagate a negative value instead of
5179 // conservatively ceiling it to 0.
5180 TotalCreatedInstsCost =
5181 std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
5182 if (!StressExtLdPromotion &&
5183 (TotalCreatedInstsCost > 1 ||
5184 !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
5185 // This promotion is not profitable, rollback to the previous state, and
5186 // save the current extension in ProfitablyMovedExts as the latest
5187 // speculative promotion turned out to be unprofitable.
5188 TPT.rollback(LastKnownGood);
5189 ProfitablyMovedExts.push_back(I);
5190 continue;
5191 }
5192 // Continue promoting NewExts as far as doing so is profitable.
5193 SmallVector<Instruction *, 2> NewlyMovedExts;
5194 (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
5195 bool NewPromoted = false;
5196 for (auto ExtInst : NewlyMovedExts) {
5197 Instruction *MovedExt = cast<Instruction>(ExtInst);
5198 Value *ExtOperand = MovedExt->getOperand(0);
5199 // If we have reached to a load, we need this extra profitability check
5200 // as it could potentially be merged into an ext(load).
5201 if (isa<LoadInst>(ExtOperand) &&
5202 !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
5203 (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
5204 continue;
5205
5206 ProfitablyMovedExts.push_back(MovedExt);
5207 NewPromoted = true;
5208 }
5209
5210 // If none of speculative promotions for NewExts is profitable, rollback
5211 // and save the current extension (I) as the last profitable extension.
5212 if (!NewPromoted) {
5213 TPT.rollback(LastKnownGood);
5214 ProfitablyMovedExts.push_back(I);
5215 continue;
5216 }
5217 // The promotion is profitable.
5218 Promoted = true;
5219 }
5220 return Promoted;
5221}
5222
5223/// Merging redundant sexts when one is dominating the other.
5224bool CodeGenPrepare::mergeSExts(Function &F) {
5225 bool Changed = false;
5226 for (auto &Entry : ValToSExtendedUses) {
5227 SExts &Insts = Entry.second;
5228 SExts CurPts;
5229 for (Instruction *Inst : Insts) {
5230 if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
5231 Inst->getOperand(0) != Entry.first)
5232 continue;
5233 bool inserted = false;
5234 for (auto &Pt : CurPts) {
5235 if (getDT(F).dominates(Inst, Pt)) {
5236 Pt->replaceAllUsesWith(Inst);
5237 RemovedInsts.insert(Pt);
5238 Pt->removeFromParent();
5239 Pt = Inst;
5240 inserted = true;
5241 Changed = true;
5242 break;
5243 }
5244 if (!getDT(F).dominates(Pt, Inst))
5245 // Give up if we need to merge in a common dominator as the
5246 // experiments show it is not profitable.
5247 continue;
5248 Inst->replaceAllUsesWith(Pt);
5249 RemovedInsts.insert(Inst);
5250 Inst->removeFromParent();
5251 inserted = true;
5252 Changed = true;
5253 break;
5254 }
5255 if (!inserted)
5256 CurPts.push_back(Inst);
5257 }
5258 }
5259 return Changed;
5260}
5261
5262// Spliting large data structures so that the GEPs accessing them can have
5263// smaller offsets so that they can be sunk to the same blocks as their users.
5264// For example, a large struct starting from %base is splitted into two parts
5265// where the second part starts from %new_base.
5266//
5267// Before:
5268// BB0:
5269// %base =
5270//
5271// BB1:
5272// %gep0 = gep %base, off0
5273// %gep1 = gep %base, off1
5274// %gep2 = gep %base, off2
5275//
5276// BB2:
5277// %load1 = load %gep0
5278// %load2 = load %gep1
5279// %load3 = load %gep2
5280//
5281// After:
5282// BB0:
5283// %base =
5284// %new_base = gep %base, off0
5285//
5286// BB1:
5287// %new_gep0 = %new_base
5288// %new_gep1 = gep %new_base, off1 - off0
5289// %new_gep2 = gep %new_base, off2 - off0
5290//
5291// BB2:
5292// %load1 = load i32, i32* %new_gep0
5293// %load2 = load i32, i32* %new_gep1
5294// %load3 = load i32, i32* %new_gep2
5295//
5296// %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because
5297// their offsets are smaller enough to fit into the addressing mode.
5298bool CodeGenPrepare::splitLargeGEPOffsets() {
5299 bool Changed = false;
5300 for (auto &Entry : LargeOffsetGEPMap) {
5301 Value *OldBase = Entry.first;
5302 SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>>
5303 &LargeOffsetGEPs = Entry.second;
5304 auto compareGEPOffset =
5305 [&](const std::pair<GetElementPtrInst *, int64_t> &LHS,
5306 const std::pair<GetElementPtrInst *, int64_t> &RHS) {
5307 if (LHS.first == RHS.first)
5308 return false;
5309 if (LHS.second != RHS.second)
5310 return LHS.second < RHS.second;
5311 return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first];
5312 };
5313 // Sorting all the GEPs of the same data structures based on the offsets.
5314 llvm::sort(LargeOffsetGEPs, compareGEPOffset);
5315 LargeOffsetGEPs.erase(
5316 std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()),
5317 LargeOffsetGEPs.end());
5318 // Skip if all the GEPs have the same offsets.
5319 if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second)
5320 continue;
5321 GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first;
5322 int64_t BaseOffset = LargeOffsetGEPs.begin()->second;
5323 Value *NewBaseGEP = nullptr;
5324
5325 auto LargeOffsetGEP = LargeOffsetGEPs.begin();
5326 while (LargeOffsetGEP != LargeOffsetGEPs.end()) {
5327 GetElementPtrInst *GEP = LargeOffsetGEP->first;
5328 int64_t Offset = LargeOffsetGEP->second;
5329 if (Offset != BaseOffset) {
5330 TargetLowering::AddrMode AddrMode;
5331 AddrMode.BaseOffs = Offset - BaseOffset;
5332 // The result type of the GEP might not be the type of the memory
5333 // access.
5334 if (!TLI->isLegalAddressingMode(*DL, AddrMode,
5335 GEP->getResultElementType(),
5336 GEP->getAddressSpace())) {
5337 // We need to create a new base if the offset to the current base is
5338 // too large to fit into the addressing mode. So, a very large struct
5339 // may be splitted into several parts.
5340 BaseGEP = GEP;
5341 BaseOffset = Offset;
5342 NewBaseGEP = nullptr;
5343 }
5344 }
5345
5346 // Generate a new GEP to replace the current one.
5347 LLVMContext &Ctx = GEP->getContext();
5348 Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
5349 Type *I8PtrTy =
5350 Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace());
5351 Type *I8Ty = Type::getInt8Ty(Ctx);
5352
5353 if (!NewBaseGEP) {
5354 // Create a new base if we don't have one yet. Find the insertion
5355 // pointer for the new base first.
5356 BasicBlock::iterator NewBaseInsertPt;
5357 BasicBlock *NewBaseInsertBB;
5358 if (auto *BaseI = dyn_cast<Instruction>(OldBase)) {
5359 // If the base of the struct is an instruction, the new base will be
5360 // inserted close to it.
5361 NewBaseInsertBB = BaseI->getParent();
5362 if (isa<PHINode>(BaseI))
5363 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5364 else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) {
5365 NewBaseInsertBB =
5366 SplitEdge(NewBaseInsertBB, Invoke->getNormalDest());
5367 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5368 } else
5369 NewBaseInsertPt = std::next(BaseI->getIterator());
5370 } else {
5371 // If the current base is an argument or global value, the new base
5372 // will be inserted to the entry block.
5373 NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock();
5374 NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt();
5375 }
5376 IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt);
5377 // Create a new base.
5378 Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset);
5379 NewBaseGEP = OldBase;
5380 if (NewBaseGEP->getType() != I8PtrTy)
5381 NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy);
5382 NewBaseGEP =
5383 NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep");
5384 NewGEPBases.insert(NewBaseGEP);
5385 }
5386
5387 IRBuilder<> Builder(GEP);
5388 Value *NewGEP = NewBaseGEP;
5389 if (Offset == BaseOffset) {
5390 if (GEP->getType() != I8PtrTy)
5391 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5392 } else {
5393 // Calculate the new offset for the new GEP.
5394 Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset);
5395 NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index);
5396
5397 if (GEP->getType() != I8PtrTy)
5398 NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType());
5399 }
5400 GEP->replaceAllUsesWith(NewGEP);
5401 LargeOffsetGEPID.erase(GEP);
5402 LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP);
5403 GEP->eraseFromParent();
5404 Changed = true;
5405 }
5406 }
5407 return Changed;
5408}
5409
5410/// Return true, if an ext(load) can be formed from an extension in
5411/// \p MovedExts.
5412bool CodeGenPrepare::canFormExtLd(
5413 const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
5414 Instruction *&Inst, bool HasPromoted) {
5415 for (auto *MovedExtInst : MovedExts) {
5416 if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
5417 LI = cast<LoadInst>(MovedExtInst->getOperand(0));
5418 Inst = MovedExtInst;
5419 break;
5420 }
5421 }
5422 if (!LI)
5423 return false;
5424
5425 // If they're already in the same block, there's nothing to do.
5426 // Make the cheap checks first if we did not promote.
5427 // If we promoted, we need to check if it is indeed profitable.
5428 if (!HasPromoted && LI->getParent() == Inst->getParent())
5429 return false;
5430
5431 return TLI->isExtLoad(LI, Inst, *DL);
5432}
5433
5434/// Move a zext or sext fed by a load into the same basic block as the load,
5435/// unless conditions are unfavorable. This allows SelectionDAG to fold the
5436/// extend into the load.
5437///
5438/// E.g.,
5439/// \code
5440/// %ld = load i32* %addr
5441/// %add = add nuw i32 %ld, 4
5442/// %zext = zext i32 %add to i64
5443// \endcode
5444/// =>
5445/// \code
5446/// %ld = load i32* %addr
5447/// %zext = zext i32 %ld to i64
5448/// %add = add nuw i64 %zext, 4
5449/// \encode
5450/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
5451/// allow us to match zext(load i32*) to i64.
5452///
5453/// Also, try to promote the computations used to obtain a sign extended
5454/// value used into memory accesses.
5455/// E.g.,
5456/// \code
5457/// a = add nsw i32 b, 3
5458/// d = sext i32 a to i64
5459/// e = getelementptr ..., i64 d
5460/// \endcode
5461/// =>
5462/// \code
5463/// f = sext i32 b to i64
5464/// a = add nsw i64 f, 3
5465/// e = getelementptr ..., i64 a
5466/// \endcode
5467///
5468/// \p Inst[in/out] the extension may be modified during the process if some
5469/// promotions apply.
5470bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
5471 // ExtLoad formation and address type promotion infrastructure requires TLI to
5472 // be effective.
5473 if (!TLI)
5474 return false;
5475
5476 bool AllowPromotionWithoutCommonHeader = false;
5477 /// See if it is an interesting sext operations for the address type
5478 /// promotion before trying to promote it, e.g., the ones with the right
5479 /// type and used in memory accesses.
5480 bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
5481 *Inst, AllowPromotionWithoutCommonHeader);
5482 TypePromotionTransaction TPT(RemovedInsts);
5483 TypePromotionTransaction::ConstRestorationPt LastKnownGood =
5484 TPT.getRestorationPoint();
5485 SmallVector<Instruction *, 1> Exts;
5486 SmallVector<Instruction *, 2> SpeculativelyMovedExts;
5487 Exts.push_back(Inst);
5488
5489 bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
5490
5491 // Look for a load being extended.
5492 LoadInst *LI = nullptr;
5493 Instruction *ExtFedByLoad;
5494
5495 // Try to promote a chain of computation if it allows to form an extended
5496 // load.
5497 if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
5498 assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension"
) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5498, __PRETTY_FUNCTION__))
;
5499 TPT.commit();
5500 // Move the extend into the same block as the load
5501 ExtFedByLoad->moveAfter(LI);
5502 // CGP does not check if the zext would be speculatively executed when moved
5503 // to the same basic block as the load. Preserving its original location
5504 // would pessimize the debugging experience, as well as negatively impact
5505 // the quality of sample pgo. We don't want to use "line 0" as that has a
5506 // size cost in the line-table section and logically the zext can be seen as
5507 // part of the load. Therefore we conservatively reuse the same debug
5508 // location for the load and the zext.
5509 ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
5510 ++NumExtsMoved;
5511 Inst = ExtFedByLoad;
5512 return true;
5513 }
5514
5515 // Continue promoting SExts if known as considerable depending on targets.
5516 if (ATPConsiderable &&
5517 performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
5518 HasPromoted, TPT, SpeculativelyMovedExts))
5519 return true;
5520
5521 TPT.rollback(LastKnownGood);
5522 return false;
5523}
5524
5525// Perform address type promotion if doing so is profitable.
5526// If AllowPromotionWithoutCommonHeader == false, we should find other sext
5527// instructions that sign extended the same initial value. However, if
5528// AllowPromotionWithoutCommonHeader == true, we expect promoting the
5529// extension is just profitable.
5530bool CodeGenPrepare::performAddressTypePromotion(
5531 Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
5532 bool HasPromoted, TypePromotionTransaction &TPT,
5533 SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
5534 bool Promoted = false;
5535 SmallPtrSet<Instruction *, 1> UnhandledExts;
5536 bool AllSeenFirst = true;
5537 for (auto I : SpeculativelyMovedExts) {
5538 Value *HeadOfChain = I->getOperand(0);
5539 DenseMap<Value *, Instruction *>::iterator AlreadySeen =
5540 SeenChainsForSExt.find(HeadOfChain);
5541 // If there is an unhandled SExt which has the same header, try to promote
5542 // it as well.
5543 if (AlreadySeen != SeenChainsForSExt.end()) {
5544 if (AlreadySeen->second != nullptr)
5545 UnhandledExts.insert(AlreadySeen->second);
5546 AllSeenFirst = false;
5547 }
5548 }
5549
5550 if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
5551 SpeculativelyMovedExts.size() == 1)) {
5552 TPT.commit();
5553 if (HasPromoted)
5554 Promoted = true;
5555 for (auto I : SpeculativelyMovedExts) {
5556 Value *HeadOfChain = I->getOperand(0);
5557 SeenChainsForSExt[HeadOfChain] = nullptr;
5558 ValToSExtendedUses[HeadOfChain].push_back(I);
5559 }
5560 // Update Inst as promotion happen.
5561 Inst = SpeculativelyMovedExts.pop_back_val();
5562 } else {
5563 // This is the first chain visited from the header, keep the current chain
5564 // as unhandled. Defer to promote this until we encounter another SExt
5565 // chain derived from the same header.
5566 for (auto I : SpeculativelyMovedExts) {
5567 Value *HeadOfChain = I->getOperand(0);
5568 SeenChainsForSExt[HeadOfChain] = Inst;
5569 }
5570 return false;
5571 }
5572
5573 if (!AllSeenFirst && !UnhandledExts.empty())
5574 for (auto VisitedSExt : UnhandledExts) {
5575 if (RemovedInsts.count(VisitedSExt))
5576 continue;
5577 TypePromotionTransaction TPT(RemovedInsts);
5578 SmallVector<Instruction *, 1> Exts;
5579 SmallVector<Instruction *, 2> Chains;
5580 Exts.push_back(VisitedSExt);
5581 bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
5582 TPT.commit();
5583 if (HasPromoted)
5584 Promoted = true;
5585 for (auto I : Chains) {
5586 Value *HeadOfChain = I->getOperand(0);
5587 // Mark this as handled.
5588 SeenChainsForSExt[HeadOfChain] = nullptr;
5589 ValToSExtendedUses[HeadOfChain].push_back(I);
5590 }
5591 }
5592 return Promoted;
5593}
5594
5595bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
5596 BasicBlock *DefBB = I->getParent();
5597
5598 // If the result of a {s|z}ext and its source are both live out, rewrite all
5599 // other uses of the source with result of extension.
5600 Value *Src = I->getOperand(0);
5601 if (Src->hasOneUse())
5602 return false;
5603
5604 // Only do this xform if truncating is free.
5605 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
5606 return false;
5607
5608 // Only safe to perform the optimization if the source is also defined in
5609 // this block.
5610 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
5611 return false;
5612
5613 bool DefIsLiveOut = false;
5614 for (User *U : I->users()) {
5615 Instruction *UI = cast<Instruction>(U);
5616
5617 // Figure out which BB this ext is used in.
5618 BasicBlock *UserBB = UI->getParent();
5619 if (UserBB == DefBB) continue;
5620 DefIsLiveOut = true;
5621 break;
5622 }
5623 if (!DefIsLiveOut)
5624 return false;
5625
5626 // Make sure none of the uses are PHI nodes.
5627 for (User *U : Src->users()) {
5628 Instruction *UI = cast<Instruction>(U);
5629 BasicBlock *UserBB = UI->getParent();
5630 if (UserBB == DefBB) continue;
5631 // Be conservative. We don't want this xform to end up introducing
5632 // reloads just before load / store instructions.
5633 if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI))
5634 return false;
5635 }
5636
5637 // InsertedTruncs - Only insert one trunc in each block once.
5638 DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
5639
5640 bool MadeChange = false;
5641 for (Use &U : Src->uses()) {
5642 Instruction *User = cast<Instruction>(U.getUser());
5643
5644 // Figure out which BB this ext is used in.
5645 BasicBlock *UserBB = User->getParent();
5646 if (UserBB == DefBB) continue;
5647
5648 // Both src and def are live in this block. Rewrite the use.
5649 Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
5650
5651 if (!InsertedTrunc) {
5652 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
5653 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5653, __PRETTY_FUNCTION__))
;
5654 InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt);
5655 InsertedInsts.insert(InsertedTrunc);
5656 }
5657
5658 // Replace a use of the {s|z}ext source with a use of the result.
5659 U = InsertedTrunc;
5660 ++NumExtUses;
5661 MadeChange = true;
5662 }
5663
5664 return MadeChange;
5665}
5666
5667// Find loads whose uses only use some of the loaded value's bits. Add an "and"
5668// just after the load if the target can fold this into one extload instruction,
5669// with the hope of eliminating some of the other later "and" instructions using
5670// the loaded value. "and"s that are made trivially redundant by the insertion
5671// of the new "and" are removed by this function, while others (e.g. those whose
5672// path from the load goes through a phi) are left for isel to potentially
5673// remove.
5674//
5675// For example:
5676//
5677// b0:
5678// x = load i32
5679// ...
5680// b1:
5681// y = and x, 0xff
5682// z = use y
5683//
5684// becomes:
5685//
5686// b0:
5687// x = load i32
5688// x' = and x, 0xff
5689// ...
5690// b1:
5691// z = use x'
5692//
5693// whereas:
5694//
5695// b0:
5696// x1 = load i32
5697// ...
5698// b1:
5699// x2 = load i32
5700// ...
5701// b2:
5702// x = phi x1, x2
5703// y = and x, 0xff
5704//
5705// becomes (after a call to optimizeLoadExt for each load):
5706//
5707// b0:
5708// x1 = load i32
5709// x1' = and x1, 0xff
5710// ...
5711// b1:
5712// x2 = load i32
5713// x2' = and x2, 0xff
5714// ...
5715// b2:
5716// x = phi x1', x2'
5717// y = and x, 0xff
5718bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
5719 if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy())
50
Assuming the condition is false
51
Assuming the condition is false
52
Taking false branch
5720 return false;
5721
5722 // Skip loads we've already transformed.
5723 if (Load->hasOneUse() &&
53
Assuming the condition is false
54
Taking false branch
5724 InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
5725 return false;
5726
5727 // Look at all uses of Load, looking through phis, to determine how many bits
5728 // of the loaded value are needed.
5729 SmallVector<Instruction *, 8> WorkList;
5730 SmallPtrSet<Instruction *, 16> Visited;
5731 SmallVector<Instruction *, 8> AndsToMaybeRemove;
5732 for (auto *U : Load->users())
5733 WorkList.push_back(cast<Instruction>(U));
5734
5735 EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
5736 unsigned BitWidth = LoadResultVT.getSizeInBits();
5737 APInt DemandBits(BitWidth, 0);
5738 APInt WidestAndBits(BitWidth, 0);
5739
5740 while (!WorkList.empty()) {
55
Calling 'SmallVectorBase::empty'
58
Returning from 'SmallVectorBase::empty'
59
Loop condition is false. Execution continues on line 5791
5741 Instruction *I = WorkList.back();
5742 WorkList.pop_back();
5743
5744 // Break use-def graph loops.
5745 if (!Visited.insert(I).second)
5746 continue;
5747
5748 // For a PHI node, push all of its users.
5749 if (auto *Phi = dyn_cast<PHINode>(I)) {
5750 for (auto *U : Phi->users())
5751 WorkList.push_back(cast<Instruction>(U));
5752 continue;
5753 }
5754
5755 switch (I->getOpcode()) {
5756 case Instruction::And: {
5757 auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1));
5758 if (!AndC)
5759 return false;
5760 APInt AndBits = AndC->getValue();
5761 DemandBits |= AndBits;
5762 // Keep track of the widest and mask we see.
5763 if (AndBits.ugt(WidestAndBits))
5764 WidestAndBits = AndBits;
5765 if (AndBits == WidestAndBits && I->getOperand(0) == Load)
5766 AndsToMaybeRemove.push_back(I);
5767 break;
5768 }
5769
5770 case Instruction::Shl: {
5771 auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1));
5772 if (!ShlC)
5773 return false;
5774 uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1);
5775 DemandBits.setLowBits(BitWidth - ShiftAmt);
5776 break;
5777 }
5778
5779 case Instruction::Trunc: {
5780 EVT TruncVT = TLI->getValueType(*DL, I->getType());
5781 unsigned TruncBitWidth = TruncVT.getSizeInBits();
5782 DemandBits.setLowBits(TruncBitWidth);
5783 break;
5784 }
5785
5786 default:
5787 return false;
5788 }
5789 }
5790
5791 uint32_t ActiveBits = DemandBits.getActiveBits();
5792 // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the
5793 // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example,
5794 // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but
5795 // (and (load x) 1) is not matched as a single instruction, rather as a LDR
5796 // followed by an AND.
5797 // TODO: Look into removing this restriction by fixing backends to either
5798 // return false for isLoadExtLegal for i1 or have them select this pattern to
5799 // a single instruction.
5800 //
5801 // Also avoid hoisting if we didn't see any ands with the exact DemandBits
5802 // mask, since these are the only ands that will be removed by isel.
5803 if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
60
Assuming 'ActiveBits' is > 1
61
Assuming the condition is false
66
Taking false branch
5804 WidestAndBits != DemandBits)
62
Calling 'APInt::operator!='
65
Returning from 'APInt::operator!='
5805 return false;
5806
5807 LLVMContext &Ctx = Load->getType()->getContext();
5808 Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits);
5809 EVT TruncVT = TLI->getValueType(*DL, TruncTy);
5810
5811 // Reject cases that won't be matched as extloads.
5812 if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() ||
67
Assuming the condition is false
68
Assuming the condition is false
73
Taking false branch
5813 !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT))
69
Calling 'TargetLoweringBase::isLoadExtLegal'
72
Returning from 'TargetLoweringBase::isLoadExtLegal'
5814 return false;
5815
5816 IRBuilder<> Builder(Load->getNextNode());
5817 auto *NewAnd = dyn_cast<Instruction>(
74
Assuming the object is not a 'Instruction'
75
'NewAnd' initialized to a null pointer value
5818 Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
5819 // Mark this instruction as "inserted by CGP", so that other
5820 // optimizations don't touch it.
5821 InsertedInsts.insert(NewAnd);
5822
5823 // Replace all uses of load with new and (except for the use of load in the
5824 // new and itself).
5825 Load->replaceAllUsesWith(NewAnd);
5826 NewAnd->setOperand(0, Load);
76
Called C++ object pointer is null
5827
5828 // Remove any and instructions that are now redundant.
5829 for (auto *And : AndsToMaybeRemove)
5830 // Check that the and mask is the same as the one we decided to put on the
5831 // new and.
5832 if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) {
5833 And->replaceAllUsesWith(NewAnd);
5834 if (&*CurInstIterator == And)
5835 CurInstIterator = std::next(And->getIterator());
5836 And->eraseFromParent();
5837 ++NumAndUses;
5838 }
5839
5840 ++NumAndsAdded;
5841 return true;
5842}
5843
5844/// Check if V (an operand of a select instruction) is an expensive instruction
5845/// that is only used once.
5846static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) {
5847 auto *I = dyn_cast<Instruction>(V);
5848 // If it's safe to speculatively execute, then it should not have side
5849 // effects; therefore, it's safe to sink and possibly *not* execute.
5850 return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) &&
5851 TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive;
5852}
5853
5854/// Returns true if a SelectInst should be turned into an explicit branch.
5855static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
5856 const TargetLowering *TLI,
5857 SelectInst *SI) {
5858 // If even a predictable select is cheap, then a branch can't be cheaper.
5859 if (!TLI->isPredictableSelectExpensive())
5860 return false;
5861
5862 // FIXME: This should use the same heuristics as IfConversion to determine
5863 // whether a select is better represented as a branch.
5864
5865 // If metadata tells us that the select condition is obviously predictable,
5866 // then we want to replace the select with a branch.
5867 uint64_t TrueWeight, FalseWeight;
5868 if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
5869 uint64_t Max = std::max(TrueWeight, FalseWeight);
5870 uint64_t Sum = TrueWeight + FalseWeight;
5871 if (Sum != 0) {
5872 auto Probability = BranchProbability::getBranchProbability(Max, Sum);
5873 if (Probability > TLI->getPredictableBranchThreshold())
5874 return true;
5875 }
5876 }
5877
5878 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
5879
5880 // If a branch is predictable, an out-of-order CPU can avoid blocking on its
5881 // comparison condition. If the compare has more than one use, there's
5882 // probably another cmov or setcc around, so it's not worth emitting a branch.
5883 if (!Cmp || !Cmp->hasOneUse())
5884 return false;
5885
5886 // If either operand of the select is expensive and only needed on one side
5887 // of the select, we should form a branch.
5888 if (sinkSelectOperand(TTI, SI->getTrueValue()) ||
5889 sinkSelectOperand(TTI, SI->getFalseValue()))
5890 return true;
5891
5892 return false;
5893}
5894
5895/// If \p isTrue is true, return the true value of \p SI, otherwise return
5896/// false value of \p SI. If the true/false value of \p SI is defined by any
5897/// select instructions in \p Selects, look through the defining select
5898/// instruction until the true/false value is not defined in \p Selects.
5899static Value *getTrueOrFalseValue(
5900 SelectInst *SI, bool isTrue,
5901 const SmallPtrSet<const Instruction *, 2> &Selects) {
5902 Value *V = nullptr;
5903
5904 for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
5905 DefSI = dyn_cast<SelectInst>(V)) {
5906 assert(DefSI->getCondition() == SI->getCondition() &&((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5907, __PRETTY_FUNCTION__))
5907 "The condition of DefSI does not match with SI")((DefSI->getCondition() == SI->getCondition() &&
"The condition of DefSI does not match with SI") ? static_cast
<void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5907, __PRETTY_FUNCTION__))
;
5908 V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
5909 }
5910
5911 assert(V && "Failed to get select true/false value")((V && "Failed to get select true/false value") ? static_cast
<void> (0) : __assert_fail ("V && \"Failed to get select true/false value\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5911, __PRETTY_FUNCTION__))
;
5912 return V;
5913}
5914
5915bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
5916 assert(Shift->isShift() && "Expected a shift")((Shift->isShift() && "Expected a shift") ? static_cast
<void> (0) : __assert_fail ("Shift->isShift() && \"Expected a shift\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 5916, __PRETTY_FUNCTION__))
;
5917
5918 // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
5919 // general vector shifts, and (3) the shift amount is a select-of-splatted
5920 // values, hoist the shifts before the select:
5921 // shift Op0, (select Cond, TVal, FVal) -->
5922 // select Cond, (shift Op0, TVal), (shift Op0, FVal)
5923 //
5924 // This is inverting a generic IR transform when we know that the cost of a
5925 // general vector shift is more than the cost of 2 shift-by-scalars.
5926 // We can't do this effectively in SDAG because we may not be able to
5927 // determine if the select operands are splats from within a basic block.
5928 Type *Ty = Shift->getType();
5929 if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
5930 return false;
5931 Value *Cond, *TVal, *FVal;
5932 if (!match(Shift->getOperand(1),
5933 m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
5934 return false;
5935 if (!isSplatValue(TVal) || !isSplatValue(FVal))
5936 return false;
5937
5938 IRBuilder<> Builder(Shift);
5939 BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
5940 Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
5941 Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
5942 Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
5943 Shift->replaceAllUsesWith(NewSel);
5944 Shift->eraseFromParent();
5945 return true;
5946}
5947
5948/// If we have a SelectInst that will likely profit from branch prediction,
5949/// turn it into a branch.
5950bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
5951 // If branch conversion isn't desirable, exit early.
5952 if (DisableSelectToBranch || OptSize || !TLI)
5953 return false;
5954
5955 // Find all consecutive select instructions that share the same condition.
5956 SmallVector<SelectInst *, 2> ASI;
5957 ASI.push_back(SI);
5958 for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
5959 It != SI->getParent()->end(); ++It) {
5960 SelectInst *I = dyn_cast<SelectInst>(&*It);
5961 if (I && SI->getCondition() == I->getCondition()) {
5962 ASI.push_back(I);
5963 } else {
5964 break;
5965 }
5966 }
5967
5968 SelectInst *LastSI = ASI.back();
5969 // Increment the current iterator to skip all the rest of select instructions
5970 // because they will be either "not lowered" or "all lowered" to branch.
5971 CurInstIterator = std::next(LastSI->getIterator());
5972
5973 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
5974
5975 // Can we convert the 'select' to CF ?
5976 if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable))
5977 return false;
5978
5979 TargetLowering::SelectSupportKind SelectKind;
5980 if (VectorCond)
5981 SelectKind = TargetLowering::VectorMaskSelect;
5982 else if (SI->getType()->isVectorTy())
5983 SelectKind = TargetLowering::ScalarCondVectorVal;
5984 else
5985 SelectKind = TargetLowering::ScalarValSelect;
5986
5987 if (TLI->isSelectSupported(SelectKind) &&
5988 !isFormingBranchFromSelectProfitable(TTI, TLI, SI))
5989 return false;
5990
5991 // The DominatorTree needs to be rebuilt by any consumers after this
5992 // transformation. We simply reset here rather than setting the ModifiedDT
5993 // flag to avoid restarting the function walk in runOnFunction for each
5994 // select optimized.
5995 DT.reset();
5996
5997 // Transform a sequence like this:
5998 // start:
5999 // %cmp = cmp uge i32 %a, %b
6000 // %sel = select i1 %cmp, i32 %c, i32 %d
6001 //
6002 // Into:
6003 // start:
6004 // %cmp = cmp uge i32 %a, %b
6005 // br i1 %cmp, label %select.true, label %select.false
6006 // select.true:
6007 // br label %select.end
6008 // select.false:
6009 // br label %select.end
6010 // select.end:
6011 // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
6012 //
6013 // In addition, we may sink instructions that produce %c or %d from
6014 // the entry block into the destination(s) of the new branch.
6015 // If the true or false blocks do not contain a sunken instruction, that
6016 // block and its branch may be optimized away. In that case, one side of the
6017 // first branch will point directly to select.end, and the corresponding PHI
6018 // predecessor block will be the start block.
6019
6020 // First, we split the block containing the select into 2 blocks.
6021 BasicBlock *StartBlock = SI->getParent();
6022 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
6023 BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
6024
6025 // Delete the unconditional branch that was just created by the split.
6026 StartBlock->getTerminator()->eraseFromParent();
6027
6028 // These are the new basic blocks for the conditional branch.
6029 // At least one will become an actual new basic block.
6030 BasicBlock *TrueBlock = nullptr;
6031 BasicBlock *FalseBlock = nullptr;
6032 BranchInst *TrueBranch = nullptr;
6033 BranchInst *FalseBranch = nullptr;
6034
6035 // Sink expensive instructions into the conditional blocks to avoid executing
6036 // them speculatively.
6037 for (SelectInst *SI : ASI) {
6038 if (sinkSelectOperand(TTI, SI->getTrueValue())) {
6039 if (TrueBlock == nullptr) {
6040 TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
6041 EndBlock->getParent(), EndBlock);
6042 TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
6043 TrueBranch->setDebugLoc(SI->getDebugLoc());
6044 }
6045 auto *TrueInst = cast<Instruction>(SI->getTrueValue());
6046 TrueInst->moveBefore(TrueBranch);
6047 }
6048 if (sinkSelectOperand(TTI, SI->getFalseValue())) {
6049 if (FalseBlock == nullptr) {
6050 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
6051 EndBlock->getParent(), EndBlock);
6052 FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
6053 FalseBranch->setDebugLoc(SI->getDebugLoc());
6054 }
6055 auto *FalseInst = cast<Instruction>(SI->getFalseValue());
6056 FalseInst->moveBefore(FalseBranch);
6057 }
6058 }
6059
6060 // If there was nothing to sink, then arbitrarily choose the 'false' side
6061 // for a new input value to the PHI.
6062 if (TrueBlock == FalseBlock) {
6063 assert(TrueBlock == nullptr &&((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6064, __PRETTY_FUNCTION__))
6064 "Unexpected basic block transform while optimizing select")((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select"
) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6064, __PRETTY_FUNCTION__))
;
6065
6066 FalseBlock = BasicBlock::Create(SI->getContext(), "select.false",
6067 EndBlock->getParent(), EndBlock);
6068 auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
6069 FalseBranch->setDebugLoc(SI->getDebugLoc());
6070 }
6071
6072 // Insert the real conditional branch based on the original condition.
6073 // If we did not create a new block for one of the 'true' or 'false' paths
6074 // of the condition, it means that side of the branch goes to the end block
6075 // directly and the path originates from the start block from the point of
6076 // view of the new PHI.
6077 BasicBlock *TT, *FT;
6078 if (TrueBlock == nullptr) {
6079 TT = EndBlock;
6080 FT = FalseBlock;
6081 TrueBlock = StartBlock;
6082 } else if (FalseBlock == nullptr) {
6083 TT = TrueBlock;
6084 FT = EndBlock;
6085 FalseBlock = StartBlock;
6086 } else {
6087 TT = TrueBlock;
6088 FT = FalseBlock;
6089 }
6090 IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
6091
6092 SmallPtrSet<const Instruction *, 2> INS;
6093 INS.insert(ASI.begin(), ASI.end());
6094 // Use reverse iterator because later select may use the value of the
6095 // earlier select, and we need to propagate value through earlier select
6096 // to get the PHI operand.
6097 for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
6098 SelectInst *SI = *It;
6099 // The select itself is replaced with a PHI Node.
6100 PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
6101 PN->takeName(SI);
6102 PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
6103 PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
6104 PN->setDebugLoc(SI->getDebugLoc());
6105
6106 SI->replaceAllUsesWith(PN);
6107 SI->eraseFromParent();
6108 INS.erase(SI);
6109 ++NumSelectsExpanded;
6110 }
6111
6112 // Instruct OptimizeBlock to skip to the next block.
6113 CurInstIterator = StartBlock->end();
6114 return true;
6115}
6116
6117static bool isBroadcastShuffle(ShuffleVectorInst *SVI) {
6118 SmallVector<int, 16> Mask(SVI->getShuffleMask());
6119 int SplatElem = -1;
6120 for (unsigned i = 0; i < Mask.size(); ++i) {
6121 if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem)
6122 return false;
6123 SplatElem = Mask[i];
6124 }
6125
6126 return true;
6127}
6128
6129/// Some targets have expensive vector shifts if the lanes aren't all the same
6130/// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases
6131/// it's often worth sinking a shufflevector splat down to its use so that
6132/// codegen can spot all lanes are identical.
6133bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
6134 BasicBlock *DefBB = SVI->getParent();
6135
6136 // Only do this xform if variable vector shifts are particularly expensive.
6137 if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType()))
6138 return false;
6139
6140 // We only expect better codegen by sinking a shuffle if we can recognise a
6141 // constant splat.
6142 if (!isBroadcastShuffle(SVI))
6143 return false;
6144
6145 // InsertedShuffles - Only insert a shuffle in each block once.
6146 DenseMap<BasicBlock*, Instruction*> InsertedShuffles;
6147
6148 bool MadeChange = false;
6149 for (User *U : SVI->users()) {
6150 Instruction *UI = cast<Instruction>(U);
6151
6152 // Figure out which BB this ext is used in.
6153 BasicBlock *UserBB = UI->getParent();
6154 if (UserBB == DefBB) continue;
6155
6156 // For now only apply this when the splat is used by a shift instruction.
6157 if (!UI->isShift()) continue;
6158
6159 // Everything checks out, sink the shuffle if the user's block doesn't
6160 // already have a copy.
6161 Instruction *&InsertedShuffle = InsertedShuffles[UserBB];
6162
6163 if (!InsertedShuffle) {
6164 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
6165 assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0)
: __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6165, __PRETTY_FUNCTION__))
;
6166 InsertedShuffle =
6167 new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
6168 SVI->getOperand(2), "", &*InsertPt);
6169 InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
6170 }
6171
6172 UI->replaceUsesOfWith(SVI, InsertedShuffle);
6173 MadeChange = true;
6174 }
6175
6176 // If we removed all uses, nuke the shuffle.
6177 if (SVI->use_empty()) {
6178 SVI->eraseFromParent();
6179 MadeChange = true;
6180 }
6181
6182 return MadeChange;
6183}
6184
6185bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
6186 // If the operands of I can be folded into a target instruction together with
6187 // I, duplicate and sink them.
6188 SmallVector<Use *, 4> OpsToSink;
6189 if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink))
6190 return false;
6191
6192 // OpsToSink can contain multiple uses in a use chain (e.g.
6193 // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
6194 // uses must come first, so we process the ops in reverse order so as to not
6195 // create invalid IR.
6196 BasicBlock *TargetBB = I->getParent();
6197 bool Changed = false;
6198 SmallVector<Use *, 4> ToReplace;
6199 for (Use *U : reverse(OpsToSink)) {
6200 auto *UI = cast<Instruction>(U->get());
6201 if (UI->getParent() == TargetBB || isa<PHINode>(UI))
6202 continue;
6203 ToReplace.push_back(U);
6204 }
6205
6206 SetVector<Instruction *> MaybeDead;
6207 DenseMap<Instruction *, Instruction *> NewInstructions;
6208 Instruction *InsertPoint = I;
6209 for (Use *U : ToReplace) {
6210 auto *UI = cast<Instruction>(U->get());
6211 Instruction *NI = UI->clone();
6212 NewInstructions[UI] = NI;
6213 MaybeDead.insert(UI);
6214 LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Sinking " << *UI
<< " to user " << *I << "\n"; } } while (false
)
;
6215 NI->insertBefore(InsertPoint);
6216 InsertPoint = NI;
6217 InsertedInsts.insert(NI);
6218
6219 // Update the use for the new instruction, making sure that we update the
6220 // sunk instruction uses, if it is part of a chain that has already been
6221 // sunk.
6222 Instruction *OldI = cast<Instruction>(U->getUser());
6223 if (NewInstructions.count(OldI))
6224 NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
6225 else
6226 U->set(NI);
6227 Changed = true;
6228 }
6229
6230 // Remove instructions that are dead after sinking.
6231 for (auto *I : MaybeDead) {
6232 if (!I->hasNUsesOrMore(1)) {
6233 LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Removing dead instruction: "
<< *I << "\n"; } } while (false)
;
6234 I->eraseFromParent();
6235 }
6236 }
6237
6238 return Changed;
6239}
6240
6241bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
6242 if (!TLI || !DL)
6243 return false;
6244
6245 Value *Cond = SI->getCondition();
6246 Type *OldType = Cond->getType();
6247 LLVMContext &Context = Cond->getContext();
6248 MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
6249 unsigned RegWidth = RegType.getSizeInBits();
6250
6251 if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
6252 return false;
6253
6254 // If the register width is greater than the type width, expand the condition
6255 // of the switch instruction and each case constant to the width of the
6256 // register. By widening the type of the switch condition, subsequent
6257 // comparisons (for case comparisons) will not need to be extended to the
6258 // preferred register width, so we will potentially eliminate N-1 extends,
6259 // where N is the number of cases in the switch.
6260 auto *NewType = Type::getIntNTy(Context, RegWidth);
6261
6262 // Zero-extend the switch condition and case constants unless the switch
6263 // condition is a function argument that is already being sign-extended.
6264 // In that case, we can avoid an unnecessary mask/extension by sign-extending
6265 // everything instead.
6266 Instruction::CastOps ExtType = Instruction::ZExt;
6267 if (auto *Arg = dyn_cast<Argument>(Cond))
6268 if (Arg->hasSExtAttr())
6269 ExtType = Instruction::SExt;
6270
6271 auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
6272 ExtInst->insertBefore(SI);
6273 ExtInst->setDebugLoc(SI->getDebugLoc());
6274 SI->setCondition(ExtInst);
6275 for (auto Case : SI->cases()) {
6276 APInt NarrowConst = Case.getCaseValue()->getValue();
6277 APInt WideConst = (ExtType == Instruction::ZExt) ?
6278 NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
6279 Case.setValue(ConstantInt::get(Context, WideConst));
6280 }
6281
6282 return true;
6283}
6284
6285
6286namespace {
6287
6288/// Helper class to promote a scalar operation to a vector one.
6289/// This class is used to move downward extractelement transition.
6290/// E.g.,
6291/// a = vector_op <2 x i32>
6292/// b = extractelement <2 x i32> a, i32 0
6293/// c = scalar_op b
6294/// store c
6295///
6296/// =>
6297/// a = vector_op <2 x i32>
6298/// c = vector_op a (equivalent to scalar_op on the related lane)
6299/// * d = extractelement <2 x i32> c, i32 0
6300/// * store d
6301/// Assuming both extractelement and store can be combine, we get rid of the
6302/// transition.
6303class VectorPromoteHelper {
6304 /// DataLayout associated with the current module.
6305 const DataLayout &DL;
6306
6307 /// Used to perform some checks on the legality of vector operations.
6308 const TargetLowering &TLI;
6309
6310 /// Used to estimated the cost of the promoted chain.
6311 const TargetTransformInfo &TTI;
6312
6313 /// The transition being moved downwards.
6314 Instruction *Transition;
6315
6316 /// The sequence of instructions to be promoted.
6317 SmallVector<Instruction *, 4> InstsToBePromoted;
6318
6319 /// Cost of combining a store and an extract.
6320 unsigned StoreExtractCombineCost;
6321
6322 /// Instruction that will be combined with the transition.
6323 Instruction *CombineInst = nullptr;
6324
6325 /// The instruction that represents the current end of the transition.
6326 /// Since we are faking the promotion until we reach the end of the chain
6327 /// of computation, we need a way to get the current end of the transition.
6328 Instruction *getEndOfTransition() const {
6329 if (InstsToBePromoted.empty())
6330 return Transition;
6331 return InstsToBePromoted.back();
6332 }
6333
6334 /// Return the index of the original value in the transition.
6335 /// E.g., for "extractelement <2 x i32> c, i32 1" the original value,
6336 /// c, is at index 0.
6337 unsigned getTransitionOriginalValueIdx() const {
6338 assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6339, __PRETTY_FUNCTION__))
6339 "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6339, __PRETTY_FUNCTION__))
;
6340 return 0;
6341 }
6342
6343 /// Return the index of the index in the transition.
6344 /// E.g., for "extractelement <2 x i32> c, i32 0" the index
6345 /// is at index 1.
6346 unsigned getTransitionIdx() const {
6347 assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6348, __PRETTY_FUNCTION__))
6348 "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet"
) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6348, __PRETTY_FUNCTION__))
;
6349 return 1;
6350 }
6351
6352 /// Get the type of the transition.
6353 /// This is the type of the original value.
6354 /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the
6355 /// transition is <2 x i32>.
6356 Type *getTransitionType() const {
6357 return Transition->getOperand(getTransitionOriginalValueIdx())->getType();
6358 }
6359
6360 /// Promote \p ToBePromoted by moving \p Def downward through.
6361 /// I.e., we have the following sequence:
6362 /// Def = Transition <ty1> a to <ty2>
6363 /// b = ToBePromoted <ty2> Def, ...
6364 /// =>
6365 /// b = ToBePromoted <ty1> a, ...
6366 /// Def = Transition <ty1> ToBePromoted to <ty2>
6367 void promoteImpl(Instruction *ToBePromoted);
6368
6369 /// Check whether or not it is profitable to promote all the
6370 /// instructions enqueued to be promoted.
6371 bool isProfitableToPromote() {
6372 Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx());
6373 unsigned Index = isa<ConstantInt>(ValIdx)
6374 ? cast<ConstantInt>(ValIdx)->getZExtValue()
6375 : -1;
6376 Type *PromotedType = getTransitionType();
6377
6378 StoreInst *ST = cast<StoreInst>(CombineInst);
6379 unsigned AS = ST->getPointerAddressSpace();
6380 unsigned Align = ST->getAlignment();
6381 // Check if this store is supported.
6382 if (!TLI.allowsMisalignedMemoryAccesses(
6383 TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
6384 Align)) {
6385 // If this is not supported, there is no way we can combine
6386 // the extract with the store.
6387 return false;
6388 }
6389
6390 // The scalar chain of computation has to pay for the transition
6391 // scalar to vector.
6392 // The vector chain has to account for the combining cost.
6393 uint64_t ScalarCost =
6394 TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
6395 uint64_t VectorCost = StoreExtractCombineCost;
6396 for (const auto &Inst : InstsToBePromoted) {
6397 // Compute the cost.
6398 // By construction, all instructions being promoted are arithmetic ones.
6399 // Moreover, one argument is a constant that can be viewed as a splat
6400 // constant.
6401 Value *Arg0 = Inst->getOperand(0);
6402 bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) ||
6403 isa<ConstantFP>(Arg0);
6404 TargetTransformInfo::OperandValueKind Arg0OVK =
6405 IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
6406 : TargetTransformInfo::OK_AnyValue;
6407 TargetTransformInfo::OperandValueKind Arg1OVK =
6408 !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue
6409 : TargetTransformInfo::OK_AnyValue;
6410 ScalarCost += TTI.getArithmeticInstrCost(
6411 Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK);
6412 VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType,
6413 Arg0OVK, Arg1OVK);
6414 }
6415 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
6416 dbgs() << "Estimated cost of computation to be promoted:\nScalar: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
6417 << ScalarCost << "\nVector: " << VectorCost << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: "
<< ScalarCost << "\nVector: " << VectorCost
<< '\n'; } } while (false)
;
6418 return ScalarCost > VectorCost;
6419 }
6420
6421 /// Generate a constant vector with \p Val with the same
6422 /// number of elements as the transition.
6423 /// \p UseSplat defines whether or not \p Val should be replicated
6424 /// across the whole vector.
6425 /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>,
6426 /// otherwise we generate a vector with as many undef as possible:
6427 /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only
6428 /// used at the index of the extract.
6429 Value *getConstantVector(Constant *Val, bool UseSplat) const {
6430 unsigned ExtractIdx = std::numeric_limits<unsigned>::max();
6431 if (!UseSplat) {
6432 // If we cannot determine where the constant must be, we have to
6433 // use a splat constant.
6434 Value *ValExtractIdx = Transition->getOperand(getTransitionIdx());
6435 if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx))
6436 ExtractIdx = CstVal->getSExtValue();
6437 else
6438 UseSplat = true;
6439 }
6440
6441 unsigned End = getTransitionType()->getVectorNumElements();
6442 if (UseSplat)
6443 return ConstantVector::getSplat(End, Val);
6444
6445 SmallVector<Constant *, 4> ConstVec;
6446 UndefValue *UndefVal = UndefValue::get(Val->getType());
6447 for (unsigned Idx = 0; Idx != End; ++Idx) {
6448 if (Idx == ExtractIdx)
6449 ConstVec.push_back(Val);
6450 else
6451 ConstVec.push_back(UndefVal);
6452 }
6453 return ConstantVector::get(ConstVec);
6454 }
6455
6456 /// Check if promoting to a vector type an operand at \p OperandIdx
6457 /// in \p Use can trigger undefined behavior.
6458 static bool canCauseUndefinedBehavior(const Instruction *Use,
6459 unsigned OperandIdx) {
6460 // This is not safe to introduce undef when the operand is on
6461 // the right hand side of a division-like instruction.
6462 if (OperandIdx != 1)
6463 return false;
6464 switch (Use->getOpcode()) {
6465 default:
6466 return false;
6467 case Instruction::SDiv:
6468 case Instruction::UDiv:
6469 case Instruction::SRem:
6470 case Instruction::URem:
6471 return true;
6472 case Instruction::FDiv:
6473 case Instruction::FRem:
6474 return !Use->hasNoNaNs();
6475 }
6476 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6476)
;
6477 }
6478
6479public:
6480 VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI,
6481 const TargetTransformInfo &TTI, Instruction *Transition,
6482 unsigned CombineCost)
6483 : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition),
6484 StoreExtractCombineCost(CombineCost) {
6485 assert(Transition && "Do not know how to promote null")((Transition && "Do not know how to promote null") ? static_cast
<void> (0) : __assert_fail ("Transition && \"Do not know how to promote null\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6485, __PRETTY_FUNCTION__))
;
6486 }
6487
6488 /// Check if we can promote \p ToBePromoted to \p Type.
6489 bool canPromote(const Instruction *ToBePromoted) const {
6490 // We could support CastInst too.
6491 return isa<BinaryOperator>(ToBePromoted);
6492 }
6493
6494 /// Check if it is profitable to promote \p ToBePromoted
6495 /// by moving downward the transition through.
6496 bool shouldPromote(const Instruction *ToBePromoted) const {
6497 // Promote only if all the operands can be statically expanded.
6498 // Indeed, we do not want to introduce any new kind of transitions.
6499 for (const Use &U : ToBePromoted->operands()) {
6500 const Value *Val = U.get();
6501 if (Val == getEndOfTransition()) {
6502 // If the use is a division and the transition is on the rhs,
6503 // we cannot promote the operation, otherwise we may create a
6504 // division by zero.
6505 if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()))
6506 return false;
6507 continue;
6508 }
6509 if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) &&
6510 !isa<ConstantFP>(Val))
6511 return false;
6512 }
6513 // Check that the resulting operation is legal.
6514 int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode());
6515 if (!ISDOpcode)
6516 return false;
6517 return StressStoreExtract ||
6518 TLI.isOperationLegalOrCustom(
6519 ISDOpcode, TLI.getValueType(DL, getTransitionType(), true));
6520 }
6521
6522 /// Check whether or not \p Use can be combined
6523 /// with the transition.
6524 /// I.e., is it possible to do Use(Transition) => AnotherUse?
6525 bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); }
6526
6527 /// Record \p ToBePromoted as part of the chain to be promoted.
6528 void enqueueForPromotion(Instruction *ToBePromoted) {
6529 InstsToBePromoted.push_back(ToBePromoted);
6530 }
6531
6532 /// Set the instruction that will be combined with the transition.
6533 void recordCombineInstruction(Instruction *ToBeCombined) {
6534 assert(canCombine(ToBeCombined) && "Unsupported instruction to combine")((canCombine(ToBeCombined) && "Unsupported instruction to combine"
) ? static_cast<void> (0) : __assert_fail ("canCombine(ToBeCombined) && \"Unsupported instruction to combine\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6534, __PRETTY_FUNCTION__))
;
6535 CombineInst = ToBeCombined;
6536 }
6537
6538 /// Promote all the instructions enqueued for promotion if it is
6539 /// is profitable.
6540 /// \return True if the promotion happened, false otherwise.
6541 bool promote() {
6542 // Check if there is something to promote.
6543 // Right now, if we do not have anything to combine with,
6544 // we assume the promotion is not profitable.
6545 if (InstsToBePromoted.empty() || !CombineInst)
6546 return false;
6547
6548 // Check cost.
6549 if (!StressStoreExtract && !isProfitableToPromote())
6550 return false;
6551
6552 // Promote.
6553 for (auto &ToBePromoted : InstsToBePromoted)
6554 promoteImpl(ToBePromoted);
6555 InstsToBePromoted.clear();
6556 return true;
6557 }
6558};
6559
6560} // end anonymous namespace
6561
6562void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) {
6563 // At this point, we know that all the operands of ToBePromoted but Def
6564 // can be statically promoted.
6565 // For Def, we need to use its parameter in ToBePromoted:
6566 // b = ToBePromoted ty1 a
6567 // Def = Transition ty1 b to ty2
6568 // Move the transition down.
6569 // 1. Replace all uses of the promoted operation by the transition.
6570 // = ... b => = ... Def.
6571 assert(ToBePromoted->getType() == Transition->getType() &&((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6573, __PRETTY_FUNCTION__))
6572 "The type of the result of the transition does not match "((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6573, __PRETTY_FUNCTION__))
6573 "the final type")((ToBePromoted->getType() == Transition->getType() &&
"The type of the result of the transition does not match " "the final type"
) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6573, __PRETTY_FUNCTION__))
;
6574 ToBePromoted->replaceAllUsesWith(Transition);
6575 // 2. Update the type of the uses.
6576 // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def.
6577 Type *TransitionTy = getTransitionType();
6578 ToBePromoted->mutateType(TransitionTy);
6579 // 3. Update all the operands of the promoted operation with promoted
6580 // operands.
6581 // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a.
6582 for (Use &U : ToBePromoted->operands()) {
6583 Value *Val = U.get();
6584 Value *NewVal = nullptr;
6585 if (Val == Transition)
6586 NewVal = Transition->getOperand(getTransitionOriginalValueIdx());
6587 else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) ||
6588 isa<ConstantFP>(Val)) {
6589 // Use a splat constant if it is not safe to use undef.
6590 NewVal = getConstantVector(
6591 cast<Constant>(Val),
6592 isa<UndefValue>(Val) ||
6593 canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo()));
6594 } else
6595 llvm_unreachable("Did you modified shouldPromote and forgot to update "::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6596)
6596 "this?")::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update "
"this?", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6596)
;
6597 ToBePromoted->setOperand(U.getOperandNo(), NewVal);
6598 }
6599 Transition->moveAfter(ToBePromoted);
6600 Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted);
6601}
6602
6603/// Some targets can do store(extractelement) with one instruction.
6604/// Try to push the extractelement towards the stores when the target
6605/// has this feature and this is profitable.
6606bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
6607 unsigned CombineCost = std::numeric_limits<unsigned>::max();
6608 if (DisableStoreExtract || !TLI ||
6609 (!StressStoreExtract &&
6610 !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(),
6611 Inst->getOperand(1), CombineCost)))
6612 return false;
6613
6614 // At this point we know that Inst is a vector to scalar transition.
6615 // Try to move it down the def-use chain, until:
6616 // - We can combine the transition with its single use
6617 // => we got rid of the transition.
6618 // - We escape the current basic block
6619 // => we would need to check that we are moving it at a cheaper place and
6620 // we do not do that for now.
6621 BasicBlock *Parent = Inst->getParent();
6622 LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Found an interesting transition: "
<< *Inst << '\n'; } } while (false)
;
6623 VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost);
6624 // If the transition has more than one use, assume this is not going to be
6625 // beneficial.
6626 while (Inst->hasOneUse()) {
6627 Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin());
6628 LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Use: " << *ToBePromoted
<< '\n'; } } while (false)
;
6629
6630 if (ToBePromoted->getParent() != Parent) {
6631 LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
6632 << ToBePromoted->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
6633 << ") than the transition (" << Parent->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
6634 << ").\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Instruction to promote is in a different block ("
<< ToBePromoted->getParent()->getName() <<
") than the transition (" << Parent->getName() <<
").\n"; } } while (false)
;
6635 return false;
6636 }
6637
6638 if (VPH.canCombine(ToBePromoted)) {
6639 LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false)
6640 << "will be combined with: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Assume " << *Inst
<< '\n' << "will be combined with: " << *ToBePromoted
<< '\n'; } } while (false)
;
6641 VPH.recordCombineInstruction(ToBePromoted);
6642 bool Changed = VPH.promote();
6643 NumStoreExtractExposed += Changed;
6644 return Changed;
6645 }
6646
6647 LLVM_DEBUG(dbgs() << "Try promoting.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Try promoting.\n"; } }
while (false)
;
6648 if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted))
6649 return false;
6650
6651 LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Promoting is possible... Enqueue for promotion!\n"
; } } while (false)
;
6652
6653 VPH.enqueueForPromotion(ToBePromoted);
6654 Inst = ToBePromoted;
6655 }
6656 return false;
6657}
6658
6659/// For the instruction sequence of store below, F and I values
6660/// are bundled together as an i64 value before being stored into memory.
6661/// Sometimes it is more efficient to generate separate stores for F and I,
6662/// which can remove the bitwise instructions or sink them to colder places.
6663///
6664/// (store (or (zext (bitcast F to i32) to i64),
6665/// (shl (zext I to i64), 32)), addr) -->
6666/// (store F, addr) and (store I, addr+4)
6667///
6668/// Similarly, splitting for other merged store can also be beneficial, like:
6669/// For pair of {i32, i32}, i64 store --> two i32 stores.
6670/// For pair of {i32, i16}, i64 store --> two i32 stores.
6671/// For pair of {i16, i16}, i32 store --> two i16 stores.
6672/// For pair of {i16, i8}, i32 store --> two i16 stores.
6673/// For pair of {i8, i8}, i16 store --> two i8 stores.
6674///
6675/// We allow each target to determine specifically which kind of splitting is
6676/// supported.
6677///
6678/// The store patterns are commonly seen from the simple code snippet below
6679/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
6680/// void goo(const std::pair<int, float> &);
6681/// hoo() {
6682/// ...
6683/// goo(std::make_pair(tmp, ftmp));
6684/// ...
6685/// }
6686///
6687/// Although we already have similar splitting in DAG Combine, we duplicate
6688/// it in CodeGenPrepare to catch the case in which pattern is across
6689/// multiple BBs. The logic in DAG Combine is kept to catch case generated
6690/// during code expansion.
6691static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
6692 const TargetLowering &TLI) {
6693 // Handle simple but common cases only.
6694 Type *StoreType = SI.getValueOperand()->getType();
6695 if (!DL.typeSizeEqualsStoreSize(StoreType) ||
6696 DL.getTypeSizeInBits(StoreType) == 0)
6697 return false;
6698
6699 unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
6700 Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
6701 if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
6702 return false;
6703
6704 // Don't split the store if it is volatile.
6705 if (SI.isVolatile())
6706 return false;
6707
6708 // Match the following patterns:
6709 // (store (or (zext LValue to i64),
6710 // (shl (zext HValue to i64), 32)), HalfValBitSize)
6711 // or
6712 // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
6713 // (zext LValue to i64),
6714 // Expect both operands of OR and the first operand of SHL have only
6715 // one use.
6716 Value *LValue, *HValue;
6717 if (!match(SI.getValueOperand(),
6718 m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
6719 m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
6720 m_SpecificInt(HalfValBitSize))))))
6721 return false;
6722
6723 // Check LValue and HValue are int with size less or equal than 32.
6724 if (!LValue->getType()->isIntegerTy() ||
6725 DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
6726 !HValue->getType()->isIntegerTy() ||
6727 DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
6728 return false;
6729
6730 // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
6731 // as the input of target query.
6732 auto *LBC = dyn_cast<BitCastInst>(LValue);
6733 auto *HBC = dyn_cast<BitCastInst>(HValue);
6734 EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
6735 : EVT::getEVT(LValue->getType());
6736 EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
6737 : EVT::getEVT(HValue->getType());
6738 if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
6739 return false;
6740
6741 // Start to split store.
6742 IRBuilder<> Builder(SI.getContext());
6743 Builder.SetInsertPoint(&SI);
6744
6745 // If LValue/HValue is a bitcast in another BB, create a new one in current
6746 // BB so it may be merged with the splitted stores by dag combiner.
6747 if (LBC && LBC->getParent() != SI.getParent())
6748 LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
6749 if (HBC && HBC->getParent() != SI.getParent())
6750 HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
6751
6752 bool IsLE = SI.getModule()->getDataLayout().isLittleEndian();
6753 auto CreateSplitStore = [&](Value *V, bool Upper) {
6754 V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
6755 Value *Addr = Builder.CreateBitCast(
6756 SI.getOperand(1),
6757 SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
6758 if ((IsLE && Upper) || (!IsLE && !Upper))
6759 Addr = Builder.CreateGEP(
6760 SplitStoreType, Addr,
6761 ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
6762 Builder.CreateAlignedStore(
6763 V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
6764 };
6765
6766 CreateSplitStore(LValue, false);
6767 CreateSplitStore(HValue, true);
6768
6769 // Delete the old store.
6770 SI.eraseFromParent();
6771 return true;
6772}
6773
6774// Return true if the GEP has two operands, the first operand is of a sequential
6775// type, and the second operand is a constant.
6776static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) {
6777 gep_type_iterator I = gep_type_begin(*GEP);
6778 return GEP->getNumOperands() == 2 &&
6779 I.isSequential() &&
6780 isa<ConstantInt>(GEP->getOperand(1));
6781}
6782
6783// Try unmerging GEPs to reduce liveness interference (register pressure) across
6784// IndirectBr edges. Since IndirectBr edges tend to touch on many blocks,
6785// reducing liveness interference across those edges benefits global register
6786// allocation. Currently handles only certain cases.
6787//
6788// For example, unmerge %GEPI and %UGEPI as below.
6789//
6790// ---------- BEFORE ----------
6791// SrcBlock:
6792// ...
6793// %GEPIOp = ...
6794// ...
6795// %GEPI = gep %GEPIOp, Idx
6796// ...
6797// indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ]
6798// (* %GEPI is alive on the indirectbr edges due to other uses ahead)
6799// (* %GEPIOp is alive on the indirectbr edges only because of it's used by
6800// %UGEPI)
6801//
6802// DstB0: ... (there may be a gep similar to %UGEPI to be unmerged)
6803// DstB1: ... (there may be a gep similar to %UGEPI to be unmerged)
6804// ...
6805//
6806// DstBi:
6807// ...
6808// %UGEPI = gep %GEPIOp, UIdx
6809// ...
6810// ---------------------------
6811//
6812// ---------- AFTER ----------
6813// SrcBlock:
6814// ... (same as above)
6815// (* %GEPI is still alive on the indirectbr edges)
6816// (* %GEPIOp is no longer alive on the indirectbr edges as a result of the
6817// unmerging)
6818// ...
6819//
6820// DstBi:
6821// ...
6822// %UGEPI = gep %GEPI, (UIdx-Idx)
6823// ...
6824// ---------------------------
6825//
6826// The register pressure on the IndirectBr edges is reduced because %GEPIOp is
6827// no longer alive on them.
6828//
6829// We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging
6830// of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as
6831// not to disable further simplications and optimizations as a result of GEP
6832// merging.
6833//
6834// Note this unmerging may increase the length of the data flow critical path
6835// (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff
6836// between the register pressure and the length of data-flow critical
6837// path. Restricting this to the uncommon IndirectBr case would minimize the
6838// impact of potentially longer critical path, if any, and the impact on compile
6839// time.
6840static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
6841 const TargetTransformInfo *TTI) {
6842 BasicBlock *SrcBlock = GEPI->getParent();
6843 // Check that SrcBlock ends with an IndirectBr. If not, give up. The common
6844 // (non-IndirectBr) cases exit early here.
6845 if (!isa<IndirectBrInst>(SrcBlock->getTerminator()))
6846 return false;
6847 // Check that GEPI is a simple gep with a single constant index.
6848 if (!GEPSequentialConstIndexed(GEPI))
6849 return false;
6850 ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1));
6851 // Check that GEPI is a cheap one.
6852 if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType())
6853 > TargetTransformInfo::TCC_Basic)
6854 return false;
6855 Value *GEPIOp = GEPI->getOperand(0);
6856 // Check that GEPIOp is an instruction that's also defined in SrcBlock.
6857 if (!isa<Instruction>(GEPIOp))
6858 return false;
6859 auto *GEPIOpI = cast<Instruction>(GEPIOp);
6860 if (GEPIOpI->getParent() != SrcBlock)
6861 return false;
6862 // Check that GEP is used outside the block, meaning it's alive on the
6863 // IndirectBr edge(s).
6864 if (find_if(GEPI->users(), [&](User *Usr) {
6865 if (auto *I = dyn_cast<Instruction>(Usr)) {
6866 if (I->getParent() != SrcBlock) {
6867 return true;
6868 }
6869 }
6870 return false;
6871 }) == GEPI->users().end())
6872 return false;
6873 // The second elements of the GEP chains to be unmerged.
6874 std::vector<GetElementPtrInst *> UGEPIs;
6875 // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive
6876 // on IndirectBr edges.
6877 for (User *Usr : GEPIOp->users()) {
6878 if (Usr == GEPI) continue;
6879 // Check if Usr is an Instruction. If not, give up.
6880 if (!isa<Instruction>(Usr))
6881 return false;
6882 auto *UI = cast<Instruction>(Usr);
6883 // Check if Usr in the same block as GEPIOp, which is fine, skip.
6884 if (UI->getParent() == SrcBlock)
6885 continue;
6886 // Check if Usr is a GEP. If not, give up.
6887 if (!isa<GetElementPtrInst>(Usr))
6888 return false;
6889 auto *UGEPI = cast<GetElementPtrInst>(Usr);
6890 // Check if UGEPI is a simple gep with a single constant index and GEPIOp is
6891 // the pointer operand to it. If so, record it in the vector. If not, give
6892 // up.
6893 if (!GEPSequentialConstIndexed(UGEPI))
6894 return false;
6895 if (UGEPI->getOperand(0) != GEPIOp)
6896 return false;
6897 if (GEPIIdx->getType() !=
6898 cast<ConstantInt>(UGEPI->getOperand(1))->getType())
6899 return false;
6900 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6901 if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType())
6902 > TargetTransformInfo::TCC_Basic)
6903 return false;
6904 UGEPIs.push_back(UGEPI);
6905 }
6906 if (UGEPIs.size() == 0)
6907 return false;
6908 // Check the materializing cost of (Uidx-Idx).
6909 for (GetElementPtrInst *UGEPI : UGEPIs) {
6910 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6911 APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
6912 unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType());
6913 if (ImmCost > TargetTransformInfo::TCC_Basic)
6914 return false;
6915 }
6916 // Now unmerge between GEPI and UGEPIs.
6917 for (GetElementPtrInst *UGEPI : UGEPIs) {
6918 UGEPI->setOperand(0, GEPI);
6919 ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
6920 Constant *NewUGEPIIdx =
6921 ConstantInt::get(GEPIIdx->getType(),
6922 UGEPIIdx->getValue() - GEPIIdx->getValue());
6923 UGEPI->setOperand(1, NewUGEPIIdx);
6924 // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not
6925 // inbounds to avoid UB.
6926 if (!GEPI->isInBounds()) {
6927 UGEPI->setIsInBounds(false);
6928 }
6929 }
6930 // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not
6931 // alive on IndirectBr edges).
6932 assert(find_if(GEPIOp->users(), [&](User *Usr) {((find_if(GEPIOp->users(), [&](User *Usr) { return cast
<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp
->users().end() && "GEPIOp is used outside SrcBlock"
) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6934, __PRETTY_FUNCTION__))
6933 return cast<Instruction>(Usr)->getParent() != SrcBlock;((find_if(GEPIOp->users(), [&](User *Usr) { return cast
<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp
->users().end() && "GEPIOp is used outside SrcBlock"
) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6934, __PRETTY_FUNCTION__))
6934 }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock")((find_if(GEPIOp->users(), [&](User *Usr) { return cast
<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp
->users().end() && "GEPIOp is used outside SrcBlock"
) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\""
, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp"
, 6934, __PRETTY_FUNCTION__))
;
6935 return true;
6936}
6937
6938bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
6939 // Bail out if we inserted the instruction to prevent optimizations from
6940 // stepping on each other's toes.
6941 if (InsertedInsts.count(I))
15
Assuming the condition is false
16
Taking false branch
37
Assuming the condition is false
38
Taking false branch
6942 return false;
6943
6944 // TODO: Move into the switch on opcode below here.
6945 if (PHINode *P
17.1
'P' is null
39.1
'P' is null
17.1
'P' is null
39.1
'P' is null
17.1
'P' is null
39.1
'P' is null
17.1
'P' is null
39.1
'P' is null
= dyn_cast<PHINode>(I)) {
17
Assuming 'I' is not a 'PHINode'
18
Taking false branch
39
Assuming 'I' is not a 'PHINode'
40
Taking false branch
6946 // It is possible for very late stage optimizations (such as SimplifyCFG)
6947 // to introduce PHI nodes too late to be cleaned up. If we detect such a
6948 // trivial PHI, go ahead and zap it here.
6949 if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
6950 LargeOffsetGEPMap.erase(P);
6951 P->replaceAllUsesWith(V);
6952 P->eraseFromParent();
6953 ++NumPHIsElim;
6954 return true;
6955 }
6956 return false;
6957 }
6958
6959 if (CastInst *CI
19.1
'CI' is null
41.1
'CI' is null
19.1
'CI' is null
41.1
'CI' is null
19.1
'CI' is null
41.1
'CI' is null
19.1
'CI' is null
41.1
'CI' is null
= dyn_cast<CastInst>(I)) {
19
Assuming 'I' is not a 'CastInst'
20
Taking false branch
41
Assuming 'I' is not a 'CastInst'
42
Taking false branch
6960 // If the source of the cast is a constant, then this should have
6961 // already been constant folded. The only reason NOT to constant fold
6962 // it is if something (e.g. LSR) was careful to place the constant
6963 // evaluation in a block other than then one that uses it (e.g. to hoist
6964 // the address of globals out of a loop). If this is the case, we don't
6965 // want to forward-subst the cast.
6966 if (isa<Constant>(CI->getOperand(0)))
6967 return false;
6968
6969 if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL))
6970 return true;
6971
6972 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
6973 /// Sink a zext or sext into its user blocks if the target type doesn't
6974 /// fit in one register
6975 if (TLI &&
6976 TLI->getTypeAction(CI->getContext(),
6977 TLI->getValueType(*DL, CI->getType())) ==
6978 TargetLowering::TypeExpandInteger) {
6979 return SinkCast(CI);
6980 } else {
6981 bool MadeChange = optimizeExt(I);
6982 return MadeChange | optimizeExtUses(I);
6983 }
6984 }
6985 return false;
6986 }
6987
6988 if (auto *Cmp
21.1
'Cmp' is null
43.1
'Cmp' is null
21.1
'Cmp' is null
43.1
'Cmp' is null
21.1
'Cmp' is null
43.1
'Cmp' is null
21.1
'Cmp' is null
43.1
'Cmp' is null
= dyn_cast<CmpInst>(I))
21
Assuming 'I' is not a 'CmpInst'
22
Taking false branch
43
Assuming 'I' is not a 'CmpInst'
44
Taking false branch
6989 if (TLI && optimizeCmp(Cmp, ModifiedDT))
6990 return true;
6991
6992 if (LoadInst *LI
23.1
'LI' is null
45.1
'LI' is non-null
23.1
'LI' is null
45.1
'LI' is non-null
23.1
'LI' is null
45.1
'LI' is non-null
23.1
'LI' is null
45.1
'LI' is non-null
= dyn_cast<LoadInst>(I)) {
23
Assuming 'I' is not a 'LoadInst'
24
Taking false branch
45
Assuming 'I' is a 'LoadInst'
46
Taking true branch
6993 LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
6994 if (TLI) {
47
Assuming field 'TLI' is non-null
48
Taking true branch
6995 bool Modified = optimizeLoadExt(LI);
49
Calling 'CodeGenPrepare::optimizeLoadExt'
6996 unsigned AS = LI->getPointerAddressSpace();
6997 Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS);
6998 return Modified;
6999 }
7000 return false;
7001 }
7002
7003 if (StoreInst *SI
25.1
'SI' is null
25.1
'SI' is null
25.1
'SI' is null
25.1
'SI' is null
= dyn_cast<StoreInst>(I)) {
25
Assuming 'I' is not a 'StoreInst'
26
Taking false branch
7004 if (TLI && splitMergedValStore(*SI, *DL, *TLI))
7005 return true;
7006 SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
7007 if (TLI) {
7008 unsigned AS = SI->getPointerAddressSpace();
7009 return optimizeMemoryInst(I, SI->getOperand(1),
7010 SI->getOperand(0)->getType(), AS);
7011 }
7012 return false;
7013 }
7014
7015 if (AtomicRMWInst *RMW
27.1
'RMW' is null
27.1
'RMW' is null
27.1
'RMW' is null
27.1
'RMW' is null
= dyn_cast<AtomicRMWInst>(I)) {
27
Assuming 'I' is not a 'AtomicRMWInst'
28
Taking false branch
7016 unsigned AS = RMW->getPointerAddressSpace();
7017 return optimizeMemoryInst(I, RMW->getPointerOperand(),
7018 RMW->getType(), AS);
7019 }
7020
7021 if (AtomicCmpXchgInst *CmpX
29.1
'CmpX' is null
29.1
'CmpX' is null
29.1
'CmpX' is null
29.1
'CmpX' is null
= dyn_cast<AtomicCmpXchgInst>(I)) {
29
Assuming 'I' is not a 'AtomicCmpXchgInst'
30
Taking false branch
7022 unsigned AS = CmpX->getPointerAddressSpace();
7023 return optimizeMemoryInst(I, CmpX->getPointerOperand(),
7024 CmpX->getCompareOperand()->getType(), AS);
7025 }
7026
7027 BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
31
Assuming 'I' is not a 'BinaryOperator'
7028
7029 if (BinOp
31.1
'BinOp' is null
31.1
'BinOp' is null
31.1
'BinOp' is null
31.1
'BinOp' is null
&& (BinOp->getOpcode() == Instruction::And) &&
7030 EnableAndCmpSinking && TLI)
7031 return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
7032
7033 // TODO: Move this into the switch on opcode - it handles shifts already.
7034 if (BinOp
31.2
'BinOp' is null
31.2
'BinOp' is null
31.2
'BinOp' is null
31.2
'BinOp' is null
&& (BinOp->getOpcode() == Instruction::AShr ||
7035 BinOp->getOpcode() == Instruction::LShr)) {
7036 ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
7037 if (TLI && CI && TLI->hasExtractBitsInsn())
7038 if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
7039 return true;
7040 }
7041
7042 if (GetElementPtrInst *GEPI
32.1
'GEPI' is non-null
32.1
'GEPI' is non-null
32.1
'GEPI' is non-null
32.1
'GEPI' is non-null
= dyn_cast<GetElementPtrInst>(I)) {
32
Assuming 'I' is a 'GetElementPtrInst'
33
Taking true branch
7043 if (GEPI->hasAllZeroIndices()) {
34
Assuming the condition is true
35
Taking true branch
7044 /// The GEP operand must be a pointer, so must its result -> BitCast
7045 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
7046 GEPI->getName(), GEPI);
7047 NC->setDebugLoc(GEPI->getDebugLoc());
7048 GEPI->replaceAllUsesWith(NC);
7049 GEPI->eraseFromParent();
7050 ++NumGEPsElim;
7051 optimizeInst(NC, ModifiedDT);
36
Calling 'CodeGenPrepare::optimizeInst'
7052 return true;
7053 }
7054 if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) {
7055 return true;
7056 }
7057 return false;
7058 }
7059
7060 if (tryToSinkFreeOperands(I))
7061 return true;
7062
7063 switch (I->getOpcode()) {
7064 case Instruction::Shl:
7065 case Instruction::LShr:
7066 case Instruction::AShr:
7067 return optimizeShiftInst(cast<BinaryOperator>(I));
7068 case Instruction::Call:
7069 return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
7070 case Instruction::Select:
7071 return optimizeSelectInst(cast<SelectInst>(I));
7072 case Instruction::ShuffleVector:
7073 return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
7074 case Instruction::Switch:
7075 return optimizeSwitchInst(cast<SwitchInst>(I));
7076 case Instruction::ExtractElement:
7077 return optimizeExtractElementInst(cast<ExtractElementInst>(I));
7078 }
7079
7080 return false;
7081}
7082
7083/// Given an OR instruction, check to see if this is a bitreverse
7084/// idiom. If so, insert the new intrinsic and return true.
7085static bool makeBitReverse(Instruction &I, const DataLayout &DL,
7086 const TargetLowering &TLI) {
7087 if (!I.getType()->isIntegerTy() ||
7088 !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
7089 TLI.getValueType(DL, I.getType(), true)))
7090 return false;
7091
7092 SmallVector<Instruction*, 4> Insts;
7093 if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts))
7094 return false;
7095 Instruction *LastInst = Insts.back();
7096 I.replaceAllUsesWith(LastInst);
7097 RecursivelyDeleteTriviallyDeadInstructions(&I);
7098 return true;
7099}
7100
7101// In this pass we look for GEP and cast instructions that are used
7102// across basic blocks and rewrite them to improve basic-block-at-a-time
7103// selection.
7104bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
7105 SunkAddrs.clear();
7106 bool MadeChange = false;
7107
7108 CurInstIterator = BB.begin();
7109 while (CurInstIterator != BB.end()) {
13
Loop condition is true. Entering loop body
7110 MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT);
14
Calling 'CodeGenPrepare::optimizeInst'
7111 if (ModifiedDT)
7112 return true;
7113 }
7114
7115 bool MadeBitReverse = true;
7116 while (TLI && MadeBitReverse) {
7117 MadeBitReverse = false;
7118 for (auto &I : reverse(BB)) {
7119 if (makeBitReverse(I, *DL, *TLI)) {
7120 MadeBitReverse = MadeChange = true;
7121 break;
7122 }
7123 }
7124 }
7125 MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
7126
7127 return MadeChange;
7128}
7129
7130// llvm.dbg.value is far away from the value then iSel may not be able
7131// handle it properly. iSel will drop llvm.dbg.value if it can not
7132// find a node corresponding to the value.
7133bool CodeGenPrepare::placeDbgValues(Function &F) {
7134 bool MadeChange = false;
7135 for (BasicBlock &BB : F) {
7136 Instruction *PrevNonDbgInst = nullptr;
7137 for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
7138 Instruction *Insn = &*BI++;
7139 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
7140 // Leave dbg.values that refer to an alloca alone. These
7141 // intrinsics describe the address of a variable (= the alloca)
7142 // being taken. They should not be moved next to the alloca
7143 // (and to the beginning of the scope), but rather stay close to
7144 // where said address is used.
7145 if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) {
7146 PrevNonDbgInst = Insn;
7147 continue;
7148 }
7149
7150 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
7151 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
7152 // If VI is a phi in a block with an EHPad terminator, we can't insert
7153 // after it.
7154 if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
7155 continue;
7156 LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Moving Debug Value before :\n"
<< *DVI << ' ' << *VI; } } while (false)
7157 << *DVI << ' ' << *VI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Moving Debug Value before :\n"
<< *DVI << ' ' << *VI; } } while (false)
;
7158 DVI->removeFromParent();
7159 if (isa<PHINode>(VI))
7160 DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
7161 else
7162 DVI->insertAfter(VI);
7163 MadeChange = true;
7164 ++NumDbgValueMoved;
7165 }
7166 }
7167 }
7168 return MadeChange;
7169}
7170
7171/// Scale down both weights to fit into uint32_t.
7172static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
7173 uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;
7174 uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1;
7175 NewTrue = NewTrue / Scale;
7176 NewFalse = NewFalse / Scale;
7177}
7178
7179/// Some targets prefer to split a conditional branch like:
7180/// \code
7181/// %0 = icmp ne i32 %a, 0
7182/// %1 = icmp ne i32 %b, 0
7183/// %or.cond = or i1 %0, %1
7184/// br i1 %or.cond, label %TrueBB, label %FalseBB
7185/// \endcode
7186/// into multiple branch instructions like:
7187/// \code
7188/// bb1:
7189/// %0 = icmp ne i32 %a, 0
7190/// br i1 %0, label %TrueBB, label %bb2
7191/// bb2:
7192/// %1 = icmp ne i32 %b, 0
7193/// br i1 %1, label %TrueBB, label %FalseBB
7194/// \endcode
7195/// This usually allows instruction selection to do even further optimizations
7196/// and combine the compare with the branch instruction. Currently this is
7197/// applied for targets which have "cheap" jump instructions.
7198///
7199/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
7200///
7201bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
7202 if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
7203 return false;
7204
7205 bool MadeChange = false;
7206 for (auto &BB : F) {
7207 // Does this BB end with the following?
7208 // %cond1 = icmp|fcmp|binary instruction ...
7209 // %cond2 = icmp|fcmp|binary instruction ...
7210 // %cond.or = or|and i1 %cond1, cond2
7211 // br i1 %cond.or label %dest1, label %dest2"
7212 BinaryOperator *LogicOp;
7213 BasicBlock *TBB, *FBB;
7214 if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB)))
7215 continue;
7216
7217 auto *Br1 = cast<BranchInst>(BB.getTerminator());
7218 if (Br1->getMetadata(LLVMContext::MD_unpredictable))
7219 continue;
7220
7221 unsigned Opc;
7222 Value *Cond1, *Cond2;
7223 if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)),
7224 m_OneUse(m_Value(Cond2)))))
7225 Opc = Instruction::And;
7226 else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)),
7227 m_OneUse(m_Value(Cond2)))))
7228 Opc = Instruction::Or;
7229 else
7230 continue;
7231
7232 if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) ||
7233 !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) )
7234 continue;
7235
7236 LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "Before branch condition splitting\n"
; BB.dump(); } } while (false)
;
7237
7238 // Create a new BB.
7239 auto TmpBB =
7240 BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split",
7241 BB.getParent(), BB.getNextNode());
7242
7243 // Update original basic block by using the first condition directly by the
7244 // branch instruction and removing the no longer needed and/or instruction.
7245 Br1->setCondition(Cond1);
7246 LogicOp->eraseFromParent();
7247
7248 // Depending on the condition we have to either replace the true or the
7249 // false successor of the original branch instruction.
7250 if (Opc == Instruction::And)
7251 Br1->setSuccessor(0, TmpBB);
7252 else
7253 Br1->setSuccessor(1, TmpBB);
7254
7255 // Fill in the new basic block.
7256 auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB);
7257 if (auto *I = dyn_cast<Instruction>(Cond2)) {
7258 I->removeFromParent();
7259 I->insertBefore(Br2);
7260 }
7261
7262 // Update PHI nodes in both successors. The original BB needs to be
7263 // replaced in one successor's PHI nodes, because the branch comes now from
7264 // the newly generated BB (NewBB). In the other successor we need to add one
7265 // incoming edge to the PHI nodes, because both branch instructions target
7266 // now the same successor. Depending on the original branch condition
7267 // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
7268 // we perform the correct update for the PHI nodes.
7269 // This doesn't change the successor order of the just created branch
7270 // instruction (or any other instruction).
7271 if (Opc == Instruction::Or)
7272 std::swap(TBB, FBB);
7273
7274 // Replace the old BB with the new BB.
7275 TBB->replacePhiUsesWith(&BB, TmpBB);
7276
7277 // Add another incoming edge form the new BB.
7278 for (PHINode &PN : FBB->phis()) {
7279 auto *Val = PN.getIncomingValueForBlock(&BB);
7280 PN.addIncoming(Val, TmpBB);
7281 }
7282
7283 // Update the branch weights (from SelectionDAGBuilder::
7284 // FindMergedConditions).
7285 if (Opc == Instruction::Or) {
7286 // Codegen X | Y as:
7287 // BB1:
7288 // jmp_if_X TBB
7289 // jmp TmpBB
7290 // TmpBB:
7291 // jmp_if_Y TBB
7292 // jmp FBB
7293 //
7294
7295 // We have flexibility in setting Prob for BB1 and Prob for NewBB.
7296 // The requirement is that
7297 // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
7298 // = TrueProb for original BB.
7299 // Assuming the original weights are A and B, one choice is to set BB1's
7300 // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice
7301 // assumes that
7302 // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
7303 // Another choice is to assume TrueProb for BB1 equals to TrueProb for
7304 // TmpBB, but the math is more complicated.
7305 uint64_t TrueWeight, FalseWeight;
7306 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
7307 uint64_t NewTrueWeight = TrueWeight;
7308 uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight;
7309 scaleWeights(NewTrueWeight, NewFalseWeight);
7310 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
7311 .createBranchWeights(TrueWeight, FalseWeight));
7312
7313 NewTrueWeight = TrueWeight;
7314 NewFalseWeight = 2 * FalseWeight;
7315 scaleWeights(NewTrueWeight, NewFalseWeight);
7316 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
7317 .createBranchWeights(TrueWeight, FalseWeight));
7318 }
7319 } else {
7320 // Codegen X & Y as:
7321 // BB1:
7322 // jmp_if_X TmpBB
7323 // jmp FBB
7324 // TmpBB:
7325 // jmp_if_Y TBB
7326 // jmp FBB
7327 //
7328 // This requires creation of TmpBB after CurBB.
7329
7330 // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
7331 // The requirement is that
7332 // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
7333 // = FalseProb for original BB.
7334 // Assuming the original weights are A and B, one choice is to set BB1's
7335 // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice
7336 // assumes that
7337 // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB.
7338 uint64_t TrueWeight, FalseWeight;
7339 if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) {
7340 uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight;
7341 uint64_t NewFalseWeight = FalseWeight;
7342 scaleWeights(NewTrueWeight, NewFalseWeight);
7343 Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext())
7344 .createBranchWeights(TrueWeight, FalseWeight));
7345
7346 NewTrueWeight = 2 * TrueWeight;
7347 NewFalseWeight = FalseWeight;
7348 scaleWeights(NewTrueWeight, NewFalseWeight);
7349 Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext())
7350 .createBranchWeights(TrueWeight, FalseWeight));
7351 }
7352 }
7353
7354 ModifiedDT = true;
7355 MadeChange = true;
7356
7357 LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false)
7358 TmpBB->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("codegenprepare")) { dbgs() << "After branch condition splitting\n"
; BB.dump(); TmpBB->dump(); } } while (false)
;
7359 }
7360 return MadeChange;
7361}

/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H
15
16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/AlignOf.h"
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include "llvm/Support/MemAlloc.h"
21#include "llvm/Support/type_traits.h"
22#include "llvm/Support/ErrorHandling.h"
23#include <algorithm>
24#include <cassert>
25#include <cstddef>
26#include <cstdlib>
27#include <cstring>
28#include <initializer_list>
29#include <iterator>
30#include <memory>
31#include <new>
32#include <type_traits>
33#include <utility>
34
35namespace llvm {
36
37/// This is all the non-templated stuff common to all SmallVectors.
38class SmallVectorBase {
39protected:
40 void *BeginX;
41 unsigned Size = 0, Capacity;
42
43 SmallVectorBase() = delete;
44 SmallVectorBase(void *FirstEl, size_t TotalCapacity)
45 : BeginX(FirstEl), Capacity(TotalCapacity) {}
46
47 /// This is an implementation of the grow() method which only works
48 /// on POD-like data types and is out of line to reduce code duplication.
49 void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize);
50
51public:
52 size_t size() const { return Size; }
53 size_t capacity() const { return Capacity; }
54
55 LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
56
Assuming field 'Size' is 0, which participates in a condition later
57
Returning the value 1, which participates in a condition later
56
57 /// Set the array size to \p N, which the current array must have enough
58 /// capacity for.
59 ///
60 /// This does not construct or destroy any elements in the vector.
61 ///
62 /// Clients can use this in conjunction with capacity() to write past the end
63 /// of the buffer when they know that more elements are available, and only
64 /// update the size later. This avoids the cost of value initializing elements
65 /// which will only be overwritten.
66 void set_size(size_t N) {
67 assert(N <= capacity())((N <= capacity()) ? static_cast<void> (0) : __assert_fail
("N <= capacity()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 67, __PRETTY_FUNCTION__))
;
68 Size = N;
69 }
70};
71
72/// Figure out the offset of the first element.
73template <class T, typename = void> struct SmallVectorAlignmentAndSize {
74 AlignedCharArrayUnion<SmallVectorBase> Base;
75 AlignedCharArrayUnion<T> FirstEl;
76};
77
78/// This is the part of SmallVectorTemplateBase which does not depend on whether
79/// the type T is a POD. The extra dummy template argument is used by ArrayRef
80/// to avoid unnecessarily requiring T to be complete.
81template <typename T, typename = void>
82class SmallVectorTemplateCommon : public SmallVectorBase {
83 /// Find the address of the first element. For this pointer math to be valid
84 /// with small-size of 0 for T with lots of alignment, it's important that
85 /// SmallVectorStorage is properly-aligned even for small-size of 0.
86 void *getFirstEl() const {
87 return const_cast<void *>(reinterpret_cast<const void *>(
88 reinterpret_cast<const char *>(this) +
89 offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)
));
90 }
91 // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
92
93protected:
94 SmallVectorTemplateCommon(size_t Size)
95 : SmallVectorBase(getFirstEl(), Size) {}
96
97 void grow_pod(size_t MinCapacity, size_t TSize) {
98 SmallVectorBase::grow_pod(getFirstEl(), MinCapacity, TSize);
99 }
100
101 /// Return true if this is a smallvector which has not had dynamic
102 /// memory allocated for it.
103 bool isSmall() const { return BeginX == getFirstEl(); }
104
105 /// Put this vector in a state of being small.
106 void resetToSmall() {
107 BeginX = getFirstEl();
108 Size = Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
109 }
110
111public:
112 using size_type = size_t;
113 using difference_type = ptrdiff_t;
114 using value_type = T;
115 using iterator = T *;
116 using const_iterator = const T *;
117
118 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
119 using reverse_iterator = std::reverse_iterator<iterator>;
120
121 using reference = T &;
122 using const_reference = const T &;
123 using pointer = T *;
124 using const_pointer = const T *;
125
126 // forward iterator creation methods.
127 iterator begin() { return (iterator)this->BeginX; }
128 const_iterator begin() const { return (const_iterator)this->BeginX; }
129 iterator end() { return begin() + size(); }
130 const_iterator end() const { return begin() + size(); }
131
132 // reverse iterator creation methods.
133 reverse_iterator rbegin() { return reverse_iterator(end()); }
134 const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
135 reverse_iterator rend() { return reverse_iterator(begin()); }
136 const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
137
138 size_type size_in_bytes() const { return size() * sizeof(T); }
139 size_type max_size() const { return size_type(-1) / sizeof(T); }
140
141 size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
142
143 /// Return a pointer to the vector's buffer, even if empty().
144 pointer data() { return pointer(begin()); }
145 /// Return a pointer to the vector's buffer, even if empty().
146 const_pointer data() const { return const_pointer(begin()); }
147
148 reference operator[](size_type idx) {
149 assert(idx < size())((idx < size()) ? static_cast<void> (0) : __assert_fail
("idx < size()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 149, __PRETTY_FUNCTION__))
;
150 return begin()[idx];
151 }
152 const_reference operator[](size_type idx) const {
153 assert(idx < size())((idx < size()) ? static_cast<void> (0) : __assert_fail
("idx < size()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 153, __PRETTY_FUNCTION__))
;
154 return begin()[idx];
155 }
156
157 reference front() {
158 assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 158, __PRETTY_FUNCTION__))
;
159 return begin()[0];
160 }
161 const_reference front() const {
162 assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 162, __PRETTY_FUNCTION__))
;
163 return begin()[0];
164 }
165
166 reference back() {
167 assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 167, __PRETTY_FUNCTION__))
;
168 return end()[-1];
169 }
170 const_reference back() const {
171 assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 171, __PRETTY_FUNCTION__))
;
172 return end()[-1];
173 }
174};
175
176/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put method
177/// implementations that are designed to work with non-POD-like T's.
178template <typename T, bool = is_trivially_copyable<T>::value>
179class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
180protected:
181 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
182
183 static void destroy_range(T *S, T *E) {
184 while (S != E) {
185 --E;
186 E->~T();
187 }
188 }
189
190 /// Move the range [I, E) into the uninitialized memory starting with "Dest",
191 /// constructing elements as needed.
192 template<typename It1, typename It2>
193 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
194 std::uninitialized_copy(std::make_move_iterator(I),
195 std::make_move_iterator(E), Dest);
196 }
197
198 /// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
199 /// constructing elements as needed.
200 template<typename It1, typename It2>
201 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
202 std::uninitialized_copy(I, E, Dest);
203 }
204
205 /// Grow the allocated memory (without initializing new elements), doubling
206 /// the size of the allocated memory. Guarantees space for at least one more
207 /// element, or MinSize more elements if specified.
208 void grow(size_t MinSize = 0);
209
210public:
211 void push_back(const T &Elt) {
212 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
213 this->grow();
214 ::new ((void*) this->end()) T(Elt);
215 this->set_size(this->size() + 1);
216 }
217
218 void push_back(T &&Elt) {
219 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
220 this->grow();
221 ::new ((void*) this->end()) T(::std::move(Elt));
222 this->set_size(this->size() + 1);
223 }
224
225 void pop_back() {
226 this->set_size(this->size() - 1);
227 this->end()->~T();
228 }
229};
230
231// Define this out-of-line to dissuade the C++ compiler from inlining it.
232template <typename T, bool TriviallyCopyable>
233void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
234 if (MinSize > UINT32_MAX(4294967295U))
235 report_bad_alloc_error("SmallVector capacity overflow during allocation");
236
237 // Always grow, even from zero.
238 size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2));
239 NewCapacity = std::min(std::max(NewCapacity, MinSize), size_t(UINT32_MAX(4294967295U)));
240 T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T)));
241
242 // Move the elements over.
243 this->uninitialized_move(this->begin(), this->end(), NewElts);
244
245 // Destroy the original elements.
246 destroy_range(this->begin(), this->end());
247
248 // If this wasn't grown from the inline copy, deallocate the old space.
249 if (!this->isSmall())
250 free(this->begin());
251
252 this->BeginX = NewElts;
253 this->Capacity = NewCapacity;
254}
255
256/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
257/// method implementations that are designed to work with POD-like T's.
258template <typename T>
259class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
260protected:
261 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
262
263 // No need to do a destroy loop for POD's.
264 static void destroy_range(T *, T *) {}
265
266 /// Move the range [I, E) onto the uninitialized memory
267 /// starting with "Dest", constructing elements into it as needed.
268 template<typename It1, typename It2>
269 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
270 // Just do a copy.
271 uninitialized_copy(I, E, Dest);
272 }
273
274 /// Copy the range [I, E) onto the uninitialized memory
275 /// starting with "Dest", constructing elements into it as needed.
276 template<typename It1, typename It2>
277 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
278 // Arbitrary iterator types; just use the basic implementation.
279 std::uninitialized_copy(I, E, Dest);
280 }
281
282 /// Copy the range [I, E) onto the uninitialized memory
283 /// starting with "Dest", constructing elements into it as needed.
284 template <typename T1, typename T2>
285 static void uninitialized_copy(
286 T1 *I, T1 *E, T2 *Dest,
287 typename std::enable_if<std::is_same<typename std::remove_const<T1>::type,
288 T2>::value>::type * = nullptr) {
289 // Use memcpy for PODs iterated by pointers (which includes SmallVector
290 // iterators): std::uninitialized_copy optimizes to memmove, but we can
291 // use memcpy here. Note that I and E are iterators and thus might be
292 // invalid for memcpy if they are equal.
293 if (I != E)
294 memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
295 }
296
297 /// Double the size of the allocated memory, guaranteeing space for at
298 /// least one more element or MinSize if specified.
299 void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }
300
301public:
302 void push_back(const T &Elt) {
303 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
304 this->grow();
305 memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T));
306 this->set_size(this->size() + 1);
307 }
308
309 void pop_back() { this->set_size(this->size() - 1); }
310};
311
312/// This class consists of common code factored out of the SmallVector class to
313/// reduce code duplication based on the SmallVector 'N' template parameter.
314template <typename T>
315class SmallVectorImpl : public SmallVectorTemplateBase<T> {
316 using SuperClass = SmallVectorTemplateBase<T>;
317
318public:
319 using iterator = typename SuperClass::iterator;
320 using const_iterator = typename SuperClass::const_iterator;
321 using reference = typename SuperClass::reference;
322 using size_type = typename SuperClass::size_type;
323
324protected:
325 // Default ctor - Initialize to empty.
326 explicit SmallVectorImpl(unsigned N)
327 : SmallVectorTemplateBase<T>(N) {}
328
329public:
330 SmallVectorImpl(const SmallVectorImpl &) = delete;
331
332 ~SmallVectorImpl() {
333 // Subclass has already destructed this vector's elements.
334 // If this wasn't grown from the inline copy, deallocate the old space.
335 if (!this->isSmall())
336 free(this->begin());
337 }
338
339 void clear() {
340 this->destroy_range(this->begin(), this->end());
341 this->Size = 0;
342 }
343
344 void resize(size_type N) {
345 if (N < this->size()) {
346 this->destroy_range(this->begin()+N, this->end());
347 this->set_size(N);
348 } else if (N > this->size()) {
349 if (this->capacity() < N)
350 this->grow(N);
351 for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
352 new (&*I) T();
353 this->set_size(N);
354 }
355 }
356
357 void resize(size_type N, const T &NV) {
358 if (N < this->size()) {
359 this->destroy_range(this->begin()+N, this->end());
360 this->set_size(N);
361 } else if (N > this->size()) {
362 if (this->capacity() < N)
363 this->grow(N);
364 std::uninitialized_fill(this->end(), this->begin()+N, NV);
365 this->set_size(N);
366 }
367 }
368
369 void reserve(size_type N) {
370 if (this->capacity() < N)
371 this->grow(N);
372 }
373
374 LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
375 T Result = ::std::move(this->back());
376 this->pop_back();
377 return Result;
378 }
379
380 void swap(SmallVectorImpl &RHS);
381
382 /// Add the specified range to the end of the SmallVector.
383 template <typename in_iter,
384 typename = typename std::enable_if<std::is_convertible<
385 typename std::iterator_traits<in_iter>::iterator_category,
386 std::input_iterator_tag>::value>::type>
387 void append(in_iter in_start, in_iter in_end) {
388 size_type NumInputs = std::distance(in_start, in_end);
389 if (NumInputs > this->capacity() - this->size())
390 this->grow(this->size()+NumInputs);
391
392 this->uninitialized_copy(in_start, in_end, this->end());
393 this->set_size(this->size() + NumInputs);
394 }
395
396 /// Append \p NumInputs copies of \p Elt to the end.
397 void append(size_type NumInputs, const T &Elt) {
398 if (NumInputs > this->capacity() - this->size())
399 this->grow(this->size()+NumInputs);
400
401 std::uninitialized_fill_n(this->end(), NumInputs, Elt);
402 this->set_size(this->size() + NumInputs);
403 }
404
405 void append(std::initializer_list<T> IL) {
406 append(IL.begin(), IL.end());
407 }
408
409 // FIXME: Consider assigning over existing elements, rather than clearing &
410 // re-initializing them - for all assign(...) variants.
411
412 void assign(size_type NumElts, const T &Elt) {
413 clear();
414 if (this->capacity() < NumElts)
415 this->grow(NumElts);
416 this->set_size(NumElts);
417 std::uninitialized_fill(this->begin(), this->end(), Elt);
418 }
419
420 template <typename in_iter,
421 typename = typename std::enable_if<std::is_convertible<
422 typename std::iterator_traits<in_iter>::iterator_category,
423 std::input_iterator_tag>::value>::type>
424 void assign(in_iter in_start, in_iter in_end) {
425 clear();
426 append(in_start, in_end);
427 }
428
429 void assign(std::initializer_list<T> IL) {
430 clear();
431 append(IL);
432 }
433
434 iterator erase(const_iterator CI) {
435 // Just cast away constness because this is a non-const member function.
436 iterator I = const_cast<iterator>(CI);
437
438 assert(I >= this->begin() && "Iterator to erase is out of bounds.")((I >= this->begin() && "Iterator to erase is out of bounds."
) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Iterator to erase is out of bounds.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 438, __PRETTY_FUNCTION__))
;
439 assert(I < this->end() && "Erasing at past-the-end iterator.")((I < this->end() && "Erasing at past-the-end iterator."
) ? static_cast<void> (0) : __assert_fail ("I < this->end() && \"Erasing at past-the-end iterator.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 439, __PRETTY_FUNCTION__))
;
440
441 iterator N = I;
442 // Shift all elts down one.
443 std::move(I+1, this->end(), I);
444 // Drop the last elt.
445 this->pop_back();
446 return(N);
447 }
448
449 iterator erase(const_iterator CS, const_iterator CE) {
450 // Just cast away constness because this is a non-const member function.
451 iterator S = const_cast<iterator>(CS);
452 iterator E = const_cast<iterator>(CE);
453
454 assert(S >= this->begin() && "Range to erase is out of bounds.")((S >= this->begin() && "Range to erase is out of bounds."
) ? static_cast<void> (0) : __assert_fail ("S >= this->begin() && \"Range to erase is out of bounds.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 454, __PRETTY_FUNCTION__))
;
455 assert(S <= E && "Trying to erase invalid range.")((S <= E && "Trying to erase invalid range.") ? static_cast
<void> (0) : __assert_fail ("S <= E && \"Trying to erase invalid range.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 455, __PRETTY_FUNCTION__))
;
456 assert(E <= this->end() && "Trying to erase past the end.")((E <= this->end() && "Trying to erase past the end."
) ? static_cast<void> (0) : __assert_fail ("E <= this->end() && \"Trying to erase past the end.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 456, __PRETTY_FUNCTION__))
;
457
458 iterator N = S;
459 // Shift all elts down.
460 iterator I = std::move(E, this->end(), S);
461 // Drop the last elts.
462 this->destroy_range(I, this->end());
463 this->set_size(I - this->begin());
464 return(N);
465 }
466
467 iterator insert(iterator I, T &&Elt) {
468 if (I == this->end()) { // Important special case for empty vector.
469 this->push_back(::std::move(Elt));
470 return this->end()-1;
471 }
472
473 assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds."
) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 473, __PRETTY_FUNCTION__))
;
474 assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector."
) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 474, __PRETTY_FUNCTION__))
;
475
476 if (this->size() >= this->capacity()) {
477 size_t EltNo = I-this->begin();
478 this->grow();
479 I = this->begin()+EltNo;
480 }
481
482 ::new ((void*) this->end()) T(::std::move(this->back()));
483 // Push everything else over.
484 std::move_backward(I, this->end()-1, this->end());
485 this->set_size(this->size() + 1);
486
487 // If we just moved the element we're inserting, be sure to update
488 // the reference.
489 T *EltPtr = &Elt;
490 if (I <= EltPtr && EltPtr < this->end())
491 ++EltPtr;
492
493 *I = ::std::move(*EltPtr);
494 return I;
495 }
496
497 iterator insert(iterator I, const T &Elt) {
498 if (I == this->end()) { // Important special case for empty vector.
499 this->push_back(Elt);
500 return this->end()-1;
501 }
502
503 assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds."
) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 503, __PRETTY_FUNCTION__))
;
504 assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector."
) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 504, __PRETTY_FUNCTION__))
;
505
506 if (this->size() >= this->capacity()) {
507 size_t EltNo = I-this->begin();
508 this->grow();
509 I = this->begin()+EltNo;
510 }
511 ::new ((void*) this->end()) T(std::move(this->back()));
512 // Push everything else over.
513 std::move_backward(I, this->end()-1, this->end());
514 this->set_size(this->size() + 1);
515
516 // If we just moved the element we're inserting, be sure to update
517 // the reference.
518 const T *EltPtr = &Elt;
519 if (I <= EltPtr && EltPtr < this->end())
520 ++EltPtr;
521
522 *I = *EltPtr;
523 return I;
524 }
525
526 iterator insert(iterator I, size_type NumToInsert, const T &Elt) {
527 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
528 size_t InsertElt = I - this->begin();
529
530 if (I == this->end()) { // Important special case for empty vector.
531 append(NumToInsert, Elt);
532 return this->begin()+InsertElt;
533 }
534
535 assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds."
) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 535, __PRETTY_FUNCTION__))
;
536 assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector."
) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 536, __PRETTY_FUNCTION__))
;
537
538 // Ensure there is enough space.
539 reserve(this->size() + NumToInsert);
540
541 // Uninvalidate the iterator.
542 I = this->begin()+InsertElt;
543
544 // If there are more elements between the insertion point and the end of the
545 // range than there are being inserted, we can use a simple approach to
546 // insertion. Since we already reserved space, we know that this won't
547 // reallocate the vector.
548 if (size_t(this->end()-I) >= NumToInsert) {
549 T *OldEnd = this->end();
550 append(std::move_iterator<iterator>(this->end() - NumToInsert),
551 std::move_iterator<iterator>(this->end()));
552
553 // Copy the existing elements that get replaced.
554 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
555
556 std::fill_n(I, NumToInsert, Elt);
557 return I;
558 }
559
560 // Otherwise, we're inserting more elements than exist already, and we're
561 // not inserting at the end.
562
563 // Move over the elements that we're about to overwrite.
564 T *OldEnd = this->end();
565 this->set_size(this->size() + NumToInsert);
566 size_t NumOverwritten = OldEnd-I;
567 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
568
569 // Replace the overwritten part.
570 std::fill_n(I, NumOverwritten, Elt);
571
572 // Insert the non-overwritten middle part.
573 std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt);
574 return I;
575 }
576
577 template <typename ItTy,
578 typename = typename std::enable_if<std::is_convertible<
579 typename std::iterator_traits<ItTy>::iterator_category,
580 std::input_iterator_tag>::value>::type>
581 iterator insert(iterator I, ItTy From, ItTy To) {
582 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
583 size_t InsertElt = I - this->begin();
584
585 if (I == this->end()) { // Important special case for empty vector.
586 append(From, To);
587 return this->begin()+InsertElt;
588 }
589
590 assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds."
) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 590, __PRETTY_FUNCTION__))
;
591 assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector."
) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h"
, 591, __PRETTY_FUNCTION__))
;
592
593 size_t NumToInsert = std::distance(From, To);
594
595 // Ensure there is enough space.
596 reserve(this->size() + NumToInsert);
597
598 // Uninvalidate the iterator.
599 I = this->begin()+InsertElt;
600
601 // If there are more elements between the insertion point and the end of the
602 // range than there are being inserted, we can use a simple approach to
603 // insertion. Since we already reserved space, we know that this won't
604 // reallocate the vector.
605 if (size_t(this->end()-I) >= NumToInsert) {
606 T *OldEnd = this->end();
607 append(std::move_iterator<iterator>(this->end() - NumToInsert),
608 std::move_iterator<iterator>(this->end()));
609
610 // Copy the existing elements that get replaced.
611 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
612
613 std::copy(From, To, I);
614 return I;
615 }
616
617 // Otherwise, we're inserting more elements than exist already, and we're
618 // not inserting at the end.
619
620 // Move over the elements that we're about to overwrite.
621 T *OldEnd = this->end();
622 this->set_size(this->size() + NumToInsert);
623 size_t NumOverwritten = OldEnd-I;
624 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
625
626 // Replace the overwritten part.
627 for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
628 *J = *From;
629 ++J; ++From;
630 }
631
632 // Insert the non-overwritten middle part.
633 this->uninitialized_copy(From, To, OldEnd);
634 return I;
635 }
636
637 void insert(iterator I, std::initializer_list<T> IL) {
638 insert(I, IL.begin(), IL.end());
639 }
640
641 template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
642 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
643 this->grow();
644 ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
645 this->set_size(this->size() + 1);
646 return this->back();
647 }
648
649 SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
650
651 SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
652
653 bool operator==(const SmallVectorImpl &RHS) const {
654 if (this->size() != RHS.size()) return false;
655 return std::equal(this->begin(), this->end(), RHS.begin());
656 }
657 bool operator!=(const SmallVectorImpl &RHS) const {
658 return !(*this == RHS);
659 }
660
661 bool operator<(const SmallVectorImpl &RHS) const {
662 return std::lexicographical_compare(this->begin(), this->end(),
663 RHS.begin(), RHS.end());
664 }
665};
666
667template <typename T>
668void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
669 if (this == &RHS) return;
670
671 // We can only avoid copying elements if neither vector is small.
672 if (!this->isSmall() && !RHS.isSmall()) {
673 std::swap(this->BeginX, RHS.BeginX);
674 std::swap(this->Size, RHS.Size);
675 std::swap(this->Capacity, RHS.Capacity);
676 return;
677 }
678 if (RHS.size() > this->capacity())
679 this->grow(RHS.size());
680 if (this->size() > RHS.capacity())
681 RHS.grow(this->size());
682
683 // Swap the shared elements.
684 size_t NumShared = this->size();
685 if (NumShared > RHS.size()) NumShared = RHS.size();
686 for (size_type i = 0; i != NumShared; ++i)
687 std::swap((*this)[i], RHS[i]);
688
689 // Copy over the extra elts.
690 if (this->size() > RHS.size()) {
691 size_t EltDiff = this->size() - RHS.size();
692 this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
693 RHS.set_size(RHS.size() + EltDiff);
694 this->destroy_range(this->begin()+NumShared, this->end());
695 this->set_size(NumShared);
696 } else if (RHS.size() > this->size()) {
697 size_t EltDiff = RHS.size() - this->size();
698 this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
699 this->set_size(this->size() + EltDiff);
700 this->destroy_range(RHS.begin()+NumShared, RHS.end());
701 RHS.set_size(NumShared);
702 }
703}
704
705template <typename T>
706SmallVectorImpl<T> &SmallVectorImpl<T>::
707 operator=(const SmallVectorImpl<T> &RHS) {
708 // Avoid self-assignment.
709 if (this == &RHS) return *this;
710
711 // If we already have sufficient space, assign the common elements, then
712 // destroy any excess.
713 size_t RHSSize = RHS.size();
714 size_t CurSize = this->size();
715 if (CurSize >= RHSSize) {
716 // Assign common elements.
717 iterator NewEnd;
718 if (RHSSize)
719 NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
720 else
721 NewEnd = this->begin();
722
723 // Destroy excess elements.
724 this->destroy_range(NewEnd, this->end());
725
726 // Trim.
727 this->set_size(RHSSize);
728 return *this;
729 }
730
731 // If we have to grow to have enough elements, destroy the current elements.
732 // This allows us to avoid copying them during the grow.
733 // FIXME: don't do this if they're efficiently moveable.
734 if (this->capacity() < RHSSize) {
735 // Destroy current elements.
736 this->destroy_range(this->begin(), this->end());
737 this->set_size(0);
738 CurSize = 0;
739 this->grow(RHSSize);
740 } else if (CurSize) {
741 // Otherwise, use assignment for the already-constructed elements.
742 std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
743 }
744
745 // Copy construct the new elements in place.
746 this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
747 this->begin()+CurSize);
748
749 // Set end.
750 this->set_size(RHSSize);
751 return *this;
752}
753
754template <typename T>
755SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
756 // Avoid self-assignment.
757 if (this == &RHS) return *this;
758
759 // If the RHS isn't small, clear this vector and then steal its buffer.
760 if (!RHS.isSmall()) {
761 this->destroy_range(this->begin(), this->end());
762 if (!this->isSmall()) free(this->begin());
763 this->BeginX = RHS.BeginX;
764 this->Size = RHS.Size;
765 this->Capacity = RHS.Capacity;
766 RHS.resetToSmall();
767 return *this;
768 }
769
770 // If we already have sufficient space, assign the common elements, then
771 // destroy any excess.
772 size_t RHSSize = RHS.size();
773 size_t CurSize = this->size();
774 if (CurSize >= RHSSize) {
775 // Assign common elements.
776 iterator NewEnd = this->begin();
777 if (RHSSize)
778 NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);
779
780 // Destroy excess elements and trim the bounds.
781 this->destroy_range(NewEnd, this->end());
782 this->set_size(RHSSize);
783
784 // Clear the RHS.
785 RHS.clear();
786
787 return *this;
788 }
789
790 // If we have to grow to have enough elements, destroy the current elements.
791 // This allows us to avoid copying them during the grow.
792 // FIXME: this may not actually make any sense if we can efficiently move
793 // elements.
794 if (this->capacity() < RHSSize) {
795 // Destroy current elements.
796 this->destroy_range(this->begin(), this->end());
797 this->set_size(0);
798 CurSize = 0;
799 this->grow(RHSSize);
800 } else if (CurSize) {
801 // Otherwise, use assignment for the already-constructed elements.
802 std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
803 }
804
805 // Move-construct the new elements in place.
806 this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
807 this->begin()+CurSize);
808
809 // Set end.
810 this->set_size(RHSSize);
811
812 RHS.clear();
813 return *this;
814}
815
816/// Storage for the SmallVector elements. This is specialized for the N=0 case
817/// to avoid allocating unnecessary storage.
818template <typename T, unsigned N>
819struct SmallVectorStorage {
820 AlignedCharArrayUnion<T> InlineElts[N];
821};
822
823/// We need the storage to be properly aligned even for small-size of 0 so that
824/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
825/// well-defined.
826template <typename T> struct alignas(alignof(T)) SmallVectorStorage<T, 0> {};
827
828/// This is a 'vector' (really, a variable-sized array), optimized
829/// for the case when the array is small. It contains some number of elements
830/// in-place, which allows it to avoid heap allocation when the actual number of
831/// elements is below that threshold. This allows normal "small" cases to be
832/// fast without losing generality for large inputs.
833///
834/// Note that this does not attempt to be exception safe.
835///
836template <typename T, unsigned N>
837class SmallVector : public SmallVectorImpl<T>, SmallVectorStorage<T, N> {
838public:
839 SmallVector() : SmallVectorImpl<T>(N) {}
840
841 ~SmallVector() {
842 // Destroy the constructed elements in the vector.
843 this->destroy_range(this->begin(), this->end());
844 }
845
846 explicit SmallVector(size_t Size, const T &Value = T())
847 : SmallVectorImpl<T>(N) {
848 this->assign(Size, Value);
849 }
850
851 template <typename ItTy,
852 typename = typename std::enable_if<std::is_convertible<
853 typename std::iterator_traits<ItTy>::iterator_category,
854 std::input_iterator_tag>::value>::type>
855 SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
856 this->append(S, E);
857 }
858
859 template <typename RangeTy>
860 explicit SmallVector(const iterator_range<RangeTy> &R)
861 : SmallVectorImpl<T>(N) {
862 this->append(R.begin(), R.end());
863 }
864
865 SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
866 this->assign(IL);
867 }
868
869 SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
870 if (!RHS.empty())
871 SmallVectorImpl<T>::operator=(RHS);
872 }
873
874 const SmallVector &operator=(const SmallVector &RHS) {
875 SmallVectorImpl<T>::operator=(RHS);
876 return *this;
877 }
878
879 SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
880 if (!RHS.empty())
881 SmallVectorImpl<T>::operator=(::std::move(RHS));
882 }
883
884 SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
885 if (!RHS.empty())
886 SmallVectorImpl<T>::operator=(::std::move(RHS));
887 }
888
889 const SmallVector &operator=(SmallVector &&RHS) {
890 SmallVectorImpl<T>::operator=(::std::move(RHS));
891 return *this;
892 }
893
894 const SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
895 SmallVectorImpl<T>::operator=(::std::move(RHS));
896 return *this;
897 }
898
899 const SmallVector &operator=(std::initializer_list<T> IL) {
900 this->assign(IL);
901 return *this;
902 }
903};
904
905template <typename T, unsigned N>
906inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
907 return X.capacity_in_bytes();
908}
909
910} // end namespace llvm
911
912namespace std {
913
914 /// Implement std::swap in terms of SmallVector swap.
915 template<typename T>
916 inline void
917 swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
918 LHS.swap(RHS);
919 }
920
921 /// Implement std::swap in terms of SmallVector swap.
922 template<typename T, unsigned N>
923 inline void
924 swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
925 LHS.swap(RHS);
926 }
927
928} // end namespace std
929
930#endif // LLVM_ADT_SMALLVECTOR_H

/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <string>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34
35class APInt;
36
37inline APInt operator-(APInt);
38
39//===----------------------------------------------------------------------===//
40// APInt Class
41//===----------------------------------------------------------------------===//
42
43/// Class for arbitrary precision integers.
44///
45/// APInt is a functional replacement for common case unsigned integer type like
46/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
47/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
48/// than 64-bits of precision. APInt provides a variety of arithmetic operators
49/// and methods to manipulate integer values of any bit-width. It supports both
50/// the typical integer arithmetic and comparison operations as well as bitwise
51/// manipulation.
52///
53/// The class has several invariants worth noting:
54/// * All bit, byte, and word positions are zero-based.
55/// * Once the bit width is set, it doesn't change except by the Truncate,
56/// SignExtend, or ZeroExtend operations.
57/// * All binary operators must be on APInt instances of the same bit width.
58/// Attempting to use these operators on instances with different bit
59/// widths will yield an assertion.
60/// * The value is stored canonically as an unsigned value. For operations
61/// where it makes a difference, there are both signed and unsigned variants
62/// of the operation. For example, sdiv and udiv. However, because the bit
63/// widths must be the same, operations such as Mul and Add produce the same
64/// results regardless of whether the values are interpreted as signed or
65/// not.
66/// * In general, the class tries to follow the style of computation that LLVM
67/// uses in its IR. This simplifies its use for LLVM.
68///
69class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
70public:
71 typedef uint64_t WordType;
72
73 /// This enum is used to hold the constants we needed for APInt.
74 enum : unsigned {
75 /// Byte size of a word.
76 APINT_WORD_SIZE = sizeof(WordType),
77 /// Bits in a word.
78 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
79 };
80
81 enum class Rounding {
82 DOWN,
83 TOWARD_ZERO,
84 UP,
85 };
86
87 static const WordType WORDTYPE_MAX = ~WordType(0);
88
89private:
90 /// This union is used to store the integer value. When the
91 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
92 union {
93 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
94 uint64_t *pVal; ///< Used to store the >64 bits integer value.
95 } U;
96
97 unsigned BitWidth; ///< The number of bits in this APInt.
98
99 friend struct DenseMapAPIntKeyInfo;
100
101 friend class APSInt;
102
103 /// Fast internal constructor
104 ///
105 /// This constructor is used only internally for speed of construction of
106 /// temporaries. It is unsafe for general use so it is not public.
107 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
108 U.pVal = val;
109 }
110
111 /// Determine if this APInt just has one word to store value.
112 ///
113 /// \returns true if the number of bits <= 64, false otherwise.
114 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
115
116 /// Determine which word a bit is in.
117 ///
118 /// \returns the word position for the specified bit position.
119 static unsigned whichWord(unsigned bitPosition) {
120 return bitPosition / APINT_BITS_PER_WORD;
121 }
122
123 /// Determine which bit in a word a bit is in.
124 ///
125 /// \returns the bit position in a word for the specified bit position
126 /// in the APInt.
127 static unsigned whichBit(unsigned bitPosition) {
128 return bitPosition % APINT_BITS_PER_WORD;
129 }
130
131 /// Get a single bit mask.
132 ///
133 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
134 /// This method generates and returns a uint64_t (word) mask for a single
135 /// bit at a specific bit position. This is used to mask the bit in the
136 /// corresponding word.
137 static uint64_t maskBit(unsigned bitPosition) {
138 return 1ULL << whichBit(bitPosition);
139 }
140
141 /// Clear unused high order bits
142 ///
143 /// This method is used internally to clear the top "N" bits in the high order
144 /// word that are not used by the APInt. This is needed after the most
145 /// significant word is assigned a value to ensure that those bits are
146 /// zero'd out.
147 APInt &clearUnusedBits() {
148 // Compute how many bits are used in the final word
149 unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
150
151 // Mask out the high bits.
152 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
153 if (isSingleWord())
154 U.VAL &= mask;
155 else
156 U.pVal[getNumWords() - 1] &= mask;
157 return *this;
158 }
159
160 /// Get the word corresponding to a bit position
161 /// \returns the corresponding word for the specified bit position.
162 uint64_t getWord(unsigned bitPosition) const {
163 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
164 }
165
166 /// Utility method to change the bit width of this APInt to new bit width,
167 /// allocating and/or deallocating as necessary. There is no guarantee on the
168 /// value of any bits upon return. Caller should populate the bits after.
169 void reallocate(unsigned NewBitWidth);
170
171 /// Convert a char array into an APInt
172 ///
173 /// \param radix 2, 8, 10, 16, or 36
174 /// Converts a string into a number. The string must be non-empty
175 /// and well-formed as a number of the given base. The bit-width
176 /// must be sufficient to hold the result.
177 ///
178 /// This is used by the constructors that take string arguments.
179 ///
180 /// StringRef::getAsInteger is superficially similar but (1) does
181 /// not assume that the string is well-formed and (2) grows the
182 /// result to hold the input.
183 void fromString(unsigned numBits, StringRef str, uint8_t radix);
184
185 /// An internal division function for dividing APInts.
186 ///
187 /// This is used by the toString method to divide by the radix. It simply
188 /// provides a more convenient form of divide for internal use since KnuthDiv
189 /// has specific constraints on its inputs. If those constraints are not met
190 /// then it provides a simpler form of divide.
191 static void divide(const WordType *LHS, unsigned lhsWords,
192 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
193 WordType *Remainder);
194
195 /// out-of-line slow case for inline constructor
196 void initSlowCase(uint64_t val, bool isSigned);
197
198 /// shared code between two array constructors
199 void initFromArray(ArrayRef<uint64_t> array);
200
201 /// out-of-line slow case for inline copy constructor
202 void initSlowCase(const APInt &that);
203
204 /// out-of-line slow case for shl
205 void shlSlowCase(unsigned ShiftAmt);
206
207 /// out-of-line slow case for lshr.
208 void lshrSlowCase(unsigned ShiftAmt);
209
210 /// out-of-line slow case for ashr.
211 void ashrSlowCase(unsigned ShiftAmt);
212
213 /// out-of-line slow case for operator=
214 void AssignSlowCase(const APInt &RHS);
215
216 /// out-of-line slow case for operator==
217 bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
218
219 /// out-of-line slow case for countLeadingZeros
220 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
221
222 /// out-of-line slow case for countLeadingOnes.
223 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
224
225 /// out-of-line slow case for countTrailingZeros.
226 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
227
228 /// out-of-line slow case for countTrailingOnes
229 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
230
231 /// out-of-line slow case for countPopulation
232 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
233
234 /// out-of-line slow case for intersects.
235 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
236
237 /// out-of-line slow case for isSubsetOf.
238 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
239
240 /// out-of-line slow case for setBits.
241 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
242
243 /// out-of-line slow case for flipAllBits.
244 void flipAllBitsSlowCase();
245
246 /// out-of-line slow case for operator&=.
247 void AndAssignSlowCase(const APInt& RHS);
248
249 /// out-of-line slow case for operator|=.
250 void OrAssignSlowCase(const APInt& RHS);
251
252 /// out-of-line slow case for operator^=.
253 void XorAssignSlowCase(const APInt& RHS);
254
255 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
256 /// to, or greater than RHS.
257 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
258
259 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
260 /// to, or greater than RHS.
261 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
262
263public:
264 /// \name Constructors
265 /// @{
266
267 /// Create a new APInt of numBits width, initialized as val.
268 ///
269 /// If isSigned is true then val is treated as if it were a signed value
270 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
271 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
272 /// the range of val are zero filled).
273 ///
274 /// \param numBits the bit width of the constructed APInt
275 /// \param val the initial value of the APInt
276 /// \param isSigned how to treat signedness of val
277 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
278 : BitWidth(numBits) {
279 assert(BitWidth && "bitwidth too small")((BitWidth && "bitwidth too small") ? static_cast<
void> (0) : __assert_fail ("BitWidth && \"bitwidth too small\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 279, __PRETTY_FUNCTION__))
;
280 if (isSingleWord()) {
281 U.VAL = val;
282 clearUnusedBits();
283 } else {
284 initSlowCase(val, isSigned);
285 }
286 }
287
288 /// Construct an APInt of numBits width, initialized as bigVal[].
289 ///
290 /// Note that bigVal.size() can be smaller or larger than the corresponding
291 /// bit width but any extraneous bits will be dropped.
292 ///
293 /// \param numBits the bit width of the constructed APInt
294 /// \param bigVal a sequence of words to form the initial value of the APInt
295 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
296
297 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
298 /// deprecated because this constructor is prone to ambiguity with the
299 /// APInt(unsigned, uint64_t, bool) constructor.
300 ///
301 /// If this overload is ever deleted, care should be taken to prevent calls
302 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
303 /// constructor.
304 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
305
306 /// Construct an APInt from a string representation.
307 ///
308 /// This constructor interprets the string \p str in the given radix. The
309 /// interpretation stops when the first character that is not suitable for the
310 /// radix is encountered, or the end of the string. Acceptable radix values
311 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
312 /// string to require more bits than numBits.
313 ///
314 /// \param numBits the bit width of the constructed APInt
315 /// \param str the string to be interpreted
316 /// \param radix the radix to use for the conversion
317 APInt(unsigned numBits, StringRef str, uint8_t radix);
318
319 /// Simply makes *this a copy of that.
320 /// Copy Constructor.
321 APInt(const APInt &that) : BitWidth(that.BitWidth) {
322 if (isSingleWord())
323 U.VAL = that.U.VAL;
324 else
325 initSlowCase(that);
326 }
327
328 /// Move Constructor.
329 APInt(APInt &&that) : BitWidth(that.BitWidth) {
330 memcpy(&U, &that.U, sizeof(U));
331 that.BitWidth = 0;
332 }
333
334 /// Destructor.
335 ~APInt() {
336 if (needsCleanup())
337 delete[] U.pVal;
338 }
339
340 /// Default constructor that creates an uninteresting APInt
341 /// representing a 1-bit zero value.
342 ///
343 /// This is useful for object deserialization (pair this with the static
344 /// method Read).
345 explicit APInt() : BitWidth(1) { U.VAL = 0; }
346
347 /// Returns whether this instance allocated memory.
348 bool needsCleanup() const { return !isSingleWord(); }
349
350 /// Used to insert APInt objects, or objects that contain APInt objects, into
351 /// FoldingSets.
352 void Profile(FoldingSetNodeID &id) const;
353
354 /// @}
355 /// \name Value Tests
356 /// @{
357
358 /// Determine sign of this APInt.
359 ///
360 /// This tests the high bit of this APInt to determine if it is set.
361 ///
362 /// \returns true if this APInt is negative, false otherwise
363 bool isNegative() const { return (*this)[BitWidth - 1]; }
364
365 /// Determine if this APInt Value is non-negative (>= 0)
366 ///
367 /// This tests the high bit of the APInt to determine if it is unset.
368 bool isNonNegative() const { return !isNegative(); }
369
370 /// Determine if sign bit of this APInt is set.
371 ///
372 /// This tests the high bit of this APInt to determine if it is set.
373 ///
374 /// \returns true if this APInt has its sign bit set, false otherwise.
375 bool isSignBitSet() const { return (*this)[BitWidth-1]; }
376
377 /// Determine if sign bit of this APInt is clear.
378 ///
379 /// This tests the high bit of this APInt to determine if it is clear.
380 ///
381 /// \returns true if this APInt has its sign bit clear, false otherwise.
382 bool isSignBitClear() const { return !isSignBitSet(); }
383
384 /// Determine if this APInt Value is positive.
385 ///
386 /// This tests if the value of this APInt is positive (> 0). Note
387 /// that 0 is not a positive value.
388 ///
389 /// \returns true if this APInt is positive.
390 bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
391
392 /// Determine if all bits are set
393 ///
394 /// This checks to see if the value has all bits of the APInt are set or not.
395 bool isAllOnesValue() const {
396 if (isSingleWord())
397 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
398 return countTrailingOnesSlowCase() == BitWidth;
399 }
400
401 /// Determine if all bits are clear
402 ///
403 /// This checks to see if the value has all bits of the APInt are clear or
404 /// not.
405 bool isNullValue() const { return !*this; }
406
407 /// Determine if this is a value of 1.
408 ///
409 /// This checks to see if the value of this APInt is one.
410 bool isOneValue() const {
411 if (isSingleWord())
412 return U.VAL == 1;
413 return countLeadingZerosSlowCase() == BitWidth - 1;
414 }
415
416 /// Determine if this is the largest unsigned value.
417 ///
418 /// This checks to see if the value of this APInt is the maximum unsigned
419 /// value for the APInt's bit width.
420 bool isMaxValue() const { return isAllOnesValue(); }
421
422 /// Determine if this is the largest signed value.
423 ///
424 /// This checks to see if the value of this APInt is the maximum signed
425 /// value for the APInt's bit width.
426 bool isMaxSignedValue() const {
427 if (isSingleWord())
428 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
429 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
430 }
431
432 /// Determine if this is the smallest unsigned value.
433 ///
434 /// This checks to see if the value of this APInt is the minimum unsigned
435 /// value for the APInt's bit width.
436 bool isMinValue() const { return isNullValue(); }
437
438 /// Determine if this is the smallest signed value.
439 ///
440 /// This checks to see if the value of this APInt is the minimum signed
441 /// value for the APInt's bit width.
442 bool isMinSignedValue() const {
443 if (isSingleWord())
444 return U.VAL == (WordType(1) << (BitWidth - 1));
445 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
446 }
447
448 /// Check if this APInt has an N-bits unsigned integer value.
449 bool isIntN(unsigned N) const {
450 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 450, __PRETTY_FUNCTION__))
;
451 return getActiveBits() <= N;
452 }
453
454 /// Check if this APInt has an N-bits signed integer value.
455 bool isSignedIntN(unsigned N) const {
456 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 456, __PRETTY_FUNCTION__))
;
457 return getMinSignedBits() <= N;
458 }
459
460 /// Check if this APInt's value is a power of two greater than zero.
461 ///
462 /// \returns true if the argument APInt value is a power of two > 0.
463 bool isPowerOf2() const {
464 if (isSingleWord())
465 return isPowerOf2_64(U.VAL);
466 return countPopulationSlowCase() == 1;
467 }
468
469 /// Check if the APInt's value is returned by getSignMask.
470 ///
471 /// \returns true if this is the value returned by getSignMask.
472 bool isSignMask() const { return isMinSignedValue(); }
473
474 /// Convert APInt to a boolean value.
475 ///
476 /// This converts the APInt to a boolean value as a test against zero.
477 bool getBoolValue() const { return !!*this; }
478
479 /// If this value is smaller than the specified limit, return it, otherwise
480 /// return the limit value. This causes the value to saturate to the limit.
481 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
482 return ugt(Limit) ? Limit : getZExtValue();
483 }
484
485 /// Check if the APInt consists of a repeated bit pattern.
486 ///
487 /// e.g. 0x01010101 satisfies isSplat(8).
488 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
489 /// width without remainder.
490 bool isSplat(unsigned SplatSizeInBits) const;
491
492 /// \returns true if this APInt value is a sequence of \param numBits ones
493 /// starting at the least significant bit with the remainder zero.
494 bool isMask(unsigned numBits) const {
495 assert(numBits != 0 && "numBits must be non-zero")((numBits != 0 && "numBits must be non-zero") ? static_cast
<void> (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 495, __PRETTY_FUNCTION__))
;
496 assert(numBits <= BitWidth && "numBits out of range")((numBits <= BitWidth && "numBits out of range") ?
static_cast<void> (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 496, __PRETTY_FUNCTION__))
;
497 if (isSingleWord())
498 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
499 unsigned Ones = countTrailingOnesSlowCase();
500 return (numBits == Ones) &&
501 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
502 }
503
504 /// \returns true if this APInt is a non-empty sequence of ones starting at
505 /// the least significant bit with the remainder zero.
506 /// Ex. isMask(0x0000FFFFU) == true.
507 bool isMask() const {
508 if (isSingleWord())
509 return isMask_64(U.VAL);
510 unsigned Ones = countTrailingOnesSlowCase();
511 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
512 }
513
514 /// Return true if this APInt value contains a sequence of ones with
515 /// the remainder zero.
516 bool isShiftedMask() const {
517 if (isSingleWord())
518 return isShiftedMask_64(U.VAL);
519 unsigned Ones = countPopulationSlowCase();
520 unsigned LeadZ = countLeadingZerosSlowCase();
521 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
522 }
523
524 /// @}
525 /// \name Value Generators
526 /// @{
527
528 /// Gets maximum unsigned value of APInt for specific bit width.
529 static APInt getMaxValue(unsigned numBits) {
530 return getAllOnesValue(numBits);
531 }
532
533 /// Gets maximum signed value of APInt for a specific bit width.
534 static APInt getSignedMaxValue(unsigned numBits) {
535 APInt API = getAllOnesValue(numBits);
536 API.clearBit(numBits - 1);
537 return API;
538 }
539
540 /// Gets minimum unsigned value of APInt for a specific bit width.
541 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
542
543 /// Gets minimum signed value of APInt for a specific bit width.
544 static APInt getSignedMinValue(unsigned numBits) {
545 APInt API(numBits, 0);
546 API.setBit(numBits - 1);
547 return API;
548 }
549
550 /// Get the SignMask for a specific bit width.
551 ///
552 /// This is just a wrapper function of getSignedMinValue(), and it helps code
553 /// readability when we want to get a SignMask.
554 static APInt getSignMask(unsigned BitWidth) {
555 return getSignedMinValue(BitWidth);
556 }
557
558 /// Get the all-ones value.
559 ///
560 /// \returns the all-ones value for an APInt of the specified bit-width.
561 static APInt getAllOnesValue(unsigned numBits) {
562 return APInt(numBits, WORDTYPE_MAX, true);
563 }
564
565 /// Get the '0' value.
566 ///
567 /// \returns the '0' value for an APInt of the specified bit-width.
568 static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
569
570 /// Compute an APInt containing numBits highbits from this APInt.
571 ///
572 /// Get an APInt with the same BitWidth as this APInt, just zero mask
573 /// the low bits and right shift to the least significant bit.
574 ///
575 /// \returns the high "numBits" bits of this APInt.
576 APInt getHiBits(unsigned numBits) const;
577
578 /// Compute an APInt containing numBits lowbits from this APInt.
579 ///
580 /// Get an APInt with the same BitWidth as this APInt, just zero mask
581 /// the high bits.
582 ///
583 /// \returns the low "numBits" bits of this APInt.
584 APInt getLoBits(unsigned numBits) const;
585
586 /// Return an APInt with exactly one bit set in the result.
587 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
588 APInt Res(numBits, 0);
589 Res.setBit(BitNo);
590 return Res;
591 }
592
593 /// Get a value with a block of bits set.
594 ///
595 /// Constructs an APInt value that has a contiguous range of bits set. The
596 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
597 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
598 /// 0x0000FFFF. If hiBit is less than loBit then the set bits "wrap". For
599 /// example, with parameters (32, 28, 4), you would get 0xF000000F.
600 ///
601 /// \param numBits the intended bit width of the result
602 /// \param loBit the index of the lowest bit set.
603 /// \param hiBit the index of the highest bit set.
604 ///
605 /// \returns An APInt value with the requested bits set.
606 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
607 APInt Res(numBits, 0);
608 Res.setBits(loBit, hiBit);
609 return Res;
610 }
611
612 /// Get a value with upper bits starting at loBit set.
613 ///
614 /// Constructs an APInt value that has a contiguous range of bits set. The
615 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
616 /// bits will be zero. For example, with parameters(32, 12) you would get
617 /// 0xFFFFF000.
618 ///
619 /// \param numBits the intended bit width of the result
620 /// \param loBit the index of the lowest bit to set.
621 ///
622 /// \returns An APInt value with the requested bits set.
623 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
624 APInt Res(numBits, 0);
625 Res.setBitsFrom(loBit);
626 return Res;
627 }
628
629 /// Get a value with high bits set
630 ///
631 /// Constructs an APInt value that has the top hiBitsSet bits set.
632 ///
633 /// \param numBits the bitwidth of the result
634 /// \param hiBitsSet the number of high-order bits set in the result.
635 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
636 APInt Res(numBits, 0);
637 Res.setHighBits(hiBitsSet);
638 return Res;
639 }
640
641 /// Get a value with low bits set
642 ///
643 /// Constructs an APInt value that has the bottom loBitsSet bits set.
644 ///
645 /// \param numBits the bitwidth of the result
646 /// \param loBitsSet the number of low-order bits set in the result.
647 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
648 APInt Res(numBits, 0);
649 Res.setLowBits(loBitsSet);
650 return Res;
651 }
652
653 /// Return a value containing V broadcasted over NewLen bits.
654 static APInt getSplat(unsigned NewLen, const APInt &V);
655
656 /// Determine if two APInts have the same value, after zero-extending
657 /// one of them (if needed!) to ensure that the bit-widths match.
658 static bool isSameValue(const APInt &I1, const APInt &I2) {
659 if (I1.getBitWidth() == I2.getBitWidth())
660 return I1 == I2;
661
662 if (I1.getBitWidth() > I2.getBitWidth())
663 return I1 == I2.zext(I1.getBitWidth());
664
665 return I1.zext(I2.getBitWidth()) == I2;
666 }
667
668 /// Overload to compute a hash_code for an APInt value.
669 friend hash_code hash_value(const APInt &Arg);
670
671 /// This function returns a pointer to the internal storage of the APInt.
672 /// This is useful for writing out the APInt in binary form without any
673 /// conversions.
674 const uint64_t *getRawData() const {
675 if (isSingleWord())
676 return &U.VAL;
677 return &U.pVal[0];
678 }
679
680 /// @}
681 /// \name Unary Operators
682 /// @{
683
684 /// Postfix increment operator.
685 ///
686 /// Increments *this by 1.
687 ///
688 /// \returns a new APInt value representing the original value of *this.
689 const APInt operator++(int) {
690 APInt API(*this);
691 ++(*this);
692 return API;
693 }
694
695 /// Prefix increment operator.
696 ///
697 /// \returns *this incremented by one
698 APInt &operator++();
699
700 /// Postfix decrement operator.
701 ///
702 /// Decrements *this by 1.
703 ///
704 /// \returns a new APInt value representing the original value of *this.
705 const APInt operator--(int) {
706 APInt API(*this);
707 --(*this);
708 return API;
709 }
710
711 /// Prefix decrement operator.
712 ///
713 /// \returns *this decremented by one.
714 APInt &operator--();
715
716 /// Logical negation operator.
717 ///
718 /// Performs logical negation operation on this APInt.
719 ///
720 /// \returns true if *this is zero, false otherwise.
721 bool operator!() const {
722 if (isSingleWord())
723 return U.VAL == 0;
724 return countLeadingZerosSlowCase() == BitWidth;
725 }
726
727 /// @}
728 /// \name Assignment Operators
729 /// @{
730
731 /// Copy assignment operator.
732 ///
733 /// \returns *this after assignment of RHS.
734 APInt &operator=(const APInt &RHS) {
735 // If the bitwidths are the same, we can avoid mucking with memory
736 if (isSingleWord() && RHS.isSingleWord()) {
737 U.VAL = RHS.U.VAL;
738 BitWidth = RHS.BitWidth;
739 return clearUnusedBits();
740 }
741
742 AssignSlowCase(RHS);
743 return *this;
744 }
745
746 /// Move assignment operator.
747 APInt &operator=(APInt &&that) {
748#ifdef _MSC_VER
749 // The MSVC std::shuffle implementation still does self-assignment.
750 if (this == &that)
751 return *this;
752#endif
753 assert(this != &that && "Self-move not supported")((this != &that && "Self-move not supported") ? static_cast
<void> (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 753, __PRETTY_FUNCTION__))
;
754 if (!isSingleWord())
755 delete[] U.pVal;
756
757 // Use memcpy so that type based alias analysis sees both VAL and pVal
758 // as modified.
759 memcpy(&U, &that.U, sizeof(U));
760
761 BitWidth = that.BitWidth;
762 that.BitWidth = 0;
763
764 return *this;
765 }
766
767 /// Assignment operator.
768 ///
769 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
770 /// the bit width, the excess bits are truncated. If the bit width is larger
771 /// than 64, the value is zero filled in the unspecified high order bits.
772 ///
773 /// \returns *this after assignment of RHS value.
774 APInt &operator=(uint64_t RHS) {
775 if (isSingleWord()) {
776 U.VAL = RHS;
777 clearUnusedBits();
778 } else {
779 U.pVal[0] = RHS;
780 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
781 }
782 return *this;
783 }
784
785 /// Bitwise AND assignment operator.
786 ///
787 /// Performs a bitwise AND operation on this APInt and RHS. The result is
788 /// assigned to *this.
789 ///
790 /// \returns *this after ANDing with RHS.
791 APInt &operator&=(const APInt &RHS) {
792 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 792, __PRETTY_FUNCTION__))
;
793 if (isSingleWord())
794 U.VAL &= RHS.U.VAL;
795 else
796 AndAssignSlowCase(RHS);
797 return *this;
798 }
799
800 /// Bitwise AND assignment operator.
801 ///
802 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
803 /// logically zero-extended or truncated to match the bit-width of
804 /// the LHS.
805 APInt &operator&=(uint64_t RHS) {
806 if (isSingleWord()) {
807 U.VAL &= RHS;
808 return *this;
809 }
810 U.pVal[0] &= RHS;
811 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
812 return *this;
813 }
814
815 /// Bitwise OR assignment operator.
816 ///
817 /// Performs a bitwise OR operation on this APInt and RHS. The result is
818 /// assigned *this;
819 ///
820 /// \returns *this after ORing with RHS.
821 APInt &operator|=(const APInt &RHS) {
822 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 822, __PRETTY_FUNCTION__))
;
823 if (isSingleWord())
824 U.VAL |= RHS.U.VAL;
825 else
826 OrAssignSlowCase(RHS);
827 return *this;
828 }
829
830 /// Bitwise OR assignment operator.
831 ///
832 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
833 /// logically zero-extended or truncated to match the bit-width of
834 /// the LHS.
835 APInt &operator|=(uint64_t RHS) {
836 if (isSingleWord()) {
837 U.VAL |= RHS;
838 clearUnusedBits();
839 } else {
840 U.pVal[0] |= RHS;
841 }
842 return *this;
843 }
844
845 /// Bitwise XOR assignment operator.
846 ///
847 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
848 /// assigned to *this.
849 ///
850 /// \returns *this after XORing with RHS.
851 APInt &operator^=(const APInt &RHS) {
852 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 852, __PRETTY_FUNCTION__))
;
853 if (isSingleWord())
854 U.VAL ^= RHS.U.VAL;
855 else
856 XorAssignSlowCase(RHS);
857 return *this;
858 }
859
860 /// Bitwise XOR assignment operator.
861 ///
862 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
863 /// logically zero-extended or truncated to match the bit-width of
864 /// the LHS.
865 APInt &operator^=(uint64_t RHS) {
866 if (isSingleWord()) {
867 U.VAL ^= RHS;
868 clearUnusedBits();
869 } else {
870 U.pVal[0] ^= RHS;
871 }
872 return *this;
873 }
874
875 /// Multiplication assignment operator.
876 ///
877 /// Multiplies this APInt by RHS and assigns the result to *this.
878 ///
879 /// \returns *this
880 APInt &operator*=(const APInt &RHS);
881 APInt &operator*=(uint64_t RHS);
882
883 /// Addition assignment operator.
884 ///
885 /// Adds RHS to *this and assigns the result to *this.
886 ///
887 /// \returns *this
888 APInt &operator+=(const APInt &RHS);
889 APInt &operator+=(uint64_t RHS);
890
891 /// Subtraction assignment operator.
892 ///
893 /// Subtracts RHS from *this and assigns the result to *this.
894 ///
895 /// \returns *this
896 APInt &operator-=(const APInt &RHS);
897 APInt &operator-=(uint64_t RHS);
898
899 /// Left-shift assignment function.
900 ///
901 /// Shifts *this left by shiftAmt and assigns the result to *this.
902 ///
903 /// \returns *this after shifting left by ShiftAmt
904 APInt &operator<<=(unsigned ShiftAmt) {
905 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 905, __PRETTY_FUNCTION__))
;
906 if (isSingleWord()) {
907 if (ShiftAmt == BitWidth)
908 U.VAL = 0;
909 else
910 U.VAL <<= ShiftAmt;
911 return clearUnusedBits();
912 }
913 shlSlowCase(ShiftAmt);
914 return *this;
915 }
916
917 /// Left-shift assignment function.
918 ///
919 /// Shifts *this left by shiftAmt and assigns the result to *this.
920 ///
921 /// \returns *this after shifting left by ShiftAmt
922 APInt &operator<<=(const APInt &ShiftAmt);
923
924 /// @}
925 /// \name Binary Operators
926 /// @{
927
928 /// Multiplication operator.
929 ///
930 /// Multiplies this APInt by RHS and returns the result.
931 APInt operator*(const APInt &RHS) const;
932
933 /// Left logical shift operator.
934 ///
935 /// Shifts this APInt left by \p Bits and returns the result.
936 APInt operator<<(unsigned Bits) const { return shl(Bits); }
937
938 /// Left logical shift operator.
939 ///
940 /// Shifts this APInt left by \p Bits and returns the result.
941 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
942
943 /// Arithmetic right-shift function.
944 ///
945 /// Arithmetic right-shift this APInt by shiftAmt.
946 APInt ashr(unsigned ShiftAmt) const {
947 APInt R(*this);
948 R.ashrInPlace(ShiftAmt);
949 return R;
950 }
951
952 /// Arithmetic right-shift this APInt by ShiftAmt in place.
953 void ashrInPlace(unsigned ShiftAmt) {
954 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 954, __PRETTY_FUNCTION__))
;
955 if (isSingleWord()) {
956 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
957 if (ShiftAmt == BitWidth)
958 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
959 else
960 U.VAL = SExtVAL >> ShiftAmt;
961 clearUnusedBits();
962 return;
963 }
964 ashrSlowCase(ShiftAmt);
965 }
966
967 /// Logical right-shift function.
968 ///
969 /// Logical right-shift this APInt by shiftAmt.
970 APInt lshr(unsigned shiftAmt) const {
971 APInt R(*this);
972 R.lshrInPlace(shiftAmt);
973 return R;
974 }
975
976 /// Logical right-shift this APInt by ShiftAmt in place.
977 void lshrInPlace(unsigned ShiftAmt) {
978 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 978, __PRETTY_FUNCTION__))
;
979 if (isSingleWord()) {
980 if (ShiftAmt == BitWidth)
981 U.VAL = 0;
982 else
983 U.VAL >>= ShiftAmt;
984 return;
985 }
986 lshrSlowCase(ShiftAmt);
987 }
988
989 /// Left-shift function.
990 ///
991 /// Left-shift this APInt by shiftAmt.
992 APInt shl(unsigned shiftAmt) const {
993 APInt R(*this);
994 R <<= shiftAmt;
995 return R;
996 }
997
998 /// Rotate left by rotateAmt.
999 APInt rotl(unsigned rotateAmt) const;
1000
1001 /// Rotate right by rotateAmt.
1002 APInt rotr(unsigned rotateAmt) const;
1003
1004 /// Arithmetic right-shift function.
1005 ///
1006 /// Arithmetic right-shift this APInt by shiftAmt.
1007 APInt ashr(const APInt &ShiftAmt) const {
1008 APInt R(*this);
1009 R.ashrInPlace(ShiftAmt);
1010 return R;
1011 }
1012
1013 /// Arithmetic right-shift this APInt by shiftAmt in place.
1014 void ashrInPlace(const APInt &shiftAmt);
1015
1016 /// Logical right-shift function.
1017 ///
1018 /// Logical right-shift this APInt by shiftAmt.
1019 APInt lshr(const APInt &ShiftAmt) const {
1020 APInt R(*this);
1021 R.lshrInPlace(ShiftAmt);
1022 return R;
1023 }
1024
1025 /// Logical right-shift this APInt by ShiftAmt in place.
1026 void lshrInPlace(const APInt &ShiftAmt);
1027
1028 /// Left-shift function.
1029 ///
1030 /// Left-shift this APInt by shiftAmt.
1031 APInt shl(const APInt &ShiftAmt) const {
1032 APInt R(*this);
1033 R <<= ShiftAmt;
1034 return R;
1035 }
1036
1037 /// Rotate left by rotateAmt.
1038 APInt rotl(const APInt &rotateAmt) const;
1039
1040 /// Rotate right by rotateAmt.
1041 APInt rotr(const APInt &rotateAmt) const;
1042
1043 /// Unsigned division operation.
1044 ///
1045 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
1046 /// RHS are treated as unsigned quantities for purposes of this division.
1047 ///
1048 /// \returns a new APInt value containing the division result, rounded towards
1049 /// zero.
1050 APInt udiv(const APInt &RHS) const;
1051 APInt udiv(uint64_t RHS) const;
1052
1053 /// Signed division function for APInt.
1054 ///
1055 /// Signed divide this APInt by APInt RHS.
1056 ///
1057 /// The result is rounded towards zero.
1058 APInt sdiv(const APInt &RHS) const;
1059 APInt sdiv(int64_t RHS) const;
1060
1061 /// Unsigned remainder operation.
1062 ///
1063 /// Perform an unsigned remainder operation on this APInt with RHS being the
1064 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
1065 /// of this operation. Note that this is a true remainder operation and not a
1066 /// modulo operation because the sign follows the sign of the dividend which
1067 /// is *this.
1068 ///
1069 /// \returns a new APInt value containing the remainder result
1070 APInt urem(const APInt &RHS) const;
1071 uint64_t urem(uint64_t RHS) const;
1072
1073 /// Function for signed remainder operation.
1074 ///
1075 /// Signed remainder operation on APInt.
1076 APInt srem(const APInt &RHS) const;
1077 int64_t srem(int64_t RHS) const;
1078
1079 /// Dual division/remainder interface.
1080 ///
1081 /// Sometimes it is convenient to divide two APInt values and obtain both the
1082 /// quotient and remainder. This function does both operations in the same
1083 /// computation making it a little more efficient. The pair of input arguments
1084 /// may overlap with the pair of output arguments. It is safe to call
1085 /// udivrem(X, Y, X, Y), for example.
1086 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1087 APInt &Remainder);
1088 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1089 uint64_t &Remainder);
1090
1091 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1092 APInt &Remainder);
1093 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
1094 int64_t &Remainder);
1095
1096 // Operations that return overflow indicators.
1097 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
1098 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1099 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1100 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1101 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1102 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1103 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1104 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1105 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1106
1107 // Operations that saturate
1108 APInt sadd_sat(const APInt &RHS) const;
1109 APInt uadd_sat(const APInt &RHS) const;
1110 APInt ssub_sat(const APInt &RHS) const;
1111 APInt usub_sat(const APInt &RHS) const;
1112
1113 /// Array-indexing support.
1114 ///
1115 /// \returns the bit value at bitPosition
1116 bool operator[](unsigned bitPosition) const {
1117 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")((bitPosition < getBitWidth() && "Bit position out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1117, __PRETTY_FUNCTION__))
;
1118 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1119 }
1120
1121 /// @}
1122 /// \name Comparison Operators
1123 /// @{
1124
1125 /// Equality operator.
1126 ///
1127 /// Compares this APInt with RHS for the validity of the equality
1128 /// relationship.
1129 bool operator==(const APInt &RHS) const {
1130 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")((BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1130, __PRETTY_FUNCTION__))
;
1131 if (isSingleWord())
1132 return U.VAL == RHS.U.VAL;
1133 return EqualSlowCase(RHS);
1134 }
1135
1136 /// Equality operator.
1137 ///
1138 /// Compares this APInt with a uint64_t for the validity of the equality
1139 /// relationship.
1140 ///
1141 /// \returns true if *this == Val
1142 bool operator==(uint64_t Val) const {
1143 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1144 }
1145
1146 /// Equality comparison.
1147 ///
1148 /// Compares this APInt with RHS for the validity of the equality
1149 /// relationship.
1150 ///
1151 /// \returns true if *this == Val
1152 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1153
1154 /// Inequality operator.
1155 ///
1156 /// Compares this APInt with RHS for the validity of the inequality
1157 /// relationship.
1158 ///
1159 /// \returns true if *this != Val
1160 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
63
Assuming the condition is false
64
Returning zero, which participates in a condition later
1161
1162 /// Inequality operator.
1163 ///
1164 /// Compares this APInt with a uint64_t for the validity of the inequality
1165 /// relationship.
1166 ///
1167 /// \returns true if *this != Val
1168 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1169
1170 /// Inequality comparison
1171 ///
1172 /// Compares this APInt with RHS for the validity of the inequality
1173 /// relationship.
1174 ///
1175 /// \returns true if *this != Val
1176 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1177
1178 /// Unsigned less than comparison
1179 ///
1180 /// Regards both *this and RHS as unsigned quantities and compares them for
1181 /// the validity of the less-than relationship.
1182 ///
1183 /// \returns true if *this < RHS when both are considered unsigned.
1184 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1185
1186 /// Unsigned less than comparison
1187 ///
1188 /// Regards both *this as an unsigned quantity and compares it with RHS for
1189 /// the validity of the less-than relationship.
1190 ///
1191 /// \returns true if *this < RHS when considered unsigned.
1192 bool ult(uint64_t RHS) const {
1193 // Only need to check active bits if not a single word.
1194 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1195 }
1196
1197 /// Signed less than comparison
1198 ///
1199 /// Regards both *this and RHS as signed quantities and compares them for
1200 /// validity of the less-than relationship.
1201 ///
1202 /// \returns true if *this < RHS when both are considered signed.
1203 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1204
1205 /// Signed less than comparison
1206 ///
1207 /// Regards both *this as a signed quantity and compares it with RHS for
1208 /// the validity of the less-than relationship.
1209 ///
1210 /// \returns true if *this < RHS when considered signed.
1211 bool slt(int64_t RHS) const {
1212 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1213 : getSExtValue() < RHS;
1214 }
1215
1216 /// Unsigned less or equal comparison
1217 ///
1218 /// Regards both *this and RHS as unsigned quantities and compares them for
1219 /// validity of the less-or-equal relationship.
1220 ///
1221 /// \returns true if *this <= RHS when both are considered unsigned.
1222 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1223
1224 /// Unsigned less or equal comparison
1225 ///
1226 /// Regards both *this as an unsigned quantity and compares it with RHS for
1227 /// the validity of the less-or-equal relationship.
1228 ///
1229 /// \returns true if *this <= RHS when considered unsigned.
1230 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1231
1232 /// Signed less or equal comparison
1233 ///
1234 /// Regards both *this and RHS as signed quantities and compares them for
1235 /// validity of the less-or-equal relationship.
1236 ///
1237 /// \returns true if *this <= RHS when both are considered signed.
1238 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1239
1240 /// Signed less or equal comparison
1241 ///
1242 /// Regards both *this as a signed quantity and compares it with RHS for the
1243 /// validity of the less-or-equal relationship.
1244 ///
1245 /// \returns true if *this <= RHS when considered signed.
1246 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1247
1248 /// Unsigned greather than comparison
1249 ///
1250 /// Regards both *this and RHS as unsigned quantities and compares them for
1251 /// the validity of the greater-than relationship.
1252 ///
1253 /// \returns true if *this > RHS when both are considered unsigned.
1254 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1255
1256 /// Unsigned greater than comparison
1257 ///
1258 /// Regards both *this as an unsigned quantity and compares it with RHS for
1259 /// the validity of the greater-than relationship.
1260 ///
1261 /// \returns true if *this > RHS when considered unsigned.
1262 bool ugt(uint64_t RHS) const {
1263 // Only need to check active bits if not a single word.
1264 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1265 }
1266
1267 /// Signed greather than comparison
1268 ///
1269 /// Regards both *this and RHS as signed quantities and compares them for the
1270 /// validity of the greater-than relationship.
1271 ///
1272 /// \returns true if *this > RHS when both are considered signed.
1273 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1274
1275 /// Signed greater than comparison
1276 ///
1277 /// Regards both *this as a signed quantity and compares it with RHS for
1278 /// the validity of the greater-than relationship.
1279 ///
1280 /// \returns true if *this > RHS when considered signed.
1281 bool sgt(int64_t RHS) const {
1282 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1283 : getSExtValue() > RHS;
1284 }
1285
1286 /// Unsigned greater or equal comparison
1287 ///
1288 /// Regards both *this and RHS as unsigned quantities and compares them for
1289 /// validity of the greater-or-equal relationship.
1290 ///
1291 /// \returns true if *this >= RHS when both are considered unsigned.
1292 bool uge(const APInt &RHS) const { return !ult(RHS); }
1293
1294 /// Unsigned greater or equal comparison
1295 ///
1296 /// Regards both *this as an unsigned quantity and compares it with RHS for
1297 /// the validity of the greater-or-equal relationship.
1298 ///
1299 /// \returns true if *this >= RHS when considered unsigned.
1300 bool uge(uint64_t RHS) const { return !ult(RHS); }
1301
1302 /// Signed greater or equal comparison
1303 ///
1304 /// Regards both *this and RHS as signed quantities and compares them for
1305 /// validity of the greater-or-equal relationship.
1306 ///
1307 /// \returns true if *this >= RHS when both are considered signed.
1308 bool sge(const APInt &RHS) const { return !slt(RHS); }
1309
1310 /// Signed greater or equal comparison
1311 ///
1312 /// Regards both *this as a signed quantity and compares it with RHS for
1313 /// the validity of the greater-or-equal relationship.
1314 ///
1315 /// \returns true if *this >= RHS when considered signed.
1316 bool sge(int64_t RHS) const { return !slt(RHS); }
1317
1318 /// This operation tests if there are any pairs of corresponding bits
1319 /// between this APInt and RHS that are both set.
1320 bool intersects(const APInt &RHS) const {
1321 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1321, __PRETTY_FUNCTION__))
;
1322 if (isSingleWord())
1323 return (U.VAL & RHS.U.VAL) != 0;
1324 return intersectsSlowCase(RHS);
1325 }
1326
1327 /// This operation checks that all bits set in this APInt are also set in RHS.
1328 bool isSubsetOf(const APInt &RHS) const {
1329 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1329, __PRETTY_FUNCTION__))
;
1330 if (isSingleWord())
1331 return (U.VAL & ~RHS.U.VAL) == 0;
1332 return isSubsetOfSlowCase(RHS);
1333 }
1334
1335 /// @}
1336 /// \name Resizing Operators
1337 /// @{
1338
1339 /// Truncate to new width.
1340 ///
1341 /// Truncate the APInt to a specified width. It is an error to specify a width
1342 /// that is greater than or equal to the current width.
1343 APInt trunc(unsigned width) const;
1344
1345 /// Sign extend to a new width.
1346 ///
1347 /// This operation sign extends the APInt to a new width. If the high order
1348 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1349 /// It is an error to specify a width that is less than or equal to the
1350 /// current width.
1351 APInt sext(unsigned width) const;
1352
1353 /// Zero extend to a new width.
1354 ///
1355 /// This operation zero extends the APInt to a new width. The high order bits
1356 /// are filled with 0 bits. It is an error to specify a width that is less
1357 /// than or equal to the current width.
1358 APInt zext(unsigned width) const;
1359
1360 /// Sign extend or truncate to width
1361 ///
1362 /// Make this APInt have the bit width given by \p width. The value is sign
1363 /// extended, truncated, or left alone to make it that width.
1364 APInt sextOrTrunc(unsigned width) const;
1365
1366 /// Zero extend or truncate to width
1367 ///
1368 /// Make this APInt have the bit width given by \p width. The value is zero
1369 /// extended, truncated, or left alone to make it that width.
1370 APInt zextOrTrunc(unsigned width) const;
1371
1372 /// Sign extend or truncate to width
1373 ///
1374 /// Make this APInt have the bit width given by \p width. The value is sign
1375 /// extended, or left alone to make it that width.
1376 APInt sextOrSelf(unsigned width) const;
1377
1378 /// Zero extend or truncate to width
1379 ///
1380 /// Make this APInt have the bit width given by \p width. The value is zero
1381 /// extended, or left alone to make it that width.
1382 APInt zextOrSelf(unsigned width) const;
1383
1384 /// @}
1385 /// \name Bit Manipulation Operators
1386 /// @{
1387
1388 /// Set every bit to 1.
1389 void setAllBits() {
1390 if (isSingleWord())
1391 U.VAL = WORDTYPE_MAX;
1392 else
1393 // Set all the bits in all the words.
1394 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1395 // Clear the unused ones
1396 clearUnusedBits();
1397 }
1398
1399 /// Set a given bit to 1.
1400 ///
1401 /// Set the given bit to 1 whose position is given as "bitPosition".
1402 void setBit(unsigned BitPosition) {
1403 assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1403, __PRETTY_FUNCTION__))
;
1404 WordType Mask = maskBit(BitPosition);
1405 if (isSingleWord())
1406 U.VAL |= Mask;
1407 else
1408 U.pVal[whichWord(BitPosition)] |= Mask;
1409 }
1410
1411 /// Set the sign bit to 1.
1412 void setSignBit() {
1413 setBit(BitWidth - 1);
1414 }
1415
1416 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1417 void setBits(unsigned loBit, unsigned hiBit) {
1418 assert(hiBit <= BitWidth && "hiBit out of range")((hiBit <= BitWidth && "hiBit out of range") ? static_cast
<void> (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1418, __PRETTY_FUNCTION__))
;
1419 assert(loBit <= BitWidth && "loBit out of range")((loBit <= BitWidth && "loBit out of range") ? static_cast
<void> (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1419, __PRETTY_FUNCTION__))
;
1420 assert(loBit <= hiBit && "loBit greater than hiBit")((loBit <= hiBit && "loBit greater than hiBit") ? static_cast
<void> (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1420, __PRETTY_FUNCTION__))
;
1421 if (loBit == hiBit)
1422 return;
1423 if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
1424 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1425 mask <<= loBit;
1426 if (isSingleWord())
1427 U.VAL |= mask;
1428 else
1429 U.pVal[0] |= mask;
1430 } else {
1431 setBitsSlowCase(loBit, hiBit);
1432 }
1433 }
1434
1435 /// Set the top bits starting from loBit.
1436 void setBitsFrom(unsigned loBit) {
1437 return setBits(loBit, BitWidth);
1438 }
1439
1440 /// Set the bottom loBits bits.
1441 void setLowBits(unsigned loBits) {
1442 return setBits(0, loBits);
1443 }
1444
1445 /// Set the top hiBits bits.
1446 void setHighBits(unsigned hiBits) {
1447 return setBits(BitWidth - hiBits, BitWidth);
1448 }
1449
1450 /// Set every bit to 0.
1451 void clearAllBits() {
1452 if (isSingleWord())
1453 U.VAL = 0;
1454 else
1455 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1456 }
1457
1458 /// Set a given bit to 0.
1459 ///
1460 /// Set the given bit to 0 whose position is given as "bitPosition".
1461 void clearBit(unsigned BitPosition) {
1462 assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1462, __PRETTY_FUNCTION__))
;
1463 WordType Mask = ~maskBit(BitPosition);
1464 if (isSingleWord())
1465 U.VAL &= Mask;
1466 else
1467 U.pVal[whichWord(BitPosition)] &= Mask;
1468 }
1469
1470 /// Set bottom loBits bits to 0.
1471 void clearLowBits(unsigned loBits) {
1472 assert(loBits <= BitWidth && "More bits than bitwidth")((loBits <= BitWidth && "More bits than bitwidth")
? static_cast<void> (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1472, __PRETTY_FUNCTION__))
;
1473 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1474 *this &= Keep;
1475 }
1476
1477 /// Set the sign bit to 0.
1478 void clearSignBit() {
1479 clearBit(BitWidth - 1);
1480 }
1481
1482 /// Toggle every bit to its opposite value.
1483 void flipAllBits() {
1484 if (isSingleWord()) {
1485 U.VAL ^= WORDTYPE_MAX;
1486 clearUnusedBits();
1487 } else {
1488 flipAllBitsSlowCase();
1489 }
1490 }
1491
1492 /// Toggles a given bit to its opposite value.
1493 ///
1494 /// Toggle a given bit to its opposite value whose position is given
1495 /// as "bitPosition".
1496 void flipBit(unsigned bitPosition);
1497
1498 /// Negate this APInt in place.
1499 void negate() {
1500 flipAllBits();
1501 ++(*this);
1502 }
1503
1504 /// Insert the bits from a smaller APInt starting at bitPosition.
1505 void insertBits(const APInt &SubBits, unsigned bitPosition);
1506 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1507
1508 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1509 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1510 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1511
1512 /// @}
1513 /// \name Value Characterization Functions
1514 /// @{
1515
1516 /// Return the number of bits in the APInt.
1517 unsigned getBitWidth() const { return BitWidth; }
1518
1519 /// Get the number of words.
1520 ///
1521 /// Here one word's bitwidth equals to that of uint64_t.
1522 ///
1523 /// \returns the number of words to hold the integer value of this APInt.
1524 unsigned getNumWords() const { return getNumWords(BitWidth); }
1525
1526 /// Get the number of words.
1527 ///
1528 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1529 ///
1530 /// \returns the number of words to hold the integer value with a given bit
1531 /// width.
1532 static unsigned getNumWords(unsigned BitWidth) {
1533 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1534 }
1535
1536 /// Compute the number of active bits in the value
1537 ///
1538 /// This function returns the number of active bits which is defined as the
1539 /// bit width minus the number of leading zeros. This is used in several
1540 /// computations to see how "wide" the value is.
1541 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1542
1543 /// Compute the number of active words in the value of this APInt.
1544 ///
1545 /// This is used in conjunction with getActiveData to extract the raw value of
1546 /// the APInt.
1547 unsigned getActiveWords() const {
1548 unsigned numActiveBits = getActiveBits();
1549 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1550 }
1551
1552 /// Get the minimum bit size for this signed APInt
1553 ///
1554 /// Computes the minimum bit width for this APInt while considering it to be a
1555 /// signed (and probably negative) value. If the value is not negative, this
1556 /// function returns the same value as getActiveBits()+1. Otherwise, it
1557 /// returns the smallest bit width that will retain the negative value. For
1558 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1559 /// for -1, this function will always return 1.
1560 unsigned getMinSignedBits() const {
1561 if (isNegative())
1562 return BitWidth - countLeadingOnes() + 1;
1563 return getActiveBits() + 1;
1564 }
1565
1566 /// Get zero extended value
1567 ///
1568 /// This method attempts to return the value of this APInt as a zero extended
1569 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1570 /// uint64_t. Otherwise an assertion will result.
1571 uint64_t getZExtValue() const {
1572 if (isSingleWord())
1573 return U.VAL;
1574 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")((getActiveBits() <= 64 && "Too many bits for uint64_t"
) ? static_cast<void> (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1574, __PRETTY_FUNCTION__))
;
1575 return U.pVal[0];
1576 }
1577
1578 /// Get sign extended value
1579 ///
1580 /// This method attempts to return the value of this APInt as a sign extended
1581 /// int64_t. The bit width must be <= 64 or the value must fit within an
1582 /// int64_t. Otherwise an assertion will result.
1583 int64_t getSExtValue() const {
1584 if (isSingleWord())
1585 return SignExtend64(U.VAL, BitWidth);
1586 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")((getMinSignedBits() <= 64 && "Too many bits for int64_t"
) ? static_cast<void> (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h"
, 1586, __PRETTY_FUNCTION__))
;
1587 return int64_t(U.pVal[0]);
1588 }
1589
1590 /// Get bits required for string value.
1591 ///
1592 /// This method determines how many bits are required to hold the APInt
1593 /// equivalent of the string given by \p str.
1594 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1595
1596 /// The APInt version of the countLeadingZeros functions in
1597 /// MathExtras.h.
1598 ///
1599 /// It counts the number of zeros from the most significant bit to the first
1600 /// one bit.
1601 ///
1602 /// \returns BitWidth if the value is zero, otherwise returns the number of
1603 /// zeros from the most significant bit to the first one bits.
1604 unsigned countLeadingZeros() const {
1605 if (isSingleWord()) {
1606 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1607 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1608 }
1609 return countLeadingZerosSlowCase();
1610 }
1611
1612 /// Count the number of leading one bits.
1613 ///
1614 /// This function is an APInt version of the countLeadingOnes
1615 /// functions in MathExtras.h. It counts the number of ones from the most
1616 /// significant bit to the first zero bit.
1617 ///
1618 /// \returns 0 if the high order bit is not set, otherwise returns the number
1619 /// of 1 bits from the most significant to the least
1620 unsigned countLeadingOnes() const {
1621 if (isSingleWord())
1622 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1623 return countLeadingOnesSlowCase();
1624 }
1625
1626 /// Computes the number of leading bits of this APInt that are equal to its
1627 /// sign bit.
1628 unsigned getNumSignBits() const {
1629 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1630 }
1631
1632 /// Count the number of trailing zero bits.
1633 ///
1634 /// This function is an APInt version of the countTrailingZeros
1635 /// functions in MathExtras.h. It counts the number of zeros from the least
1636 /// significant bit to the first set bit.
1637 ///
1638 /// \returns BitWidth if the value is zero, otherwise returns the number of
1639 /// zeros from the least significant bit to the first one bit.
1640 unsigned countTrailingZeros() const {
1641 if (isSingleWord())
1642 return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
1643 return countTrailingZerosSlowCase();
1644 }
1645
1646 /// Count the number of trailing one bits.
1647 ///
1648 /// This function is an APInt version of the countTrailingOnes
1649 /// functions in MathExtras.h. It counts the number of ones from the least
1650 /// significant bit to the first zero bit.
1651 ///
1652 /// \returns BitWidth if the value is all ones, otherwise returns the number
1653 /// of ones from the least significant bit to the first zero bit.
1654 unsigned countTrailingOnes() const {
1655 if (isSingleWord())
1656 return llvm::countTrailingOnes(U.VAL);
1657 return countTrailingOnesSlowCase();
1658 }
1659
1660 /// Count the number of bits set.
1661 ///
1662 /// This function is an APInt version of the countPopulation functions
1663 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1664 ///
1665 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1666 unsigned countPopulation() const {
1667 if (isSingleWord())
1668 return llvm::countPopulation(U.VAL);
1669 return countPopulationSlowCase();
1670 }
1671
1672 /// @}
1673 /// \name Conversion Functions
1674 /// @{
1675 void print(raw_ostream &OS, bool isSigned) const;
1676
1677 /// Converts an APInt to a string and append it to Str. Str is commonly a
1678 /// SmallString.
1679 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1680 bool formatAsCLiteral = false) const;
1681
1682 /// Considers the APInt to be unsigned and converts it into a string in the
1683 /// radix given. The radix can be 2, 8, 10 16, or 36.
1684 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1685 toString(Str, Radix, false, false);
1686 }
1687
1688 /// Considers the APInt to be signed and converts it into a string in the
1689 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1690 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1691 toString(Str, Radix, true, false);
1692 }
1693
1694 /// Return the APInt as a std::string.
1695 ///
1696 /// Note that this is an inefficient method. It is better to pass in a
1697 /// SmallVector/SmallString to the methods above to avoid thrashing the heap
1698 /// for the string.
1699 std::string toString(unsigned Radix, bool Signed) const;
1700
1701 /// \returns a byte-swapped representation of this APInt Value.
1702 APInt byteSwap() const;
1703
1704 /// \returns the value with the bit representation reversed of this APInt
1705 /// Value.
1706 APInt reverseBits() const;
1707
1708 /// Converts this APInt to a double value.
1709 double roundToDouble(bool isSigned) const;
1710
1711 /// Converts this unsigned APInt to a double value.
1712 double roundToDouble() const { return roundToDouble(false); }
1713
1714 /// Converts this signed APInt to a double value.
1715 double signedRoundToDouble() const { return roundToDouble(true); }
1716
1717 /// Converts APInt bits to a double
1718 ///
1719 /// The conversion does not do a translation from integer to double, it just
1720 /// re-interprets the bits as a double. Note that it is valid to do this on
1721 /// any bit width. Exactly 64 bits will be translated.
1722 double bitsToDouble() const {
1723 return BitsToDouble(getWord(0));
1724 }
1725
1726 /// Converts APInt bits to a double
1727 ///
1728 /// The conversion does not do a translation from integer to float, it just
1729 /// re-interprets the bits as a float. Note that it is valid to do this on
1730 /// any bit width. Exactly 32 bits will be translated.
1731 float bitsToFloat() const {
1732 return BitsToFloat(getWord(0));
1733 }
1734
1735 /// Converts a double to APInt bits.
1736 ///
1737 /// The conversion does not do a translation from double to integer, it just
1738 /// re-interprets the bits of the double.
1739 static APInt doubleToBits(double V) {
1740 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1741 }
1742
1743 /// Converts a float to APInt bits.
1744 ///
1745 /// The conversion does not do a translation from float to integer, it just
1746 /// re-interprets the bits of the float.
1747 static APInt floatToBits(float V) {
1748 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1749 }
1750
1751 /// @}
1752 /// \name Mathematics Operations
1753 /// @{
1754
1755 /// \returns the floor log base 2 of this APInt.
1756 unsigned logBase2() const { return getActiveBits() - 1; }
1757
1758 /// \returns the ceil log base 2 of this APInt.
1759 unsigned ceilLogBase2() const {
1760 APInt temp(*this);
1761 --temp;
1762 return temp.getActiveBits();
1763 }
1764
1765 /// \returns the nearest log base 2 of this APInt. Ties round up.
1766 ///
1767 /// NOTE: When we have a BitWidth of 1, we define:
1768 ///
1769 /// log2(0) = UINT32_MAX
1770 /// log2(1) = 0
1771 ///
1772 /// to get around any mathematical concerns resulting from
1773 /// referencing 2 in a space where 2 does no exist.
1774 unsigned nearestLogBase2() const {
1775 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1776 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1777 // UINT32_MAX.
1778 if (BitWidth == 1)
1779 return U.VAL - 1;
1780
1781 // Handle the zero case.
1782 if (isNullValue())
1783 return UINT32_MAX(4294967295U);
1784
1785 // The non-zero case is handled by computing:
1786 //
1787 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1788 //
1789 // where x[i] is referring to the value of the ith bit of x.
1790 unsigned lg = logBase2();
1791 return lg + unsigned((*this)[lg - 1]);
1792 }
1793
1794 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1795 /// otherwise
1796 int32_t exactLogBase2() const {
1797 if (!isPowerOf2())
1798 return -1;
1799 return logBase2();
1800 }
1801
1802 /// Compute the square root
1803 APInt sqrt() const;
1804
1805 /// Get the absolute value;
1806 ///
1807 /// If *this is < 0 then return -(*this), otherwise *this;
1808 APInt abs() const {
1809 if (isNegative())
1810 return -(*this);
1811 return *this;
1812 }
1813
1814 /// \returns the multiplicative inverse for a given modulo.
1815 APInt multiplicativeInverse(const APInt &modulo) const;
1816
1817 /// @}
1818 /// \name Support for division by constant
1819 /// @{
1820
1821 /// Calculate the magic number for signed division by a constant.
1822 struct ms;
1823 ms magic() const;
1824
1825 /// Calculate the magic number for unsigned division by a constant.
1826 struct mu;
1827 mu magicu(unsigned LeadingZeros = 0) const;
1828
1829 /// @}
1830 /// \name Building-block Operations for APInt and APFloat
1831 /// @{
1832
1833 // These building block operations operate on a representation of arbitrary
1834 // precision, two's-complement, bignum integer values. They should be
1835 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1836 // generally a pointer to the base of an array of integer parts, representing
1837 // an unsigned bignum, and a count of how many parts there are.
1838
1839 /// Sets the least significant part of a bignum to the input value, and zeroes
1840 /// out higher parts.
1841 static void tcSet(WordType *, WordType, unsigned);
1842
1843 /// Assign one bignum to another.
1844 static void tcAssign(WordType *, const WordType *, unsigned);
1845
1846 /// Returns true if a bignum is zero, false otherwise.
1847 static bool tcIsZero(const WordType *, unsigned);
1848
1849 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1850 static int tcExtractBit(const WordType *, unsigned bit);
1851
1852 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1853 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1854 /// significant bit of DST. All high bits above srcBITS in DST are
1855 /// zero-filled.
1856 static void tcExtract(WordType *, unsigned dstCount,
1857 const WordType *, unsigned srcBits,
1858 unsigned srcLSB);
1859
1860 /// Set the given bit of a bignum. Zero-based.
1861 static void tcSetBit(WordType *, unsigned bit);
1862
1863 /// Clear the given bit of a bignum. Zero-based.
1864 static void tcClearBit(WordType *, unsigned bit);
1865
1866 /// Returns the bit number of the least or most significant set bit of a
1867 /// number. If the input number has no bits set -1U is returned.
1868 static unsigned tcLSB(const WordType *, unsigned n);
1869 static unsigned tcMSB(const WordType *parts, unsigned n);
1870
1871 /// Negate a bignum in-place.
1872 static void tcNegate(WordType *, unsigned);
1873
1874 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1875 static WordType tcAdd(WordType *, const WordType *,
1876 WordType carry, unsigned);
1877 /// DST += RHS. Returns the carry flag.
1878 static WordType tcAddPart(WordType *, WordType, unsigned);
1879
1880 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1881 static WordType tcSubtract(WordType *, const WordType *,
1882 WordType carry, unsigned);
1883 /// DST -= RHS. Returns the carry flag.
1884 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1885
1886 /// DST += SRC * MULTIPLIER + PART if add is true
1887 /// DST = SRC * MULTIPLIER + PART if add is false
1888 ///
1889 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1890 /// start at the same point, i.e. DST == SRC.
1891 ///
1892 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1893 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1894 /// result, and if all of the omitted higher parts were zero return zero,
1895 /// otherwise overflow occurred and return one.
1896 static int tcMultiplyPart(WordType *dst, const WordType *src,
1897 WordType multiplier, WordType carry,
1898 unsigned srcParts, unsigned dstParts,
1899 bool add);
1900
1901 /// DST = LHS * RHS, where DST has the same width as the operands and is
1902 /// filled with the least significant parts of the result. Returns one if
1903 /// overflow occurred, otherwise zero. DST must be disjoint from both
1904 /// operands.
1905 static int tcMultiply(WordType *, const WordType *, const WordType *,
1906 unsigned);
1907
1908 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1909 /// operands. No overflow occurs. DST must be disjoint from both operands.
1910 static void tcFullMultiply(WordType *, const WordType *,
1911 const WordType *, unsigned, unsigned);
1912
1913 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1914 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1915 /// REMAINDER to the remainder, return zero. i.e.
1916 ///
1917 /// OLD_LHS = RHS * LHS + REMAINDER
1918 ///
1919 /// SCRATCH is a bignum of the same size as the operands and result for use by
1920 /// the routine; its contents need not be initialized and are destroyed. LHS,
1921 /// REMAINDER and SCRATCH must be distinct.
1922 static int tcDivide(WordType *lhs, const WordType *rhs,
1923 WordType *remainder, WordType *scratch,
1924 unsigned parts);
1925
1926 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1927 /// restrictions on Count.
1928 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1929
1930 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1931 /// restrictions on Count.
1932 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1933
1934 /// The obvious AND, OR and XOR and complement operations.
1935 static void tcAnd(WordType *, const WordType *, unsigned);
1936 static void tcOr(WordType *, const WordType *, unsigned);
1937 static void tcXor(WordType *, const WordType *, unsigned);
1938 static void tcComplement(WordType *, unsigned);
1939
1940 /// Comparison (unsigned) of two bignums.
1941 static int tcCompare(const WordType *, const WordType *, unsigned);
1942
1943 /// Increment a bignum in-place. Return the carry flag.
1944 static WordType tcIncrement(WordType *dst, unsigned parts) {
1945 return tcAddPart(dst, 1, parts);
1946 }
1947
1948 /// Decrement a bignum in-place. Return the borrow flag.
1949 static WordType tcDecrement(WordType *dst, unsigned parts) {
1950 return tcSubtractPart(dst, 1, parts);
1951 }
1952
1953 /// Set the least significant BITS and clear the rest.
1954 static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
1955
1956 /// debug method
1957 void dump() const;
1958
1959 /// @}
1960};
1961
1962/// Magic data for optimising signed division by a constant.
1963struct APInt::ms {
1964 APInt m; ///< magic number
1965 unsigned s; ///< shift amount
1966};
1967
1968/// Magic data for optimising unsigned division by a constant.
1969struct APInt::mu {
1970 APInt m; ///< magic number
1971 bool a; ///< add indicator
1972 unsigned s; ///< shift amount
1973};
1974
1975inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
1976
1977inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
1978
1979/// Unary bitwise complement operator.
1980///
1981/// \returns an APInt that is the bitwise complement of \p v.
1982inline APInt operator~(APInt v) {
1983 v.flipAllBits();
1984 return v;
1985}
1986
1987inline APInt operator&(APInt a, const APInt &b) {
1988 a &= b;
1989 return a;
1990}
1991
1992inline APInt operator&(const APInt &a, APInt &&b) {
1993 b &= a;
1994 return std::move(b);
1995}
1996
1997inline APInt operator&(APInt a, uint64_t RHS) {
1998 a &= RHS;
1999 return a;
2000}
2001
2002inline APInt operator&(uint64_t LHS, APInt b) {
2003 b &= LHS;
2004 return b;
2005}
2006
2007inline APInt operator|(APInt a, const APInt &b) {
2008 a |= b;
2009 return a;
2010}
2011
2012inline APInt operator|(const APInt &a, APInt &&b) {
2013 b |= a;
2014 return std::move(b);
2015}
2016
2017inline APInt operator|(APInt a, uint64_t RHS) {
2018 a |= RHS;
2019 return a;
2020}
2021
2022inline APInt operator|(uint64_t LHS, APInt b) {
2023 b |= LHS;
2024 return b;
2025}
2026
2027inline APInt operator^(APInt a, const APInt &b) {
2028 a ^= b;
2029 return a;
2030}
2031
2032inline APInt operator^(const APInt &a, APInt &&b) {
2033 b ^= a;
2034 return std::move(b);
2035}
2036
2037inline APInt operator^(APInt a, uint64_t RHS) {
2038 a ^= RHS;
2039 return a;
2040}
2041
2042inline APInt operator^(uint64_t LHS, APInt b) {
2043 b ^= LHS;
2044 return b;
2045}
2046
2047inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2048 I.print(OS, true);
2049 return OS;
2050}
2051
2052inline APInt operator-(APInt v) {
2053 v.negate();
2054 return v;
2055}
2056
2057inline APInt operator+(APInt a, const APInt &b) {
2058 a += b;
2059 return a;
2060}
2061
2062inline APInt operator+(const APInt &a, APInt &&b) {
2063 b += a;
2064 return std::move(b);
2065}
2066
2067inline APInt operator+(APInt a, uint64_t RHS) {
2068 a += RHS;
2069 return a;
2070}
2071
2072inline APInt operator+(uint64_t LHS, APInt b) {
2073 b += LHS;
2074 return b;
2075}
2076
2077inline APInt operator-(APInt a, const APInt &b) {
2078 a -= b;
2079 return a;
2080}
2081
2082inline APInt operator-(const APInt &a, APInt &&b) {
2083 b.negate();
2084 b += a;
2085 return std::move(b);
2086}
2087
2088inline APInt operator-(APInt a, uint64_t RHS) {
2089 a -= RHS;
2090 return a;
2091}
2092
2093inline APInt operator-(uint64_t LHS, APInt b) {
2094 b.negate();
2095 b += LHS;
2096 return b;
2097}
2098
2099inline APInt operator*(APInt a, uint64_t RHS) {
2100 a *= RHS;
2101 return a;
2102}
2103
2104inline APInt operator*(uint64_t LHS, APInt b) {
2105 b *= LHS;
2106 return b;
2107}
2108
2109
2110namespace APIntOps {
2111
2112/// Determine the smaller of two APInts considered to be signed.
2113inline const APInt &smin(const APInt &A, const APInt &B) {
2114 return A.slt(B) ? A : B;
2115}
2116
2117/// Determine the larger of two APInts considered to be signed.
2118inline const APInt &smax(const APInt &A, const APInt &B) {
2119 return A.sgt(B) ? A : B;
2120}
2121
2122/// Determine the smaller of two APInts considered to be signed.
2123inline const APInt &umin(const APInt &A, const APInt &B) {
2124 return A.ult(B) ? A : B;
2125}
2126
2127/// Determine the larger of two APInts considered to be unsigned.
2128inline const APInt &umax(const APInt &A, const APInt &B) {
2129 return A.ugt(B) ? A : B;
2130}
2131
2132/// Compute GCD of two unsigned APInt values.
2133///
2134/// This function returns the greatest common divisor of the two APInt values
2135/// using Stein's algorithm.
2136///
2137/// \returns the greatest common divisor of A and B.
2138APInt GreatestCommonDivisor(APInt A, APInt B);
2139
2140/// Converts the given APInt to a double value.
2141///
2142/// Treats the APInt as an unsigned value for conversion purposes.
2143inline double RoundAPIntToDouble(const APInt &APIVal) {
2144 return APIVal.roundToDouble();
2145}
2146
2147/// Converts the given APInt to a double value.
2148///
2149/// Treats the APInt as a signed value for conversion purposes.
2150inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2151 return APIVal.signedRoundToDouble();
2152}
2153
2154/// Converts the given APInt to a float vlalue.
2155inline float RoundAPIntToFloat(const APInt &APIVal) {
2156 return float(RoundAPIntToDouble(APIVal));
2157}
2158
2159/// Converts the given APInt to a float value.
2160///
2161/// Treast the APInt as a signed value for conversion purposes.
2162inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2163 return float(APIVal.signedRoundToDouble());
2164}
2165
2166/// Converts the given double value into a APInt.
2167///
2168/// This function convert a double value to an APInt value.
2169APInt RoundDoubleToAPInt(double Double, unsigned width);
2170
2171/// Converts a float value into a APInt.
2172///
2173/// Converts a float value into an APInt value.
2174inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2175 return RoundDoubleToAPInt(double(Float), width);
2176}
2177
2178/// Return A unsign-divided by B, rounded by the given rounding mode.
2179APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2180
2181/// Return A sign-divided by B, rounded by the given rounding mode.
2182APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2183
2184/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2185/// (e.g. 32 for i32).
2186/// This function finds the smallest number n, such that
2187/// (a) n >= 0 and q(n) = 0, or
2188/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2189/// integers, belong to two different intervals [Rk, Rk+R),
2190/// where R = 2^BW, and k is an integer.
2191/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2192/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2193/// subtraction (treated as addition of negated numbers) would always
2194/// count as an overflow, but here we want to allow values to decrease
2195/// and increase as long as they are within the same interval.
2196/// Specifically, adding of two negative numbers should not cause an
2197/// overflow (as long as the magnitude does not exceed the bith width).
2198/// On the other hand, given a positive number, adding a negative
2199/// number to it can give a negative result, which would cause the
2200/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2201/// treated as a special case of an overflow.
2202///
2203/// This function returns None if after finding k that minimizes the
2204/// positive solution to q(n) = kR, both solutions are contained between
2205/// two consecutive integers.
2206///
2207/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2208/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2209/// virtue of *signed* overflow. This function will *not* find such an n,
2210/// however it may find a value of n satisfying the inequalities due to
2211/// an *unsigned* overflow (if the values are treated as unsigned).
2212/// To find a solution for a signed overflow, treat it as a problem of
2213/// finding an unsigned overflow with a range with of BW-1.
2214///
2215/// The returned value may have a different bit width from the input
2216/// coefficients.
2217Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2218 unsigned RangeWidth);
2219} // End of APIntOps namespace
2220
2221// See friend declaration above. This additional declaration is required in
2222// order to compile LLVM with IBM xlC compiler.
2223hash_code hash_value(const APInt &Arg);
2224
2225/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2226/// with the integer held in IntVal.
2227void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2228
2229/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2230/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2231void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes);
2232
2233} // namespace llvm
2234
2235#endif

/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h

1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
32#include "llvm/CodeGen/DAGCombine.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/RuntimeLibcalls.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/IRBuilder.h"
46#include "llvm/IR/InlineAsm.h"
47#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Type.h"
50#include "llvm/MC/MCRegisterInfo.h"
51#include "llvm/Support/Alignment.h"
52#include "llvm/Support/AtomicOrdering.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MachineValueType.h"
56#include "llvm/Target/TargetMachine.h"
57#include <algorithm>
58#include <cassert>
59#include <climits>
60#include <cstdint>
61#include <iterator>
62#include <map>
63#include <string>
64#include <utility>
65#include <vector>
66
67namespace llvm {
68
69class BranchProbability;
70class CCState;
71class CCValAssign;
72class Constant;
73class FastISel;
74class FunctionLoweringInfo;
75class GlobalValue;
76class GISelKnownBits;
77class IntrinsicInst;
78struct KnownBits;
79class LLVMContext;
80class MachineBasicBlock;
81class MachineFunction;
82class MachineInstr;
83class MachineJumpTableInfo;
84class MachineLoop;
85class MachineRegisterInfo;
86class MCContext;
87class MCExpr;
88class Module;
89class TargetRegisterClass;
90class TargetLibraryInfo;
91class TargetRegisterInfo;
92class Value;
93
94namespace Sched {
95
96 enum Preference {
97 None, // No preference
98 Source, // Follow source order.
99 RegPressure, // Scheduling for lowest register pressure.
100 Hybrid, // Scheduling for both latency and register pressure.
101 ILP, // Scheduling for ILP in low register pressure mode.
102 VLIW // Scheduling for VLIW targets.
103 };
104
105} // end namespace Sched
106
107/// This base class for TargetLowering contains the SelectionDAG-independent
108/// parts that can be used from the rest of CodeGen.
109class TargetLoweringBase {
110public:
111 /// This enum indicates whether operations are valid for a target, and if not,
112 /// what action should be used to make them valid.
113 enum LegalizeAction : uint8_t {
114 Legal, // The target natively supports this operation.
115 Promote, // This operation should be executed in a larger type.
116 Expand, // Try to expand this to other ops, otherwise use a libcall.
117 LibCall, // Don't try to expand this to other ops, always use a libcall.
118 Custom // Use the LowerOperation hook to implement custom lowering.
119 };
120
121 /// This enum indicates whether a types are legal for a target, and if not,
122 /// what action should be used to make them valid.
123 enum LegalizeTypeAction : uint8_t {
124 TypeLegal, // The target natively supports this type.
125 TypePromoteInteger, // Replace this integer with a larger one.
126 TypeExpandInteger, // Split this integer into two of half the size.
127 TypeSoftenFloat, // Convert this float to a same size integer type.
128 TypeExpandFloat, // Split this float into two of half the size.
129 TypeScalarizeVector, // Replace this one-element vector with its element.
130 TypeSplitVector, // Split this vector into two of half the size.
131 TypeWidenVector, // This vector should be widened into a larger vector.
132 TypePromoteFloat // Replace this float with a larger one.
133 };
134
135 /// LegalizeKind holds the legalization kind that needs to happen to EVT
136 /// in order to type-legalize it.
137 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
138
139 /// Enum that describes how the target represents true/false values.
140 enum BooleanContent {
141 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
142 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
143 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
144 };
145
146 /// Enum that describes what type of support for selects the target has.
147 enum SelectSupportKind {
148 ScalarValSelect, // The target supports scalar selects (ex: cmov).
149 ScalarCondVectorVal, // The target supports selects with a scalar condition
150 // and vector values (ex: cmov).
151 VectorMaskSelect // The target supports vector selects with a vector
152 // mask (ex: x86 blends).
153 };
154
155 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
156 /// to, if at all. Exists because different targets have different levels of
157 /// support for these atomic instructions, and also have different options
158 /// w.r.t. what they should expand to.
159 enum class AtomicExpansionKind {
160 None, // Don't expand the instruction.
161 LLSC, // Expand the instruction into loadlinked/storeconditional; used
162 // by ARM/AArch64.
163 LLOnly, // Expand the (load) instruction into just a load-linked, which has
164 // greater atomic guarantees than a normal load.
165 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
166 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
167 };
168
169 /// Enum that specifies when a multiplication should be expanded.
170 enum class MulExpansionKind {
171 Always, // Always expand the instruction.
172 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
173 // or custom.
174 };
175
176 class ArgListEntry {
177 public:
178 Value *Val = nullptr;
179 SDValue Node = SDValue();
180 Type *Ty = nullptr;
181 bool IsSExt : 1;
182 bool IsZExt : 1;
183 bool IsInReg : 1;
184 bool IsSRet : 1;
185 bool IsNest : 1;
186 bool IsByVal : 1;
187 bool IsInAlloca : 1;
188 bool IsReturned : 1;
189 bool IsSwiftSelf : 1;
190 bool IsSwiftError : 1;
191 uint16_t Alignment = 0;
192 Type *ByValType = nullptr;
193
194 ArgListEntry()
195 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
196 IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
197 IsSwiftSelf(false), IsSwiftError(false) {}
198
199 void setAttributes(const CallBase *Call, unsigned ArgIdx);
200
201 void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
202 return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
203 }
204 };
205 using ArgListTy = std::vector<ArgListEntry>;
206
207 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
208 ArgListTy &Args) const {};
209
210 static ISD::NodeType getExtendForContent(BooleanContent Content) {
211 switch (Content) {
212 case UndefinedBooleanContent:
213 // Extend by adding rubbish bits.
214 return ISD::ANY_EXTEND;
215 case ZeroOrOneBooleanContent:
216 // Extend by adding zero bits.
217 return ISD::ZERO_EXTEND;
218 case ZeroOrNegativeOneBooleanContent:
219 // Extend by copying the sign bit.
220 return ISD::SIGN_EXTEND;
221 }
222 llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 222)
;
223 }
224
225 /// NOTE: The TargetMachine owns TLOF.
226 explicit TargetLoweringBase(const TargetMachine &TM);
227 TargetLoweringBase(const TargetLoweringBase &) = delete;
228 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
229 virtual ~TargetLoweringBase() = default;
230
231protected:
232 /// Initialize all of the actions to default values.
233 void initActions();
234
235public:
236 const TargetMachine &getTargetMachine() const { return TM; }
237
238 virtual bool useSoftFloat() const { return false; }
239
240 /// Return the pointer type for the given address space, defaults to
241 /// the pointer type from the data layout.
242 /// FIXME: The default needs to be removed once all the code is updated.
243 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
244 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
245 }
246
247 /// Return the in-memory pointer type for the given address space, defaults to
248 /// the pointer type from the data layout. FIXME: The default needs to be
249 /// removed once all the code is updated.
250 MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
251 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
252 }
253
254 /// Return the type for frame index, which is determined by
255 /// the alloca address space specified through the data layout.
256 MVT getFrameIndexTy(const DataLayout &DL) const {
257 return getPointerTy(DL, DL.getAllocaAddrSpace());
258 }
259
260 /// Return the type for operands of fence.
261 /// TODO: Let fence operands be of i32 type and remove this.
262 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
263 return getPointerTy(DL);
264 }
265
266 /// EVT is not used in-tree, but is used by out-of-tree target.
267 /// A documentation for this function would be nice...
268 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
269
270 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
271 bool LegalTypes = true) const;
272
273 /// Returns the type to be used for the index operand of:
274 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
275 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
276 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
277 return getPointerTy(DL);
278 }
279
280 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
281 return true;
282 }
283
284 /// Return true if it is profitable to convert a select of FP constants into
285 /// a constant pool load whose address depends on the select condition. The
286 /// parameter may be used to differentiate a select with FP compare from
287 /// integer compare.
288 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
289 return true;
290 }
291
292 /// Return true if multiple condition registers are available.
293 bool hasMultipleConditionRegisters() const {
294 return HasMultipleConditionRegisters;
295 }
296
297 /// Return true if the target has BitExtract instructions.
298 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
299
300 /// Return the preferred vector type legalization action.
301 virtual TargetLoweringBase::LegalizeTypeAction
302 getPreferredVectorAction(MVT VT) const {
303 // The default action for one element vectors is to scalarize
304 if (VT.getVectorNumElements() == 1)
305 return TypeScalarizeVector;
306 // The default action for an odd-width vector is to widen.
307 if (!VT.isPow2VectorType())
308 return TypeWidenVector;
309 // The default action for other vectors is to promote
310 return TypePromoteInteger;
311 }
312
313 // There are two general methods for expanding a BUILD_VECTOR node:
314 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
315 // them together.
316 // 2. Build the vector on the stack and then load it.
317 // If this function returns true, then method (1) will be used, subject to
318 // the constraint that all of the necessary shuffles are legal (as determined
319 // by isShuffleMaskLegal). If this function returns false, then method (2) is
320 // always used. The vector type, and the number of defined values, are
321 // provided.
322 virtual bool
323 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
324 unsigned DefinedValues) const {
325 return DefinedValues < 3;
326 }
327
328 /// Return true if integer divide is usually cheaper than a sequence of
329 /// several shifts, adds, and multiplies for this target.
330 /// The definition of "cheaper" may depend on whether we're optimizing
331 /// for speed or for size.
332 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
333
334 /// Return true if the target can handle a standalone remainder operation.
335 virtual bool hasStandaloneRem(EVT VT) const {
336 return true;
337 }
338
339 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
340 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
341 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
342 return false;
343 }
344
345 /// Reciprocal estimate status values used by the functions below.
346 enum ReciprocalEstimate : int {
347 Unspecified = -1,
348 Disabled = 0,
349 Enabled = 1
350 };
351
352 /// Return a ReciprocalEstimate enum value for a square root of the given type
353 /// based on the function's attributes. If the operation is not overridden by
354 /// the function's attributes, "Unspecified" is returned and target defaults
355 /// are expected to be used for instruction selection.
356 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
357
358 /// Return a ReciprocalEstimate enum value for a division of the given type
359 /// based on the function's attributes. If the operation is not overridden by
360 /// the function's attributes, "Unspecified" is returned and target defaults
361 /// are expected to be used for instruction selection.
362 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
363
364 /// Return the refinement step count for a square root of the given type based
365 /// on the function's attributes. If the operation is not overridden by
366 /// the function's attributes, "Unspecified" is returned and target defaults
367 /// are expected to be used for instruction selection.
368 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
369
370 /// Return the refinement step count for a division of the given type based
371 /// on the function's attributes. If the operation is not overridden by
372 /// the function's attributes, "Unspecified" is returned and target defaults
373 /// are expected to be used for instruction selection.
374 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
375
376 /// Returns true if target has indicated at least one type should be bypassed.
377 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
378
379 /// Returns map of slow types for division or remainder with corresponding
380 /// fast types
381 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
382 return BypassSlowDivWidths;
383 }
384
385 /// Return true if Flow Control is an expensive operation that should be
386 /// avoided.
387 bool isJumpExpensive() const { return JumpIsExpensive; }
388
389 /// Return true if selects are only cheaper than branches if the branch is
390 /// unlikely to be predicted right.
391 bool isPredictableSelectExpensive() const {
392 return PredictableSelectIsExpensive;
393 }
394
395 /// If a branch or a select condition is skewed in one direction by more than
396 /// this factor, it is very likely to be predicted correctly.
397 virtual BranchProbability getPredictableBranchThreshold() const;
398
399 /// Return true if the following transform is beneficial:
400 /// fold (conv (load x)) -> (load (conv*)x)
401 /// On architectures that don't natively support some vector loads
402 /// efficiently, casting the load to a smaller vector of larger types and
403 /// loading is more efficient, however, this can be undone by optimizations in
404 /// dag combiner.
405 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
406 const SelectionDAG &DAG,
407 const MachineMemOperand &MMO) const {
408 // Don't do if we could do an indexed load on the original type, but not on
409 // the new one.
410 if (!LoadVT.isSimple() || !BitcastVT.isSimple())
411 return true;
412
413 MVT LoadMVT = LoadVT.getSimpleVT();
414
415 // Don't bother doing this if it's just going to be promoted again later, as
416 // doing so might interfere with other combines.
417 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
418 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
419 return false;
420
421 bool Fast = false;
422 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
423 MMO, &Fast) && Fast;
424 }
425
426 /// Return true if the following transform is beneficial:
427 /// (store (y (conv x)), y*)) -> (store x, (x*))
428 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
429 const SelectionDAG &DAG,
430 const MachineMemOperand &MMO) const {
431 // Default to the same logic as loads.
432 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
433 }
434
435 /// Return true if it is expected to be cheaper to do a store of a non-zero
436 /// vector constant with the given size and type for the address space than to
437 /// store the individual scalar element constants.
438 virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
439 unsigned NumElem,
440 unsigned AddrSpace) const {
441 return false;
442 }
443
444 /// Allow store merging for the specified type after legalization in addition
445 /// to before legalization. This may transform stores that do not exist
446 /// earlier (for example, stores created from intrinsics).
447 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
448 return true;
449 }
450
451 /// Returns if it's reasonable to merge stores to MemVT size.
452 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
453 const SelectionDAG &DAG) const {
454 return true;
455 }
456
457 /// Return true if it is cheap to speculate a call to intrinsic cttz.
458 virtual bool isCheapToSpeculateCttz() const {
459 return false;
460 }
461
462 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
463 virtual bool isCheapToSpeculateCtlz() const {
464 return false;
465 }
466
467 /// Return true if ctlz instruction is fast.
468 virtual bool isCtlzFast() const {
469 return false;
470 }
471
472 /// Return true if it is safe to transform an integer-domain bitwise operation
473 /// into the equivalent floating-point operation. This should be set to true
474 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
475 /// type.
476 virtual bool hasBitPreservingFPLogic(EVT VT) const {
477 return false;
478 }
479
480 /// Return true if it is cheaper to split the store of a merged int val
481 /// from a pair of smaller values into multiple stores.
482 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
483 return false;
484 }
485
486 /// Return if the target supports combining a
487 /// chain like:
488 /// \code
489 /// %andResult = and %val1, #mask
490 /// %icmpResult = icmp %andResult, 0
491 /// \endcode
492 /// into a single machine instruction of a form like:
493 /// \code
494 /// cc = test %register, #mask
495 /// \endcode
496 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
497 return false;
498 }
499
500 /// Use bitwise logic to make pairs of compares more efficient. For example:
501 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
502 /// This should be true when it takes more than one instruction to lower
503 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
504 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
505 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
506 return false;
507 }
508
509 /// Return the preferred operand type if the target has a quick way to compare
510 /// integer values of the given size. Assume that any legal integer type can
511 /// be compared efficiently. Targets may override this to allow illegal wide
512 /// types to return a vector type if there is support to compare that type.
513 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
514 MVT VT = MVT::getIntegerVT(NumBits);
515 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
516 }
517
518 /// Return true if the target should transform:
519 /// (X & Y) == Y ---> (~X & Y) == 0
520 /// (X & Y) != Y ---> (~X & Y) != 0
521 ///
522 /// This may be profitable if the target has a bitwise and-not operation that
523 /// sets comparison flags. A target may want to limit the transformation based
524 /// on the type of Y or if Y is a constant.
525 ///
526 /// Note that the transform will not occur if Y is known to be a power-of-2
527 /// because a mask and compare of a single bit can be handled by inverting the
528 /// predicate, for example:
529 /// (X & 8) == 8 ---> (X & 8) != 0
530 virtual bool hasAndNotCompare(SDValue Y) const {
531 return false;
532 }
533
534 /// Return true if the target has a bitwise and-not operation:
535 /// X = ~A & B
536 /// This can be used to simplify select or other instructions.
537 virtual bool hasAndNot(SDValue X) const {
538 // If the target has the more complex version of this operation, assume that
539 // it has this operation too.
540 return hasAndNotCompare(X);
541 }
542
543 /// Return true if the target has a bit-test instruction:
544 /// (X & (1 << Y)) ==/!= 0
545 /// This knowledge can be used to prevent breaking the pattern,
546 /// or creating it if it could be recognized.
547 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
548
549 /// There are two ways to clear extreme bits (either low or high):
550 /// Mask: x & (-1 << y) (the instcombine canonical form)
551 /// Shifts: x >> y << y
552 /// Return true if the variant with 2 variable shifts is preferred.
553 /// Return false if there is no preference.
554 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
555 // By default, let's assume that no one prefers shifts.
556 return false;
557 }
558
559 /// Return true if it is profitable to fold a pair of shifts into a mask.
560 /// This is usually true on most targets. But some targets, like Thumb1,
561 /// have immediate shift instructions, but no immediate "and" instruction;
562 /// this makes the fold unprofitable.
563 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
564 CombineLevel Level) const {
565 return true;
566 }
567
568 /// Should we tranform the IR-optimal check for whether given truncation
569 /// down into KeptBits would be truncating or not:
570 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
571 /// Into it's more traditional form:
572 /// ((%x << C) a>> C) dstcond %x
573 /// Return true if we should transform.
574 /// Return false if there is no preference.
575 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
576 unsigned KeptBits) const {
577 // By default, let's assume that no one prefers shifts.
578 return false;
579 }
580
581 /// Given the pattern
582 /// (X & (C l>>/<< Y)) ==/!= 0
583 /// return true if it should be transformed into:
584 /// ((X <</l>> Y) & C) ==/!= 0
585 /// WARNING: if 'X' is a constant, the fold may deadlock!
586 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
587 /// here because it can end up being not linked in.
588 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
589 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
590 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
591 SelectionDAG &DAG) const {
592 if (hasBitTest(X, Y)) {
593 // One interesting pattern that we'd want to form is 'bit test':
594 // ((1 << Y) & C) ==/!= 0
595 // But we also need to be careful not to try to reverse that fold.
596
597 // Is this '1 << Y' ?
598 if (OldShiftOpcode == ISD::SHL && CC->isOne())
599 return false; // Keep the 'bit test' pattern.
600
601 // Will it be '1 << Y' after the transform ?
602 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
603 return true; // Do form the 'bit test' pattern.
604 }
605
606 // If 'X' is a constant, and we transform, then we will immediately
607 // try to undo the fold, thus causing endless combine loop.
608 // So by default, let's assume everyone prefers the fold
609 // iff 'X' is not a constant.
610 return !XC;
611 }
612
613 /// These two forms are equivalent:
614 /// sub %y, (xor %x, -1)
615 /// add (add %x, 1), %y
616 /// The variant with two add's is IR-canonical.
617 /// Some targets may prefer one to the other.
618 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
619 // By default, let's assume that everyone prefers the form with two add's.
620 return true;
621 }
622
623 /// Return true if the target wants to use the optimization that
624 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
625 /// promotedInst1(...(promotedInstN(ext(load)))).
626 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
627
628 /// Return true if the target can combine store(extractelement VectorTy,
629 /// Idx).
630 /// \p Cost[out] gives the cost of that transformation when this is true.
631 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
632 unsigned &Cost) const {
633 return false;
634 }
635
636 /// Return true if inserting a scalar into a variable element of an undef
637 /// vector is more efficiently handled by splatting the scalar instead.
638 virtual bool shouldSplatInsEltVarIndex(EVT) const {
639 return false;
640 }
641
642 /// Return true if target always beneficiates from combining into FMA for a
643 /// given value type. This must typically return false on targets where FMA
644 /// takes more cycles to execute than FADD.
645 virtual bool enableAggressiveFMAFusion(EVT VT) const {
646 return false;
647 }
648
649 /// Return the ValueType of the result of SETCC operations.
650 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
651 EVT VT) const;
652
653 /// Return the ValueType for comparison libcalls. Comparions libcalls include
654 /// floating point comparion calls, and Ordered/Unordered check calls on
655 /// floating point numbers.
656 virtual
657 MVT::SimpleValueType getCmpLibcallReturnType() const;
658
659 /// For targets without i1 registers, this gives the nature of the high-bits
660 /// of boolean values held in types wider than i1.
661 ///
662 /// "Boolean values" are special true/false values produced by nodes like
663 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
664 /// Not to be confused with general values promoted from i1. Some cpus
665 /// distinguish between vectors of boolean and scalars; the isVec parameter
666 /// selects between the two kinds. For example on X86 a scalar boolean should
667 /// be zero extended from i1, while the elements of a vector of booleans
668 /// should be sign extended from i1.
669 ///
670 /// Some cpus also treat floating point types the same way as they treat
671 /// vectors instead of the way they treat scalars.
672 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
673 if (isVec)
674 return BooleanVectorContents;
675 return isFloat ? BooleanFloatContents : BooleanContents;
676 }
677
678 BooleanContent getBooleanContents(EVT Type) const {
679 return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
680 }
681
682 /// Return target scheduling preference.
683 Sched::Preference getSchedulingPreference() const {
684 return SchedPreferenceInfo;
685 }
686
687 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
688 /// for different nodes. This function returns the preference (or none) for
689 /// the given node.
690 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
691 return Sched::None;
692 }
693
694 /// Return the register class that should be used for the specified value
695 /// type.
696 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
697 (void)isDivergent;
698 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
699 assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!")
? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 699, __PRETTY_FUNCTION__))
;
700 return RC;
701 }
702
703 /// Allows target to decide about the register class of the
704 /// specific value that is live outside the defining block.
705 /// Returns true if the value needs uniform register class.
706 virtual bool requiresUniformRegister(MachineFunction &MF,
707 const Value *) const {
708 return false;
709 }
710
711 /// Return the 'representative' register class for the specified value
712 /// type.
713 ///
714 /// The 'representative' register class is the largest legal super-reg
715 /// register class for the register class of the value type. For example, on
716 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
717 /// register class is GR64 on x86_64.
718 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
719 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
720 return RC;
721 }
722
723 /// Return the cost of the 'representative' register class for the specified
724 /// value type.
725 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
726 return RepRegClassCostForVT[VT.SimpleTy];
727 }
728
729 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
730 /// instructions, and false if a library call is preferred (e.g for code-size
731 /// reasons).
732 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
733 return true;
734 }
735
736 /// Return true if the target has native support for the specified value type.
737 /// This means that it has a register that directly holds it without
738 /// promotions or expansions.
739 bool isTypeLegal(EVT VT) const {
740 assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 741, __PRETTY_FUNCTION__))
741 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 741, __PRETTY_FUNCTION__))
;
742 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
743 }
744
745 class ValueTypeActionImpl {
746 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
747 /// that indicates how instruction selection should deal with the type.
748 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
749
750 public:
751 ValueTypeActionImpl() {
752 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
753 TypeLegal);
754 }
755
756 LegalizeTypeAction getTypeAction(MVT VT) const {
757 return ValueTypeActions[VT.SimpleTy];
758 }
759
760 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
761 ValueTypeActions[VT.SimpleTy] = Action;
762 }
763 };
764
765 const ValueTypeActionImpl &getValueTypeActions() const {
766 return ValueTypeActions;
767 }
768
769 /// Return how we should legalize values of this type, either it is already
770 /// legal (return 'Legal') or we need to promote it to a larger type (return
771 /// 'Promote'), or we need to expand it into multiple registers of smaller
772 /// integer type (return 'Expand'). 'Custom' is not an option.
773 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
774 return getTypeConversion(Context, VT).first;
775 }
776 LegalizeTypeAction getTypeAction(MVT VT) const {
777 return ValueTypeActions.getTypeAction(VT);
778 }
779
780 /// For types supported by the target, this is an identity function. For
781 /// types that must be promoted to larger types, this returns the larger type
782 /// to promote to. For integer types that are larger than the largest integer
783 /// register, this contains one step in the expansion to get to the smaller
784 /// register. For illegal floating point types, this returns the integer type
785 /// to transform to.
786 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
787 return getTypeConversion(Context, VT).second;
788 }
789
790 /// For types supported by the target, this is an identity function. For
791 /// types that must be expanded (i.e. integer types that are larger than the
792 /// largest integer register or illegal floating point types), this returns
793 /// the largest legal type it will be expanded to.
794 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
795 assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail
("!VT.isVector()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 795, __PRETTY_FUNCTION__))
;
796 while (true) {
797 switch (getTypeAction(Context, VT)) {
798 case TypeLegal:
799 return VT;
800 case TypeExpandInteger:
801 VT = getTypeToTransformTo(Context, VT);
802 break;
803 default:
804 llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 804)
;
805 }
806 }
807 }
808
809 /// Vector types are broken down into some number of legal first class types.
810 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
811 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
812 /// turns into 4 EVT::i32 values with both PPC and X86.
813 ///
814 /// This method returns the number of registers needed, and the VT for each
815 /// register. It also returns the VT and quantity of the intermediate values
816 /// before they are promoted/expanded.
817 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
818 EVT &IntermediateVT,
819 unsigned &NumIntermediates,
820 MVT &RegisterVT) const;
821
822 /// Certain targets such as MIPS require that some types such as vectors are
823 /// always broken down into scalars in some contexts. This occurs even if the
824 /// vector type is legal.
825 virtual unsigned getVectorTypeBreakdownForCallingConv(
826 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
827 unsigned &NumIntermediates, MVT &RegisterVT) const {
828 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
829 RegisterVT);
830 }
831
832 struct IntrinsicInfo {
833 unsigned opc = 0; // target opcode
834 EVT memVT; // memory VT
835
836 // value representing memory location
837 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
838
839 int offset = 0; // offset off of ptrVal
840 uint64_t size = 0; // the size of the memory location
841 // (taken from memVT if zero)
842 MaybeAlign align = Align::None(); // alignment
843
844 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
845 IntrinsicInfo() = default;
846 };
847
848 /// Given an intrinsic, checks if on the target the intrinsic will need to map
849 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
850 /// true and store the intrinsic information into the IntrinsicInfo that was
851 /// passed to the function.
852 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
853 MachineFunction &,
854 unsigned /*Intrinsic*/) const {
855 return false;
856 }
857
858 /// Returns true if the target can instruction select the specified FP
859 /// immediate natively. If false, the legalizer will materialize the FP
860 /// immediate as a load from a constant pool.
861 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
862 bool ForCodeSize = false) const {
863 return false;
864 }
865
866 /// Targets can use this to indicate that they only support *some*
867 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
868 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
869 /// legal.
870 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
871 return true;
872 }
873
874 /// Returns true if the operation can trap for the value type.
875 ///
876 /// VT must be a legal type. By default, we optimistically assume most
877 /// operations don't trap except for integer divide and remainder.
878 virtual bool canOpTrap(unsigned Op, EVT VT) const;
879
880 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
881 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
882 /// constant pool entry.
883 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
884 EVT /*VT*/) const {
885 return false;
886 }
887
888 /// Return how this operation should be treated: either it is legal, needs to
889 /// be promoted to a larger size, needs to be expanded to some other code
890 /// sequence, or the target has a custom expander for it.
891 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
892 if (VT.isExtended()) return Expand;
893 // If a target-specific SDNode requires legalization, require the target
894 // to provide custom legalization for it.
895 if (Op >= array_lengthof(OpActions[0])) return Custom;
896 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
897 }
898
899 /// Custom method defined by each target to indicate if an operation which
900 /// may require a scale is supported natively by the target.
901 /// If not, the operation is illegal.
902 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
903 unsigned Scale) const {
904 return false;
905 }
906
907 /// Some fixed point operations may be natively supported by the target but
908 /// only for specific scales. This method allows for checking
909 /// if the width is supported by the target for a given operation that may
910 /// depend on scale.
911 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
912 unsigned Scale) const {
913 auto Action = getOperationAction(Op, VT);
914 if (Action != Legal)
915 return Action;
916
917 // This operation is supported in this type but may only work on specific
918 // scales.
919 bool Supported;
920 switch (Op) {
921 default:
922 llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation."
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 922)
;
923 case ISD::SMULFIX:
924 case ISD::SMULFIXSAT:
925 case ISD::UMULFIX:
926 case ISD::UMULFIXSAT:
927 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
928 break;
929 }
930
931 return Supported ? Action : Expand;
932 }
933
934 // If Op is a strict floating-point operation, return the result
935 // of getOperationAction for the equivalent non-strict operation.
936 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
937 unsigned EqOpc;
938 switch (Op) {
939 default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 939)
;
940 case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
941 case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
942 case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
943 case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
944 case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
945 case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
946 case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
947 case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
948 case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
949 case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
950 case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
951 case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
952 case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
953 case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
954 case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
955 case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
956 case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
957 case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
958 case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
959 case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
960 case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
961 case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
962 case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
963 case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
964 case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break;
965 case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break;
966 case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
967 case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
968 }
969
970 return getOperationAction(EqOpc, VT);
971 }
972
973 /// Return true if the specified operation is legal on this target or can be
974 /// made legal with custom lowering. This is used to help guide high-level
975 /// lowering decisions.
976 bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
977 return (VT == MVT::Other || isTypeLegal(VT)) &&
978 (getOperationAction(Op, VT) == Legal ||
979 getOperationAction(Op, VT) == Custom);
980 }
981
982 /// Return true if the specified operation is legal on this target or can be
983 /// made legal using promotion. This is used to help guide high-level lowering
984 /// decisions.
985 bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
986 return (VT == MVT::Other || isTypeLegal(VT)) &&
987 (getOperationAction(Op, VT) == Legal ||
988 getOperationAction(Op, VT) == Promote);
989 }
990
991 /// Return true if the specified operation is legal on this target or can be
992 /// made legal with custom lowering or using promotion. This is used to help
993 /// guide high-level lowering decisions.
994 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
995 return (VT == MVT::Other || isTypeLegal(VT)) &&
996 (getOperationAction(Op, VT) == Legal ||
997 getOperationAction(Op, VT) == Custom ||
998 getOperationAction(Op, VT) == Promote);
999 }
1000
1001 /// Return true if the operation uses custom lowering, regardless of whether
1002 /// the type is legal or not.
1003 bool isOperationCustom(unsigned Op, EVT VT) const {
1004 return getOperationAction(Op, VT) == Custom;
1005 }
1006
1007 /// Return true if lowering to a jump table is allowed.
1008 virtual bool areJTsAllowed(const Function *Fn) const {
1009 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
1010 return false;
1011
1012 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1013 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1014 }
1015
1016 /// Check whether the range [Low,High] fits in a machine word.
1017 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1018 const DataLayout &DL) const {
1019 // FIXME: Using the pointer type doesn't seem ideal.
1020 uint64_t BW = DL.getIndexSizeInBits(0u);
1021 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1;
1022 return Range <= BW;
1023 }
1024
1025 /// Return true if lowering to a jump table is suitable for a set of case
1026 /// clusters which may contain \p NumCases cases, \p Range range of values.
1027 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1028 uint64_t Range) const {
1029 // FIXME: This function check the maximum table size and density, but the
1030 // minimum size is not checked. It would be nice if the minimum size is
1031 // also combined within this function. Currently, the minimum size check is
1032 // performed in findJumpTable() in SelectionDAGBuiler and
1033 // getEstimatedNumberOfCaseClusters() in BasicTTIImpl.
1034 const bool OptForSize = SI->getParent()->getParent()->hasOptSize();
1035 const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize);
1036 const unsigned MaxJumpTableSize = getMaximumJumpTableSize();
1037
1038 // Check whether the number of cases is small enough and
1039 // the range is dense enough for a jump table.
1040 if ((OptForSize || Range <= MaxJumpTableSize) &&
1041 (NumCases * 100 >= Range * MinDensity)) {
1042 return true;
1043 }
1044 return false;
1045 }
1046
1047 /// Return true if lowering to a bit test is suitable for a set of case
1048 /// clusters which contains \p NumDests unique destinations, \p Low and
1049 /// \p High as its lowest and highest case values, and expects \p NumCmps
1050 /// case value comparisons. Check if the number of destinations, comparison
1051 /// metric, and range are all suitable.
1052 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1053 const APInt &Low, const APInt &High,
1054 const DataLayout &DL) const {
1055 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1056 // range of cases both require only one branch to lower. Just looking at the
1057 // number of clusters and destinations should be enough to decide whether to
1058 // build bit tests.
1059
1060 // To lower a range with bit tests, the range must fit the bitwidth of a
1061 // machine word.
1062 if (!rangeFitsInWord(Low, High, DL))
1063 return false;
1064
1065 // Decide whether it's profitable to lower this range with bit tests. Each
1066 // destination requires a bit test and branch, and there is an overall range
1067 // check branch. For a small number of clusters, separate comparisons might
1068 // be cheaper, and for many destinations, splitting the range might be
1069 // better.
1070 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1071 (NumDests == 3 && NumCmps >= 6);
1072 }
1073
1074 /// Return true if the specified operation is illegal on this target or
1075 /// unlikely to be made legal with custom lowering. This is used to help guide
1076 /// high-level lowering decisions.
1077 bool isOperationExpand(unsigned Op, EVT VT) const {
1078 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1079 }
1080
1081 /// Return true if the specified operation is legal on this target.
1082 bool isOperationLegal(unsigned Op, EVT VT) const {
1083 return (VT == MVT::Other || isTypeLegal(VT)) &&
1084 getOperationAction(Op, VT) == Legal;
1085 }
1086
1087 /// Return how this load with extension should be treated: either it is legal,
1088 /// needs to be promoted to a larger size, needs to be expanded to some other
1089 /// code sequence, or the target has a custom expander for it.
1090 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1091 EVT MemVT) const {
1092 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1093 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1094 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1095 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1096, __PRETTY_FUNCTION__))
1096 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1096, __PRETTY_FUNCTION__))
;
1097 unsigned Shift = 4 * ExtType;
1098 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1099 }
1100
1101 /// Return true if the specified load with extension is legal on this target.
1102 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1103 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
70
Assuming the condition is true
71
Returning the value 1, which participates in a condition later
1104 }
1105
1106 /// Return true if the specified load with extension is legal or custom
1107 /// on this target.
1108 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1109 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1110 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1111 }
1112
1113 /// Return how this store with truncation should be treated: either it is
1114 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1115 /// other code sequence, or the target has a custom expander for it.
1116 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1117 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1118 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1119 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1120 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1121, __PRETTY_FUNCTION__))
1121 "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1121, __PRETTY_FUNCTION__))
;
1122 return TruncStoreActions[ValI][MemI];
1123 }
1124
1125 /// Return true if the specified store with truncation is legal on this
1126 /// target.
1127 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1128 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1129 }
1130
1131 /// Return true if the specified store with truncation has solution on this
1132 /// target.
1133 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1134 return isTypeLegal(ValVT) &&
1135 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1136 getTruncStoreAction(ValVT, MemVT) == Custom);
1137 }
1138
1139 /// Return how the indexed load should be treated: either it is legal, needs
1140 /// to be promoted to a larger size, needs to be expanded to some other code
1141 /// sequence, or the target has a custom expander for it.
1142 LegalizeAction
1143 getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1144 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1145, __PRETTY_FUNCTION__))
1145 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1145, __PRETTY_FUNCTION__))
;
1146 unsigned Ty = (unsigned)VT.SimpleTy;
1147 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
1148 }
1149
1150 /// Return true if the specified indexed load is legal on this target.
1151 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1152 return VT.isSimple() &&
1153 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1154 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1155 }
1156
1157 /// Return how the indexed store should be treated: either it is legal, needs
1158 /// to be promoted to a larger size, needs to be expanded to some other code
1159 /// sequence, or the target has a custom expander for it.
1160 LegalizeAction
1161 getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1162 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1163, __PRETTY_FUNCTION__))
1163 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1163, __PRETTY_FUNCTION__))
;
1164 unsigned Ty = (unsigned)VT.SimpleTy;
1165 return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
1166 }
1167
1168 /// Return true if the specified indexed load is legal on this target.
1169 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1170 return VT.isSimple() &&
1171 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1172 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1173 }
1174
1175 /// Return how the condition code should be treated: either it is legal, needs
1176 /// to be expanded to some other code sequence, or the target has a custom
1177 /// expander for it.
1178 LegalizeAction
1179 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1180 assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1182, __PRETTY_FUNCTION__))
1181 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1182, __PRETTY_FUNCTION__))
1182 "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1182, __PRETTY_FUNCTION__))
;
1183 // See setCondCodeAction for how this is encoded.
1184 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1185 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1186 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1187 assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!"
) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1187, __PRETTY_FUNCTION__))
;
1188 return Action;
1189 }
1190
1191 /// Return true if the specified condition code is legal on this target.
1192 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1193 return getCondCodeAction(CC, VT) == Legal;
1194 }
1195
1196 /// Return true if the specified condition code is legal or custom on this
1197 /// target.
1198 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1199 return getCondCodeAction(CC, VT) == Legal ||
1200 getCondCodeAction(CC, VT) == Custom;
1201 }
1202
1203 /// If the action for this operation is to promote, this method returns the
1204 /// ValueType to promote to.
1205 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1206 assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1207, __PRETTY_FUNCTION__))
1207 "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1207, __PRETTY_FUNCTION__))
;
1208
1209 // See if this has an explicit type specified.
1210 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1211 MVT::SimpleValueType>::const_iterator PTTI =
1212 PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
1213 if (PTTI != PromoteToType.end()) return PTTI->second;
1214
1215 assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1216, __PRETTY_FUNCTION__))
1216 "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1216, __PRETTY_FUNCTION__))
;
1217
1218 MVT NVT = VT;
1219 do {
1220 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1221 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1222, __PRETTY_FUNCTION__))
1222 "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1222, __PRETTY_FUNCTION__))
;
1223 } while (!isTypeLegal(NVT) ||
1224 getOperationAction(Op, NVT) == Promote);
1225 return NVT;
1226 }
1227
1228 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1229 /// operations except for the pointer size. If AllowUnknown is true, this
1230 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1231 /// otherwise it will assert.
1232 EVT getValueType(const DataLayout &DL, Type *Ty,
1233 bool AllowUnknown = false) const {
1234 // Lower scalar pointers to native pointer types.
1235 if (auto *PTy = dyn_cast<PointerType>(Ty))
1236 return getPointerTy(DL, PTy->getAddressSpace());
1237
1238 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1239 Type *EltTy = VTy->getElementType();
1240 // Lower vectors of pointers to native pointer types.
1241 if (auto *PTy = dyn_cast<PointerType>(EltTy)) {
1242 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
1243 EltTy = PointerTy.getTypeForEVT(Ty->getContext());
1244 }
1245 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
1246 VTy->getElementCount());
1247 }
1248
1249 return EVT::getEVT(Ty, AllowUnknown);
1250 }
1251
1252 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1253 bool AllowUnknown = false) const {
1254 // Lower scalar pointers to native pointer types.
1255 if (PointerType *PTy = dyn_cast<PointerType>(Ty))
1256 return getPointerMemTy(DL, PTy->getAddressSpace());
1257 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1258 Type *Elm = VTy->getElementType();
1259 if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
1260 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
1261 Elm = PointerTy.getTypeForEVT(Ty->getContext());
1262 }
1263 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
1264 VTy->getNumElements());
1265 }
1266
1267 return getValueType(DL, Ty, AllowUnknown);
1268 }
1269
1270
1271 /// Return the MVT corresponding to this LLVM type. See getValueType.
1272 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1273 bool AllowUnknown = false) const {
1274 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1275 }
1276
1277 /// Return the desired alignment for ByVal or InAlloca aggregate function
1278 /// arguments in the caller parameter area. This is the actual alignment, not
1279 /// its logarithm.
1280 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1281
1282 /// Return the type of registers that this ValueType will eventually require.
1283 MVT getRegisterType(MVT VT) const {
1284 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1284, __PRETTY_FUNCTION__))
;
1285 return RegisterTypeForVT[VT.SimpleTy];
1286 }
1287
1288 /// Return the type of registers that this ValueType will eventually require.
1289 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1290 if (VT.isSimple()) {
1291 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1292, __PRETTY_FUNCTION__))
1292 array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1292, __PRETTY_FUNCTION__))
;
1293 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
1294 }
1295 if (VT.isVector()) {
1296 EVT VT1;
1297 MVT RegisterVT;
1298 unsigned NumIntermediates;
1299 (void)getVectorTypeBreakdown(Context, VT, VT1,
1300 NumIntermediates, RegisterVT);
1301 return RegisterVT;
1302 }
1303 if (VT.isInteger()) {
1304 return getRegisterType(Context, getTypeToTransformTo(Context, VT));
1305 }
1306 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1306)
;
1307 }
1308
1309 /// Return the number of registers that this ValueType will eventually
1310 /// require.
1311 ///
1312 /// This is one for any types promoted to live in larger registers, but may be
1313 /// more than one for types (like i64) that are split into pieces. For types
1314 /// like i140, which are first promoted then expanded, it is the number of
1315 /// registers needed to hold all the bits of the original type. For an i140
1316 /// on a 32 bit machine this means 5 registers.
1317 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
1318 if (VT.isSimple()) {
1319 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1320, __PRETTY_FUNCTION__))
1320 array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1320, __PRETTY_FUNCTION__))
;
1321 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1322 }
1323 if (VT.isVector()) {
1324 EVT VT1;
1325 MVT VT2;
1326 unsigned NumIntermediates;
1327 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
1328 }
1329 if (VT.isInteger()) {
1330 unsigned BitWidth = VT.getSizeInBits();
1331 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1332 return (BitWidth + RegWidth - 1) / RegWidth;
1333 }
1334 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1334)
;
1335 }
1336
1337 /// Certain combinations of ABIs, Targets and features require that types
1338 /// are legal for some operations and not for other operations.
1339 /// For MIPS all vector types must be passed through the integer register set.
1340 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1341 CallingConv::ID CC, EVT VT) const {
1342 return getRegisterType(Context, VT);
1343 }
1344
1345 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1346 /// this occurs when a vector type is used, as vector are passed through the
1347 /// integer register set.
1348 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1349 CallingConv::ID CC,
1350 EVT VT) const {
1351 return getNumRegisters(Context, VT);
1352 }
1353
1354 /// Certain targets have context senstive alignment requirements, where one
1355 /// type has the alignment requirement of another type.
1356 virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy,
1357 DataLayout DL) const {
1358 return DL.getABITypeAlignment(ArgTy);
1359 }
1360
1361 /// If true, then instruction selection should seek to shrink the FP constant
1362 /// of the specified type to a smaller type in order to save space and / or
1363 /// reduce runtime.
1364 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1365
1366 /// Return true if it is profitable to reduce a load to a smaller type.
1367 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1368 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1369 EVT NewVT) const {
1370 // By default, assume that it is cheaper to extract a subvector from a wide
1371 // vector load rather than creating multiple narrow vector loads.
1372 if (NewVT.isVector() && !Load->hasOneUse())
1373 return false;
1374
1375 return true;
1376 }
1377
1378 /// When splitting a value of the specified type into parts, does the Lo
1379 /// or Hi part come first? This usually follows the endianness, except
1380 /// for ppcf128, where the Hi part always comes first.
1381 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1382 return DL.isBigEndian() || VT == MVT::ppcf128;
1383 }
1384
1385 /// If true, the target has custom DAG combine transformations that it can
1386 /// perform for the specified node.
1387 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1388 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1388, __PRETTY_FUNCTION__))
;
1389 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1390 }
1391
1392 unsigned getGatherAllAliasesMaxDepth() const {
1393 return GatherAllAliasesMaxDepth;
1394 }
1395
1396 /// Returns the size of the platform's va_list object.
1397 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1398 return getPointerTy(DL).getSizeInBits();
1399 }
1400
1401 /// Get maximum # of store operations permitted for llvm.memset
1402 ///
1403 /// This function returns the maximum number of store operations permitted
1404 /// to replace a call to llvm.memset. The value is set by the target at the
1405 /// performance threshold for such a replacement. If OptSize is true,
1406 /// return the limit for functions that have OptSize attribute.
1407 unsigned getMaxStoresPerMemset(bool OptSize) const {
1408 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1409 }
1410
1411 /// Get maximum # of store operations permitted for llvm.memcpy
1412 ///
1413 /// This function returns the maximum number of store operations permitted
1414 /// to replace a call to llvm.memcpy. The value is set by the target at the
1415 /// performance threshold for such a replacement. If OptSize is true,
1416 /// return the limit for functions that have OptSize attribute.
1417 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1418 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1419 }
1420
1421 /// \brief Get maximum # of store operations to be glued together
1422 ///
1423 /// This function returns the maximum number of store operations permitted
1424 /// to glue together during lowering of llvm.memcpy. The value is set by
1425 // the target at the performance threshold for such a replacement.
1426 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1427 return MaxGluedStoresPerMemcpy;
1428 }
1429
1430 /// Get maximum # of load operations permitted for memcmp
1431 ///
1432 /// This function returns the maximum number of load operations permitted
1433 /// to replace a call to memcmp. The value is set by the target at the
1434 /// performance threshold for such a replacement. If OptSize is true,
1435 /// return the limit for functions that have OptSize attribute.
1436 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1437 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1438 }
1439
1440 /// Get maximum # of store operations permitted for llvm.memmove
1441 ///
1442 /// This function returns the maximum number of store operations permitted
1443 /// to replace a call to llvm.memmove. The value is set by the target at the
1444 /// performance threshold for such a replacement. If OptSize is true,
1445 /// return the limit for functions that have OptSize attribute.
1446 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1447 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1448 }
1449
1450 /// Determine if the target supports unaligned memory accesses.
1451 ///
1452 /// This function returns true if the target allows unaligned memory accesses
1453 /// of the specified type in the given address space. If true, it also returns
1454 /// whether the unaligned memory access is "fast" in the last argument by
1455 /// reference. This is used, for example, in situations where an array
1456 /// copy/move/set is converted to a sequence of store operations. Its use
1457 /// helps to ensure that such replacements don't generate code that causes an
1458 /// alignment error (trap) on the target machine.
1459 virtual bool allowsMisalignedMemoryAccesses(
1460 EVT, unsigned AddrSpace = 0, unsigned Align = 1,
1461 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1462 bool * /*Fast*/ = nullptr) const {
1463 return false;
1464 }
1465
1466 /// LLT handling variant.
1467 virtual bool allowsMisalignedMemoryAccesses(
1468 LLT, unsigned AddrSpace = 0, unsigned Align = 1,
1469 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1470 bool * /*Fast*/ = nullptr) const {
1471 return false;
1472 }
1473
1474 /// This function returns true if the memory access is aligned or if the
1475 /// target allows this specific unaligned memory access. If the access is
1476 /// allowed, the optional final parameter returns if the access is also fast
1477 /// (as defined by the target).
1478 bool allowsMemoryAccessForAlignment(
1479 LLVMContext &Context, const DataLayout &DL, EVT VT,
1480 unsigned AddrSpace = 0, unsigned Alignment = 1,
1481 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1482 bool *Fast = nullptr) const;
1483
1484 /// Return true if the memory access of this type is aligned or if the target
1485 /// allows this specific unaligned access for the given MachineMemOperand.
1486 /// If the access is allowed, the optional final parameter returns if the
1487 /// access is also fast (as defined by the target).
1488 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1489 const DataLayout &DL, EVT VT,
1490 const MachineMemOperand &MMO,
1491 bool *Fast = nullptr) const;
1492
1493 /// Return true if the target supports a memory access of this type for the
1494 /// given address space and alignment. If the access is allowed, the optional
1495 /// final parameter returns if the access is also fast (as defined by the
1496 /// target).
1497 virtual bool
1498 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1499 unsigned AddrSpace = 0, unsigned Alignment = 1,
1500 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1501 bool *Fast = nullptr) const;
1502
1503 /// Return true if the target supports a memory access of this type for the
1504 /// given MachineMemOperand. If the access is allowed, the optional
1505 /// final parameter returns if the access is also fast (as defined by the
1506 /// target).
1507 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1508 const MachineMemOperand &MMO,
1509 bool *Fast = nullptr) const;
1510
1511 /// Returns the target specific optimal type for load and store operations as
1512 /// a result of memset, memcpy, and memmove lowering.
1513 ///
1514 /// If DstAlign is zero that means it's safe to destination alignment can
1515 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
1516 /// a need to check it against alignment requirement, probably because the
1517 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
1518 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
1519 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
1520 /// does not need to be loaded. It returns EVT::Other if the type should be
1521 /// determined using generic target-independent logic.
1522 virtual EVT
1523 getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
1524 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1525 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1526 const AttributeList & /*FuncAttributes*/) const {
1527 return MVT::Other;
1528 }
1529
1530
1531 /// LLT returning variant.
1532 virtual LLT
1533 getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
1534 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1535 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1536 const AttributeList & /*FuncAttributes*/) const {
1537 return LLT();
1538 }
1539
1540 /// Returns true if it's safe to use load / store of the specified type to
1541 /// expand memcpy / memset inline.
1542 ///
1543 /// This is mostly true for all types except for some special cases. For
1544 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1545 /// fstpl which also does type conversion. Note the specified type doesn't
1546 /// have to be legal as the hook is used before type legalization.
1547 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1548
1549 /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp.
1550 bool usesUnderscoreSetJmp() const {
1551 return UseUnderscoreSetJmp;
1552 }
1553
1554 /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp.
1555 bool usesUnderscoreLongJmp() const {
1556 return UseUnderscoreLongJmp;
1557 }
1558
1559 /// Return lower limit for number of blocks in a jump table.
1560 virtual unsigned getMinimumJumpTableEntries() const;
1561
1562 /// Return lower limit of the density in a jump table.
1563 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1564
1565 /// Return upper limit for number of entries in a jump table.
1566 /// Zero if no limit.
1567 unsigned getMaximumJumpTableSize() const;
1568
1569 virtual bool isJumpTableRelative() const {
1570 return TM.isPositionIndependent();
1571 }
1572
1573 /// If a physical register, this specifies the register that
1574 /// llvm.savestack/llvm.restorestack should save and restore.
1575 unsigned getStackPointerRegisterToSaveRestore() const {
1576 return StackPointerRegisterToSaveRestore;
1577 }
1578
1579 /// If a physical register, this returns the register that receives the
1580 /// exception address on entry to an EH pad.
1581 virtual unsigned
1582 getExceptionPointerRegister(const Constant *PersonalityFn) const {
1583 // 0 is guaranteed to be the NoRegister value on all targets
1584 return 0;
1585 }
1586
1587 /// If a physical register, this returns the register that receives the
1588 /// exception typeid on entry to a landing pad.
1589 virtual unsigned
1590 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
1591 // 0 is guaranteed to be the NoRegister value on all targets
1592 return 0;
1593 }
1594
1595 virtual bool needsFixedCatchObjects() const {
1596 report_fatal_error("Funclet EH is not implemented for this target");
1597 }
1598
1599 /// Return the minimum stack alignment of an argument.
1600 Align getMinStackArgumentAlignment() const {
1601 return MinStackArgumentAlignment;
1602 }
1603
1604 /// Return the minimum function alignment.
1605 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
1606
1607 /// Return the preferred function alignment.
1608 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
1609
1610 /// Return the preferred loop alignment.
1611 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
1612 return PrefLoopAlignment;
1613 }
1614
1615 /// Should loops be aligned even when the function is marked OptSize (but not
1616 /// MinSize).
1617 virtual bool alignLoopsWithOptSize() const {
1618 return false;
1619 }
1620
1621 /// If the target has a standard location for the stack protector guard,
1622 /// returns the address of that location. Otherwise, returns nullptr.
1623 /// DEPRECATED: please override useLoadStackGuardNode and customize
1624 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
1625 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
1626
1627 /// Inserts necessary declarations for SSP (stack protection) purpose.
1628 /// Should be used only when getIRStackGuard returns nullptr.
1629 virtual void insertSSPDeclarations(Module &M) const;
1630
1631 /// Return the variable that's previously inserted by insertSSPDeclarations,
1632 /// if any, otherwise return nullptr. Should be used only when
1633 /// getIRStackGuard returns nullptr.
1634 virtual Value *getSDagStackGuard(const Module &M) const;
1635
1636 /// If this function returns true, stack protection checks should XOR the
1637 /// frame pointer (or whichever pointer is used to address locals) into the
1638 /// stack guard value before checking it. getIRStackGuard must return nullptr
1639 /// if this returns true.
1640 virtual bool useStackGuardXorFP() const { return false; }
1641
1642 /// If the target has a standard stack protection check function that
1643 /// performs validation and error handling, returns the function. Otherwise,
1644 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
1645 /// Should be used only when getIRStackGuard returns nullptr.
1646 virtual Function *getSSPStackGuardCheck(const Module &M) const;
1647
1648protected:
1649 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
1650 bool UseTLS) const;
1651
1652public:
1653 /// Returns the target-specific address of the unsafe stack pointer.
1654 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
1655
1656 /// Returns the name of the symbol used to emit stack probes or the empty
1657 /// string if not applicable.
1658 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
1659 return "";
1660 }
1661
1662 /// Returns true if a cast between SrcAS and DestAS is a noop.
1663 virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1664 return false;
1665 }
1666
1667 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
1668 /// are happy to sink it into basic blocks. A cast may be free, but not
1669 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
1670 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1671 return isNoopAddrSpaceCast(SrcAS, DestAS);
1672 }
1673
1674 /// Return true if the pointer arguments to CI should be aligned by aligning
1675 /// the object whose address is being passed. If so then MinSize is set to the
1676 /// minimum size the object must be to be aligned and PrefAlign is set to the
1677 /// preferred alignment.
1678 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
1679 unsigned & /*PrefAlign*/) const {
1680 return false;
1681 }
1682
1683 //===--------------------------------------------------------------------===//
1684 /// \name Helpers for TargetTransformInfo implementations
1685 /// @{
1686
1687 /// Get the ISD node that corresponds to the Instruction class opcode.
1688 int InstructionOpcodeToISD(unsigned Opcode) const;
1689
1690 /// Estimate the cost of type-legalization and the legalized type.
1691 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
1692 Type *Ty) const;
1693
1694 /// @}
1695
1696 //===--------------------------------------------------------------------===//
1697 /// \name Helpers for atomic expansion.
1698 /// @{
1699
1700 /// Returns the maximum atomic operation size (in bits) supported by
1701 /// the backend. Atomic operations greater than this size (as well
1702 /// as ones that are not naturally aligned), will be expanded by
1703 /// AtomicExpandPass into an __atomic_* library call.
1704 unsigned getMaxAtomicSizeInBitsSupported() const {
1705 return MaxAtomicSizeInBitsSupported;
1706 }
1707
1708 /// Returns the size of the smallest cmpxchg or ll/sc instruction
1709 /// the backend supports. Any smaller operations are widened in
1710 /// AtomicExpandPass.
1711 ///
1712 /// Note that *unlike* operations above the maximum size, atomic ops
1713 /// are still natively supported below the minimum; they just
1714 /// require a more complex expansion.
1715 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
1716
1717 /// Whether the target supports unaligned atomic operations.
1718 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
1719
1720 /// Whether AtomicExpandPass should automatically insert fences and reduce
1721 /// ordering for this atomic. This should be true for most architectures with
1722 /// weak memory ordering. Defaults to false.
1723 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
1724 return false;
1725 }
1726
1727 /// Perform a load-linked operation on Addr, returning a "Value *" with the
1728 /// corresponding pointee type. This may entail some non-trivial operations to
1729 /// truncate or reconstruct types that will be illegal in the backend. See
1730 /// ARMISelLowering for an example implementation.
1731 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1732 AtomicOrdering Ord) const {
1733 llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1733)
;
1734 }
1735
1736 /// Perform a store-conditional operation to Addr. Return the status of the
1737 /// store. This should be 0 if the store succeeded, non-zero otherwise.
1738 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1739 Value *Addr, AtomicOrdering Ord) const {
1740 llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1740)
;
1741 }
1742
1743 /// Perform a masked atomicrmw using a target-specific intrinsic. This
1744 /// represents the core LL/SC loop which will be lowered at a late stage by
1745 /// the backend.
1746 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
1747 AtomicRMWInst *AI,
1748 Value *AlignedAddr, Value *Incr,
1749 Value *Mask, Value *ShiftAmt,
1750 AtomicOrdering Ord) const {
1751 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1751)
;
1752 }
1753
1754 /// Perform a masked cmpxchg using a target-specific intrinsic. This
1755 /// represents the core LL/SC loop which will be lowered at a late stage by
1756 /// the backend.
1757 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
1758 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1759 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1760 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 1760)
;
1761 }
1762
1763 /// Inserts in the IR a target-specific intrinsic specifying a fence.
1764 /// It is called by AtomicExpandPass before expanding an
1765 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
1766 /// if shouldInsertFencesForAtomic returns true.
1767 ///
1768 /// Inst is the original atomic instruction, prior to other expansions that
1769 /// may be performed.
1770 ///
1771 /// This function should either return a nullptr, or a pointer to an IR-level
1772 /// Instruction*. Even complex fence sequences can be represented by a
1773 /// single Instruction* through an intrinsic to be lowered later.
1774 /// Backends should override this method to produce target-specific intrinsic
1775 /// for their fences.
1776 /// FIXME: Please note that the default implementation here in terms of
1777 /// IR-level fences exists for historical/compatibility reasons and is
1778 /// *unsound* ! Fences cannot, in general, be used to restore sequential
1779 /// consistency. For example, consider the following example:
1780 /// atomic<int> x = y = 0;
1781 /// int r1, r2, r3, r4;
1782 /// Thread 0:
1783 /// x.store(1);
1784 /// Thread 1:
1785 /// y.store(1);
1786 /// Thread 2:
1787 /// r1 = x.load();
1788 /// r2 = y.load();
1789 /// Thread 3:
1790 /// r3 = y.load();
1791 /// r4 = x.load();
1792 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
1793 /// seq_cst. But if they are lowered to monotonic accesses, no amount of
1794 /// IR-level fences can prevent it.
1795 /// @{
1796 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
1797 AtomicOrdering Ord) const {
1798 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
1799 return Builder.CreateFence(Ord);
1800 else
1801 return nullptr;
1802 }
1803
1804 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
1805 Instruction *Inst,
1806 AtomicOrdering Ord) const {
1807 if (isAcquireOrStronger(Ord))
1808 return Builder.CreateFence(Ord);
1809 else
1810 return nullptr;
1811 }
1812 /// @}
1813
1814 // Emits code that executes when the comparison result in the ll/sc
1815 // expansion of a cmpxchg instruction is such that the store-conditional will
1816 // not execute. This makes it possible to balance out the load-linked with
1817 // a dedicated instruction, if desired.
1818 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
1819 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
1820 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
1821
1822 /// Returns true if the given (atomic) store should be expanded by the
1823 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
1824 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1825 return false;
1826 }
1827
1828 /// Returns true if arguments should be sign-extended in lib calls.
1829 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
1830 return IsSigned;
1831 }
1832
1833 /// Returns true if arguments should be extended in lib calls.
1834 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
1835 return true;
1836 }
1837
1838 /// Returns how the given (atomic) load should be expanded by the
1839 /// IR-level AtomicExpand pass.
1840 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1841 return AtomicExpansionKind::None;
1842 }
1843
1844 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
1845 /// AtomicExpand pass.
1846 virtual AtomicExpansionKind
1847 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1848 return AtomicExpansionKind::None;
1849 }
1850
1851 /// Returns how the IR-level AtomicExpand pass should expand the given
1852 /// AtomicRMW, if at all. Default is to never expand.
1853 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1854 return RMW->isFloatingPointOperation() ?
1855 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
1856 }
1857
1858 /// On some platforms, an AtomicRMW that never actually modifies the value
1859 /// (such as fetch_add of 0) can be turned into a fence followed by an
1860 /// atomic load. This may sound useless, but it makes it possible for the
1861 /// processor to keep the cacheline shared, dramatically improving
1862 /// performance. And such idempotent RMWs are useful for implementing some
1863 /// kinds of locks, see for example (justification + benchmarks):
1864 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1865 /// This method tries doing that transformation, returning the atomic load if
1866 /// it succeeds, and nullptr otherwise.
1867 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
1868 /// another round of expansion.
1869 virtual LoadInst *
1870 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
1871 return nullptr;
1872 }
1873
1874 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
1875 /// SIGN_EXTEND, or ANY_EXTEND).
1876 virtual ISD::NodeType getExtendForAtomicOps() const {
1877 return ISD::ZERO_EXTEND;
1878 }
1879
1880 /// @}
1881
1882 /// Returns true if we should normalize
1883 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1884 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
1885 /// that it saves us from materializing N0 and N1 in an integer register.
1886 /// Targets that are able to perform and/or on flags should return false here.
1887 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
1888 EVT VT) const {
1889 // If a target has multiple condition registers, then it likely has logical
1890 // operations on those registers.
1891 if (hasMultipleConditionRegisters())
1892 return false;
1893 // Only do the transform if the value won't be split into multiple
1894 // registers.
1895 LegalizeTypeAction Action = getTypeAction(Context, VT);
1896 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
1897 Action != TypeSplitVector;
1898 }
1899
1900 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
1901
1902 /// Return true if a select of constants (select Cond, C1, C2) should be
1903 /// transformed into simple math ops with the condition value. For example:
1904 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
1905 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
1906 return false;
1907 }
1908
1909 /// Return true if it is profitable to transform an integer
1910 /// multiplication-by-constant into simpler operations like shifts and adds.
1911 /// This may be true if the target does not directly support the
1912 /// multiplication operation for the specified type or the sequence of simpler
1913 /// ops is faster than the multiply.
1914 virtual bool decomposeMulByConstant(LLVMContext &Context,
1915 EVT VT, SDValue C) const {
1916 return false;
1917 }
1918
1919 /// Return true if it is more correct/profitable to use strict FP_TO_INT
1920 /// conversion operations - canonicalizing the FP source value instead of
1921 /// converting all cases and then selecting based on value.
1922 /// This may be true if the target throws exceptions for out of bounds
1923 /// conversions or has fast FP CMOV.
1924 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1925 bool IsSigned) const {
1926 return false;
1927 }
1928
1929 //===--------------------------------------------------------------------===//
1930 // TargetLowering Configuration Methods - These methods should be invoked by
1931 // the derived class constructor to configure this object for the target.
1932 //
1933protected:
1934 /// Specify how the target extends the result of integer and floating point
1935 /// boolean values from i1 to a wider type. See getBooleanContents.
1936 void setBooleanContents(BooleanContent Ty) {
1937 BooleanContents = Ty;
1938 BooleanFloatContents = Ty;
1939 }
1940
1941 /// Specify how the target extends the result of integer and floating point
1942 /// boolean values from i1 to a wider type. See getBooleanContents.
1943 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
1944 BooleanContents = IntTy;
1945 BooleanFloatContents = FloatTy;
1946 }
1947
1948 /// Specify how the target extends the result of a vector boolean value from a
1949 /// vector of i1 to a wider type. See getBooleanContents.
1950 void setBooleanVectorContents(BooleanContent Ty) {
1951 BooleanVectorContents = Ty;
1952 }
1953
1954 /// Specify the target scheduling preference.
1955 void setSchedulingPreference(Sched::Preference Pref) {
1956 SchedPreferenceInfo = Pref;
1957 }
1958
1959 /// Indicate whether this target prefers to use _setjmp to implement
1960 /// llvm.setjmp or the version without _. Defaults to false.
1961 void setUseUnderscoreSetJmp(bool Val) {
1962 UseUnderscoreSetJmp = Val;
1963 }
1964
1965 /// Indicate whether this target prefers to use _longjmp to implement
1966 /// llvm.longjmp or the version without _. Defaults to false.
1967 void setUseUnderscoreLongJmp(bool Val) {
1968 UseUnderscoreLongJmp = Val;
1969 }
1970
1971 /// Indicate the minimum number of blocks to generate jump tables.
1972 void setMinimumJumpTableEntries(unsigned Val);
1973
1974 /// Indicate the maximum number of entries in jump tables.
1975 /// Set to zero to generate unlimited jump tables.
1976 void setMaximumJumpTableSize(unsigned);
1977
1978 /// If set to a physical register, this specifies the register that
1979 /// llvm.savestack/llvm.restorestack should save and restore.
1980 void setStackPointerRegisterToSaveRestore(unsigned R) {
1981 StackPointerRegisterToSaveRestore = R;
1982 }
1983
1984 /// Tells the code generator that the target has multiple (allocatable)
1985 /// condition registers that can be used to store the results of comparisons
1986 /// for use by selects and conditional branches. With multiple condition
1987 /// registers, the code generator will not aggressively sink comparisons into
1988 /// the blocks of their users.
1989 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
1990 HasMultipleConditionRegisters = hasManyRegs;
1991 }
1992
1993 /// Tells the code generator that the target has BitExtract instructions.
1994 /// The code generator will aggressively sink "shift"s into the blocks of
1995 /// their users if the users will generate "and" instructions which can be
1996 /// combined with "shift" to BitExtract instructions.
1997 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
1998 HasExtractBitsInsn = hasExtractInsn;
1999 }
2000
2001 /// Tells the code generator not to expand logic operations on comparison
2002 /// predicates into separate sequences that increase the amount of flow
2003 /// control.
2004 void setJumpIsExpensive(bool isExpensive = true);
2005
2006 /// Tells the code generator which bitwidths to bypass.
2007 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2008 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2009 }
2010
2011 /// Add the specified register class as an available regclass for the
2012 /// specified value type. This indicates the selector can handle values of
2013 /// that class natively.
2014 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2015 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ?
static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2015, __PRETTY_FUNCTION__))
;
2016 RegClassForVT[VT.SimpleTy] = RC;
2017 }
2018
2019 /// Return the largest legal super-reg register class of the register class
2020 /// for the specified type and its associated "cost".
2021 virtual std::pair<const TargetRegisterClass *, uint8_t>
2022 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2023
2024 /// Once all of the register classes are added, this allows us to compute
2025 /// derived properties we expose.
2026 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2027
2028 /// Indicate that the specified operation does not work with the specified
2029 /// type and indicate what to do about it. Note that VT may refer to either
2030 /// the type of a result or that of an operand of Op.
2031 void setOperationAction(unsigned Op, MVT VT,
2032 LegalizeAction Action) {
2033 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2033, __PRETTY_FUNCTION__))
;
2034 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2035 }
2036
2037 /// Indicate that the specified load with extension does not work with the
2038 /// specified type and indicate what to do about it.
2039 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2040 LegalizeAction Action) {
2041 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2042, __PRETTY_FUNCTION__))
2042 MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2042, __PRETTY_FUNCTION__))
;
2043 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2043, __PRETTY_FUNCTION__))
;
2044 unsigned Shift = 4 * ExtType;
2045 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2046 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2047 }
2048
2049 /// Indicate that the specified truncating store does not work with the
2050 /// specified type and indicate what to do about it.
2051 void setTruncStoreAction(MVT ValVT, MVT MemVT,
2052 LegalizeAction Action) {
2053 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2053, __PRETTY_FUNCTION__))
;
2054 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2055 }
2056
2057 /// Indicate that the specified indexed load does or does not work with the
2058 /// specified type and indicate what to do abort it.
2059 ///
2060 /// NOTE: All indexed mode loads are initialized to Expand in
2061 /// TargetLowering.cpp
2062 void setIndexedLoadAction(unsigned IdxMode, MVT VT,
2063 LegalizeAction Action) {
2064 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2065, __PRETTY_FUNCTION__))
2065 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2065, __PRETTY_FUNCTION__))
;
2066 // Load action are kept in the upper half.
2067 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
2068 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
2069 }
2070
2071 /// Indicate that the specified indexed store does or does not work with the
2072 /// specified type and indicate what to do about it.
2073 ///
2074 /// NOTE: All indexed mode stores are initialized to Expand in
2075 /// TargetLowering.cpp
2076 void setIndexedStoreAction(unsigned IdxMode, MVT VT,
2077 LegalizeAction Action) {
2078 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2079, __PRETTY_FUNCTION__))
2079 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2079, __PRETTY_FUNCTION__))
;
2080 // Store action are kept in the lower half.
2081 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
2082 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
2083 }
2084
2085 /// Indicate that the specified condition code is or isn't supported on the
2086 /// target and indicate what to do about it.
2087 void setCondCodeAction(ISD::CondCode CC, MVT VT,
2088 LegalizeAction Action) {
2089 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2090, __PRETTY_FUNCTION__))
2090 "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2090, __PRETTY_FUNCTION__))
;
2091 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2091, __PRETTY_FUNCTION__))
;
2092 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
2093 /// value and the upper 29 bits index into the second dimension of the array
2094 /// to select what 32-bit value to use.
2095 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2096 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2097 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2098 }
2099
2100 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2101 /// to trying a larger integer/fp until it can find one that works. If that
2102 /// default is insufficient, this method can be used by the target to override
2103 /// the default.
2104 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2105 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
2106 }
2107
2108 /// Convenience method to set an operation to Promote and specify the type
2109 /// in a single call.
2110 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2111 setOperationAction(Opc, OrigVT, Promote);
2112 AddPromotedToType(Opc, OrigVT, DestVT);
2113 }
2114
2115 /// Targets should invoke this method for each target independent node that
2116 /// they want to provide a custom DAG combiner for by implementing the
2117 /// PerformDAGCombine virtual method.
2118 void setTargetDAGCombine(ISD::NodeType NT) {
2119 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2119, __PRETTY_FUNCTION__))
;
2120 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
2121 }
2122
2123 /// Set the target's minimum function alignment.
2124 void setMinFunctionAlignment(Align Alignment) {
2125 MinFunctionAlignment = Alignment;
2126 }
2127
2128 /// Set the target's preferred function alignment. This should be set if
2129 /// there is a performance benefit to higher-than-minimum alignment
2130 void setPrefFunctionAlignment(Align Alignment) {
2131 PrefFunctionAlignment = Alignment;
2132 }
2133
2134 /// Set the target's preferred loop alignment. Default alignment is one, it
2135 /// means the target does not care about loop alignment. The target may also
2136 /// override getPrefLoopAlignment to provide per-loop values.
2137 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2138
2139 /// Set the minimum stack alignment of an argument.
2140 void setMinStackArgumentAlignment(Align Alignment) {
2141 MinStackArgumentAlignment = Alignment;
2142 }
2143
2144 /// Set the maximum atomic operation size supported by the
2145 /// backend. Atomic operations greater than this size (as well as
2146 /// ones that are not naturally aligned), will be expanded by
2147 /// AtomicExpandPass into an __atomic_* library call.
2148 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2149 MaxAtomicSizeInBitsSupported = SizeInBits;
2150 }
2151
2152 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2153 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2154 MinCmpXchgSizeInBits = SizeInBits;
2155 }
2156
2157 /// Sets whether unaligned atomic operations are supported.
2158 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2159 SupportsUnalignedAtomics = UnalignedSupported;
2160 }
2161
2162public:
2163 //===--------------------------------------------------------------------===//
2164 // Addressing mode description hooks (used by LSR etc).
2165 //
2166
2167 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2168 /// instructions reading the address. This allows as much computation as
2169 /// possible to be done in the address mode for that operand. This hook lets
2170 /// targets also pass back when this should be done on intrinsics which
2171 /// load/store.
2172 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2173 SmallVectorImpl<Value*> &/*Ops*/,
2174 Type *&/*AccessTy*/) const {
2175 return false;
2176 }
2177
2178 /// This represents an addressing mode of:
2179 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
2180 /// If BaseGV is null, there is no BaseGV.
2181 /// If BaseOffs is zero, there is no base offset.
2182 /// If HasBaseReg is false, there is no base register.
2183 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2184 /// no scale.
2185 struct AddrMode {
2186 GlobalValue *BaseGV = nullptr;
2187 int64_t BaseOffs = 0;
2188 bool HasBaseReg = false;
2189 int64_t Scale = 0;
2190 AddrMode() = default;
2191 };
2192
2193 /// Return true if the addressing mode represented by AM is legal for this
2194 /// target, for a load/store of the specified type.
2195 ///
2196 /// The type may be VoidTy, in which case only return true if the addressing
2197 /// mode is legal for a load/store of any legal type. TODO: Handle
2198 /// pre/postinc as well.
2199 ///
2200 /// If the address space cannot be determined, it will be -1.
2201 ///
2202 /// TODO: Remove default argument
2203 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2204 Type *Ty, unsigned AddrSpace,
2205 Instruction *I = nullptr) const;
2206
2207 /// Return the cost of the scaling factor used in the addressing mode
2208 /// represented by AM for this target, for a load/store of the specified type.
2209 ///
2210 /// If the AM is supported, the return value must be >= 0.
2211 /// If the AM is not supported, it returns a negative value.
2212 /// TODO: Handle pre/postinc as well.
2213 /// TODO: Remove default argument
2214 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
2215 Type *Ty, unsigned AS = 0) const {
2216 // Default: assume that any scaling factor used in a legal AM is free.
2217 if (isLegalAddressingMode(DL, AM, Ty, AS))
2218 return 0;
2219 return -1;
2220 }
2221
2222 /// Return true if the specified immediate is legal icmp immediate, that is
2223 /// the target has icmp instructions which can compare a register against the
2224 /// immediate without having to materialize the immediate into a register.
2225 virtual bool isLegalICmpImmediate(int64_t) const {
2226 return true;
2227 }
2228
2229 /// Return true if the specified immediate is legal add immediate, that is the
2230 /// target has add instructions which can add a register with the immediate
2231 /// without having to materialize the immediate into a register.
2232 virtual bool isLegalAddImmediate(int64_t) const {
2233 return true;
2234 }
2235
2236 /// Return true if the specified immediate is legal for the value input of a
2237 /// store instruction.
2238 virtual bool isLegalStoreImmediate(int64_t Value) const {
2239 // Default implementation assumes that at least 0 works since it is likely
2240 // that a zero register exists or a zero immediate is allowed.
2241 return Value == 0;
2242 }
2243
2244 /// Return true if it's significantly cheaper to shift a vector by a uniform
2245 /// scalar than by an amount which will vary across each lane. On x86, for
2246 /// example, there is a "psllw" instruction for the former case, but no simple
2247 /// instruction for a general "a << b" operation on vectors.
2248 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2249 return false;
2250 }
2251
2252 /// Returns true if the opcode is a commutative binary operation.
2253 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2254 // FIXME: This should get its info from the td file.
2255 switch (Opcode) {
2256 case ISD::ADD:
2257 case ISD::SMIN:
2258 case ISD::SMAX:
2259 case ISD::UMIN:
2260 case ISD::UMAX:
2261 case ISD::MUL:
2262 case ISD::MULHU:
2263 case ISD::MULHS:
2264 case ISD::SMUL_LOHI:
2265 case ISD::UMUL_LOHI:
2266 case ISD::FADD:
2267 case ISD::FMUL:
2268 case ISD::AND:
2269 case ISD::OR:
2270 case ISD::XOR:
2271 case ISD::SADDO:
2272 case ISD::UADDO:
2273 case ISD::ADDC:
2274 case ISD::ADDE:
2275 case ISD::SADDSAT:
2276 case ISD::UADDSAT:
2277 case ISD::FMINNUM:
2278 case ISD::FMAXNUM:
2279 case ISD::FMINNUM_IEEE:
2280 case ISD::FMAXNUM_IEEE:
2281 case ISD::FMINIMUM:
2282 case ISD::FMAXIMUM:
2283 return true;
2284 default: return false;
2285 }
2286 }
2287
2288 /// Return true if the node is a math/logic binary operator.
2289 virtual bool isBinOp(unsigned Opcode) const {
2290 // A commutative binop must be a binop.
2291 if (isCommutativeBinOp(Opcode))
2292 return true;
2293 // These are non-commutative binops.
2294 switch (Opcode) {
2295 case ISD::SUB:
2296 case ISD::SHL:
2297 case ISD::SRL:
2298 case ISD::SRA:
2299 case ISD::SDIV:
2300 case ISD::UDIV:
2301 case ISD::SREM:
2302 case ISD::UREM:
2303 case ISD::FSUB:
2304 case ISD::FDIV:
2305 case ISD::FREM:
2306 return true;
2307 default:
2308 return false;
2309 }
2310 }
2311
2312 /// Return true if it's free to truncate a value of type FromTy to type
2313 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2314 /// by referencing its sub-register AX.
2315 /// Targets must return false when FromTy <= ToTy.
2316 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2317 return false;
2318 }
2319
2320 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2321 /// whether a call is in tail position. Typically this means that both results
2322 /// would be assigned to the same register or stack slot, but it could mean
2323 /// the target performs adequate checks of its own before proceeding with the
2324 /// tail call. Targets must return false when FromTy <= ToTy.
2325 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2326 return false;
2327 }
2328
2329 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
2330 return false;
2331 }
2332
2333 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2334
2335 /// Return true if the extension represented by \p I is free.
2336 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2337 /// this method can use the context provided by \p I to decide
2338 /// whether or not \p I is free.
2339 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2340 /// In other words, if is[Z|FP]Free returns true, then this method
2341 /// returns true as well. The converse is not true.
2342 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2343 /// \pre \p I must be a sign, zero, or fp extension.
2344 bool isExtFree(const Instruction *I) const {
2345 switch (I->getOpcode()) {
2346 case Instruction::FPExt:
2347 if (isFPExtFree(EVT::getEVT(I->getType()),
2348 EVT::getEVT(I->getOperand(0)->getType())))
2349 return true;
2350 break;
2351 case Instruction::ZExt:
2352 if (isZExtFree(I->getOperand(0)->getType(), I->getType()))
2353 return true;
2354 break;
2355 case Instruction::SExt:
2356 break;
2357 default:
2358 llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2358)
;
2359 }
2360 return isExtFreeImpl(I);
2361 }
2362
2363 /// Return true if \p Load and \p Ext can form an ExtLoad.
2364 /// For example, in AArch64
2365 /// %L = load i8, i8* %ptr
2366 /// %E = zext i8 %L to i32
2367 /// can be lowered into one load instruction
2368 /// ldrb w0, [x0]
2369 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
2370 const DataLayout &DL) const {
2371 EVT VT = getValueType(DL, Ext->getType());
2372 EVT LoadVT = getValueType(DL, Load->getType());
2373
2374 // If the load has other users and the truncate is not free, the ext
2375 // probably isn't free.
2376 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
2377 !isTruncateFree(Ext->getType(), Load->getType()))
2378 return false;
2379
2380 // Check whether the target supports casts folded into loads.
2381 unsigned LType;
2382 if (isa<ZExtInst>(Ext))
2383 LType = ISD::ZEXTLOAD;
2384 else {
2385 assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!")
? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2385, __PRETTY_FUNCTION__))
;
2386 LType = ISD::SEXTLOAD;
2387 }
2388
2389 return isLoadExtLegal(LType, VT, LoadVT);
2390 }
2391
2392 /// Return true if any actual instruction that defines a value of type FromTy
2393 /// implicitly zero-extends the value to ToTy in the result register.
2394 ///
2395 /// The function should return true when it is likely that the truncate can
2396 /// be freely folded with an instruction defining a value of FromTy. If
2397 /// the defining instruction is unknown (because you're looking at a
2398 /// function argument, PHI, etc.) then the target may require an
2399 /// explicit truncate, which is not necessarily free, but this function
2400 /// does not deal with those cases.
2401 /// Targets must return false when FromTy >= ToTy.
2402 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
2403 return false;
2404 }
2405
2406 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
2407 return false;
2408 }
2409
2410 /// Return true if sign-extension from FromTy to ToTy is cheaper than
2411 /// zero-extension.
2412 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
2413 return false;
2414 }
2415
2416 /// Return true if sinking I's operands to the same basic block as I is
2417 /// profitable, e.g. because the operands can be folded into a target
2418 /// instruction during instruction selection. After calling the function
2419 /// \p Ops contains the Uses to sink ordered by dominance (dominating users
2420 /// come first).
2421 virtual bool shouldSinkOperands(Instruction *I,
2422 SmallVectorImpl<Use *> &Ops) const {
2423 return false;
2424 }
2425
2426 /// Return true if the target supplies and combines to a paired load
2427 /// two loaded values of type LoadedType next to each other in memory.
2428 /// RequiredAlignment gives the minimal alignment constraints that must be met
2429 /// to be able to select this paired load.
2430 ///
2431 /// This information is *not* used to generate actual paired loads, but it is
2432 /// used to generate a sequence of loads that is easier to combine into a
2433 /// paired load.
2434 /// For instance, something like this:
2435 /// a = load i64* addr
2436 /// b = trunc i64 a to i32
2437 /// c = lshr i64 a, 32
2438 /// d = trunc i64 c to i32
2439 /// will be optimized into:
2440 /// b = load i32* addr1
2441 /// d = load i32* addr2
2442 /// Where addr1 = addr2 +/- sizeof(i32).
2443 ///
2444 /// In other words, unless the target performs a post-isel load combining,
2445 /// this information should not be provided because it will generate more
2446 /// loads.
2447 virtual bool hasPairedLoad(EVT /*LoadedType*/,
2448 unsigned & /*RequiredAlignment*/) const {
2449 return false;
2450 }
2451
2452 /// Return true if the target has a vector blend instruction.
2453 virtual bool hasVectorBlend() const { return false; }
2454
2455 /// Get the maximum supported factor for interleaved memory accesses.
2456 /// Default to be the minimum interleave factor: 2.
2457 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
2458
2459 /// Lower an interleaved load to target specific intrinsics. Return
2460 /// true on success.
2461 ///
2462 /// \p LI is the vector load instruction.
2463 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
2464 /// \p Indices is the corresponding indices for each shufflevector.
2465 /// \p Factor is the interleave factor.
2466 virtual bool lowerInterleavedLoad(LoadInst *LI,
2467 ArrayRef<ShuffleVectorInst *> Shuffles,
2468 ArrayRef<unsigned> Indices,
2469 unsigned Factor) const {
2470 return false;
2471 }
2472
2473 /// Lower an interleaved store to target specific intrinsics. Return
2474 /// true on success.
2475 ///
2476 /// \p SI is the vector store instruction.
2477 /// \p SVI is the shufflevector to RE-interleave the stored vector.
2478 /// \p Factor is the interleave factor.
2479 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
2480 unsigned Factor) const {
2481 return false;
2482 }
2483
2484 /// Return true if zero-extending the specific node Val to type VT2 is free
2485 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
2486 /// because it's folded such as X86 zero-extending loads).
2487 virtual bool isZExtFree(SDValue Val, EVT VT2) const {
2488 return isZExtFree(Val.getValueType(), VT2);
2489 }
2490
2491 /// Return true if an fpext operation is free (for instance, because
2492 /// single-precision floating-point numbers are implicitly extended to
2493 /// double-precision).
2494 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
2495 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2496, __PRETTY_FUNCTION__))
2496 "invalid fpext types")((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2496, __PRETTY_FUNCTION__))
;
2497 return false;
2498 }
2499
2500 /// Return true if an fpext operation input to an \p Opcode operation is free
2501 /// (for instance, because half-precision floating-point numbers are
2502 /// implicitly extended to float-precision) for an FMA instruction.
2503 virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const {
2504 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2505, __PRETTY_FUNCTION__))
2505 "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2505, __PRETTY_FUNCTION__))
;
2506 return isFPExtFree(DestVT, SrcVT);
2507 }
2508
2509 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
2510 /// extend node) is profitable.
2511 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
2512
2513 /// Return true if an fneg operation is free to the point where it is never
2514 /// worthwhile to replace it with a bitwise operation.
2515 virtual bool isFNegFree(EVT VT) const {
2516 assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail
("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2516, __PRETTY_FUNCTION__))
;
2517 return false;
2518 }
2519
2520 /// Return true if an fabs operation is free to the point where it is never
2521 /// worthwhile to replace it with a bitwise operation.
2522 virtual bool isFAbsFree(EVT VT) const {
2523 assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail
("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2523, __PRETTY_FUNCTION__))
;
2524 return false;
2525 }
2526
2527 /// Return true if an FMA operation is faster than a pair of fmul and fadd
2528 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
2529 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
2530 ///
2531 /// NOTE: This may be called before legalization on types for which FMAs are
2532 /// not legal, but should return true if those types will eventually legalize
2533 /// to types that support FMAs. After legalization, it will only be called on
2534 /// types that support FMAs (via Legal or Custom actions)
2535 virtual bool isFMAFasterThanFMulAndFAdd(EVT) const {
2536 return false;
2537 }
2538
2539 /// Return true if it's profitable to narrow operations of type VT1 to
2540 /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
2541 /// i32 to i16.
2542 virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const {
2543 return false;
2544 }
2545
2546 /// Return true if it is beneficial to convert a load of a constant to
2547 /// just the constant itself.
2548 /// On some targets it might be more efficient to use a combination of
2549 /// arithmetic instructions to materialize the constant instead of loading it
2550 /// from a constant pool.
2551 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
2552 Type *Ty) const {
2553 return false;
2554 }
2555
2556 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type
2557 /// from this source type with this index. This is needed because
2558 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of
2559 /// the first element, and only the target knows which lowering is cheap.
2560 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2561 unsigned Index) const {
2562 return false;
2563 }
2564
2565 /// Try to convert an extract element of a vector binary operation into an
2566 /// extract element followed by a scalar operation.
2567 virtual bool shouldScalarizeBinop(SDValue VecOp) const {
2568 return false;
2569 }
2570
2571 /// Return true if extraction of a scalar element from the given vector type
2572 /// at the given index is cheap. For example, if scalar operations occur on
2573 /// the same register file as vector operations, then an extract element may
2574 /// be a sub-register rename rather than an actual instruction.
2575 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const {
2576 return false;
2577 }
2578
2579 /// Try to convert math with an overflow comparison into the corresponding DAG
2580 /// node operation. Targets may want to override this independently of whether
2581 /// the operation is legal/custom for the given type because it may obscure
2582 /// matching of other patterns.
2583 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const {
2584 // TODO: The default logic is inherited from code in CodeGenPrepare.
2585 // The opcode should not make a difference by default?
2586 if (Opcode != ISD::UADDO)
2587 return false;
2588
2589 // Allow the transform as long as we have an integer type that is not
2590 // obviously illegal and unsupported.
2591 if (VT.isVector())
2592 return false;
2593 return VT.isSimple() || !isOperationExpand(Opcode, VT);
2594 }
2595
2596 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
2597 // even if the vector itself has multiple uses.
2598 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
2599 return false;
2600 }
2601
2602 // Return true if CodeGenPrepare should consider splitting large offset of a
2603 // GEP to make the GEP fit into the addressing mode and can be sunk into the
2604 // same blocks of its users.
2605 virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
2606
2607 //===--------------------------------------------------------------------===//
2608 // Runtime Library hooks
2609 //
2610
2611 /// Rename the default libcall routine name for the specified libcall.
2612 void setLibcallName(RTLIB::Libcall Call, const char *Name) {
2613 LibcallRoutineNames[Call] = Name;
2614 }
2615
2616 /// Get the libcall routine name for the specified libcall.
2617 const char *getLibcallName(RTLIB::Libcall Call) const {
2618 return LibcallRoutineNames[Call];
2619 }
2620
2621 /// Override the default CondCode to be used to test the result of the
2622 /// comparison libcall against zero.
2623 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
2624 CmpLibcallCCs[Call] = CC;
2625 }
2626
2627 /// Get the CondCode that's to be used to test the result of the comparison
2628 /// libcall against zero.
2629 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
2630 return CmpLibcallCCs[Call];
2631 }
2632
2633 /// Set the CallingConv that should be used for the specified libcall.
2634 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
2635 LibcallCallingConvs[Call] = CC;
2636 }
2637
2638 /// Get the CallingConv that should be used for the specified libcall.
2639 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
2640 return LibcallCallingConvs[Call];
2641 }
2642
2643 /// Execute target specific actions to finalize target lowering.
2644 /// This is used to set extra flags in MachineFrameInformation and freezing
2645 /// the set of reserved registers.
2646 /// The default implementation just freezes the set of reserved registers.
2647 virtual void finalizeLowering(MachineFunction &MF) const;
2648
2649private:
2650 const TargetMachine &TM;
2651
2652 /// Tells the code generator that the target has multiple (allocatable)
2653 /// condition registers that can be used to store the results of comparisons
2654 /// for use by selects and conditional branches. With multiple condition
2655 /// registers, the code generator will not aggressively sink comparisons into
2656 /// the blocks of their users.
2657 bool HasMultipleConditionRegisters;
2658
2659 /// Tells the code generator that the target has BitExtract instructions.
2660 /// The code generator will aggressively sink "shift"s into the blocks of
2661 /// their users if the users will generate "and" instructions which can be
2662 /// combined with "shift" to BitExtract instructions.
2663 bool HasExtractBitsInsn;
2664
2665 /// Tells the code generator to bypass slow divide or remainder
2666 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
2667 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
2668 /// div/rem when the operands are positive and less than 256.
2669 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
2670
2671 /// Tells the code generator that it shouldn't generate extra flow control
2672 /// instructions and should attempt to combine flow control instructions via
2673 /// predication.
2674 bool JumpIsExpensive;
2675
2676 /// This target prefers to use _setjmp to implement llvm.setjmp.
2677 ///
2678 /// Defaults to false.
2679 bool UseUnderscoreSetJmp;
2680
2681 /// This target prefers to use _longjmp to implement llvm.longjmp.
2682 ///
2683 /// Defaults to false.
2684 bool UseUnderscoreLongJmp;
2685
2686 /// Information about the contents of the high-bits in boolean values held in
2687 /// a type wider than i1. See getBooleanContents.
2688 BooleanContent BooleanContents;
2689
2690 /// Information about the contents of the high-bits in boolean values held in
2691 /// a type wider than i1. See getBooleanContents.
2692 BooleanContent BooleanFloatContents;
2693
2694 /// Information about the contents of the high-bits in boolean vector values
2695 /// when the element type is wider than i1. See getBooleanContents.
2696 BooleanContent BooleanVectorContents;
2697
2698 /// The target scheduling preference: shortest possible total cycles or lowest
2699 /// register usage.
2700 Sched::Preference SchedPreferenceInfo;
2701
2702 /// The minimum alignment that any argument on the stack needs to have.
2703 Align MinStackArgumentAlignment;
2704
2705 /// The minimum function alignment (used when optimizing for size, and to
2706 /// prevent explicitly provided alignment from leading to incorrect code).
2707 Align MinFunctionAlignment;
2708
2709 /// The preferred function alignment (used when alignment unspecified and
2710 /// optimizing for speed).
2711 Align PrefFunctionAlignment;
2712
2713 /// The preferred loop alignment (in log2 bot in bytes).
2714 Align PrefLoopAlignment;
2715
2716 /// Size in bits of the maximum atomics size the backend supports.
2717 /// Accesses larger than this will be expanded by AtomicExpandPass.
2718 unsigned MaxAtomicSizeInBitsSupported;
2719
2720 /// Size in bits of the minimum cmpxchg or ll/sc operation the
2721 /// backend supports.
2722 unsigned MinCmpXchgSizeInBits;
2723
2724 /// This indicates if the target supports unaligned atomic operations.
2725 bool SupportsUnalignedAtomics;
2726
2727 /// If set to a physical register, this specifies the register that
2728 /// llvm.savestack/llvm.restorestack should save and restore.
2729 unsigned StackPointerRegisterToSaveRestore;
2730
2731 /// This indicates the default register class to use for each ValueType the
2732 /// target supports natively.
2733 const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
2734 unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE];
2735 MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
2736
2737 /// This indicates the "representative" register class to use for each
2738 /// ValueType the target supports natively. This information is used by the
2739 /// scheduler to track register pressure. By default, the representative
2740 /// register class is the largest legal super-reg register class of the
2741 /// register class of the specified type. e.g. On x86, i8, i16, and i32's
2742 /// representative class would be GR32.
2743 const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE];
2744
2745 /// This indicates the "cost" of the "representative" register class for each
2746 /// ValueType. The cost is used by the scheduler to approximate register
2747 /// pressure.
2748 uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE];
2749
2750 /// For any value types we are promoting or expanding, this contains the value
2751 /// type that we are changing to. For Expanded types, this contains one step
2752 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required
2753 /// (e.g. i64 -> i16). For types natively supported by the system, this holds
2754 /// the same type (e.g. i32 -> i32).
2755 MVT TransformToType[MVT::LAST_VALUETYPE];
2756
2757 /// For each operation and each value type, keep a LegalizeAction that
2758 /// indicates how instruction selection should deal with the operation. Most
2759 /// operations are Legal (aka, supported natively by the target), but
2760 /// operations that are not should be described. Note that operations on
2761 /// non-legal value types are not described here.
2762 LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
2763
2764 /// For each load extension type and each value type, keep a LegalizeAction
2765 /// that indicates how instruction selection should deal with a load of a
2766 /// specific value type and extension type. Uses 4-bits to store the action
2767 /// for each of the 4 load ext types.
2768 uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
2769
2770 /// For each value type pair keep a LegalizeAction that indicates whether a
2771 /// truncating store of a specific value type and truncating type is legal.
2772 LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
2773
2774 /// For each indexed mode and each value type, keep a pair of LegalizeAction
2775 /// that indicates how instruction selection should deal with the load /
2776 /// store.
2777 ///
2778 /// The first dimension is the value_type for the reference. The second
2779 /// dimension represents the various modes for load store.
2780 uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
2781
2782 /// For each condition code (ISD::CondCode) keep a LegalizeAction that
2783 /// indicates how instruction selection should deal with the condition code.
2784 ///
2785 /// Because each CC action takes up 4 bits, we need to have the array size be
2786 /// large enough to fit all of the value types. This can be done by rounding
2787 /// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
2788 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
2789
2790 ValueTypeActionImpl ValueTypeActions;
2791
2792private:
2793 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
2794
2795 /// Targets can specify ISD nodes that they would like PerformDAGCombine
2796 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
2797 /// array.
2798 unsigned char
2799 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT8-1)/CHAR_BIT8];
2800
2801 /// For operations that must be promoted to a specific type, this holds the
2802 /// destination type. This map should be sparse, so don't hold it as an
2803 /// array.
2804 ///
2805 /// Targets add entries to this map with AddPromotedToType(..), clients access
2806 /// this with getTypeToPromoteTo(..).
2807 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
2808 PromoteToType;
2809
2810 /// Stores the name each libcall.
2811 const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1];
2812
2813 /// The ISD::CondCode that should be used to test the result of each of the
2814 /// comparison libcall against zero.
2815 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
2816
2817 /// Stores the CallingConv that should be used for each libcall.
2818 CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
2819
2820 /// Set default libcall names and calling conventions.
2821 void InitLibcalls(const Triple &TT);
2822
2823protected:
2824 /// Return true if the extension represented by \p I is free.
2825 /// \pre \p I is a sign, zero, or fp extension and
2826 /// is[Z|FP]ExtFree of the related types is not true.
2827 virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
2828
2829 /// Depth that GatherAllAliases should should continue looking for chain
2830 /// dependencies when trying to find a more preferable chain. As an
2831 /// approximation, this should be more than the number of consecutive stores
2832 /// expected to be merged.
2833 unsigned GatherAllAliasesMaxDepth;
2834
2835 /// \brief Specify maximum number of store instructions per memset call.
2836 ///
2837 /// When lowering \@llvm.memset this field specifies the maximum number of
2838 /// store operations that may be substituted for the call to memset. Targets
2839 /// must set this value based on the cost threshold for that target. Targets
2840 /// should assume that the memset will be done using as many of the largest
2841 /// store operations first, followed by smaller ones, if necessary, per
2842 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
2843 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
2844 /// store. This only applies to setting a constant array of a constant size.
2845 unsigned MaxStoresPerMemset;
2846 /// Likewise for functions with the OptSize attribute.
2847 unsigned MaxStoresPerMemsetOptSize;
2848
2849 /// \brief Specify maximum number of store instructions per memcpy call.
2850 ///
2851 /// When lowering \@llvm.memcpy this field specifies the maximum number of
2852 /// store operations that may be substituted for a call to memcpy. Targets
2853 /// must set this value based on the cost threshold for that target. Targets
2854 /// should assume that the memcpy will be done using as many of the largest
2855 /// store operations first, followed by smaller ones, if necessary, per
2856 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
2857 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
2858 /// and one 1-byte store. This only applies to copying a constant array of
2859 /// constant size.
2860 unsigned MaxStoresPerMemcpy;
2861 /// Likewise for functions with the OptSize attribute.
2862 unsigned MaxStoresPerMemcpyOptSize;
2863 /// \brief Specify max number of store instructions to glue in inlined memcpy.
2864 ///
2865 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
2866 /// of store instructions to keep together. This helps in pairing and
2867 // vectorization later on.
2868 unsigned MaxGluedStoresPerMemcpy = 0;
2869
2870 /// \brief Specify maximum number of load instructions per memcmp call.
2871 ///
2872 /// When lowering \@llvm.memcmp this field specifies the maximum number of
2873 /// pairs of load operations that may be substituted for a call to memcmp.
2874 /// Targets must set this value based on the cost threshold for that target.
2875 /// Targets should assume that the memcmp will be done using as many of the
2876 /// largest load operations first, followed by smaller ones, if necessary, per
2877 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine
2878 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load
2879 /// and one 1-byte load. This only applies to copying a constant array of
2880 /// constant size.
2881 unsigned MaxLoadsPerMemcmp;
2882 /// Likewise for functions with the OptSize attribute.
2883 unsigned MaxLoadsPerMemcmpOptSize;
2884
2885 /// \brief Specify maximum number of store instructions per memmove call.
2886 ///
2887 /// When lowering \@llvm.memmove this field specifies the maximum number of
2888 /// store instructions that may be substituted for a call to memmove. Targets
2889 /// must set this value based on the cost threshold for that target. Targets
2890 /// should assume that the memmove will be done using as many of the largest
2891 /// store operations first, followed by smaller ones, if necessary, per
2892 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
2893 /// with 8-bit alignment would result in nine 1-byte stores. This only
2894 /// applies to copying a constant array of constant size.
2895 unsigned MaxStoresPerMemmove;
2896 /// Likewise for functions with the OptSize attribute.
2897 unsigned MaxStoresPerMemmoveOptSize;
2898
2899 /// Tells the code generator that select is more expensive than a branch if
2900 /// the branch is usually predicted right.
2901 bool PredictableSelectIsExpensive;
2902
2903 /// \see enableExtLdPromotion.
2904 bool EnableExtLdPromotion;
2905
2906 /// Return true if the value types that can be represented by the specified
2907 /// register class are all legal.
2908 bool isLegalRC(const TargetRegisterInfo &TRI,
2909 const TargetRegisterClass &RC) const;
2910
2911 /// Replace/modify any TargetFrameIndex operands with a targte-dependent
2912 /// sequence of memory operands that is recognized by PrologEpilogInserter.
2913 MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
2914 MachineBasicBlock *MBB) const;
2915
2916 /// Replace/modify the XRay custom event operands with target-dependent
2917 /// details.
2918 MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
2919 MachineBasicBlock *MBB) const;
2920
2921 /// Replace/modify the XRay typed event operands with target-dependent
2922 /// details.
2923 MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
2924 MachineBasicBlock *MBB) const;
2925};
2926
2927/// This class defines information used to lower LLVM code to legal SelectionDAG
2928/// operators that the target instruction selector can accept natively.
2929///
2930/// This class also defines callbacks that targets must implement to lower
2931/// target-specific constructs to SelectionDAG operators.
2932class TargetLowering : public TargetLoweringBase {
2933public:
2934 struct DAGCombinerInfo;
2935 struct MakeLibCallOptions;
2936
2937 TargetLowering(const TargetLowering &) = delete;
2938 TargetLowering &operator=(const TargetLowering &) = delete;
2939
2940 /// NOTE: The TargetMachine owns TLOF.
2941 explicit TargetLowering(const TargetMachine &TM);
2942
2943 bool isPositionIndependent() const;
2944
2945 virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
2946 FunctionLoweringInfo *FLI,
2947 LegacyDivergenceAnalysis *DA) const {
2948 return false;
2949 }
2950
2951 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
2952 return false;
2953 }
2954
2955 /// Returns true by value, base pointer and offset pointer and addressing mode
2956 /// by reference if the node's address can be legally represented as
2957 /// pre-indexed load / store address.
2958 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
2959 SDValue &/*Offset*/,
2960 ISD::MemIndexedMode &/*AM*/,
2961 SelectionDAG &/*DAG*/) const {
2962 return false;
2963 }
2964
2965 /// Returns true by value, base pointer and offset pointer and addressing mode
2966 /// by reference if this node can be combined with a load / store to form a
2967 /// post-indexed load / store.
2968 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
2969 SDValue &/*Base*/,
2970 SDValue &/*Offset*/,
2971 ISD::MemIndexedMode &/*AM*/,
2972 SelectionDAG &/*DAG*/) const {
2973 return false;
2974 }
2975
2976 /// Returns true if the specified base+offset is a legal indexed addressing
2977 /// mode for this target. \p MI is the load or store instruction that is being
2978 /// considered for transformation.
2979 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
2980 bool IsPre, MachineRegisterInfo &MRI) const {
2981 return false;
2982 }
2983
2984 /// Return the entry encoding for a jump table in the current function. The
2985 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
2986 virtual unsigned getJumpTableEncoding() const;
2987
2988 virtual const MCExpr *
2989 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
2990 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
2991 MCContext &/*Ctx*/) const {
2992 llvm_unreachable("Need to implement this hook if target has custom JTIs")::llvm::llvm_unreachable_internal("Need to implement this hook if target has custom JTIs"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 2992)
;
2993 }
2994
2995 /// Returns relocation base for the given PIC jumptable.
2996 virtual SDValue getPICJumpTableRelocBase(SDValue Table,
2997 SelectionDAG &DAG) const;
2998
2999 /// This returns the relocation base for the given PIC jumptable, the same as
3000 /// getPICJumpTableRelocBase, but as an MCExpr.
3001 virtual const MCExpr *
3002 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3003 unsigned JTI, MCContext &Ctx) const;
3004
3005 /// Return true if folding a constant offset with the given GlobalAddress is
3006 /// legal. It is frequently not legal in PIC relocation models.
3007 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
3008
3009 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
3010 SDValue &Chain) const;
3011
3012 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
3013 SDValue &NewRHS, ISD::CondCode &CCCode,
3014 const SDLoc &DL, const SDValue OldLHS,
3015 const SDValue OldRHS) const;
3016
3017 /// Returns a pair of (return value, chain).
3018 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
3019 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
3020 EVT RetVT, ArrayRef<SDValue> Ops,
3021 MakeLibCallOptions CallOptions,
3022 const SDLoc &dl) const;
3023
3024 /// Check whether parameters to a call that are passed in callee saved
3025 /// registers are the same as from the calling function. This needs to be
3026 /// checked for tail call eligibility.
3027 bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
3028 const uint32_t *CallerPreservedMask,
3029 const SmallVectorImpl<CCValAssign> &ArgLocs,
3030 const SmallVectorImpl<SDValue> &OutVals) const;
3031
3032 //===--------------------------------------------------------------------===//
3033 // TargetLowering Optimization Methods
3034 //
3035
3036 /// A convenience struct that encapsulates a DAG, and two SDValues for
3037 /// returning information from TargetLowering to its clients that want to
3038 /// combine.
3039 struct TargetLoweringOpt {
3040 SelectionDAG &DAG;
3041 bool LegalTys;
3042 bool LegalOps;
3043 SDValue Old;
3044 SDValue New;
3045
3046 explicit TargetLoweringOpt(SelectionDAG &InDAG,
3047 bool LT, bool LO) :
3048 DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
3049
3050 bool LegalTypes() const { return LegalTys; }
3051 bool LegalOperations() const { return LegalOps; }
3052
3053 bool CombineTo(SDValue O, SDValue N) {
3054 Old = O;
3055 New = N;
3056 return true;
3057 }
3058 };
3059
3060 /// Determines the optimal series of memory ops to replace the memset / memcpy.
3061 /// Return true if the number of memory ops is below the threshold (Limit).
3062 /// It returns the types of the sequence of memory ops to perform
3063 /// memset / memcpy by reference.
3064 bool findOptimalMemOpLowering(std::vector<EVT> &MemOps,
3065 unsigned Limit, uint64_t Size,
3066 unsigned DstAlign, unsigned SrcAlign,
3067 bool IsMemset,
3068 bool ZeroMemset,
3069 bool MemcpyStrSrc,
3070 bool AllowOverlap,
3071 unsigned DstAS, unsigned SrcAS,
3072 const AttributeList &FuncAttributes) const;
3073
3074 /// Check to see if the specified operand of the specified instruction is a
3075 /// constant integer. If so, check to see if there are any bits set in the
3076 /// constant that are not demanded. If so, shrink the constant and return
3077 /// true.
3078 bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
3079 TargetLoweringOpt &TLO) const;
3080
3081 // Target hook to do target-specific const optimization, which is called by
3082 // ShrinkDemandedConstant. This function should return true if the target
3083 // doesn't want ShrinkDemandedConstant to further optimize the constant.
3084 virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
3085 TargetLoweringOpt &TLO) const {
3086 return false;
3087 }
3088
3089 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This
3090 /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
3091 /// generalized for targets with other types of implicit widening casts.
3092 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
3093 TargetLoweringOpt &TLO) const;
3094
3095 /// Look at Op. At this point, we know that only the DemandedBits bits of the
3096 /// result of Op are ever used downstream. If we can use this information to
3097 /// simplify Op, create a new simplified DAG node and return true, returning
3098 /// the original and new nodes in Old and New. Otherwise, analyze the
3099 /// expression and return a mask of KnownOne and KnownZero bits for the
3100 /// expression (used to simplify the caller). The KnownZero/One bits may only
3101 /// be accurate for those bits in the Demanded masks.
3102 /// \p AssumeSingleUse When this parameter is true, this function will
3103 /// attempt to simplify \p Op even if there are multiple uses.
3104 /// Callers are responsible for correctly updating the DAG based on the
3105 /// results of this function, because simply replacing replacing TLO.Old
3106 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
3107 /// has multiple uses.
3108 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
3109 const APInt &DemandedElts, KnownBits &Known,
3110 TargetLoweringOpt &TLO, unsigned Depth = 0,
3111 bool AssumeSingleUse = false) const;
3112
3113 /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
3114 /// Adds Op back to the worklist upon success.
3115 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
3116 KnownBits &Known, TargetLoweringOpt &TLO,
3117 unsigned Depth = 0,
3118 bool AssumeSingleUse = false) const;
3119
3120 /// Helper wrapper around SimplifyDemandedBits.
3121 /// Adds Op back to the worklist upon success.
3122 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
3123 DAGCombinerInfo &DCI) const;
3124
3125 /// More limited version of SimplifyDemandedBits that can be used to "look
3126 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
3127 /// bitwise ops etc.
3128 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
3129 const APInt &DemandedElts,
3130 SelectionDAG &DAG,
3131 unsigned Depth) const;
3132
3133 /// Look at Vector Op. At this point, we know that only the DemandedElts
3134 /// elements of the result of Op are ever used downstream. If we can use
3135 /// this information to simplify Op, create a new simplified DAG node and
3136 /// return true, storing the original and new nodes in TLO.
3137 /// Otherwise, analyze the expression and return a mask of KnownUndef and
3138 /// KnownZero elements for the expression (used to simplify the caller).
3139 /// The KnownUndef/Zero elements may only be accurate for those bits
3140 /// in the DemandedMask.
3141 /// \p AssumeSingleUse When this parameter is true, this function will
3142 /// attempt to simplify \p Op even if there are multiple uses.
3143 /// Callers are responsible for correctly updating the DAG based on the
3144 /// results of this function, because simply replacing replacing TLO.Old
3145 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
3146 /// has multiple uses.
3147 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
3148 APInt &KnownUndef, APInt &KnownZero,
3149 TargetLoweringOpt &TLO, unsigned Depth = 0,
3150 bool AssumeSingleUse = false) const;
3151
3152 /// Helper wrapper around SimplifyDemandedVectorElts.
3153 /// Adds Op back to the worklist upon success.
3154 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
3155 APInt &KnownUndef, APInt &KnownZero,
3156 DAGCombinerInfo &DCI) const;
3157
3158 /// Determine which of the bits specified in Mask are known to be either zero
3159 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
3160 /// argument allows us to only collect the known bits that are shared by the
3161 /// requested vector elements.
3162 virtual void computeKnownBitsForTargetNode(const SDValue Op,
3163 KnownBits &Known,
3164 const APInt &DemandedElts,
3165 const SelectionDAG &DAG,
3166 unsigned Depth = 0) const;
3167 /// Determine which of the bits specified in Mask are known to be either zero
3168 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
3169 /// argument allows us to only collect the known bits that are shared by the
3170 /// requested vector elements. This is for GISel.
3171 virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis,
3172 Register R, KnownBits &Known,
3173 const APInt &DemandedElts,
3174 const MachineRegisterInfo &MRI,
3175 unsigned Depth = 0) const;
3176
3177 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
3178 /// Default implementation computes low bits based on alignment
3179 /// information. This should preserve known bits passed into it.
3180 virtual void computeKnownBitsForFrameIndex(const SDValue FIOp,
3181 KnownBits &Known,
3182 const APInt &DemandedElts,
3183 const SelectionDAG &DAG,
3184 unsigned Depth = 0) const;
3185
3186 /// This method can be implemented by targets that want to expose additional
3187 /// information about sign bits to the DAG Combiner. The DemandedElts
3188 /// argument allows us to only collect the minimum sign bits that are shared
3189 /// by the requested vector elements.
3190 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
3191 const APInt &DemandedElts,
3192 const SelectionDAG &DAG,
3193 unsigned Depth = 0) const;
3194
3195 /// Attempt to simplify any target nodes based on the demanded vector
3196 /// elements, returning true on success. Otherwise, analyze the expression and
3197 /// return a mask of KnownUndef and KnownZero elements for the expression
3198 /// (used to simplify the caller). The KnownUndef/Zero elements may only be
3199 /// accurate for those bits in the DemandedMask.
3200 virtual bool SimplifyDemandedVectorEltsForTargetNode(
3201 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
3202 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
3203
3204 /// Attempt to simplify any target nodes based on the demanded bits/elts,
3205 /// returning true on success. Otherwise, analyze the
3206 /// expression and return a mask of KnownOne and KnownZero bits for the
3207 /// expression (used to simplify the caller). The KnownZero/One bits may only
3208 /// be accurate for those bits in the Demanded masks.
3209 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
3210 const APInt &DemandedBits,
3211 const APInt &DemandedElts,
3212 KnownBits &Known,
3213 TargetLoweringOpt &TLO,
3214 unsigned Depth = 0) const;
3215
3216 /// More limited version of SimplifyDemandedBits that can be used to "look
3217 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
3218 /// bitwise ops etc.
3219 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
3220 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3221 SelectionDAG &DAG, unsigned Depth) const;
3222
3223 /// Tries to build a legal vector shuffle using the provided parameters
3224 /// or equivalent variations. The Mask argument maybe be modified as the
3225 /// function tries different variations.
3226 /// Returns an empty SDValue if the operation fails.
3227 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3228 SDValue N1, MutableArrayRef<int> Mask,
3229 SelectionDAG &DAG) const;
3230
3231 /// This method returns the constant pool value that will be loaded by LD.
3232 /// NOTE: You must check for implicit extensions of the constant by LD.
3233 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
3234
3235 /// If \p SNaN is false, \returns true if \p Op is known to never be any
3236 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
3237 /// NaN.
3238 virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
3239 const SelectionDAG &DAG,
3240 bool SNaN = false,
3241 unsigned Depth = 0) const;
3242 struct DAGCombinerInfo {
3243 void *DC; // The DAG Combiner object.
3244 CombineLevel Level;
3245 bool CalledByLegalizer;
3246
3247 public:
3248 SelectionDAG &DAG;
3249
3250 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc)
3251 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
3252
3253 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
3254 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
3255 bool isAfterLegalizeDAG() const {
3256 return Level == AfterLegalizeDAG;
3257 }
3258 CombineLevel getDAGCombineLevel() { return Level; }
3259 bool isCalledByLegalizer() const { return CalledByLegalizer; }
3260
3261 void AddToWorklist(SDNode *N);
3262 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true);
3263 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
3264 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
3265
3266 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
3267 };
3268
3269 /// Return if the N is a constant or constant vector equal to the true value
3270 /// from getBooleanContents().
3271 bool isConstTrueVal(const SDNode *N) const;
3272
3273 /// Return if the N is a constant or constant vector equal to the false value
3274 /// from getBooleanContents().
3275 bool isConstFalseVal(const SDNode *N) const;
3276
3277 /// Return if \p N is a True value when extended to \p VT.
3278 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
3279
3280 /// Try to simplify a setcc built with the specified operands and cc. If it is
3281 /// unable to simplify it, return a null SDValue.
3282 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
3283 bool foldBooleans, DAGCombinerInfo &DCI,
3284 const SDLoc &dl) const;
3285
3286 // For targets which wrap address, unwrap for analysis.
3287 virtual SDValue unwrapAddress(SDValue N) const { return N; }
3288
3289 /// Returns true (and the GlobalValue and the offset) if the node is a
3290 /// GlobalAddress + offset.
3291 virtual bool
3292 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
3293
3294 /// This method will be invoked for all target nodes and for any
3295 /// target-independent nodes that the target has registered with invoke it
3296 /// for.
3297 ///
3298 /// The semantics are as follows:
3299 /// Return Value:
3300 /// SDValue.Val == 0 - No change was made
3301 /// SDValue.Val == N - N was replaced, is dead, and is already handled.
3302 /// otherwise - N should be replaced by the returned Operand.
3303 ///
3304 /// In addition, methods provided by DAGCombinerInfo may be used to perform
3305 /// more complex transformations.
3306 ///
3307 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
3308
3309 /// Return true if it is profitable to move this shift by a constant amount
3310 /// though its operand, adjusting any immediate operands as necessary to
3311 /// preserve semantics. This transformation may not be desirable if it
3312 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
3313 /// extraction in AArch64). By default, it returns true.
3314 ///
3315 /// @param N the shift node
3316 /// @param Level the current DAGCombine legalization level.
3317 virtual bool isDesirableToCommuteWithShift(const SDNode *N,
3318 CombineLevel Level) const {
3319 return true;
3320 }
3321
3322 // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
3323 // to a shuffle and a truncate.
3324 // Example of such a combine:
3325 // v4i32 build_vector((extract_elt V, 1),
3326 // (extract_elt V, 3),
3327 // (extract_elt V, 5),
3328 // (extract_elt V, 7))
3329 // -->
3330 // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64)
3331 virtual bool isDesirableToCombineBuildVectorToShuffleTruncate(
3332 ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const {
3333 return false;
3334 }
3335
3336 /// Return true if the target has native support for the specified value type
3337 /// and it is 'desirable' to use the type for the given node type. e.g. On x86
3338 /// i16 is legal, but undesirable since i16 instruction encodings are longer
3339 /// and some i16 instructions are slow.
3340 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
3341 // By default, assume all legal types are desirable.
3342 return isTypeLegal(VT);
3343 }
3344
3345 /// Return true if it is profitable for dag combiner to transform a floating
3346 /// point op of specified opcode to a equivalent op of an integer
3347 /// type. e.g. f32 load -> i32 load can be profitable on ARM.
3348 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
3349 EVT /*VT*/) const {
3350 return false;
3351 }
3352
3353 /// This method query the target whether it is beneficial for dag combiner to
3354 /// promote the specified node. If true, it should return the desired
3355 /// promotion type by reference.
3356 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
3357 return false;
3358 }
3359
3360 /// Return true if the target supports swifterror attribute. It optimizes
3361 /// loads and stores to reading and writing a specific register.
3362 virtual bool supportSwiftError() const {
3363 return false;
3364 }
3365
3366 /// Return true if the target supports that a subset of CSRs for the given
3367 /// machine function is handled explicitly via copies.
3368 virtual bool supportSplitCSR(MachineFunction *MF) const {
3369 return false;
3370 }
3371
3372 /// Perform necessary initialization to handle a subset of CSRs explicitly
3373 /// via copies. This function is called at the beginning of instruction
3374 /// selection.
3375 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
3376 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3376)
;
3377 }
3378
3379 /// Insert explicit copies in entry and exit blocks. We copy a subset of
3380 /// CSRs to virtual registers in the entry block, and copy them back to
3381 /// physical registers in the exit blocks. This function is called at the end
3382 /// of instruction selection.
3383 virtual void insertCopiesSplitCSR(
3384 MachineBasicBlock *Entry,
3385 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
3386 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3386)
;
3387 }
3388
3389 /// Return 1 if we can compute the negated form of the specified expression
3390 /// for the same cost as the expression itself, or 2 if we can compute the
3391 /// negated form more cheaply than the expression itself. Else return 0.
3392 virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
3393 bool LegalOperations, bool ForCodeSize,
3394 unsigned Depth = 0) const;
3395
3396 /// If isNegatibleForFree returns true, return the newly negated expression.
3397 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
3398 bool LegalOperations, bool ForCodeSize,
3399 unsigned Depth = 0) const;
3400
3401 //===--------------------------------------------------------------------===//
3402 // Lowering methods - These methods must be implemented by targets so that
3403 // the SelectionDAGBuilder code knows how to lower these.
3404 //
3405
3406 /// This hook must be implemented to lower the incoming (formal) arguments,
3407 /// described by the Ins array, into the specified DAG. The implementation
3408 /// should fill in the InVals array with legal-type argument values, and
3409 /// return the resulting token chain value.
3410 virtual SDValue LowerFormalArguments(
3411 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/,
3412 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/,
3413 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const {
3414 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3414)
;
3415 }
3416
3417 /// This structure contains all information that is necessary for lowering
3418 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder
3419 /// needs to lower a call, and targets will see this struct in their LowerCall
3420 /// implementation.
3421 struct CallLoweringInfo {
3422 SDValue Chain;
3423 Type *RetTy = nullptr;
3424 bool RetSExt : 1;
3425 bool RetZExt : 1;
3426 bool IsVarArg : 1;
3427 bool IsInReg : 1;
3428 bool DoesNotReturn : 1;
3429 bool IsReturnValueUsed : 1;
3430 bool IsConvergent : 1;
3431 bool IsPatchPoint : 1;
3432
3433 // IsTailCall should be modified by implementations of
3434 // TargetLowering::LowerCall that perform tail call conversions.
3435 bool IsTailCall = false;
3436
3437 // Is Call lowering done post SelectionDAG type legalization.
3438 bool IsPostTypeLegalization = false;
3439
3440 unsigned NumFixedArgs = -1;
3441 CallingConv::ID CallConv = CallingConv::C;
3442 SDValue Callee;
3443 ArgListTy Args;
3444 SelectionDAG &DAG;
3445 SDLoc DL;
3446 ImmutableCallSite CS;
3447 SmallVector<ISD::OutputArg, 32> Outs;
3448 SmallVector<SDValue, 32> OutVals;
3449 SmallVector<ISD::InputArg, 32> Ins;
3450 SmallVector<SDValue, 4> InVals;
3451
3452 CallLoweringInfo(SelectionDAG &DAG)
3453 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
3454 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
3455 IsPatchPoint(false), DAG(DAG) {}
3456
3457 CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
3458 DL = dl;
3459 return *this;
3460 }
3461
3462 CallLoweringInfo &setChain(SDValue InChain) {
3463 Chain = InChain;
3464 return *this;
3465 }
3466
3467 // setCallee with target/module-specific attributes
3468 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType,
3469 SDValue Target, ArgListTy &&ArgsList) {
3470 RetTy = ResultType;
3471 Callee = Target;
3472 CallConv = CC;
3473 NumFixedArgs = ArgsList.size();
3474 Args = std::move(ArgsList);
3475
3476 DAG.getTargetLoweringInfo().markLibCallAttributes(
3477 &(DAG.getMachineFunction()), CC, Args);
3478 return *this;
3479 }
3480
3481 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType,
3482 SDValue Target, ArgListTy &&ArgsList) {
3483 RetTy = ResultType;
3484 Callee = Target;
3485 CallConv = CC;
3486 NumFixedArgs = ArgsList.size();
3487 Args = std::move(ArgsList);
3488 return *this;
3489 }
3490
3491 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy,
3492 SDValue Target, ArgListTy &&ArgsList,
3493 ImmutableCallSite Call) {
3494 RetTy = ResultType;
3495
3496 IsInReg = Call.hasRetAttr(Attribute::InReg);
3497 DoesNotReturn =
3498 Call.doesNotReturn() ||
3499 (!Call.isInvoke() &&
3500 isa<UnreachableInst>(Call.getInstruction()->getNextNode()));
3501 IsVarArg = FTy->isVarArg();
3502 IsReturnValueUsed = !Call.getInstruction()->use_empty();
3503 RetSExt = Call.hasRetAttr(Attribute::SExt);
3504 RetZExt = Call.hasRetAttr(Attribute::ZExt);
3505
3506 Callee = Target;
3507
3508 CallConv = Call.getCallingConv();
3509 NumFixedArgs = FTy->getNumParams();
3510 Args = std::move(ArgsList);
3511
3512 CS = Call;
3513
3514 return *this;
3515 }
3516
3517 CallLoweringInfo &setInRegister(bool Value = true) {
3518 IsInReg = Value;
3519 return *this;
3520 }
3521
3522 CallLoweringInfo &setNoReturn(bool Value = true) {
3523 DoesNotReturn = Value;
3524 return *this;
3525 }
3526
3527 CallLoweringInfo &setVarArg(bool Value = true) {
3528 IsVarArg = Value;
3529 return *this;
3530 }
3531
3532 CallLoweringInfo &setTailCall(bool Value = true) {
3533 IsTailCall = Value;
3534 return *this;
3535 }
3536
3537 CallLoweringInfo &setDiscardResult(bool Value = true) {
3538 IsReturnValueUsed = !Value;
3539 return *this;
3540 }
3541
3542 CallLoweringInfo &setConvergent(bool Value = true) {
3543 IsConvergent = Value;
3544 return *this;
3545 }
3546
3547 CallLoweringInfo &setSExtResult(bool Value = true) {
3548 RetSExt = Value;
3549 return *this;
3550 }
3551
3552 CallLoweringInfo &setZExtResult(bool Value = true) {
3553 RetZExt = Value;
3554 return *this;
3555 }
3556
3557 CallLoweringInfo &setIsPatchPoint(bool Value = true) {
3558 IsPatchPoint = Value;
3559 return *this;
3560 }
3561
3562 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
3563 IsPostTypeLegalization = Value;
3564 return *this;
3565 }
3566
3567 ArgListTy &getArgs() {
3568 return Args;
3569 }
3570 };
3571
3572 /// This structure is used to pass arguments to makeLibCall function.
3573 struct MakeLibCallOptions {
3574 // By passing type list before soften to makeLibCall, the target hook
3575 // shouldExtendTypeInLibCall can get the original type before soften.
3576 ArrayRef<EVT> OpsVTBeforeSoften;
3577 EVT RetVTBeforeSoften;
3578 bool IsSExt : 1;
3579 bool DoesNotReturn : 1;
3580 bool IsReturnValueUsed : 1;
3581 bool IsPostTypeLegalization : 1;
3582 bool IsSoften : 1;
3583
3584 MakeLibCallOptions()
3585 : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true),
3586 IsPostTypeLegalization(false), IsSoften(false) {}
3587
3588 MakeLibCallOptions &setSExt(bool Value = true) {
3589 IsSExt = Value;
3590 return *this;
3591 }
3592
3593 MakeLibCallOptions &setNoReturn(bool Value = true) {
3594 DoesNotReturn = Value;
3595 return *this;
3596 }
3597
3598 MakeLibCallOptions &setDiscardResult(bool Value = true) {
3599 IsReturnValueUsed = !Value;
3600 return *this;
3601 }
3602
3603 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) {
3604 IsPostTypeLegalization = Value;
3605 return *this;
3606 }
3607
3608 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT,
3609 bool Value = true) {
3610 OpsVTBeforeSoften = OpsVT;
3611 RetVTBeforeSoften = RetVT;
3612 IsSoften = Value;
3613 return *this;
3614 }
3615 };
3616
3617 /// This function lowers an abstract call to a function into an actual call.
3618 /// This returns a pair of operands. The first element is the return value
3619 /// for the function (if RetTy is not VoidTy). The second element is the
3620 /// outgoing token chain. It calls LowerCall to do the actual lowering.
3621 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
3622
3623 /// This hook must be implemented to lower calls into the specified
3624 /// DAG. The outgoing arguments to the call are described by the Outs array,
3625 /// and the values to be returned by the call are described by the Ins
3626 /// array. The implementation should fill in the InVals array with legal-type
3627 /// return values from the call, and return the resulting token chain value.
3628 virtual SDValue
3629 LowerCall(CallLoweringInfo &/*CLI*/,
3630 SmallVectorImpl<SDValue> &/*InVals*/) const {
3631 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3631)
;
3632 }
3633
3634 /// Target-specific cleanup for formal ByVal parameters.
3635 virtual void HandleByVal(CCState *, unsigned &, unsigned) const {}
3636
3637 /// This hook should be implemented to check whether the return values
3638 /// described by the Outs array can fit into the return registers. If false
3639 /// is returned, an sret-demotion is performed.
3640 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
3641 MachineFunction &/*MF*/, bool /*isVarArg*/,
3642 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
3643 LLVMContext &/*Context*/) const
3644 {
3645 // Return true by default to get preexisting behavior.
3646 return true;
3647 }
3648
3649 /// This hook must be implemented to lower outgoing return values, described
3650 /// by the Outs array, into the specified DAG. The implementation should
3651 /// return the resulting token chain value.
3652 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
3653 bool /*isVarArg*/,
3654 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/,
3655 const SmallVectorImpl<SDValue> & /*OutVals*/,
3656 const SDLoc & /*dl*/,
3657 SelectionDAG & /*DAG*/) const {
3658 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3658)
;
3659 }
3660
3661 /// Return true if result of the specified node is used by a return node
3662 /// only. It also compute and return the input chain for the tail call.
3663 ///
3664 /// This is used to determine whether it is possible to codegen a libcall as
3665 /// tail call at legalization time.
3666 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const {
3667 return false;
3668 }
3669
3670 /// Return true if the target may be able emit the call instruction as a tail
3671 /// call. This is used by optimization passes to determine if it's profitable
3672 /// to duplicate return instructions to enable tailcall optimization.
3673 virtual bool mayBeEmittedAsTailCall(const CallInst *) const {
3674 return false;
3675 }
3676
3677 /// Return the builtin name for the __builtin___clear_cache intrinsic
3678 /// Default is to invoke the clear cache library call
3679 virtual const char * getClearCacheBuiltinName() const {
3680 return "__clear_cache";
3681 }
3682
3683 /// Return the register ID of the name passed in. Used by named register
3684 /// global variables extension. There is no target-independent behaviour
3685 /// so the default action is to bail.
3686 virtual Register getRegisterByName(const char* RegName, EVT VT,
3687 const MachineFunction &MF) const {
3688 report_fatal_error("Named registers not implemented for this target");
3689 }
3690
3691 /// Return the type that should be used to zero or sign extend a
3692 /// zeroext/signext integer return value. FIXME: Some C calling conventions
3693 /// require the return type to be promoted, but this is not true all the time,
3694 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
3695 /// conventions. The frontend should handle this and include all of the
3696 /// necessary information.
3697 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
3698 ISD::NodeType /*ExtendKind*/) const {
3699 EVT MinVT = getRegisterType(Context, MVT::i32);
3700 return VT.bitsLT(MinVT) ? MinVT : VT;
3701 }
3702
3703 /// For some targets, an LLVM struct type must be broken down into multiple
3704 /// simple types, but the calling convention specifies that the entire struct
3705 /// must be passed in a block of consecutive registers.
3706 virtual bool
3707 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
3708 bool isVarArg) const {
3709 return false;
3710 }
3711
3712 /// For most targets, an LLVM type must be broken down into multiple
3713 /// smaller types. Usually the halves are ordered according to the endianness
3714 /// but for some platform that would break. So this method will default to
3715 /// matching the endianness but can be overridden.
3716 virtual bool
3717 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
3718 return DL.isLittleEndian();
3719 }
3720
3721 /// Returns a 0 terminated array of registers that can be safely used as
3722 /// scratch registers.
3723 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
3724 return nullptr;
3725 }
3726
3727 /// This callback is used to prepare for a volatile or atomic load.
3728 /// It takes a chain node as input and returns the chain for the load itself.
3729 ///
3730 /// Having a callback like this is necessary for targets like SystemZ,
3731 /// which allows a CPU to reuse the result of a previous load indefinitely,
3732 /// even if a cache-coherent store is performed by another CPU. The default
3733 /// implementation does nothing.
3734 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
3735 SelectionDAG &DAG) const {
3736 return Chain;
3737 }
3738
3739 /// This callback is used to inspect load/store instructions and add
3740 /// target-specific MachineMemOperand flags to them. The default
3741 /// implementation does nothing.
3742 virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const {
3743 return MachineMemOperand::MONone;
3744 }
3745
3746 /// Should SelectionDAG lower an atomic store of the given kind as a normal
3747 /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
3748 /// eventually migrate all targets to the using StoreSDNodes, but porting is
3749 /// being done target at a time.
3750 virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
3751 assert(SI.isAtomic() && "violated precondition")((SI.isAtomic() && "violated precondition") ? static_cast
<void> (0) : __assert_fail ("SI.isAtomic() && \"violated precondition\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3751, __PRETTY_FUNCTION__))
;
3752 return false;
3753 }
3754
3755 /// Should SelectionDAG lower an atomic load of the given kind as a normal
3756 /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
3757 /// eventually migrate all targets to the using LoadSDNodes, but porting is
3758 /// being done target at a time.
3759 virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
3760 assert(LI.isAtomic() && "violated precondition")((LI.isAtomic() && "violated precondition") ? static_cast
<void> (0) : __assert_fail ("LI.isAtomic() && \"violated precondition\""
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3760, __PRETTY_FUNCTION__))
;
3761 return false;
3762 }
3763
3764
3765 /// This callback is invoked by the type legalizer to legalize nodes with an
3766 /// illegal operand type but legal result types. It replaces the
3767 /// LowerOperation callback in the type Legalizer. The reason we can not do
3768 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to
3769 /// use this callback.
3770 ///
3771 /// TODO: Consider merging with ReplaceNodeResults.
3772 ///
3773 /// The target places new result values for the node in Results (their number
3774 /// and types must exactly match those of the original return values of
3775 /// the node), or leaves Results empty, which indicates that the node is not
3776 /// to be custom lowered after all.
3777 /// The default implementation calls LowerOperation.
3778 virtual void LowerOperationWrapper(SDNode *N,
3779 SmallVectorImpl<SDValue> &Results,
3780 SelectionDAG &DAG) const;
3781
3782 /// This callback is invoked for operations that are unsupported by the
3783 /// target, which are registered to use 'custom' lowering, and whose defined
3784 /// values are all legal. If the target has no operations that require custom
3785 /// lowering, it need not implement this. The default implementation of this
3786 /// aborts.
3787 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
3788
3789 /// This callback is invoked when a node result type is illegal for the
3790 /// target, and the operation was registered to use 'custom' lowering for that
3791 /// result type. The target places new result values for the node in Results
3792 /// (their number and types must exactly match those of the original return
3793 /// values of the node), or leaves Results empty, which indicates that the
3794 /// node is not to be custom lowered after all.
3795 ///
3796 /// If the target has no operations that require custom lowering, it need not
3797 /// implement this. The default implementation aborts.
3798 virtual void ReplaceNodeResults(SDNode * /*N*/,
3799 SmallVectorImpl<SDValue> &/*Results*/,
3800 SelectionDAG &/*DAG*/) const {
3801 llvm_unreachable("ReplaceNodeResults not implemented for this target!")::llvm::llvm_unreachable_internal("ReplaceNodeResults not implemented for this target!"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 3801)
;
3802 }
3803
3804 /// This method returns the name of a target specific DAG node.
3805 virtual const char *getTargetNodeName(unsigned Opcode) const;
3806
3807 /// This method returns a target specific FastISel object, or null if the
3808 /// target does not support "fast" ISel.
3809 virtual FastISel *createFastISel(FunctionLoweringInfo &,
3810 const TargetLibraryInfo *) const {
3811 return nullptr;
3812 }
3813
3814 bool verifyReturnAddressArgumentIsConstant(SDValue Op,
3815 SelectionDAG &DAG) const;
3816
3817 //===--------------------------------------------------------------------===//
3818 // Inline Asm Support hooks
3819 //
3820
3821 /// This hook allows the target to expand an inline asm call to be explicit
3822 /// llvm code if it wants to. This is useful for turning simple inline asms
3823 /// into LLVM intrinsics, which gives the compiler more information about the
3824 /// behavior of the code.
3825 virtual bool ExpandInlineAsm(CallInst *) const {
3826 return false;
3827 }
3828
3829 enum ConstraintType {
3830 C_Register, // Constraint represents specific register(s).
3831 C_RegisterClass, // Constraint represents any of register(s) in class.
3832 C_Memory, // Memory constraint.
3833 C_Immediate, // Requires an immediate.
3834 C_Other, // Something else.
3835 C_Unknown // Unsupported constraint.
3836 };
3837
3838 enum ConstraintWeight {
3839 // Generic weights.
3840 CW_Invalid = -1, // No match.
3841 CW_Okay = 0, // Acceptable.
3842 CW_Good = 1, // Good weight.
3843 CW_Better = 2, // Better weight.
3844 CW_Best = 3, // Best weight.
3845
3846 // Well-known weights.
3847 CW_SpecificReg = CW_Okay, // Specific register operands.
3848 CW_Register = CW_Good, // Register operands.
3849 CW_Memory = CW_Better, // Memory operands.
3850 CW_Constant = CW_Best, // Constant operand.
3851 CW_Default = CW_Okay // Default or don't know type.
3852 };
3853
3854 /// This contains information for each constraint that we are lowering.
3855 struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
3856 /// This contains the actual string for the code, like "m". TargetLowering
3857 /// picks the 'best' code from ConstraintInfo::Codes that most closely
3858 /// matches the operand.
3859 std::string ConstraintCode;
3860
3861 /// Information about the constraint code, e.g. Register, RegisterClass,
3862 /// Memory, Other, Unknown.
3863 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown;
3864
3865 /// If this is the result output operand or a clobber, this is null,
3866 /// otherwise it is the incoming operand to the CallInst. This gets
3867 /// modified as the asm is processed.
3868 Value *CallOperandVal = nullptr;
3869
3870 /// The ValueType for the operand value.
3871 MVT ConstraintVT = MVT::Other;
3872
3873 /// Copy constructor for copying from a ConstraintInfo.
3874 AsmOperandInfo(InlineAsm::ConstraintInfo Info)
3875 : InlineAsm::ConstraintInfo(std::move(Info)) {}
3876
3877 /// Return true of this is an input operand that is a matching constraint
3878 /// like "4".
3879 bool isMatchingInputConstraint() const;
3880
3881 /// If this is an input matching constraint, this method returns the output
3882 /// operand it matches.
3883 unsigned getMatchedOperand() const;
3884 };
3885
3886 using AsmOperandInfoVector = std::vector<AsmOperandInfo>;
3887
3888 /// Split up the constraint string from the inline assembly value into the
3889 /// specific constraints and their prefixes, and also tie in the associated
3890 /// operand values. If this returns an empty vector, and if the constraint
3891 /// string itself isn't empty, there was an error parsing.
3892 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL,
3893 const TargetRegisterInfo *TRI,
3894 ImmutableCallSite CS) const;
3895
3896 /// Examine constraint type and operand type and determine a weight value.
3897 /// The operand object must already have been set up with the operand type.
3898 virtual ConstraintWeight getMultipleConstraintMatchWeight(
3899 AsmOperandInfo &info, int maIndex) const;
3900
3901 /// Examine constraint string and operand type and determine a weight value.
3902 /// The operand object must already have been set up with the operand type.
3903 virtual ConstraintWeight getSingleConstraintMatchWeight(
3904 AsmOperandInfo &info, const char *constraint) const;
3905
3906 /// Determines the constraint code and constraint type to use for the specific
3907 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
3908 /// If the actual operand being passed in is available, it can be passed in as
3909 /// Op, otherwise an empty SDValue can be passed.
3910 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
3911 SDValue Op,
3912 SelectionDAG *DAG = nullptr) const;
3913
3914 /// Given a constraint, return the type of constraint it is for this target.
3915 virtual ConstraintType getConstraintType(StringRef Constraint) const;
3916
3917 /// Given a physical register constraint (e.g. {edx}), return the register
3918 /// number and the register class for the register.
3919 ///
3920 /// Given a register class constraint, like 'r', if this corresponds directly
3921 /// to an LLVM register class, return a register of 0 and the register class
3922 /// pointer.
3923 ///
3924 /// This should only be used for C_Register constraints. On error, this
3925 /// returns a register number of 0 and a null register class pointer.
3926 virtual std::pair<unsigned, const TargetRegisterClass *>
3927 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3928 StringRef Constraint, MVT VT) const;
3929
3930 virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const {
3931 if (ConstraintCode == "i")
3932 return InlineAsm::Constraint_i;
3933 else if (ConstraintCode == "m")
3934 return InlineAsm::Constraint_m;
3935 return InlineAsm::Constraint_Unknown;
3936 }
3937
3938 /// Try to replace an X constraint, which matches anything, with another that
3939 /// has more specific requirements based on the type of the corresponding
3940 /// operand. This returns null if there is no replacement to make.
3941 virtual const char *LowerXConstraint(EVT ConstraintVT) const;
3942
3943 /// Lower the specified operand into the Ops vector. If it is invalid, don't
3944 /// add anything to Ops.
3945 virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
3946 std::vector<SDValue> &Ops,
3947 SelectionDAG &DAG) const;
3948
3949 // Lower custom output constraints. If invalid, return SDValue().
3950 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
3951 SDLoc DL,
3952 const AsmOperandInfo &OpInfo,
3953 SelectionDAG &DAG) const;
3954
3955 //===--------------------------------------------------------------------===//
3956 // Div utility functions
3957 //
3958 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
3959 SmallVectorImpl<SDNode *> &Created) const;
3960 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
3961 SmallVectorImpl<SDNode *> &Created) const;
3962
3963 /// Targets may override this function to provide custom SDIV lowering for
3964 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
3965 /// assumes SDIV is expensive and replaces it with a series of other integer
3966 /// operations.
3967 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
3968 SelectionDAG &DAG,
3969 SmallVectorImpl<SDNode *> &Created) const;
3970
3971 /// Indicate whether this target prefers to combine FDIVs with the same
3972 /// divisor. If the transform should never be done, return zero. If the
3973 /// transform should be done, return the minimum number of divisor uses
3974 /// that must exist.
3975 virtual unsigned combineRepeatedFPDivisors() const {
3976 return 0;
3977 }
3978
3979 /// Hooks for building estimates in place of slower divisions and square
3980 /// roots.
3981
3982 /// Return either a square root or its reciprocal estimate value for the input
3983 /// operand.
3984 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
3985 /// 'Enabled' as set by a potential default override attribute.
3986 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
3987 /// refinement iterations required to generate a sufficient (though not
3988 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
3989 /// The boolean UseOneConstNR output is used to select a Newton-Raphson
3990 /// algorithm implementation that uses either one or two constants.
3991 /// The boolean Reciprocal is used to select whether the estimate is for the
3992 /// square root of the input operand or the reciprocal of its square root.
3993 /// A target may choose to implement its own refinement within this function.
3994 /// If that's true, then return '0' as the number of RefinementSteps to avoid
3995 /// any further refinement of the estimate.
3996 /// An empty SDValue return means no estimate sequence can be created.
3997 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
3998 int Enabled, int &RefinementSteps,
3999 bool &UseOneConstNR, bool Reciprocal) const {
4000 return SDValue();
4001 }
4002
4003 /// Return a reciprocal estimate value for the input operand.
4004 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
4005 /// 'Enabled' as set by a potential default override attribute.
4006 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
4007 /// refinement iterations required to generate a sufficient (though not
4008 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
4009 /// A target may choose to implement its own refinement within this function.
4010 /// If that's true, then return '0' as the number of RefinementSteps to avoid
4011 /// any further refinement of the estimate.
4012 /// An empty SDValue return means no estimate sequence can be created.
4013 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
4014 int Enabled, int &RefinementSteps) const {
4015 return SDValue();
4016 }
4017
4018 //===--------------------------------------------------------------------===//
4019 // Legalization utility functions
4020 //
4021
4022 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
4023 /// respectively, each computing an n/2-bit part of the result.
4024 /// \param Result A vector that will be filled with the parts of the result
4025 /// in little-endian order.
4026 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
4027 /// if you want to control how low bits are extracted from the LHS.
4028 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
4029 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
4030 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
4031 /// \returns true if the node has been expanded, false if it has not
4032 bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS,
4033 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
4034 SelectionDAG &DAG, MulExpansionKind Kind,
4035 SDValue LL = SDValue(), SDValue LH = SDValue(),
4036 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
4037
4038 /// Expand a MUL into two nodes. One that computes the high bits of
4039 /// the result and one that computes the low bits.
4040 /// \param HiLoVT The value type to use for the Lo and Hi nodes.
4041 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
4042 /// if you want to control how low bits are extracted from the LHS.
4043 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
4044 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
4045 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
4046 /// \returns true if the node has been expanded. false if it has not
4047 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
4048 SelectionDAG &DAG, MulExpansionKind Kind,
4049 SDValue LL = SDValue(), SDValue LH = SDValue(),
4050 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
4051
4052 /// Expand funnel shift.
4053 /// \param N Node to expand
4054 /// \param Result output after conversion
4055 /// \returns True, if the expansion was successful, false otherwise
4056 bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4057
4058 /// Expand rotations.
4059 /// \param N Node to expand
4060 /// \param Result output after conversion
4061 /// \returns True, if the expansion was successful, false otherwise
4062 bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4063
4064 /// Expand float(f32) to SINT(i64) conversion
4065 /// \param N Node to expand
4066 /// \param Result output after conversion
4067 /// \returns True, if the expansion was successful, false otherwise
4068 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4069
4070 /// Expand float to UINT conversion
4071 /// \param N Node to expand
4072 /// \param Result output after conversion
4073 /// \returns True, if the expansion was successful, false otherwise
4074 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const;
4075
4076 /// Expand UINT(i64) to double(f64) conversion
4077 /// \param N Node to expand
4078 /// \param Result output after conversion
4079 /// \returns True, if the expansion was successful, false otherwise
4080 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4081
4082 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
4083 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
4084
4085 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
4086 /// vector nodes can only succeed if all operations are legal/custom.
4087 /// \param N Node to expand
4088 /// \param Result output after conversion
4089 /// \returns True, if the expansion was successful, false otherwise
4090 bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4091
4092 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
4093 /// vector nodes can only succeed if all operations are legal/custom.
4094 /// \param N Node to expand
4095 /// \param Result output after conversion
4096 /// \returns True, if the expansion was successful, false otherwise
4097 bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4098
4099 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
4100 /// vector nodes can only succeed if all operations are legal/custom.
4101 /// \param N Node to expand
4102 /// \param Result output after conversion
4103 /// \returns True, if the expansion was successful, false otherwise
4104 bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4105
4106 /// Expand ABS nodes. Expands vector/scalar ABS nodes,
4107 /// vector nodes can only succeed if all operations are legal/custom.
4108 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
4109 /// \param N Node to expand
4110 /// \param Result output after conversion
4111 /// \returns True, if the expansion was successful, false otherwise
4112 bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4113
4114 /// Turn load of vector type into a load of the individual elements.
4115 /// \param LD load to expand
4116 /// \returns MERGE_VALUEs of the scalar loads with their chains.
4117 SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const;
4118
4119 // Turn a store of a vector type into stores of the individual elements.
4120 /// \param ST Store with a vector value type
4121 /// \returns MERGE_VALUs of the individual store chains.
4122 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
4123
4124 /// Expands an unaligned load to 2 half-size loads for an integer, and
4125 /// possibly more for vectors.
4126 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD,
4127 SelectionDAG &DAG) const;
4128
4129 /// Expands an unaligned store to 2 half-size stores for integer values, and
4130 /// possibly more for vectors.
4131 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const;
4132
4133 /// Increments memory address \p Addr according to the type of the value
4134 /// \p DataVT that should be stored. If the data is stored in compressed
4135 /// form, the memory address should be incremented according to the number of
4136 /// the stored elements. This number is equal to the number of '1's bits
4137 /// in the \p Mask.
4138 /// \p DataVT is a vector type. \p Mask is a vector value.
4139 /// \p DataVT and \p Mask have the same number of vector elements.
4140 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL,
4141 EVT DataVT, SelectionDAG &DAG,
4142 bool IsCompressedMemory) const;
4143
4144 /// Get a pointer to vector element \p Idx located in memory for a vector of
4145 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
4146 /// bounds the returned pointer is unspecified, but will be within the vector
4147 /// bounds.
4148 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
4149 SDValue Index) const;
4150
4151 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
4152 /// method accepts integers as its arguments.
4153 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
4154
4155 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
4156 /// method accepts integers as its arguments.
4157 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
4158
4159 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
4160 /// always suceeds and populates the Result and Overflow arguments.
4161 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
4162 SelectionDAG &DAG) const;
4163
4164 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion
4165 /// always suceeds and populates the Result and Overflow arguments.
4166 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
4167 SelectionDAG &DAG) const;
4168
4169 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether
4170 /// expansion was successful and populates the Result and Overflow arguments.
4171 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
4172 SelectionDAG &DAG) const;
4173
4174 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
4175 /// only the first Count elements of the vector are used.
4176 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
4177
4178 //===--------------------------------------------------------------------===//
4179 // Instruction Emitting Hooks
4180 //
4181
4182 /// This method should be implemented by targets that mark instructions with
4183 /// the 'usesCustomInserter' flag. These instructions are special in various
4184 /// ways, which require special support to insert. The specified MachineInstr
4185 /// is created but not inserted into any basic blocks, and this method is
4186 /// called to expand it into a sequence of instructions, potentially also
4187 /// creating new basic blocks and control flow.
4188 /// As long as the returned basic block is different (i.e., we created a new
4189 /// one), the custom inserter is free to modify the rest of \p MBB.
4190 virtual MachineBasicBlock *
4191 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
4192
4193 /// This method should be implemented by targets that mark instructions with
4194 /// the 'hasPostISelHook' flag. These instructions must be adjusted after
4195 /// instruction selection by target hooks. e.g. To fill in optional defs for
4196 /// ARM 's' setting instructions.
4197 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI,
4198 SDNode *Node) const;
4199
4200 /// If this function returns true, SelectionDAGBuilder emits a
4201 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
4202 virtual bool useLoadStackGuardNode() const {
4203 return false;
4204 }
4205
4206 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
4207 const SDLoc &DL) const {
4208 llvm_unreachable("not implemented for this target")::llvm::llvm_unreachable_internal("not implemented for this target"
, "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h"
, 4208)
;
4209 }
4210
4211 /// Lower TLS global address SDNode for target independent emulated TLS model.
4212 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
4213 SelectionDAG &DAG) const;
4214
4215 /// Expands target specific indirect branch for the case of JumpTable
4216 /// expanasion.
4217 virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr,
4218 SelectionDAG &DAG) const {
4219 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr);
4220 }
4221
4222 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
4223 // If we're comparing for equality to zero and isCtlzFast is true, expose the
4224 // fact that this can be implemented as a ctlz/srl pair, so that the dag
4225 // combiner can fold the new nodes.
4226 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
4227
4228private:
4229 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4230 const SDLoc &DL, DAGCombinerInfo &DCI) const;
4231 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4232 const SDLoc &DL, DAGCombinerInfo &DCI) const;
4233
4234 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
4235 SDValue N1, ISD::CondCode Cond,
4236 DAGCombinerInfo &DCI,
4237 const SDLoc &DL) const;
4238
4239 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4240 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
4241 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4242 DAGCombinerInfo &DCI, const SDLoc &DL) const;
4243
4244 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4245 SDValue CompTargetNode, ISD::CondCode Cond,
4246 DAGCombinerInfo &DCI, const SDLoc &DL,
4247 SmallVectorImpl<SDNode *> &Created) const;
4248 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
4249 ISD::CondCode Cond, DAGCombinerInfo &DCI,
4250 const SDLoc &DL) const;
4251
4252 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
4253 SDValue CompTargetNode, ISD::CondCode Cond,
4254 DAGCombinerInfo &DCI, const SDLoc &DL,
4255 SmallVectorImpl<SDNode *> &Created) const;
4256 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
4257 ISD::CondCode Cond, DAGCombinerInfo &DCI,
4258 const SDLoc &DL) const;
4259};
4260
4261/// Given an LLVM IR type and return type attributes, compute the return value
4262/// EVTs and flags, and optionally also the offsets, if the return value is
4263/// being lowered to memory.
4264void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr,
4265 SmallVectorImpl<ISD::OutputArg> &Outs,
4266 const TargetLowering &TLI, const DataLayout &DL);
4267
4268} // end namespace llvm
4269
4270#endif // LLVM_CODEGEN_TARGETLOWERING_H