File: | lib/CodeGen/CodeGenPrepare.cpp |
Warning: | line 5826, column 3 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// | ||||||||||||||||
2 | // | ||||||||||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||||||||
6 | // | ||||||||||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||||||||||
8 | // | ||||||||||||||||
9 | // This pass munges the code in the input function to better prepare it for | ||||||||||||||||
10 | // SelectionDAG-based code generation. This works around limitations in it's | ||||||||||||||||
11 | // basic-block-at-a-time approach. It should eventually be removed. | ||||||||||||||||
12 | // | ||||||||||||||||
13 | //===----------------------------------------------------------------------===// | ||||||||||||||||
14 | |||||||||||||||||
15 | #include "llvm/ADT/APInt.h" | ||||||||||||||||
16 | #include "llvm/ADT/ArrayRef.h" | ||||||||||||||||
17 | #include "llvm/ADT/DenseMap.h" | ||||||||||||||||
18 | #include "llvm/ADT/MapVector.h" | ||||||||||||||||
19 | #include "llvm/ADT/PointerIntPair.h" | ||||||||||||||||
20 | #include "llvm/ADT/STLExtras.h" | ||||||||||||||||
21 | #include "llvm/ADT/SmallPtrSet.h" | ||||||||||||||||
22 | #include "llvm/ADT/SmallVector.h" | ||||||||||||||||
23 | #include "llvm/ADT/Statistic.h" | ||||||||||||||||
24 | #include "llvm/Analysis/BlockFrequencyInfo.h" | ||||||||||||||||
25 | #include "llvm/Analysis/BranchProbabilityInfo.h" | ||||||||||||||||
26 | #include "llvm/Analysis/ConstantFolding.h" | ||||||||||||||||
27 | #include "llvm/Analysis/InstructionSimplify.h" | ||||||||||||||||
28 | #include "llvm/Analysis/LoopInfo.h" | ||||||||||||||||
29 | #include "llvm/Analysis/MemoryBuiltins.h" | ||||||||||||||||
30 | #include "llvm/Analysis/ProfileSummaryInfo.h" | ||||||||||||||||
31 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||||||||||||||||
32 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||||||||||
33 | #include "llvm/Transforms/Utils/Local.h" | ||||||||||||||||
34 | #include "llvm/Analysis/ValueTracking.h" | ||||||||||||||||
35 | #include "llvm/Analysis/VectorUtils.h" | ||||||||||||||||
36 | #include "llvm/CodeGen/Analysis.h" | ||||||||||||||||
37 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||||||||||||||
38 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||||||||||||||
39 | #include "llvm/CodeGen/TargetLowering.h" | ||||||||||||||||
40 | #include "llvm/CodeGen/TargetPassConfig.h" | ||||||||||||||||
41 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | ||||||||||||||||
42 | #include "llvm/CodeGen/ValueTypes.h" | ||||||||||||||||
43 | #include "llvm/Config/llvm-config.h" | ||||||||||||||||
44 | #include "llvm/IR/Argument.h" | ||||||||||||||||
45 | #include "llvm/IR/Attributes.h" | ||||||||||||||||
46 | #include "llvm/IR/BasicBlock.h" | ||||||||||||||||
47 | #include "llvm/IR/CallSite.h" | ||||||||||||||||
48 | #include "llvm/IR/Constant.h" | ||||||||||||||||
49 | #include "llvm/IR/Constants.h" | ||||||||||||||||
50 | #include "llvm/IR/DataLayout.h" | ||||||||||||||||
51 | #include "llvm/IR/DerivedTypes.h" | ||||||||||||||||
52 | #include "llvm/IR/Dominators.h" | ||||||||||||||||
53 | #include "llvm/IR/Function.h" | ||||||||||||||||
54 | #include "llvm/IR/GetElementPtrTypeIterator.h" | ||||||||||||||||
55 | #include "llvm/IR/GlobalValue.h" | ||||||||||||||||
56 | #include "llvm/IR/GlobalVariable.h" | ||||||||||||||||
57 | #include "llvm/IR/IRBuilder.h" | ||||||||||||||||
58 | #include "llvm/IR/InlineAsm.h" | ||||||||||||||||
59 | #include "llvm/IR/InstrTypes.h" | ||||||||||||||||
60 | #include "llvm/IR/Instruction.h" | ||||||||||||||||
61 | #include "llvm/IR/Instructions.h" | ||||||||||||||||
62 | #include "llvm/IR/IntrinsicInst.h" | ||||||||||||||||
63 | #include "llvm/IR/Intrinsics.h" | ||||||||||||||||
64 | #include "llvm/IR/LLVMContext.h" | ||||||||||||||||
65 | #include "llvm/IR/MDBuilder.h" | ||||||||||||||||
66 | #include "llvm/IR/Module.h" | ||||||||||||||||
67 | #include "llvm/IR/Operator.h" | ||||||||||||||||
68 | #include "llvm/IR/PatternMatch.h" | ||||||||||||||||
69 | #include "llvm/IR/Statepoint.h" | ||||||||||||||||
70 | #include "llvm/IR/Type.h" | ||||||||||||||||
71 | #include "llvm/IR/Use.h" | ||||||||||||||||
72 | #include "llvm/IR/User.h" | ||||||||||||||||
73 | #include "llvm/IR/Value.h" | ||||||||||||||||
74 | #include "llvm/IR/ValueHandle.h" | ||||||||||||||||
75 | #include "llvm/IR/ValueMap.h" | ||||||||||||||||
76 | #include "llvm/Pass.h" | ||||||||||||||||
77 | #include "llvm/Support/BlockFrequency.h" | ||||||||||||||||
78 | #include "llvm/Support/BranchProbability.h" | ||||||||||||||||
79 | #include "llvm/Support/Casting.h" | ||||||||||||||||
80 | #include "llvm/Support/CommandLine.h" | ||||||||||||||||
81 | #include "llvm/Support/Compiler.h" | ||||||||||||||||
82 | #include "llvm/Support/Debug.h" | ||||||||||||||||
83 | #include "llvm/Support/ErrorHandling.h" | ||||||||||||||||
84 | #include "llvm/Support/MachineValueType.h" | ||||||||||||||||
85 | #include "llvm/Support/MathExtras.h" | ||||||||||||||||
86 | #include "llvm/Support/raw_ostream.h" | ||||||||||||||||
87 | #include "llvm/Target/TargetMachine.h" | ||||||||||||||||
88 | #include "llvm/Target/TargetOptions.h" | ||||||||||||||||
89 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | ||||||||||||||||
90 | #include "llvm/Transforms/Utils/BypassSlowDivision.h" | ||||||||||||||||
91 | #include "llvm/Transforms/Utils/SimplifyLibCalls.h" | ||||||||||||||||
92 | #include <algorithm> | ||||||||||||||||
93 | #include <cassert> | ||||||||||||||||
94 | #include <cstdint> | ||||||||||||||||
95 | #include <iterator> | ||||||||||||||||
96 | #include <limits> | ||||||||||||||||
97 | #include <memory> | ||||||||||||||||
98 | #include <utility> | ||||||||||||||||
99 | #include <vector> | ||||||||||||||||
100 | |||||||||||||||||
101 | using namespace llvm; | ||||||||||||||||
102 | using namespace llvm::PatternMatch; | ||||||||||||||||
103 | |||||||||||||||||
104 | #define DEBUG_TYPE"codegenprepare" "codegenprepare" | ||||||||||||||||
105 | |||||||||||||||||
106 | STATISTIC(NumBlocksElim, "Number of blocks eliminated")static llvm::Statistic NumBlocksElim = {"codegenprepare", "NumBlocksElim" , "Number of blocks eliminated", {0}, {false}}; | ||||||||||||||||
107 | STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated")static llvm::Statistic NumPHIsElim = {"codegenprepare", "NumPHIsElim" , "Number of trivial PHIs eliminated", {0}, {false}}; | ||||||||||||||||
108 | STATISTIC(NumGEPsElim, "Number of GEPs converted to casts")static llvm::Statistic NumGEPsElim = {"codegenprepare", "NumGEPsElim" , "Number of GEPs converted to casts", {0}, {false}}; | ||||||||||||||||
109 | STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses" , "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps" , {0}, {false}} | ||||||||||||||||
110 | "sunken Cmps")static llvm::Statistic NumCmpUses = {"codegenprepare", "NumCmpUses" , "Number of uses of Cmp expressions replaced with uses of " "sunken Cmps" , {0}, {false}}; | ||||||||||||||||
111 | STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses" , "Number of uses of Cast expressions replaced with uses " "of sunken Casts" , {0}, {false}} | ||||||||||||||||
112 | "of sunken Casts")static llvm::Statistic NumCastUses = {"codegenprepare", "NumCastUses" , "Number of uses of Cast expressions replaced with uses " "of sunken Casts" , {0}, {false}}; | ||||||||||||||||
113 | STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts" , "Number of memory instructions whose address " "computations were sunk" , {0}, {false}} | ||||||||||||||||
114 | "computations were sunk")static llvm::Statistic NumMemoryInsts = {"codegenprepare", "NumMemoryInsts" , "Number of memory instructions whose address " "computations were sunk" , {0}, {false}}; | ||||||||||||||||
115 | STATISTIC(NumMemoryInstsPhiCreated,static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare" , "NumMemoryInstsPhiCreated", "Number of phis created when address " "computations were sunk to memory instructions", {0}, {false }} | ||||||||||||||||
116 | "Number of phis created when address "static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare" , "NumMemoryInstsPhiCreated", "Number of phis created when address " "computations were sunk to memory instructions", {0}, {false }} | ||||||||||||||||
117 | "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsPhiCreated = {"codegenprepare" , "NumMemoryInstsPhiCreated", "Number of phis created when address " "computations were sunk to memory instructions", {0}, {false }}; | ||||||||||||||||
118 | STATISTIC(NumMemoryInstsSelectCreated,static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare" , "NumMemoryInstsSelectCreated", "Number of select created when address " "computations were sunk to memory instructions", {0}, {false }} | ||||||||||||||||
119 | "Number of select created when address "static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare" , "NumMemoryInstsSelectCreated", "Number of select created when address " "computations were sunk to memory instructions", {0}, {false }} | ||||||||||||||||
120 | "computations were sunk to memory instructions")static llvm::Statistic NumMemoryInstsSelectCreated = {"codegenprepare" , "NumMemoryInstsSelectCreated", "Number of select created when address " "computations were sunk to memory instructions", {0}, {false }}; | ||||||||||||||||
121 | STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads")static llvm::Statistic NumExtsMoved = {"codegenprepare", "NumExtsMoved" , "Number of [s|z]ext instructions combined with loads", {0}, {false}}; | ||||||||||||||||
122 | STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized")static llvm::Statistic NumExtUses = {"codegenprepare", "NumExtUses" , "Number of uses of [s|z]ext instructions optimized", {0}, { false}}; | ||||||||||||||||
123 | STATISTIC(NumAndsAdded,static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded" , "Number of and mask instructions added to form ext loads", { 0}, {false}} | ||||||||||||||||
124 | "Number of and mask instructions added to form ext loads")static llvm::Statistic NumAndsAdded = {"codegenprepare", "NumAndsAdded" , "Number of and mask instructions added to form ext loads", { 0}, {false}}; | ||||||||||||||||
125 | STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized")static llvm::Statistic NumAndUses = {"codegenprepare", "NumAndUses" , "Number of uses of and mask instructions optimized", {0}, { false}}; | ||||||||||||||||
126 | STATISTIC(NumRetsDup, "Number of return instructions duplicated")static llvm::Statistic NumRetsDup = {"codegenprepare", "NumRetsDup" , "Number of return instructions duplicated", {0}, {false}}; | ||||||||||||||||
127 | STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved")static llvm::Statistic NumDbgValueMoved = {"codegenprepare", "NumDbgValueMoved" , "Number of debug value instructions moved", {0}, {false}}; | ||||||||||||||||
128 | STATISTIC(NumSelectsExpanded, "Number of selects turned into branches")static llvm::Statistic NumSelectsExpanded = {"codegenprepare" , "NumSelectsExpanded", "Number of selects turned into branches" , {0}, {false}}; | ||||||||||||||||
129 | STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed")static llvm::Statistic NumStoreExtractExposed = {"codegenprepare" , "NumStoreExtractExposed", "Number of store(extractelement) exposed" , {0}, {false}}; | ||||||||||||||||
130 | |||||||||||||||||
131 | static cl::opt<bool> DisableBranchOpts( | ||||||||||||||||
132 | "disable-cgp-branch-opts", cl::Hidden, cl::init(false), | ||||||||||||||||
133 | cl::desc("Disable branch optimizations in CodeGenPrepare")); | ||||||||||||||||
134 | |||||||||||||||||
135 | static cl::opt<bool> | ||||||||||||||||
136 | DisableGCOpts("disable-cgp-gc-opts", cl::Hidden, cl::init(false), | ||||||||||||||||
137 | cl::desc("Disable GC optimizations in CodeGenPrepare")); | ||||||||||||||||
138 | |||||||||||||||||
139 | static cl::opt<bool> DisableSelectToBranch( | ||||||||||||||||
140 | "disable-cgp-select2branch", cl::Hidden, cl::init(false), | ||||||||||||||||
141 | cl::desc("Disable select to branch conversion.")); | ||||||||||||||||
142 | |||||||||||||||||
143 | static cl::opt<bool> AddrSinkUsingGEPs( | ||||||||||||||||
144 | "addr-sink-using-gep", cl::Hidden, cl::init(true), | ||||||||||||||||
145 | cl::desc("Address sinking in CGP using GEPs.")); | ||||||||||||||||
146 | |||||||||||||||||
147 | static cl::opt<bool> EnableAndCmpSinking( | ||||||||||||||||
148 | "enable-andcmp-sinking", cl::Hidden, cl::init(true), | ||||||||||||||||
149 | cl::desc("Enable sinkinig and/cmp into branches.")); | ||||||||||||||||
150 | |||||||||||||||||
151 | static cl::opt<bool> DisableStoreExtract( | ||||||||||||||||
152 | "disable-cgp-store-extract", cl::Hidden, cl::init(false), | ||||||||||||||||
153 | cl::desc("Disable store(extract) optimizations in CodeGenPrepare")); | ||||||||||||||||
154 | |||||||||||||||||
155 | static cl::opt<bool> StressStoreExtract( | ||||||||||||||||
156 | "stress-cgp-store-extract", cl::Hidden, cl::init(false), | ||||||||||||||||
157 | cl::desc("Stress test store(extract) optimizations in CodeGenPrepare")); | ||||||||||||||||
158 | |||||||||||||||||
159 | static cl::opt<bool> DisableExtLdPromotion( | ||||||||||||||||
160 | "disable-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), | ||||||||||||||||
161 | cl::desc("Disable ext(promotable(ld)) -> promoted(ext(ld)) optimization in " | ||||||||||||||||
162 | "CodeGenPrepare")); | ||||||||||||||||
163 | |||||||||||||||||
164 | static cl::opt<bool> StressExtLdPromotion( | ||||||||||||||||
165 | "stress-cgp-ext-ld-promotion", cl::Hidden, cl::init(false), | ||||||||||||||||
166 | cl::desc("Stress test ext(promotable(ld)) -> promoted(ext(ld)) " | ||||||||||||||||
167 | "optimization in CodeGenPrepare")); | ||||||||||||||||
168 | |||||||||||||||||
169 | static cl::opt<bool> DisablePreheaderProtect( | ||||||||||||||||
170 | "disable-preheader-prot", cl::Hidden, cl::init(false), | ||||||||||||||||
171 | cl::desc("Disable protection against removing loop preheaders")); | ||||||||||||||||
172 | |||||||||||||||||
173 | static cl::opt<bool> ProfileGuidedSectionPrefix( | ||||||||||||||||
174 | "profile-guided-section-prefix", cl::Hidden, cl::init(true), cl::ZeroOrMore, | ||||||||||||||||
175 | cl::desc("Use profile info to add section prefix for hot/cold functions")); | ||||||||||||||||
176 | |||||||||||||||||
177 | static cl::opt<unsigned> FreqRatioToSkipMerge( | ||||||||||||||||
178 | "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2), | ||||||||||||||||
179 | cl::desc("Skip merging empty blocks if (frequency of empty block) / " | ||||||||||||||||
180 | "(frequency of destination block) is greater than this ratio")); | ||||||||||||||||
181 | |||||||||||||||||
182 | static cl::opt<bool> ForceSplitStore( | ||||||||||||||||
183 | "force-split-store", cl::Hidden, cl::init(false), | ||||||||||||||||
184 | cl::desc("Force store splitting no matter what the target query says.")); | ||||||||||||||||
185 | |||||||||||||||||
186 | static cl::opt<bool> | ||||||||||||||||
187 | EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden, | ||||||||||||||||
188 | cl::desc("Enable merging of redundant sexts when one is dominating" | ||||||||||||||||
189 | " the other."), cl::init(true)); | ||||||||||||||||
190 | |||||||||||||||||
191 | static cl::opt<bool> DisableComplexAddrModes( | ||||||||||||||||
192 | "disable-complex-addr-modes", cl::Hidden, cl::init(false), | ||||||||||||||||
193 | cl::desc("Disables combining addressing modes with different parts " | ||||||||||||||||
194 | "in optimizeMemoryInst.")); | ||||||||||||||||
195 | |||||||||||||||||
196 | static cl::opt<bool> | ||||||||||||||||
197 | AddrSinkNewPhis("addr-sink-new-phis", cl::Hidden, cl::init(false), | ||||||||||||||||
198 | cl::desc("Allow creation of Phis in Address sinking.")); | ||||||||||||||||
199 | |||||||||||||||||
200 | static cl::opt<bool> | ||||||||||||||||
201 | AddrSinkNewSelects("addr-sink-new-select", cl::Hidden, cl::init(true), | ||||||||||||||||
202 | cl::desc("Allow creation of selects in Address sinking.")); | ||||||||||||||||
203 | |||||||||||||||||
204 | static cl::opt<bool> AddrSinkCombineBaseReg( | ||||||||||||||||
205 | "addr-sink-combine-base-reg", cl::Hidden, cl::init(true), | ||||||||||||||||
206 | cl::desc("Allow combining of BaseReg field in Address sinking.")); | ||||||||||||||||
207 | |||||||||||||||||
208 | static cl::opt<bool> AddrSinkCombineBaseGV( | ||||||||||||||||
209 | "addr-sink-combine-base-gv", cl::Hidden, cl::init(true), | ||||||||||||||||
210 | cl::desc("Allow combining of BaseGV field in Address sinking.")); | ||||||||||||||||
211 | |||||||||||||||||
212 | static cl::opt<bool> AddrSinkCombineBaseOffs( | ||||||||||||||||
213 | "addr-sink-combine-base-offs", cl::Hidden, cl::init(true), | ||||||||||||||||
214 | cl::desc("Allow combining of BaseOffs field in Address sinking.")); | ||||||||||||||||
215 | |||||||||||||||||
216 | static cl::opt<bool> AddrSinkCombineScaledReg( | ||||||||||||||||
217 | "addr-sink-combine-scaled-reg", cl::Hidden, cl::init(true), | ||||||||||||||||
218 | cl::desc("Allow combining of ScaledReg field in Address sinking.")); | ||||||||||||||||
219 | |||||||||||||||||
220 | static cl::opt<bool> | ||||||||||||||||
221 | EnableGEPOffsetSplit("cgp-split-large-offset-gep", cl::Hidden, | ||||||||||||||||
222 | cl::init(true), | ||||||||||||||||
223 | cl::desc("Enable splitting large offset of GEP.")); | ||||||||||||||||
224 | |||||||||||||||||
225 | namespace { | ||||||||||||||||
226 | |||||||||||||||||
227 | enum ExtType { | ||||||||||||||||
228 | ZeroExtension, // Zero extension has been seen. | ||||||||||||||||
229 | SignExtension, // Sign extension has been seen. | ||||||||||||||||
230 | BothExtension // This extension type is used if we saw sext after | ||||||||||||||||
231 | // ZeroExtension had been set, or if we saw zext after | ||||||||||||||||
232 | // SignExtension had been set. It makes the type | ||||||||||||||||
233 | // information of a promoted instruction invalid. | ||||||||||||||||
234 | }; | ||||||||||||||||
235 | |||||||||||||||||
236 | using SetOfInstrs = SmallPtrSet<Instruction *, 16>; | ||||||||||||||||
237 | using TypeIsSExt = PointerIntPair<Type *, 2, ExtType>; | ||||||||||||||||
238 | using InstrToOrigTy = DenseMap<Instruction *, TypeIsSExt>; | ||||||||||||||||
239 | using SExts = SmallVector<Instruction *, 16>; | ||||||||||||||||
240 | using ValueToSExts = DenseMap<Value *, SExts>; | ||||||||||||||||
241 | |||||||||||||||||
242 | class TypePromotionTransaction; | ||||||||||||||||
243 | |||||||||||||||||
244 | class CodeGenPrepare : public FunctionPass { | ||||||||||||||||
245 | const TargetMachine *TM = nullptr; | ||||||||||||||||
246 | const TargetSubtargetInfo *SubtargetInfo; | ||||||||||||||||
247 | const TargetLowering *TLI = nullptr; | ||||||||||||||||
248 | const TargetRegisterInfo *TRI; | ||||||||||||||||
249 | const TargetTransformInfo *TTI = nullptr; | ||||||||||||||||
250 | const TargetLibraryInfo *TLInfo; | ||||||||||||||||
251 | const LoopInfo *LI; | ||||||||||||||||
252 | std::unique_ptr<BlockFrequencyInfo> BFI; | ||||||||||||||||
253 | std::unique_ptr<BranchProbabilityInfo> BPI; | ||||||||||||||||
254 | |||||||||||||||||
255 | /// As we scan instructions optimizing them, this is the next instruction | ||||||||||||||||
256 | /// to optimize. Transforms that can invalidate this should update it. | ||||||||||||||||
257 | BasicBlock::iterator CurInstIterator; | ||||||||||||||||
258 | |||||||||||||||||
259 | /// Keeps track of non-local addresses that have been sunk into a block. | ||||||||||||||||
260 | /// This allows us to avoid inserting duplicate code for blocks with | ||||||||||||||||
261 | /// multiple load/stores of the same address. The usage of WeakTrackingVH | ||||||||||||||||
262 | /// enables SunkAddrs to be treated as a cache whose entries can be | ||||||||||||||||
263 | /// invalidated if a sunken address computation has been erased. | ||||||||||||||||
264 | ValueMap<Value*, WeakTrackingVH> SunkAddrs; | ||||||||||||||||
265 | |||||||||||||||||
266 | /// Keeps track of all instructions inserted for the current function. | ||||||||||||||||
267 | SetOfInstrs InsertedInsts; | ||||||||||||||||
268 | |||||||||||||||||
269 | /// Keeps track of the type of the related instruction before their | ||||||||||||||||
270 | /// promotion for the current function. | ||||||||||||||||
271 | InstrToOrigTy PromotedInsts; | ||||||||||||||||
272 | |||||||||||||||||
273 | /// Keep track of instructions removed during promotion. | ||||||||||||||||
274 | SetOfInstrs RemovedInsts; | ||||||||||||||||
275 | |||||||||||||||||
276 | /// Keep track of sext chains based on their initial value. | ||||||||||||||||
277 | DenseMap<Value *, Instruction *> SeenChainsForSExt; | ||||||||||||||||
278 | |||||||||||||||||
279 | /// Keep track of GEPs accessing the same data structures such as structs or | ||||||||||||||||
280 | /// arrays that are candidates to be split later because of their large | ||||||||||||||||
281 | /// size. | ||||||||||||||||
282 | MapVector< | ||||||||||||||||
283 | AssertingVH<Value>, | ||||||||||||||||
284 | SmallVector<std::pair<AssertingVH<GetElementPtrInst>, int64_t>, 32>> | ||||||||||||||||
285 | LargeOffsetGEPMap; | ||||||||||||||||
286 | |||||||||||||||||
287 | /// Keep track of new GEP base after splitting the GEPs having large offset. | ||||||||||||||||
288 | SmallSet<AssertingVH<Value>, 2> NewGEPBases; | ||||||||||||||||
289 | |||||||||||||||||
290 | /// Map serial numbers to Large offset GEPs. | ||||||||||||||||
291 | DenseMap<AssertingVH<GetElementPtrInst>, int> LargeOffsetGEPID; | ||||||||||||||||
292 | |||||||||||||||||
293 | /// Keep track of SExt promoted. | ||||||||||||||||
294 | ValueToSExts ValToSExtendedUses; | ||||||||||||||||
295 | |||||||||||||||||
296 | /// True if optimizing for size. | ||||||||||||||||
297 | bool OptSize; | ||||||||||||||||
298 | |||||||||||||||||
299 | /// DataLayout for the Function being processed. | ||||||||||||||||
300 | const DataLayout *DL = nullptr; | ||||||||||||||||
301 | |||||||||||||||||
302 | /// Building the dominator tree can be expensive, so we only build it | ||||||||||||||||
303 | /// lazily and update it when required. | ||||||||||||||||
304 | std::unique_ptr<DominatorTree> DT; | ||||||||||||||||
305 | |||||||||||||||||
306 | public: | ||||||||||||||||
307 | static char ID; // Pass identification, replacement for typeid | ||||||||||||||||
308 | |||||||||||||||||
309 | CodeGenPrepare() : FunctionPass(ID) { | ||||||||||||||||
310 | initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); | ||||||||||||||||
311 | } | ||||||||||||||||
312 | |||||||||||||||||
313 | bool runOnFunction(Function &F) override; | ||||||||||||||||
314 | |||||||||||||||||
315 | StringRef getPassName() const override { return "CodeGen Prepare"; } | ||||||||||||||||
316 | |||||||||||||||||
317 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||||||||||||||
318 | // FIXME: When we can selectively preserve passes, preserve the domtree. | ||||||||||||||||
319 | AU.addRequired<ProfileSummaryInfoWrapperPass>(); | ||||||||||||||||
320 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | ||||||||||||||||
321 | AU.addRequired<TargetTransformInfoWrapperPass>(); | ||||||||||||||||
322 | AU.addRequired<LoopInfoWrapperPass>(); | ||||||||||||||||
323 | } | ||||||||||||||||
324 | |||||||||||||||||
325 | private: | ||||||||||||||||
326 | template <typename F> | ||||||||||||||||
327 | void resetIteratorIfInvalidatedWhileCalling(BasicBlock *BB, F f) { | ||||||||||||||||
328 | // Substituting can cause recursive simplifications, which can invalidate | ||||||||||||||||
329 | // our iterator. Use a WeakTrackingVH to hold onto it in case this | ||||||||||||||||
330 | // happens. | ||||||||||||||||
331 | Value *CurValue = &*CurInstIterator; | ||||||||||||||||
332 | WeakTrackingVH IterHandle(CurValue); | ||||||||||||||||
333 | |||||||||||||||||
334 | f(); | ||||||||||||||||
335 | |||||||||||||||||
336 | // If the iterator instruction was recursively deleted, start over at the | ||||||||||||||||
337 | // start of the block. | ||||||||||||||||
338 | if (IterHandle != CurValue) { | ||||||||||||||||
339 | CurInstIterator = BB->begin(); | ||||||||||||||||
340 | SunkAddrs.clear(); | ||||||||||||||||
341 | } | ||||||||||||||||
342 | } | ||||||||||||||||
343 | |||||||||||||||||
344 | // Get the DominatorTree, building if necessary. | ||||||||||||||||
345 | DominatorTree &getDT(Function &F) { | ||||||||||||||||
346 | if (!DT) | ||||||||||||||||
347 | DT = std::make_unique<DominatorTree>(F); | ||||||||||||||||
348 | return *DT; | ||||||||||||||||
349 | } | ||||||||||||||||
350 | |||||||||||||||||
351 | bool eliminateFallThrough(Function &F); | ||||||||||||||||
352 | bool eliminateMostlyEmptyBlocks(Function &F); | ||||||||||||||||
353 | BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB); | ||||||||||||||||
354 | bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; | ||||||||||||||||
355 | void eliminateMostlyEmptyBlock(BasicBlock *BB); | ||||||||||||||||
356 | bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB, | ||||||||||||||||
357 | bool isPreheader); | ||||||||||||||||
358 | bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT); | ||||||||||||||||
359 | bool optimizeInst(Instruction *I, bool &ModifiedDT); | ||||||||||||||||
360 | bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, | ||||||||||||||||
361 | Type *AccessTy, unsigned AddrSpace); | ||||||||||||||||
362 | bool optimizeInlineAsmInst(CallInst *CS); | ||||||||||||||||
363 | bool optimizeCallInst(CallInst *CI, bool &ModifiedDT); | ||||||||||||||||
364 | bool optimizeExt(Instruction *&I); | ||||||||||||||||
365 | bool optimizeExtUses(Instruction *I); | ||||||||||||||||
366 | bool optimizeLoadExt(LoadInst *Load); | ||||||||||||||||
367 | bool optimizeShiftInst(BinaryOperator *BO); | ||||||||||||||||
368 | bool optimizeSelectInst(SelectInst *SI); | ||||||||||||||||
369 | bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI); | ||||||||||||||||
370 | bool optimizeSwitchInst(SwitchInst *SI); | ||||||||||||||||
371 | bool optimizeExtractElementInst(Instruction *Inst); | ||||||||||||||||
372 | bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); | ||||||||||||||||
373 | bool placeDbgValues(Function &F); | ||||||||||||||||
374 | bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts, | ||||||||||||||||
375 | LoadInst *&LI, Instruction *&Inst, bool HasPromoted); | ||||||||||||||||
376 | bool tryToPromoteExts(TypePromotionTransaction &TPT, | ||||||||||||||||
377 | const SmallVectorImpl<Instruction *> &Exts, | ||||||||||||||||
378 | SmallVectorImpl<Instruction *> &ProfitablyMovedExts, | ||||||||||||||||
379 | unsigned CreatedInstsCost = 0); | ||||||||||||||||
380 | bool mergeSExts(Function &F); | ||||||||||||||||
381 | bool splitLargeGEPOffsets(); | ||||||||||||||||
382 | bool performAddressTypePromotion( | ||||||||||||||||
383 | Instruction *&Inst, | ||||||||||||||||
384 | bool AllowPromotionWithoutCommonHeader, | ||||||||||||||||
385 | bool HasPromoted, TypePromotionTransaction &TPT, | ||||||||||||||||
386 | SmallVectorImpl<Instruction *> &SpeculativelyMovedExts); | ||||||||||||||||
387 | bool splitBranchCondition(Function &F, bool &ModifiedDT); | ||||||||||||||||
388 | bool simplifyOffsetableRelocate(Instruction &I); | ||||||||||||||||
389 | |||||||||||||||||
390 | bool tryToSinkFreeOperands(Instruction *I); | ||||||||||||||||
391 | bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp, | ||||||||||||||||
392 | Intrinsic::ID IID); | ||||||||||||||||
393 | bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT); | ||||||||||||||||
394 | bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT); | ||||||||||||||||
395 | bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT); | ||||||||||||||||
396 | }; | ||||||||||||||||
397 | |||||||||||||||||
398 | } // end anonymous namespace | ||||||||||||||||
399 | |||||||||||||||||
400 | char CodeGenPrepare::ID = 0; | ||||||||||||||||
401 | |||||||||||||||||
402 | INITIALIZE_PASS_BEGIN(CodeGenPrepare, DEBUG_TYPE,static void *initializeCodeGenPreparePassOnce(PassRegistry & Registry) { | ||||||||||||||||
403 | "Optimize for code generation", false, false)static void *initializeCodeGenPreparePassOnce(PassRegistry & Registry) { | ||||||||||||||||
404 | INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry); | ||||||||||||||||
405 | INITIALIZE_PASS_END(CodeGenPrepare, DEBUG_TYPE,PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare" , &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor <CodeGenPrepare>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag ; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry ) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce , std::ref(Registry)); } | ||||||||||||||||
406 | "Optimize for code generation", false, false)PassInfo *PI = new PassInfo( "Optimize for code generation", "codegenprepare" , &CodeGenPrepare::ID, PassInfo::NormalCtor_t(callDefaultCtor <CodeGenPrepare>), false, false); Registry.registerPass (*PI, true); return PI; } static llvm::once_flag InitializeCodeGenPreparePassFlag ; void llvm::initializeCodeGenPreparePass(PassRegistry &Registry ) { llvm::call_once(InitializeCodeGenPreparePassFlag, initializeCodeGenPreparePassOnce , std::ref(Registry)); } | ||||||||||||||||
407 | |||||||||||||||||
408 | FunctionPass *llvm::createCodeGenPreparePass() { return new CodeGenPrepare(); } | ||||||||||||||||
409 | |||||||||||||||||
410 | bool CodeGenPrepare::runOnFunction(Function &F) { | ||||||||||||||||
411 | if (skipFunction(F)) | ||||||||||||||||
| |||||||||||||||||
412 | return false; | ||||||||||||||||
413 | |||||||||||||||||
414 | DL = &F.getParent()->getDataLayout(); | ||||||||||||||||
415 | |||||||||||||||||
416 | bool EverMadeChange = false; | ||||||||||||||||
417 | // Clear per function information. | ||||||||||||||||
418 | InsertedInsts.clear(); | ||||||||||||||||
419 | PromotedInsts.clear(); | ||||||||||||||||
420 | |||||||||||||||||
421 | if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) { | ||||||||||||||||
422 | TM = &TPC->getTM<TargetMachine>(); | ||||||||||||||||
423 | SubtargetInfo = TM->getSubtargetImpl(F); | ||||||||||||||||
424 | TLI = SubtargetInfo->getTargetLowering(); | ||||||||||||||||
425 | TRI = SubtargetInfo->getRegisterInfo(); | ||||||||||||||||
426 | } | ||||||||||||||||
427 | TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F); | ||||||||||||||||
428 | TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); | ||||||||||||||||
429 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | ||||||||||||||||
430 | BPI.reset(new BranchProbabilityInfo(F, *LI)); | ||||||||||||||||
431 | BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); | ||||||||||||||||
432 | OptSize = F.hasOptSize(); | ||||||||||||||||
433 | |||||||||||||||||
434 | ProfileSummaryInfo *PSI = | ||||||||||||||||
435 | &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); | ||||||||||||||||
436 | if (ProfileGuidedSectionPrefix) { | ||||||||||||||||
437 | if (PSI->isFunctionHotInCallGraph(&F, *BFI)) | ||||||||||||||||
438 | F.setSectionPrefix(".hot"); | ||||||||||||||||
439 | else if (PSI->isFunctionColdInCallGraph(&F, *BFI)) | ||||||||||||||||
440 | F.setSectionPrefix(".unlikely"); | ||||||||||||||||
441 | } | ||||||||||||||||
442 | |||||||||||||||||
443 | /// This optimization identifies DIV instructions that can be | ||||||||||||||||
444 | /// profitably bypassed and carried out with a shorter, faster divide. | ||||||||||||||||
445 | if (!OptSize && !PSI->hasHugeWorkingSetSize() && TLI && | ||||||||||||||||
446 | TLI->isSlowDivBypassed()) { | ||||||||||||||||
447 | const DenseMap<unsigned int, unsigned int> &BypassWidths = | ||||||||||||||||
448 | TLI->getBypassSlowDivWidths(); | ||||||||||||||||
449 | BasicBlock* BB = &*F.begin(); | ||||||||||||||||
450 | while (BB != nullptr) { | ||||||||||||||||
451 | // bypassSlowDivision may create new BBs, but we don't want to reapply the | ||||||||||||||||
452 | // optimization to those blocks. | ||||||||||||||||
453 | BasicBlock* Next = BB->getNextNode(); | ||||||||||||||||
454 | EverMadeChange |= bypassSlowDivision(BB, BypassWidths); | ||||||||||||||||
455 | BB = Next; | ||||||||||||||||
456 | } | ||||||||||||||||
457 | } | ||||||||||||||||
458 | |||||||||||||||||
459 | // Eliminate blocks that contain only PHI nodes and an | ||||||||||||||||
460 | // unconditional branch. | ||||||||||||||||
461 | EverMadeChange |= eliminateMostlyEmptyBlocks(F); | ||||||||||||||||
462 | |||||||||||||||||
463 | bool ModifiedDT = false; | ||||||||||||||||
464 | if (!DisableBranchOpts) | ||||||||||||||||
465 | EverMadeChange |= splitBranchCondition(F, ModifiedDT); | ||||||||||||||||
466 | |||||||||||||||||
467 | // Split some critical edges where one of the sources is an indirect branch, | ||||||||||||||||
468 | // to help generate sane code for PHIs involving such edges. | ||||||||||||||||
469 | EverMadeChange |= SplitIndirectBrCriticalEdges(F); | ||||||||||||||||
470 | |||||||||||||||||
471 | bool MadeChange = true; | ||||||||||||||||
472 | while (MadeChange) { | ||||||||||||||||
473 | MadeChange = false; | ||||||||||||||||
474 | DT.reset(); | ||||||||||||||||
475 | for (Function::iterator I = F.begin(); I != F.end(); ) { | ||||||||||||||||
476 | BasicBlock *BB = &*I++; | ||||||||||||||||
477 | bool ModifiedDTOnIteration = false; | ||||||||||||||||
478 | MadeChange |= optimizeBlock(*BB, ModifiedDTOnIteration); | ||||||||||||||||
479 | |||||||||||||||||
480 | // Restart BB iteration if the dominator tree of the Function was changed | ||||||||||||||||
481 | if (ModifiedDTOnIteration) | ||||||||||||||||
482 | break; | ||||||||||||||||
483 | } | ||||||||||||||||
484 | if (EnableTypePromotionMerge && !ValToSExtendedUses.empty()) | ||||||||||||||||
485 | MadeChange |= mergeSExts(F); | ||||||||||||||||
486 | if (!LargeOffsetGEPMap.empty()) | ||||||||||||||||
487 | MadeChange |= splitLargeGEPOffsets(); | ||||||||||||||||
488 | |||||||||||||||||
489 | // Really free removed instructions during promotion. | ||||||||||||||||
490 | for (Instruction *I : RemovedInsts) | ||||||||||||||||
491 | I->deleteValue(); | ||||||||||||||||
492 | |||||||||||||||||
493 | EverMadeChange |= MadeChange; | ||||||||||||||||
494 | SeenChainsForSExt.clear(); | ||||||||||||||||
495 | ValToSExtendedUses.clear(); | ||||||||||||||||
496 | RemovedInsts.clear(); | ||||||||||||||||
497 | LargeOffsetGEPMap.clear(); | ||||||||||||||||
498 | LargeOffsetGEPID.clear(); | ||||||||||||||||
499 | } | ||||||||||||||||
500 | |||||||||||||||||
501 | SunkAddrs.clear(); | ||||||||||||||||
502 | |||||||||||||||||
503 | if (!DisableBranchOpts) { | ||||||||||||||||
504 | MadeChange = false; | ||||||||||||||||
505 | // Use a set vector to get deterministic iteration order. The order the | ||||||||||||||||
506 | // blocks are removed may affect whether or not PHI nodes in successors | ||||||||||||||||
507 | // are removed. | ||||||||||||||||
508 | SmallSetVector<BasicBlock*, 8> WorkList; | ||||||||||||||||
509 | for (BasicBlock &BB : F) { | ||||||||||||||||
510 | SmallVector<BasicBlock *, 2> Successors(succ_begin(&BB), succ_end(&BB)); | ||||||||||||||||
511 | MadeChange |= ConstantFoldTerminator(&BB, true); | ||||||||||||||||
512 | if (!MadeChange) continue; | ||||||||||||||||
513 | |||||||||||||||||
514 | for (SmallVectorImpl<BasicBlock*>::iterator | ||||||||||||||||
515 | II = Successors.begin(), IE = Successors.end(); II != IE; ++II) | ||||||||||||||||
516 | if (pred_begin(*II) == pred_end(*II)) | ||||||||||||||||
517 | WorkList.insert(*II); | ||||||||||||||||
518 | } | ||||||||||||||||
519 | |||||||||||||||||
520 | // Delete the dead blocks and any of their dead successors. | ||||||||||||||||
521 | MadeChange |= !WorkList.empty(); | ||||||||||||||||
522 | while (!WorkList.empty()) { | ||||||||||||||||
523 | BasicBlock *BB = WorkList.pop_back_val(); | ||||||||||||||||
524 | SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); | ||||||||||||||||
525 | |||||||||||||||||
526 | DeleteDeadBlock(BB); | ||||||||||||||||
527 | |||||||||||||||||
528 | for (SmallVectorImpl<BasicBlock*>::iterator | ||||||||||||||||
529 | II = Successors.begin(), IE = Successors.end(); II != IE; ++II) | ||||||||||||||||
530 | if (pred_begin(*II) == pred_end(*II)) | ||||||||||||||||
531 | WorkList.insert(*II); | ||||||||||||||||
532 | } | ||||||||||||||||
533 | |||||||||||||||||
534 | // Merge pairs of basic blocks with unconditional branches, connected by | ||||||||||||||||
535 | // a single edge. | ||||||||||||||||
536 | if (EverMadeChange || MadeChange) | ||||||||||||||||
537 | MadeChange |= eliminateFallThrough(F); | ||||||||||||||||
538 | |||||||||||||||||
539 | EverMadeChange |= MadeChange; | ||||||||||||||||
540 | } | ||||||||||||||||
541 | |||||||||||||||||
542 | if (!DisableGCOpts) { | ||||||||||||||||
543 | SmallVector<Instruction *, 2> Statepoints; | ||||||||||||||||
544 | for (BasicBlock &BB : F) | ||||||||||||||||
545 | for (Instruction &I : BB) | ||||||||||||||||
546 | if (isStatepoint(I)) | ||||||||||||||||
547 | Statepoints.push_back(&I); | ||||||||||||||||
548 | for (auto &I : Statepoints) | ||||||||||||||||
549 | EverMadeChange |= simplifyOffsetableRelocate(*I); | ||||||||||||||||
550 | } | ||||||||||||||||
551 | |||||||||||||||||
552 | // Do this last to clean up use-before-def scenarios introduced by other | ||||||||||||||||
553 | // preparatory transforms. | ||||||||||||||||
554 | EverMadeChange |= placeDbgValues(F); | ||||||||||||||||
555 | |||||||||||||||||
556 | return EverMadeChange; | ||||||||||||||||
557 | } | ||||||||||||||||
558 | |||||||||||||||||
559 | /// Merge basic blocks which are connected by a single edge, where one of the | ||||||||||||||||
560 | /// basic blocks has a single successor pointing to the other basic block, | ||||||||||||||||
561 | /// which has a single predecessor. | ||||||||||||||||
562 | bool CodeGenPrepare::eliminateFallThrough(Function &F) { | ||||||||||||||||
563 | bool Changed = false; | ||||||||||||||||
564 | // Scan all of the blocks in the function, except for the entry block. | ||||||||||||||||
565 | // Use a temporary array to avoid iterator being invalidated when | ||||||||||||||||
566 | // deleting blocks. | ||||||||||||||||
567 | SmallVector<WeakTrackingVH, 16> Blocks; | ||||||||||||||||
568 | for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) | ||||||||||||||||
569 | Blocks.push_back(&Block); | ||||||||||||||||
570 | |||||||||||||||||
571 | for (auto &Block : Blocks) { | ||||||||||||||||
572 | auto *BB = cast_or_null<BasicBlock>(Block); | ||||||||||||||||
573 | if (!BB) | ||||||||||||||||
574 | continue; | ||||||||||||||||
575 | // If the destination block has a single pred, then this is a trivial | ||||||||||||||||
576 | // edge, just collapse it. | ||||||||||||||||
577 | BasicBlock *SinglePred = BB->getSinglePredecessor(); | ||||||||||||||||
578 | |||||||||||||||||
579 | // Don't merge if BB's address is taken. | ||||||||||||||||
580 | if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; | ||||||||||||||||
581 | |||||||||||||||||
582 | BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); | ||||||||||||||||
583 | if (Term && !Term->isConditional()) { | ||||||||||||||||
584 | Changed = true; | ||||||||||||||||
585 | LLVM_DEBUG(dbgs() << "To merge:\n" << *BB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "To merge:\n" << * BB << "\n\n\n"; } } while (false); | ||||||||||||||||
586 | |||||||||||||||||
587 | // Merge BB into SinglePred and delete it. | ||||||||||||||||
588 | MergeBlockIntoPredecessor(BB); | ||||||||||||||||
589 | } | ||||||||||||||||
590 | } | ||||||||||||||||
591 | return Changed; | ||||||||||||||||
592 | } | ||||||||||||||||
593 | |||||||||||||||||
594 | /// Find a destination block from BB if BB is mergeable empty block. | ||||||||||||||||
595 | BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) { | ||||||||||||||||
596 | // If this block doesn't end with an uncond branch, ignore it. | ||||||||||||||||
597 | BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); | ||||||||||||||||
598 | if (!BI || !BI->isUnconditional()) | ||||||||||||||||
599 | return nullptr; | ||||||||||||||||
600 | |||||||||||||||||
601 | // If the instruction before the branch (skipping debug info) isn't a phi | ||||||||||||||||
602 | // node, then other stuff is happening here. | ||||||||||||||||
603 | BasicBlock::iterator BBI = BI->getIterator(); | ||||||||||||||||
604 | if (BBI != BB->begin()) { | ||||||||||||||||
605 | --BBI; | ||||||||||||||||
606 | while (isa<DbgInfoIntrinsic>(BBI)) { | ||||||||||||||||
607 | if (BBI == BB->begin()) | ||||||||||||||||
608 | break; | ||||||||||||||||
609 | --BBI; | ||||||||||||||||
610 | } | ||||||||||||||||
611 | if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) | ||||||||||||||||
612 | return nullptr; | ||||||||||||||||
613 | } | ||||||||||||||||
614 | |||||||||||||||||
615 | // Do not break infinite loops. | ||||||||||||||||
616 | BasicBlock *DestBB = BI->getSuccessor(0); | ||||||||||||||||
617 | if (DestBB == BB) | ||||||||||||||||
618 | return nullptr; | ||||||||||||||||
619 | |||||||||||||||||
620 | if (!canMergeBlocks(BB, DestBB)) | ||||||||||||||||
621 | DestBB = nullptr; | ||||||||||||||||
622 | |||||||||||||||||
623 | return DestBB; | ||||||||||||||||
624 | } | ||||||||||||||||
625 | |||||||||||||||||
626 | /// Eliminate blocks that contain only PHI nodes, debug info directives, and an | ||||||||||||||||
627 | /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split | ||||||||||||||||
628 | /// edges in ways that are non-optimal for isel. Start by eliminating these | ||||||||||||||||
629 | /// blocks so we can split them the way we want them. | ||||||||||||||||
630 | bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) { | ||||||||||||||||
631 | SmallPtrSet<BasicBlock *, 16> Preheaders; | ||||||||||||||||
632 | SmallVector<Loop *, 16> LoopList(LI->begin(), LI->end()); | ||||||||||||||||
633 | while (!LoopList.empty()) { | ||||||||||||||||
634 | Loop *L = LoopList.pop_back_val(); | ||||||||||||||||
635 | LoopList.insert(LoopList.end(), L->begin(), L->end()); | ||||||||||||||||
636 | if (BasicBlock *Preheader = L->getLoopPreheader()) | ||||||||||||||||
637 | Preheaders.insert(Preheader); | ||||||||||||||||
638 | } | ||||||||||||||||
639 | |||||||||||||||||
640 | bool MadeChange = false; | ||||||||||||||||
641 | // Copy blocks into a temporary array to avoid iterator invalidation issues | ||||||||||||||||
642 | // as we remove them. | ||||||||||||||||
643 | // Note that this intentionally skips the entry block. | ||||||||||||||||
644 | SmallVector<WeakTrackingVH, 16> Blocks; | ||||||||||||||||
645 | for (auto &Block : llvm::make_range(std::next(F.begin()), F.end())) | ||||||||||||||||
646 | Blocks.push_back(&Block); | ||||||||||||||||
647 | |||||||||||||||||
648 | for (auto &Block : Blocks) { | ||||||||||||||||
649 | BasicBlock *BB = cast_or_null<BasicBlock>(Block); | ||||||||||||||||
650 | if (!BB) | ||||||||||||||||
651 | continue; | ||||||||||||||||
652 | BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB); | ||||||||||||||||
653 | if (!DestBB || | ||||||||||||||||
654 | !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB))) | ||||||||||||||||
655 | continue; | ||||||||||||||||
656 | |||||||||||||||||
657 | eliminateMostlyEmptyBlock(BB); | ||||||||||||||||
658 | MadeChange = true; | ||||||||||||||||
659 | } | ||||||||||||||||
660 | return MadeChange; | ||||||||||||||||
661 | } | ||||||||||||||||
662 | |||||||||||||||||
663 | bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB, | ||||||||||||||||
664 | BasicBlock *DestBB, | ||||||||||||||||
665 | bool isPreheader) { | ||||||||||||||||
666 | // Do not delete loop preheaders if doing so would create a critical edge. | ||||||||||||||||
667 | // Loop preheaders can be good locations to spill registers. If the | ||||||||||||||||
668 | // preheader is deleted and we create a critical edge, registers may be | ||||||||||||||||
669 | // spilled in the loop body instead. | ||||||||||||||||
670 | if (!DisablePreheaderProtect && isPreheader && | ||||||||||||||||
671 | !(BB->getSinglePredecessor() && | ||||||||||||||||
672 | BB->getSinglePredecessor()->getSingleSuccessor())) | ||||||||||||||||
673 | return false; | ||||||||||||||||
674 | |||||||||||||||||
675 | // Skip merging if the block's successor is also a successor to any callbr | ||||||||||||||||
676 | // that leads to this block. | ||||||||||||||||
677 | // FIXME: Is this really needed? Is this a correctness issue? | ||||||||||||||||
678 | for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { | ||||||||||||||||
679 | if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator())) | ||||||||||||||||
680 | for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i) | ||||||||||||||||
681 | if (DestBB == CBI->getSuccessor(i)) | ||||||||||||||||
682 | return false; | ||||||||||||||||
683 | } | ||||||||||||||||
684 | |||||||||||||||||
685 | // Try to skip merging if the unique predecessor of BB is terminated by a | ||||||||||||||||
686 | // switch or indirect branch instruction, and BB is used as an incoming block | ||||||||||||||||
687 | // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to | ||||||||||||||||
688 | // add COPY instructions in the predecessor of BB instead of BB (if it is not | ||||||||||||||||
689 | // merged). Note that the critical edge created by merging such blocks wont be | ||||||||||||||||
690 | // split in MachineSink because the jump table is not analyzable. By keeping | ||||||||||||||||
691 | // such empty block (BB), ISel will place COPY instructions in BB, not in the | ||||||||||||||||
692 | // predecessor of BB. | ||||||||||||||||
693 | BasicBlock *Pred = BB->getUniquePredecessor(); | ||||||||||||||||
694 | if (!Pred || | ||||||||||||||||
695 | !(isa<SwitchInst>(Pred->getTerminator()) || | ||||||||||||||||
696 | isa<IndirectBrInst>(Pred->getTerminator()))) | ||||||||||||||||
697 | return true; | ||||||||||||||||
698 | |||||||||||||||||
699 | if (BB->getTerminator() != BB->getFirstNonPHIOrDbg()) | ||||||||||||||||
700 | return true; | ||||||||||||||||
701 | |||||||||||||||||
702 | // We use a simple cost heuristic which determine skipping merging is | ||||||||||||||||
703 | // profitable if the cost of skipping merging is less than the cost of | ||||||||||||||||
704 | // merging : Cost(skipping merging) < Cost(merging BB), where the | ||||||||||||||||
705 | // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and | ||||||||||||||||
706 | // the Cost(merging BB) is Freq(Pred) * Cost(Copy). | ||||||||||||||||
707 | // Assuming Cost(Copy) == Cost(Branch), we could simplify it to : | ||||||||||||||||
708 | // Freq(Pred) / Freq(BB) > 2. | ||||||||||||||||
709 | // Note that if there are multiple empty blocks sharing the same incoming | ||||||||||||||||
710 | // value for the PHIs in the DestBB, we consider them together. In such | ||||||||||||||||
711 | // case, Cost(merging BB) will be the sum of their frequencies. | ||||||||||||||||
712 | |||||||||||||||||
713 | if (!isa<PHINode>(DestBB->begin())) | ||||||||||||||||
714 | return true; | ||||||||||||||||
715 | |||||||||||||||||
716 | SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs; | ||||||||||||||||
717 | |||||||||||||||||
718 | // Find all other incoming blocks from which incoming values of all PHIs in | ||||||||||||||||
719 | // DestBB are the same as the ones from BB. | ||||||||||||||||
720 | for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E; | ||||||||||||||||
721 | ++PI) { | ||||||||||||||||
722 | BasicBlock *DestBBPred = *PI; | ||||||||||||||||
723 | if (DestBBPred == BB) | ||||||||||||||||
724 | continue; | ||||||||||||||||
725 | |||||||||||||||||
726 | if (llvm::all_of(DestBB->phis(), [&](const PHINode &DestPN) { | ||||||||||||||||
727 | return DestPN.getIncomingValueForBlock(BB) == | ||||||||||||||||
728 | DestPN.getIncomingValueForBlock(DestBBPred); | ||||||||||||||||
729 | })) | ||||||||||||||||
730 | SameIncomingValueBBs.insert(DestBBPred); | ||||||||||||||||
731 | } | ||||||||||||||||
732 | |||||||||||||||||
733 | // See if all BB's incoming values are same as the value from Pred. In this | ||||||||||||||||
734 | // case, no reason to skip merging because COPYs are expected to be place in | ||||||||||||||||
735 | // Pred already. | ||||||||||||||||
736 | if (SameIncomingValueBBs.count(Pred)) | ||||||||||||||||
737 | return true; | ||||||||||||||||
738 | |||||||||||||||||
739 | BlockFrequency PredFreq = BFI->getBlockFreq(Pred); | ||||||||||||||||
740 | BlockFrequency BBFreq = BFI->getBlockFreq(BB); | ||||||||||||||||
741 | |||||||||||||||||
742 | for (auto SameValueBB : SameIncomingValueBBs) | ||||||||||||||||
743 | if (SameValueBB->getUniquePredecessor() == Pred && | ||||||||||||||||
744 | DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB)) | ||||||||||||||||
745 | BBFreq += BFI->getBlockFreq(SameValueBB); | ||||||||||||||||
746 | |||||||||||||||||
747 | return PredFreq.getFrequency() <= | ||||||||||||||||
748 | BBFreq.getFrequency() * FreqRatioToSkipMerge; | ||||||||||||||||
749 | } | ||||||||||||||||
750 | |||||||||||||||||
751 | /// Return true if we can merge BB into DestBB if there is a single | ||||||||||||||||
752 | /// unconditional branch between them, and BB contains no other non-phi | ||||||||||||||||
753 | /// instructions. | ||||||||||||||||
754 | bool CodeGenPrepare::canMergeBlocks(const BasicBlock *BB, | ||||||||||||||||
755 | const BasicBlock *DestBB) const { | ||||||||||||||||
756 | // We only want to eliminate blocks whose phi nodes are used by phi nodes in | ||||||||||||||||
757 | // the successor. If there are more complex condition (e.g. preheaders), | ||||||||||||||||
758 | // don't mess around with them. | ||||||||||||||||
759 | for (const PHINode &PN : BB->phis()) { | ||||||||||||||||
760 | for (const User *U : PN.users()) { | ||||||||||||||||
761 | const Instruction *UI = cast<Instruction>(U); | ||||||||||||||||
762 | if (UI->getParent() != DestBB || !isa<PHINode>(UI)) | ||||||||||||||||
763 | return false; | ||||||||||||||||
764 | // If User is inside DestBB block and it is a PHINode then check | ||||||||||||||||
765 | // incoming value. If incoming value is not from BB then this is | ||||||||||||||||
766 | // a complex condition (e.g. preheaders) we want to avoid here. | ||||||||||||||||
767 | if (UI->getParent() == DestBB) { | ||||||||||||||||
768 | if (const PHINode *UPN = dyn_cast<PHINode>(UI)) | ||||||||||||||||
769 | for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { | ||||||||||||||||
770 | Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); | ||||||||||||||||
771 | if (Insn && Insn->getParent() == BB && | ||||||||||||||||
772 | Insn->getParent() != UPN->getIncomingBlock(I)) | ||||||||||||||||
773 | return false; | ||||||||||||||||
774 | } | ||||||||||||||||
775 | } | ||||||||||||||||
776 | } | ||||||||||||||||
777 | } | ||||||||||||||||
778 | |||||||||||||||||
779 | // If BB and DestBB contain any common predecessors, then the phi nodes in BB | ||||||||||||||||
780 | // and DestBB may have conflicting incoming values for the block. If so, we | ||||||||||||||||
781 | // can't merge the block. | ||||||||||||||||
782 | const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); | ||||||||||||||||
783 | if (!DestBBPN) return true; // no conflict. | ||||||||||||||||
784 | |||||||||||||||||
785 | // Collect the preds of BB. | ||||||||||||||||
786 | SmallPtrSet<const BasicBlock*, 16> BBPreds; | ||||||||||||||||
787 | if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { | ||||||||||||||||
788 | // It is faster to get preds from a PHI than with pred_iterator. | ||||||||||||||||
789 | for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) | ||||||||||||||||
790 | BBPreds.insert(BBPN->getIncomingBlock(i)); | ||||||||||||||||
791 | } else { | ||||||||||||||||
792 | BBPreds.insert(pred_begin(BB), pred_end(BB)); | ||||||||||||||||
793 | } | ||||||||||||||||
794 | |||||||||||||||||
795 | // Walk the preds of DestBB. | ||||||||||||||||
796 | for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { | ||||||||||||||||
797 | BasicBlock *Pred = DestBBPN->getIncomingBlock(i); | ||||||||||||||||
798 | if (BBPreds.count(Pred)) { // Common predecessor? | ||||||||||||||||
799 | for (const PHINode &PN : DestBB->phis()) { | ||||||||||||||||
800 | const Value *V1 = PN.getIncomingValueForBlock(Pred); | ||||||||||||||||
801 | const Value *V2 = PN.getIncomingValueForBlock(BB); | ||||||||||||||||
802 | |||||||||||||||||
803 | // If V2 is a phi node in BB, look up what the mapped value will be. | ||||||||||||||||
804 | if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) | ||||||||||||||||
805 | if (V2PN->getParent() == BB) | ||||||||||||||||
806 | V2 = V2PN->getIncomingValueForBlock(Pred); | ||||||||||||||||
807 | |||||||||||||||||
808 | // If there is a conflict, bail out. | ||||||||||||||||
809 | if (V1 != V2) return false; | ||||||||||||||||
810 | } | ||||||||||||||||
811 | } | ||||||||||||||||
812 | } | ||||||||||||||||
813 | |||||||||||||||||
814 | return true; | ||||||||||||||||
815 | } | ||||||||||||||||
816 | |||||||||||||||||
817 | /// Eliminate a basic block that has only phi's and an unconditional branch in | ||||||||||||||||
818 | /// it. | ||||||||||||||||
819 | void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) { | ||||||||||||||||
820 | BranchInst *BI = cast<BranchInst>(BB->getTerminator()); | ||||||||||||||||
821 | BasicBlock *DestBB = BI->getSuccessor(0); | ||||||||||||||||
822 | |||||||||||||||||
823 | LLVM_DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; } } while (false) | ||||||||||||||||
824 | << *BB << *DestBB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB; } } while (false); | ||||||||||||||||
825 | |||||||||||||||||
826 | // If the destination block has a single pred, then this is a trivial edge, | ||||||||||||||||
827 | // just collapse it. | ||||||||||||||||
828 | if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { | ||||||||||||||||
829 | if (SinglePred != DestBB) { | ||||||||||||||||
830 | assert(SinglePred == BB &&((SinglePred == BB && "Single predecessor not the same as predecessor" ) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 831, __PRETTY_FUNCTION__)) | ||||||||||||||||
831 | "Single predecessor not the same as predecessor")((SinglePred == BB && "Single predecessor not the same as predecessor" ) ? static_cast<void> (0) : __assert_fail ("SinglePred == BB && \"Single predecessor not the same as predecessor\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 831, __PRETTY_FUNCTION__)); | ||||||||||||||||
832 | // Merge DestBB into SinglePred/BB and delete it. | ||||||||||||||||
833 | MergeBlockIntoPredecessor(DestBB); | ||||||||||||||||
834 | // Note: BB(=SinglePred) will not be deleted on this path. | ||||||||||||||||
835 | // DestBB(=its single successor) is the one that was deleted. | ||||||||||||||||
836 | LLVM_DEBUG(dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "AFTER:\n" << *SinglePred << "\n\n\n"; } } while (false); | ||||||||||||||||
837 | return; | ||||||||||||||||
838 | } | ||||||||||||||||
839 | } | ||||||||||||||||
840 | |||||||||||||||||
841 | // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB | ||||||||||||||||
842 | // to handle the new incoming edges it is about to have. | ||||||||||||||||
843 | for (PHINode &PN : DestBB->phis()) { | ||||||||||||||||
844 | // Remove the incoming value for BB, and remember it. | ||||||||||||||||
845 | Value *InVal = PN.removeIncomingValue(BB, false); | ||||||||||||||||
846 | |||||||||||||||||
847 | // Two options: either the InVal is a phi node defined in BB or it is some | ||||||||||||||||
848 | // value that dominates BB. | ||||||||||||||||
849 | PHINode *InValPhi = dyn_cast<PHINode>(InVal); | ||||||||||||||||
850 | if (InValPhi && InValPhi->getParent() == BB) { | ||||||||||||||||
851 | // Add all of the input values of the input PHI as inputs of this phi. | ||||||||||||||||
852 | for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) | ||||||||||||||||
853 | PN.addIncoming(InValPhi->getIncomingValue(i), | ||||||||||||||||
854 | InValPhi->getIncomingBlock(i)); | ||||||||||||||||
855 | } else { | ||||||||||||||||
856 | // Otherwise, add one instance of the dominating value for each edge that | ||||||||||||||||
857 | // we will be adding. | ||||||||||||||||
858 | if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { | ||||||||||||||||
859 | for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) | ||||||||||||||||
860 | PN.addIncoming(InVal, BBPN->getIncomingBlock(i)); | ||||||||||||||||
861 | } else { | ||||||||||||||||
862 | for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) | ||||||||||||||||
863 | PN.addIncoming(InVal, *PI); | ||||||||||||||||
864 | } | ||||||||||||||||
865 | } | ||||||||||||||||
866 | } | ||||||||||||||||
867 | |||||||||||||||||
868 | // The PHIs are now updated, change everything that refers to BB to use | ||||||||||||||||
869 | // DestBB and remove BB. | ||||||||||||||||
870 | BB->replaceAllUsesWith(DestBB); | ||||||||||||||||
871 | BB->eraseFromParent(); | ||||||||||||||||
872 | ++NumBlocksElim; | ||||||||||||||||
873 | |||||||||||||||||
874 | LLVM_DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"; } } while (false); | ||||||||||||||||
875 | } | ||||||||||||||||
876 | |||||||||||||||||
877 | // Computes a map of base pointer relocation instructions to corresponding | ||||||||||||||||
878 | // derived pointer relocation instructions given a vector of all relocate calls | ||||||||||||||||
879 | static void computeBaseDerivedRelocateMap( | ||||||||||||||||
880 | const SmallVectorImpl<GCRelocateInst *> &AllRelocateCalls, | ||||||||||||||||
881 | DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> | ||||||||||||||||
882 | &RelocateInstMap) { | ||||||||||||||||
883 | // Collect information in two maps: one primarily for locating the base object | ||||||||||||||||
884 | // while filling the second map; the second map is the final structure holding | ||||||||||||||||
885 | // a mapping between Base and corresponding Derived relocate calls | ||||||||||||||||
886 | DenseMap<std::pair<unsigned, unsigned>, GCRelocateInst *> RelocateIdxMap; | ||||||||||||||||
887 | for (auto *ThisRelocate : AllRelocateCalls) { | ||||||||||||||||
888 | auto K = std::make_pair(ThisRelocate->getBasePtrIndex(), | ||||||||||||||||
889 | ThisRelocate->getDerivedPtrIndex()); | ||||||||||||||||
890 | RelocateIdxMap.insert(std::make_pair(K, ThisRelocate)); | ||||||||||||||||
891 | } | ||||||||||||||||
892 | for (auto &Item : RelocateIdxMap) { | ||||||||||||||||
893 | std::pair<unsigned, unsigned> Key = Item.first; | ||||||||||||||||
894 | if (Key.first == Key.second) | ||||||||||||||||
895 | // Base relocation: nothing to insert | ||||||||||||||||
896 | continue; | ||||||||||||||||
897 | |||||||||||||||||
898 | GCRelocateInst *I = Item.second; | ||||||||||||||||
899 | auto BaseKey = std::make_pair(Key.first, Key.first); | ||||||||||||||||
900 | |||||||||||||||||
901 | // We're iterating over RelocateIdxMap so we cannot modify it. | ||||||||||||||||
902 | auto MaybeBase = RelocateIdxMap.find(BaseKey); | ||||||||||||||||
903 | if (MaybeBase == RelocateIdxMap.end()) | ||||||||||||||||
904 | // TODO: We might want to insert a new base object relocate and gep off | ||||||||||||||||
905 | // that, if there are enough derived object relocates. | ||||||||||||||||
906 | continue; | ||||||||||||||||
907 | |||||||||||||||||
908 | RelocateInstMap[MaybeBase->second].push_back(I); | ||||||||||||||||
909 | } | ||||||||||||||||
910 | } | ||||||||||||||||
911 | |||||||||||||||||
912 | // Accepts a GEP and extracts the operands into a vector provided they're all | ||||||||||||||||
913 | // small integer constants | ||||||||||||||||
914 | static bool getGEPSmallConstantIntOffsetV(GetElementPtrInst *GEP, | ||||||||||||||||
915 | SmallVectorImpl<Value *> &OffsetV) { | ||||||||||||||||
916 | for (unsigned i = 1; i < GEP->getNumOperands(); i++) { | ||||||||||||||||
917 | // Only accept small constant integer operands | ||||||||||||||||
918 | auto Op = dyn_cast<ConstantInt>(GEP->getOperand(i)); | ||||||||||||||||
919 | if (!Op || Op->getZExtValue() > 20) | ||||||||||||||||
920 | return false; | ||||||||||||||||
921 | } | ||||||||||||||||
922 | |||||||||||||||||
923 | for (unsigned i = 1; i < GEP->getNumOperands(); i++) | ||||||||||||||||
924 | OffsetV.push_back(GEP->getOperand(i)); | ||||||||||||||||
925 | return true; | ||||||||||||||||
926 | } | ||||||||||||||||
927 | |||||||||||||||||
928 | // Takes a RelocatedBase (base pointer relocation instruction) and Targets to | ||||||||||||||||
929 | // replace, computes a replacement, and affects it. | ||||||||||||||||
930 | static bool | ||||||||||||||||
931 | simplifyRelocatesOffABase(GCRelocateInst *RelocatedBase, | ||||||||||||||||
932 | const SmallVectorImpl<GCRelocateInst *> &Targets) { | ||||||||||||||||
933 | bool MadeChange = false; | ||||||||||||||||
934 | // We must ensure the relocation of derived pointer is defined after | ||||||||||||||||
935 | // relocation of base pointer. If we find a relocation corresponding to base | ||||||||||||||||
936 | // defined earlier than relocation of base then we move relocation of base | ||||||||||||||||
937 | // right before found relocation. We consider only relocation in the same | ||||||||||||||||
938 | // basic block as relocation of base. Relocations from other basic block will | ||||||||||||||||
939 | // be skipped by optimization and we do not care about them. | ||||||||||||||||
940 | for (auto R = RelocatedBase->getParent()->getFirstInsertionPt(); | ||||||||||||||||
941 | &*R != RelocatedBase; ++R) | ||||||||||||||||
942 | if (auto RI = dyn_cast<GCRelocateInst>(R)) | ||||||||||||||||
943 | if (RI->getStatepoint() == RelocatedBase->getStatepoint()) | ||||||||||||||||
944 | if (RI->getBasePtrIndex() == RelocatedBase->getBasePtrIndex()) { | ||||||||||||||||
945 | RelocatedBase->moveBefore(RI); | ||||||||||||||||
946 | break; | ||||||||||||||||
947 | } | ||||||||||||||||
948 | |||||||||||||||||
949 | for (GCRelocateInst *ToReplace : Targets) { | ||||||||||||||||
950 | assert(ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() &&((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex () && "Not relocating a derived object of the original base object" ) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 951, __PRETTY_FUNCTION__)) | ||||||||||||||||
951 | "Not relocating a derived object of the original base object")((ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex () && "Not relocating a derived object of the original base object" ) ? static_cast<void> (0) : __assert_fail ("ToReplace->getBasePtrIndex() == RelocatedBase->getBasePtrIndex() && \"Not relocating a derived object of the original base object\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 951, __PRETTY_FUNCTION__)); | ||||||||||||||||
952 | if (ToReplace->getBasePtrIndex() == ToReplace->getDerivedPtrIndex()) { | ||||||||||||||||
953 | // A duplicate relocate call. TODO: coalesce duplicates. | ||||||||||||||||
954 | continue; | ||||||||||||||||
955 | } | ||||||||||||||||
956 | |||||||||||||||||
957 | if (RelocatedBase->getParent() != ToReplace->getParent()) { | ||||||||||||||||
958 | // Base and derived relocates are in different basic blocks. | ||||||||||||||||
959 | // In this case transform is only valid when base dominates derived | ||||||||||||||||
960 | // relocate. However it would be too expensive to check dominance | ||||||||||||||||
961 | // for each such relocate, so we skip the whole transformation. | ||||||||||||||||
962 | continue; | ||||||||||||||||
963 | } | ||||||||||||||||
964 | |||||||||||||||||
965 | Value *Base = ToReplace->getBasePtr(); | ||||||||||||||||
966 | auto Derived = dyn_cast<GetElementPtrInst>(ToReplace->getDerivedPtr()); | ||||||||||||||||
967 | if (!Derived || Derived->getPointerOperand() != Base) | ||||||||||||||||
968 | continue; | ||||||||||||||||
969 | |||||||||||||||||
970 | SmallVector<Value *, 2> OffsetV; | ||||||||||||||||
971 | if (!getGEPSmallConstantIntOffsetV(Derived, OffsetV)) | ||||||||||||||||
972 | continue; | ||||||||||||||||
973 | |||||||||||||||||
974 | // Create a Builder and replace the target callsite with a gep | ||||||||||||||||
975 | assert(RelocatedBase->getNextNode() &&((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator" ) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 976, __PRETTY_FUNCTION__)) | ||||||||||||||||
976 | "Should always have one since it's not a terminator")((RelocatedBase->getNextNode() && "Should always have one since it's not a terminator" ) ? static_cast<void> (0) : __assert_fail ("RelocatedBase->getNextNode() && \"Should always have one since it's not a terminator\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 976, __PRETTY_FUNCTION__)); | ||||||||||||||||
977 | |||||||||||||||||
978 | // Insert after RelocatedBase | ||||||||||||||||
979 | IRBuilder<> Builder(RelocatedBase->getNextNode()); | ||||||||||||||||
980 | Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc()); | ||||||||||||||||
981 | |||||||||||||||||
982 | // If gc_relocate does not match the actual type, cast it to the right type. | ||||||||||||||||
983 | // In theory, there must be a bitcast after gc_relocate if the type does not | ||||||||||||||||
984 | // match, and we should reuse it to get the derived pointer. But it could be | ||||||||||||||||
985 | // cases like this: | ||||||||||||||||
986 | // bb1: | ||||||||||||||||
987 | // ... | ||||||||||||||||
988 | // %g1 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) | ||||||||||||||||
989 | // br label %merge | ||||||||||||||||
990 | // | ||||||||||||||||
991 | // bb2: | ||||||||||||||||
992 | // ... | ||||||||||||||||
993 | // %g2 = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(...) | ||||||||||||||||
994 | // br label %merge | ||||||||||||||||
995 | // | ||||||||||||||||
996 | // merge: | ||||||||||||||||
997 | // %p1 = phi i8 addrspace(1)* [ %g1, %bb1 ], [ %g2, %bb2 ] | ||||||||||||||||
998 | // %cast = bitcast i8 addrspace(1)* %p1 in to i32 addrspace(1)* | ||||||||||||||||
999 | // | ||||||||||||||||
1000 | // In this case, we can not find the bitcast any more. So we insert a new bitcast | ||||||||||||||||
1001 | // no matter there is already one or not. In this way, we can handle all cases, and | ||||||||||||||||
1002 | // the extra bitcast should be optimized away in later passes. | ||||||||||||||||
1003 | Value *ActualRelocatedBase = RelocatedBase; | ||||||||||||||||
1004 | if (RelocatedBase->getType() != Base->getType()) { | ||||||||||||||||
1005 | ActualRelocatedBase = | ||||||||||||||||
1006 | Builder.CreateBitCast(RelocatedBase, Base->getType()); | ||||||||||||||||
1007 | } | ||||||||||||||||
1008 | Value *Replacement = Builder.CreateGEP( | ||||||||||||||||
1009 | Derived->getSourceElementType(), ActualRelocatedBase, makeArrayRef(OffsetV)); | ||||||||||||||||
1010 | Replacement->takeName(ToReplace); | ||||||||||||||||
1011 | // If the newly generated derived pointer's type does not match the original derived | ||||||||||||||||
1012 | // pointer's type, cast the new derived pointer to match it. Same reasoning as above. | ||||||||||||||||
1013 | Value *ActualReplacement = Replacement; | ||||||||||||||||
1014 | if (Replacement->getType() != ToReplace->getType()) { | ||||||||||||||||
1015 | ActualReplacement = | ||||||||||||||||
1016 | Builder.CreateBitCast(Replacement, ToReplace->getType()); | ||||||||||||||||
1017 | } | ||||||||||||||||
1018 | ToReplace->replaceAllUsesWith(ActualReplacement); | ||||||||||||||||
1019 | ToReplace->eraseFromParent(); | ||||||||||||||||
1020 | |||||||||||||||||
1021 | MadeChange = true; | ||||||||||||||||
1022 | } | ||||||||||||||||
1023 | return MadeChange; | ||||||||||||||||
1024 | } | ||||||||||||||||
1025 | |||||||||||||||||
1026 | // Turns this: | ||||||||||||||||
1027 | // | ||||||||||||||||
1028 | // %base = ... | ||||||||||||||||
1029 | // %ptr = gep %base + 15 | ||||||||||||||||
1030 | // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) | ||||||||||||||||
1031 | // %base' = relocate(%tok, i32 4, i32 4) | ||||||||||||||||
1032 | // %ptr' = relocate(%tok, i32 4, i32 5) | ||||||||||||||||
1033 | // %val = load %ptr' | ||||||||||||||||
1034 | // | ||||||||||||||||
1035 | // into this: | ||||||||||||||||
1036 | // | ||||||||||||||||
1037 | // %base = ... | ||||||||||||||||
1038 | // %ptr = gep %base + 15 | ||||||||||||||||
1039 | // %tok = statepoint (%fun, i32 0, i32 0, i32 0, %base, %ptr) | ||||||||||||||||
1040 | // %base' = gc.relocate(%tok, i32 4, i32 4) | ||||||||||||||||
1041 | // %ptr' = gep %base' + 15 | ||||||||||||||||
1042 | // %val = load %ptr' | ||||||||||||||||
1043 | bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) { | ||||||||||||||||
1044 | bool MadeChange = false; | ||||||||||||||||
1045 | SmallVector<GCRelocateInst *, 2> AllRelocateCalls; | ||||||||||||||||
1046 | |||||||||||||||||
1047 | for (auto *U : I.users()) | ||||||||||||||||
1048 | if (GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U)) | ||||||||||||||||
1049 | // Collect all the relocate calls associated with a statepoint | ||||||||||||||||
1050 | AllRelocateCalls.push_back(Relocate); | ||||||||||||||||
1051 | |||||||||||||||||
1052 | // We need atleast one base pointer relocation + one derived pointer | ||||||||||||||||
1053 | // relocation to mangle | ||||||||||||||||
1054 | if (AllRelocateCalls.size() < 2) | ||||||||||||||||
1055 | return false; | ||||||||||||||||
1056 | |||||||||||||||||
1057 | // RelocateInstMap is a mapping from the base relocate instruction to the | ||||||||||||||||
1058 | // corresponding derived relocate instructions | ||||||||||||||||
1059 | DenseMap<GCRelocateInst *, SmallVector<GCRelocateInst *, 2>> RelocateInstMap; | ||||||||||||||||
1060 | computeBaseDerivedRelocateMap(AllRelocateCalls, RelocateInstMap); | ||||||||||||||||
1061 | if (RelocateInstMap.empty()) | ||||||||||||||||
1062 | return false; | ||||||||||||||||
1063 | |||||||||||||||||
1064 | for (auto &Item : RelocateInstMap) | ||||||||||||||||
1065 | // Item.first is the RelocatedBase to offset against | ||||||||||||||||
1066 | // Item.second is the vector of Targets to replace | ||||||||||||||||
1067 | MadeChange = simplifyRelocatesOffABase(Item.first, Item.second); | ||||||||||||||||
1068 | return MadeChange; | ||||||||||||||||
1069 | } | ||||||||||||||||
1070 | |||||||||||||||||
1071 | /// Sink the specified cast instruction into its user blocks. | ||||||||||||||||
1072 | static bool SinkCast(CastInst *CI) { | ||||||||||||||||
1073 | BasicBlock *DefBB = CI->getParent(); | ||||||||||||||||
1074 | |||||||||||||||||
1075 | /// InsertedCasts - Only insert a cast in each block once. | ||||||||||||||||
1076 | DenseMap<BasicBlock*, CastInst*> InsertedCasts; | ||||||||||||||||
1077 | |||||||||||||||||
1078 | bool MadeChange = false; | ||||||||||||||||
1079 | for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end(); | ||||||||||||||||
1080 | UI != E; ) { | ||||||||||||||||
1081 | Use &TheUse = UI.getUse(); | ||||||||||||||||
1082 | Instruction *User = cast<Instruction>(*UI); | ||||||||||||||||
1083 | |||||||||||||||||
1084 | // Figure out which BB this cast is used in. For PHI's this is the | ||||||||||||||||
1085 | // appropriate predecessor block. | ||||||||||||||||
1086 | BasicBlock *UserBB = User->getParent(); | ||||||||||||||||
1087 | if (PHINode *PN = dyn_cast<PHINode>(User)) { | ||||||||||||||||
1088 | UserBB = PN->getIncomingBlock(TheUse); | ||||||||||||||||
1089 | } | ||||||||||||||||
1090 | |||||||||||||||||
1091 | // Preincrement use iterator so we don't invalidate it. | ||||||||||||||||
1092 | ++UI; | ||||||||||||||||
1093 | |||||||||||||||||
1094 | // The first insertion point of a block containing an EH pad is after the | ||||||||||||||||
1095 | // pad. If the pad is the user, we cannot sink the cast past the pad. | ||||||||||||||||
1096 | if (User->isEHPad()) | ||||||||||||||||
1097 | continue; | ||||||||||||||||
1098 | |||||||||||||||||
1099 | // If the block selected to receive the cast is an EH pad that does not | ||||||||||||||||
1100 | // allow non-PHI instructions before the terminator, we can't sink the | ||||||||||||||||
1101 | // cast. | ||||||||||||||||
1102 | if (UserBB->getTerminator()->isEHPad()) | ||||||||||||||||
1103 | continue; | ||||||||||||||||
1104 | |||||||||||||||||
1105 | // If this user is in the same block as the cast, don't change the cast. | ||||||||||||||||
1106 | if (UserBB == DefBB) continue; | ||||||||||||||||
1107 | |||||||||||||||||
1108 | // If we have already inserted a cast into this block, use it. | ||||||||||||||||
1109 | CastInst *&InsertedCast = InsertedCasts[UserBB]; | ||||||||||||||||
1110 | |||||||||||||||||
1111 | if (!InsertedCast) { | ||||||||||||||||
1112 | BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); | ||||||||||||||||
1113 | assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0) : __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1113, __PRETTY_FUNCTION__)); | ||||||||||||||||
1114 | InsertedCast = CastInst::Create(CI->getOpcode(), CI->getOperand(0), | ||||||||||||||||
1115 | CI->getType(), "", &*InsertPt); | ||||||||||||||||
1116 | InsertedCast->setDebugLoc(CI->getDebugLoc()); | ||||||||||||||||
1117 | } | ||||||||||||||||
1118 | |||||||||||||||||
1119 | // Replace a use of the cast with a use of the new cast. | ||||||||||||||||
1120 | TheUse = InsertedCast; | ||||||||||||||||
1121 | MadeChange = true; | ||||||||||||||||
1122 | ++NumCastUses; | ||||||||||||||||
1123 | } | ||||||||||||||||
1124 | |||||||||||||||||
1125 | // If we removed all uses, nuke the cast. | ||||||||||||||||
1126 | if (CI->use_empty()) { | ||||||||||||||||
1127 | salvageDebugInfo(*CI); | ||||||||||||||||
1128 | CI->eraseFromParent(); | ||||||||||||||||
1129 | MadeChange = true; | ||||||||||||||||
1130 | } | ||||||||||||||||
1131 | |||||||||||||||||
1132 | return MadeChange; | ||||||||||||||||
1133 | } | ||||||||||||||||
1134 | |||||||||||||||||
1135 | /// If the specified cast instruction is a noop copy (e.g. it's casting from | ||||||||||||||||
1136 | /// one pointer type to another, i32->i8 on PPC), sink it into user blocks to | ||||||||||||||||
1137 | /// reduce the number of virtual registers that must be created and coalesced. | ||||||||||||||||
1138 | /// | ||||||||||||||||
1139 | /// Return true if any changes are made. | ||||||||||||||||
1140 | static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI, | ||||||||||||||||
1141 | const DataLayout &DL) { | ||||||||||||||||
1142 | // Sink only "cheap" (or nop) address-space casts. This is a weaker condition | ||||||||||||||||
1143 | // than sinking only nop casts, but is helpful on some platforms. | ||||||||||||||||
1144 | if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) { | ||||||||||||||||
1145 | if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(), | ||||||||||||||||
1146 | ASC->getDestAddressSpace())) | ||||||||||||||||
1147 | return false; | ||||||||||||||||
1148 | } | ||||||||||||||||
1149 | |||||||||||||||||
1150 | // If this is a noop copy, | ||||||||||||||||
1151 | EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType()); | ||||||||||||||||
1152 | EVT DstVT = TLI.getValueType(DL, CI->getType()); | ||||||||||||||||
1153 | |||||||||||||||||
1154 | // This is an fp<->int conversion? | ||||||||||||||||
1155 | if (SrcVT.isInteger() != DstVT.isInteger()) | ||||||||||||||||
1156 | return false; | ||||||||||||||||
1157 | |||||||||||||||||
1158 | // If this is an extension, it will be a zero or sign extension, which | ||||||||||||||||
1159 | // isn't a noop. | ||||||||||||||||
1160 | if (SrcVT.bitsLT(DstVT)) return false; | ||||||||||||||||
1161 | |||||||||||||||||
1162 | // If these values will be promoted, find out what they will be promoted | ||||||||||||||||
1163 | // to. This helps us consider truncates on PPC as noop copies when they | ||||||||||||||||
1164 | // are. | ||||||||||||||||
1165 | if (TLI.getTypeAction(CI->getContext(), SrcVT) == | ||||||||||||||||
1166 | TargetLowering::TypePromoteInteger) | ||||||||||||||||
1167 | SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); | ||||||||||||||||
1168 | if (TLI.getTypeAction(CI->getContext(), DstVT) == | ||||||||||||||||
1169 | TargetLowering::TypePromoteInteger) | ||||||||||||||||
1170 | DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); | ||||||||||||||||
1171 | |||||||||||||||||
1172 | // If, after promotion, these are the same types, this is a noop copy. | ||||||||||||||||
1173 | if (SrcVT != DstVT) | ||||||||||||||||
1174 | return false; | ||||||||||||||||
1175 | |||||||||||||||||
1176 | return SinkCast(CI); | ||||||||||||||||
1177 | } | ||||||||||||||||
1178 | |||||||||||||||||
1179 | bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO, | ||||||||||||||||
1180 | CmpInst *Cmp, | ||||||||||||||||
1181 | Intrinsic::ID IID) { | ||||||||||||||||
1182 | if (BO->getParent() != Cmp->getParent()) { | ||||||||||||||||
1183 | // We used to use a dominator tree here to allow multi-block optimization. | ||||||||||||||||
1184 | // But that was problematic because: | ||||||||||||||||
1185 | // 1. It could cause a perf regression by hoisting the math op into the | ||||||||||||||||
1186 | // critical path. | ||||||||||||||||
1187 | // 2. It could cause a perf regression by creating a value that was live | ||||||||||||||||
1188 | // across multiple blocks and increasing register pressure. | ||||||||||||||||
1189 | // 3. Use of a dominator tree could cause large compile-time regression. | ||||||||||||||||
1190 | // This is because we recompute the DT on every change in the main CGP | ||||||||||||||||
1191 | // run-loop. The recomputing is probably unnecessary in many cases, so if | ||||||||||||||||
1192 | // that was fixed, using a DT here would be ok. | ||||||||||||||||
1193 | return false; | ||||||||||||||||
1194 | } | ||||||||||||||||
1195 | |||||||||||||||||
1196 | // We allow matching the canonical IR (add X, C) back to (usubo X, -C). | ||||||||||||||||
1197 | Value *Arg0 = BO->getOperand(0); | ||||||||||||||||
1198 | Value *Arg1 = BO->getOperand(1); | ||||||||||||||||
1199 | if (BO->getOpcode() == Instruction::Add && | ||||||||||||||||
1200 | IID == Intrinsic::usub_with_overflow) { | ||||||||||||||||
1201 | assert(isa<Constant>(Arg1) && "Unexpected input for usubo")((isa<Constant>(Arg1) && "Unexpected input for usubo" ) ? static_cast<void> (0) : __assert_fail ("isa<Constant>(Arg1) && \"Unexpected input for usubo\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1201, __PRETTY_FUNCTION__)); | ||||||||||||||||
1202 | Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1)); | ||||||||||||||||
1203 | } | ||||||||||||||||
1204 | |||||||||||||||||
1205 | // Insert at the first instruction of the pair. | ||||||||||||||||
1206 | Instruction *InsertPt = nullptr; | ||||||||||||||||
1207 | for (Instruction &Iter : *Cmp->getParent()) { | ||||||||||||||||
1208 | if (&Iter == BO || &Iter == Cmp) { | ||||||||||||||||
1209 | InsertPt = &Iter; | ||||||||||||||||
1210 | break; | ||||||||||||||||
1211 | } | ||||||||||||||||
1212 | } | ||||||||||||||||
1213 | assert(InsertPt != nullptr && "Parent block did not contain cmp or binop")((InsertPt != nullptr && "Parent block did not contain cmp or binop" ) ? static_cast<void> (0) : __assert_fail ("InsertPt != nullptr && \"Parent block did not contain cmp or binop\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1213, __PRETTY_FUNCTION__)); | ||||||||||||||||
1214 | |||||||||||||||||
1215 | IRBuilder<> Builder(InsertPt); | ||||||||||||||||
1216 | Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1); | ||||||||||||||||
1217 | Value *Math = Builder.CreateExtractValue(MathOV, 0, "math"); | ||||||||||||||||
1218 | Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov"); | ||||||||||||||||
1219 | BO->replaceAllUsesWith(Math); | ||||||||||||||||
1220 | Cmp->replaceAllUsesWith(OV); | ||||||||||||||||
1221 | BO->eraseFromParent(); | ||||||||||||||||
1222 | Cmp->eraseFromParent(); | ||||||||||||||||
1223 | return true; | ||||||||||||||||
1224 | } | ||||||||||||||||
1225 | |||||||||||||||||
1226 | /// Match special-case patterns that check for unsigned add overflow. | ||||||||||||||||
1227 | static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp, | ||||||||||||||||
1228 | BinaryOperator *&Add) { | ||||||||||||||||
1229 | // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val) | ||||||||||||||||
1230 | // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero) | ||||||||||||||||
1231 | Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); | ||||||||||||||||
1232 | |||||||||||||||||
1233 | // We are not expecting non-canonical/degenerate code. Just bail out. | ||||||||||||||||
1234 | if (isa<Constant>(A)) | ||||||||||||||||
1235 | return false; | ||||||||||||||||
1236 | |||||||||||||||||
1237 | ICmpInst::Predicate Pred = Cmp->getPredicate(); | ||||||||||||||||
1238 | if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes())) | ||||||||||||||||
1239 | B = ConstantInt::get(B->getType(), 1); | ||||||||||||||||
1240 | else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) | ||||||||||||||||
1241 | B = ConstantInt::get(B->getType(), -1); | ||||||||||||||||
1242 | else | ||||||||||||||||
1243 | return false; | ||||||||||||||||
1244 | |||||||||||||||||
1245 | // Check the users of the variable operand of the compare looking for an add | ||||||||||||||||
1246 | // with the adjusted constant. | ||||||||||||||||
1247 | for (User *U : A->users()) { | ||||||||||||||||
1248 | if (match(U, m_Add(m_Specific(A), m_Specific(B)))) { | ||||||||||||||||
1249 | Add = cast<BinaryOperator>(U); | ||||||||||||||||
1250 | return true; | ||||||||||||||||
1251 | } | ||||||||||||||||
1252 | } | ||||||||||||||||
1253 | return false; | ||||||||||||||||
1254 | } | ||||||||||||||||
1255 | |||||||||||||||||
1256 | /// Try to combine the compare into a call to the llvm.uadd.with.overflow | ||||||||||||||||
1257 | /// intrinsic. Return true if any changes were made. | ||||||||||||||||
1258 | bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp, | ||||||||||||||||
1259 | bool &ModifiedDT) { | ||||||||||||||||
1260 | Value *A, *B; | ||||||||||||||||
1261 | BinaryOperator *Add; | ||||||||||||||||
1262 | if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add)))) | ||||||||||||||||
1263 | if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add)) | ||||||||||||||||
1264 | return false; | ||||||||||||||||
1265 | |||||||||||||||||
1266 | if (!TLI->shouldFormOverflowOp(ISD::UADDO, | ||||||||||||||||
1267 | TLI->getValueType(*DL, Add->getType()))) | ||||||||||||||||
1268 | return false; | ||||||||||||||||
1269 | |||||||||||||||||
1270 | // We don't want to move around uses of condition values this late, so we | ||||||||||||||||
1271 | // check if it is legal to create the call to the intrinsic in the basic | ||||||||||||||||
1272 | // block containing the icmp. | ||||||||||||||||
1273 | if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse()) | ||||||||||||||||
1274 | return false; | ||||||||||||||||
1275 | |||||||||||||||||
1276 | if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow)) | ||||||||||||||||
1277 | return false; | ||||||||||||||||
1278 | |||||||||||||||||
1279 | // Reset callers - do not crash by iterating over a dead instruction. | ||||||||||||||||
1280 | ModifiedDT = true; | ||||||||||||||||
1281 | return true; | ||||||||||||||||
1282 | } | ||||||||||||||||
1283 | |||||||||||||||||
1284 | bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp, | ||||||||||||||||
1285 | bool &ModifiedDT) { | ||||||||||||||||
1286 | // We are not expecting non-canonical/degenerate code. Just bail out. | ||||||||||||||||
1287 | Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1); | ||||||||||||||||
1288 | if (isa<Constant>(A) && isa<Constant>(B)) | ||||||||||||||||
1289 | return false; | ||||||||||||||||
1290 | |||||||||||||||||
1291 | // Convert (A u> B) to (A u< B) to simplify pattern matching. | ||||||||||||||||
1292 | ICmpInst::Predicate Pred = Cmp->getPredicate(); | ||||||||||||||||
1293 | if (Pred == ICmpInst::ICMP_UGT) { | ||||||||||||||||
1294 | std::swap(A, B); | ||||||||||||||||
1295 | Pred = ICmpInst::ICMP_ULT; | ||||||||||||||||
1296 | } | ||||||||||||||||
1297 | // Convert special-case: (A == 0) is the same as (A u< 1). | ||||||||||||||||
1298 | if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) { | ||||||||||||||||
1299 | B = ConstantInt::get(B->getType(), 1); | ||||||||||||||||
1300 | Pred = ICmpInst::ICMP_ULT; | ||||||||||||||||
1301 | } | ||||||||||||||||
1302 | // Convert special-case: (A != 0) is the same as (0 u< A). | ||||||||||||||||
1303 | if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) { | ||||||||||||||||
1304 | std::swap(A, B); | ||||||||||||||||
1305 | Pred = ICmpInst::ICMP_ULT; | ||||||||||||||||
1306 | } | ||||||||||||||||
1307 | if (Pred != ICmpInst::ICMP_ULT) | ||||||||||||||||
1308 | return false; | ||||||||||||||||
1309 | |||||||||||||||||
1310 | // Walk the users of a variable operand of a compare looking for a subtract or | ||||||||||||||||
1311 | // add with that same operand. Also match the 2nd operand of the compare to | ||||||||||||||||
1312 | // the add/sub, but that may be a negated constant operand of an add. | ||||||||||||||||
1313 | Value *CmpVariableOperand = isa<Constant>(A) ? B : A; | ||||||||||||||||
1314 | BinaryOperator *Sub = nullptr; | ||||||||||||||||
1315 | for (User *U : CmpVariableOperand->users()) { | ||||||||||||||||
1316 | // A - B, A u< B --> usubo(A, B) | ||||||||||||||||
1317 | if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) { | ||||||||||||||||
1318 | Sub = cast<BinaryOperator>(U); | ||||||||||||||||
1319 | break; | ||||||||||||||||
1320 | } | ||||||||||||||||
1321 | |||||||||||||||||
1322 | // A + (-C), A u< C (canonicalized form of (sub A, C)) | ||||||||||||||||
1323 | const APInt *CmpC, *AddC; | ||||||||||||||||
1324 | if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) && | ||||||||||||||||
1325 | match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) { | ||||||||||||||||
1326 | Sub = cast<BinaryOperator>(U); | ||||||||||||||||
1327 | break; | ||||||||||||||||
1328 | } | ||||||||||||||||
1329 | } | ||||||||||||||||
1330 | if (!Sub) | ||||||||||||||||
1331 | return false; | ||||||||||||||||
1332 | |||||||||||||||||
1333 | if (!TLI->shouldFormOverflowOp(ISD::USUBO, | ||||||||||||||||
1334 | TLI->getValueType(*DL, Sub->getType()))) | ||||||||||||||||
1335 | return false; | ||||||||||||||||
1336 | |||||||||||||||||
1337 | if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow)) | ||||||||||||||||
1338 | return false; | ||||||||||||||||
1339 | |||||||||||||||||
1340 | // Reset callers - do not crash by iterating over a dead instruction. | ||||||||||||||||
1341 | ModifiedDT = true; | ||||||||||||||||
1342 | return true; | ||||||||||||||||
1343 | } | ||||||||||||||||
1344 | |||||||||||||||||
1345 | /// Sink the given CmpInst into user blocks to reduce the number of virtual | ||||||||||||||||
1346 | /// registers that must be created and coalesced. This is a clear win except on | ||||||||||||||||
1347 | /// targets with multiple condition code registers (PowerPC), where it might | ||||||||||||||||
1348 | /// lose; some adjustment may be wanted there. | ||||||||||||||||
1349 | /// | ||||||||||||||||
1350 | /// Return true if any changes are made. | ||||||||||||||||
1351 | static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) { | ||||||||||||||||
1352 | if (TLI.hasMultipleConditionRegisters()) | ||||||||||||||||
1353 | return false; | ||||||||||||||||
1354 | |||||||||||||||||
1355 | // Avoid sinking soft-FP comparisons, since this can move them into a loop. | ||||||||||||||||
1356 | if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp)) | ||||||||||||||||
1357 | return false; | ||||||||||||||||
1358 | |||||||||||||||||
1359 | // Only insert a cmp in each block once. | ||||||||||||||||
1360 | DenseMap<BasicBlock*, CmpInst*> InsertedCmps; | ||||||||||||||||
1361 | |||||||||||||||||
1362 | bool MadeChange = false; | ||||||||||||||||
1363 | for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end(); | ||||||||||||||||
1364 | UI != E; ) { | ||||||||||||||||
1365 | Use &TheUse = UI.getUse(); | ||||||||||||||||
1366 | Instruction *User = cast<Instruction>(*UI); | ||||||||||||||||
1367 | |||||||||||||||||
1368 | // Preincrement use iterator so we don't invalidate it. | ||||||||||||||||
1369 | ++UI; | ||||||||||||||||
1370 | |||||||||||||||||
1371 | // Don't bother for PHI nodes. | ||||||||||||||||
1372 | if (isa<PHINode>(User)) | ||||||||||||||||
1373 | continue; | ||||||||||||||||
1374 | |||||||||||||||||
1375 | // Figure out which BB this cmp is used in. | ||||||||||||||||
1376 | BasicBlock *UserBB = User->getParent(); | ||||||||||||||||
1377 | BasicBlock *DefBB = Cmp->getParent(); | ||||||||||||||||
1378 | |||||||||||||||||
1379 | // If this user is in the same block as the cmp, don't change the cmp. | ||||||||||||||||
1380 | if (UserBB == DefBB) continue; | ||||||||||||||||
1381 | |||||||||||||||||
1382 | // If we have already inserted a cmp into this block, use it. | ||||||||||||||||
1383 | CmpInst *&InsertedCmp = InsertedCmps[UserBB]; | ||||||||||||||||
1384 | |||||||||||||||||
1385 | if (!InsertedCmp) { | ||||||||||||||||
1386 | BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); | ||||||||||||||||
1387 | assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0) : __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1387, __PRETTY_FUNCTION__)); | ||||||||||||||||
1388 | InsertedCmp = | ||||||||||||||||
1389 | CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), | ||||||||||||||||
1390 | Cmp->getOperand(0), Cmp->getOperand(1), "", | ||||||||||||||||
1391 | &*InsertPt); | ||||||||||||||||
1392 | // Propagate the debug info. | ||||||||||||||||
1393 | InsertedCmp->setDebugLoc(Cmp->getDebugLoc()); | ||||||||||||||||
1394 | } | ||||||||||||||||
1395 | |||||||||||||||||
1396 | // Replace a use of the cmp with a use of the new cmp. | ||||||||||||||||
1397 | TheUse = InsertedCmp; | ||||||||||||||||
1398 | MadeChange = true; | ||||||||||||||||
1399 | ++NumCmpUses; | ||||||||||||||||
1400 | } | ||||||||||||||||
1401 | |||||||||||||||||
1402 | // If we removed all uses, nuke the cmp. | ||||||||||||||||
1403 | if (Cmp->use_empty()) { | ||||||||||||||||
1404 | Cmp->eraseFromParent(); | ||||||||||||||||
1405 | MadeChange = true; | ||||||||||||||||
1406 | } | ||||||||||||||||
1407 | |||||||||||||||||
1408 | return MadeChange; | ||||||||||||||||
1409 | } | ||||||||||||||||
1410 | |||||||||||||||||
1411 | bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) { | ||||||||||||||||
1412 | if (sinkCmpExpression(Cmp, *TLI)) | ||||||||||||||||
1413 | return true; | ||||||||||||||||
1414 | |||||||||||||||||
1415 | if (combineToUAddWithOverflow(Cmp, ModifiedDT)) | ||||||||||||||||
1416 | return true; | ||||||||||||||||
1417 | |||||||||||||||||
1418 | if (combineToUSubWithOverflow(Cmp, ModifiedDT)) | ||||||||||||||||
1419 | return true; | ||||||||||||||||
1420 | |||||||||||||||||
1421 | return false; | ||||||||||||||||
1422 | } | ||||||||||||||||
1423 | |||||||||||||||||
1424 | /// Duplicate and sink the given 'and' instruction into user blocks where it is | ||||||||||||||||
1425 | /// used in a compare to allow isel to generate better code for targets where | ||||||||||||||||
1426 | /// this operation can be combined. | ||||||||||||||||
1427 | /// | ||||||||||||||||
1428 | /// Return true if any changes are made. | ||||||||||||||||
1429 | static bool sinkAndCmp0Expression(Instruction *AndI, | ||||||||||||||||
1430 | const TargetLowering &TLI, | ||||||||||||||||
1431 | SetOfInstrs &InsertedInsts) { | ||||||||||||||||
1432 | // Double-check that we're not trying to optimize an instruction that was | ||||||||||||||||
1433 | // already optimized by some other part of this pass. | ||||||||||||||||
1434 | assert(!InsertedInsts.count(AndI) &&((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction" ) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1435, __PRETTY_FUNCTION__)) | ||||||||||||||||
1435 | "Attempting to optimize already optimized and instruction")((!InsertedInsts.count(AndI) && "Attempting to optimize already optimized and instruction" ) ? static_cast<void> (0) : __assert_fail ("!InsertedInsts.count(AndI) && \"Attempting to optimize already optimized and instruction\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1435, __PRETTY_FUNCTION__)); | ||||||||||||||||
1436 | (void) InsertedInsts; | ||||||||||||||||
1437 | |||||||||||||||||
1438 | // Nothing to do for single use in same basic block. | ||||||||||||||||
1439 | if (AndI->hasOneUse() && | ||||||||||||||||
1440 | AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent()) | ||||||||||||||||
1441 | return false; | ||||||||||||||||
1442 | |||||||||||||||||
1443 | // Try to avoid cases where sinking/duplicating is likely to increase register | ||||||||||||||||
1444 | // pressure. | ||||||||||||||||
1445 | if (!isa<ConstantInt>(AndI->getOperand(0)) && | ||||||||||||||||
1446 | !isa<ConstantInt>(AndI->getOperand(1)) && | ||||||||||||||||
1447 | AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse()) | ||||||||||||||||
1448 | return false; | ||||||||||||||||
1449 | |||||||||||||||||
1450 | for (auto *U : AndI->users()) { | ||||||||||||||||
1451 | Instruction *User = cast<Instruction>(U); | ||||||||||||||||
1452 | |||||||||||||||||
1453 | // Only sink 'and' feeding icmp with 0. | ||||||||||||||||
1454 | if (!isa<ICmpInst>(User)) | ||||||||||||||||
1455 | return false; | ||||||||||||||||
1456 | |||||||||||||||||
1457 | auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1)); | ||||||||||||||||
1458 | if (!CmpC || !CmpC->isZero()) | ||||||||||||||||
1459 | return false; | ||||||||||||||||
1460 | } | ||||||||||||||||
1461 | |||||||||||||||||
1462 | if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI)) | ||||||||||||||||
1463 | return false; | ||||||||||||||||
1464 | |||||||||||||||||
1465 | LLVM_DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "found 'and' feeding only icmp 0;\n" ; } } while (false); | ||||||||||||||||
1466 | LLVM_DEBUG(AndI->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { AndI->getParent()->dump(); } } while (false); | ||||||||||||||||
1467 | |||||||||||||||||
1468 | // Push the 'and' into the same block as the icmp 0. There should only be | ||||||||||||||||
1469 | // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any | ||||||||||||||||
1470 | // others, so we don't need to keep track of which BBs we insert into. | ||||||||||||||||
1471 | for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end(); | ||||||||||||||||
1472 | UI != E; ) { | ||||||||||||||||
1473 | Use &TheUse = UI.getUse(); | ||||||||||||||||
1474 | Instruction *User = cast<Instruction>(*UI); | ||||||||||||||||
1475 | |||||||||||||||||
1476 | // Preincrement use iterator so we don't invalidate it. | ||||||||||||||||
1477 | ++UI; | ||||||||||||||||
1478 | |||||||||||||||||
1479 | LLVM_DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "sinking 'and' use: " << *User << "\n"; } } while (false); | ||||||||||||||||
1480 | |||||||||||||||||
1481 | // Keep the 'and' in the same place if the use is already in the same block. | ||||||||||||||||
1482 | Instruction *InsertPt = | ||||||||||||||||
1483 | User->getParent() == AndI->getParent() ? AndI : User; | ||||||||||||||||
1484 | Instruction *InsertedAnd = | ||||||||||||||||
1485 | BinaryOperator::Create(Instruction::And, AndI->getOperand(0), | ||||||||||||||||
1486 | AndI->getOperand(1), "", InsertPt); | ||||||||||||||||
1487 | // Propagate the debug info. | ||||||||||||||||
1488 | InsertedAnd->setDebugLoc(AndI->getDebugLoc()); | ||||||||||||||||
1489 | |||||||||||||||||
1490 | // Replace a use of the 'and' with a use of the new 'and'. | ||||||||||||||||
1491 | TheUse = InsertedAnd; | ||||||||||||||||
1492 | ++NumAndUses; | ||||||||||||||||
1493 | LLVM_DEBUG(User->getParent()->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { User->getParent()->dump(); } } while (false); | ||||||||||||||||
1494 | } | ||||||||||||||||
1495 | |||||||||||||||||
1496 | // We removed all uses, nuke the and. | ||||||||||||||||
1497 | AndI->eraseFromParent(); | ||||||||||||||||
1498 | return true; | ||||||||||||||||
1499 | } | ||||||||||||||||
1500 | |||||||||||||||||
1501 | /// Check if the candidates could be combined with a shift instruction, which | ||||||||||||||||
1502 | /// includes: | ||||||||||||||||
1503 | /// 1. Truncate instruction | ||||||||||||||||
1504 | /// 2. And instruction and the imm is a mask of the low bits: | ||||||||||||||||
1505 | /// imm & (imm+1) == 0 | ||||||||||||||||
1506 | static bool isExtractBitsCandidateUse(Instruction *User) { | ||||||||||||||||
1507 | if (!isa<TruncInst>(User)) { | ||||||||||||||||
1508 | if (User->getOpcode() != Instruction::And || | ||||||||||||||||
1509 | !isa<ConstantInt>(User->getOperand(1))) | ||||||||||||||||
1510 | return false; | ||||||||||||||||
1511 | |||||||||||||||||
1512 | const APInt &Cimm = cast<ConstantInt>(User->getOperand(1))->getValue(); | ||||||||||||||||
1513 | |||||||||||||||||
1514 | if ((Cimm & (Cimm + 1)).getBoolValue()) | ||||||||||||||||
1515 | return false; | ||||||||||||||||
1516 | } | ||||||||||||||||
1517 | return true; | ||||||||||||||||
1518 | } | ||||||||||||||||
1519 | |||||||||||||||||
1520 | /// Sink both shift and truncate instruction to the use of truncate's BB. | ||||||||||||||||
1521 | static bool | ||||||||||||||||
1522 | SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI, | ||||||||||||||||
1523 | DenseMap<BasicBlock *, BinaryOperator *> &InsertedShifts, | ||||||||||||||||
1524 | const TargetLowering &TLI, const DataLayout &DL) { | ||||||||||||||||
1525 | BasicBlock *UserBB = User->getParent(); | ||||||||||||||||
1526 | DenseMap<BasicBlock *, CastInst *> InsertedTruncs; | ||||||||||||||||
1527 | TruncInst *TruncI = dyn_cast<TruncInst>(User); | ||||||||||||||||
1528 | bool MadeChange = false; | ||||||||||||||||
1529 | |||||||||||||||||
1530 | for (Value::user_iterator TruncUI = TruncI->user_begin(), | ||||||||||||||||
1531 | TruncE = TruncI->user_end(); | ||||||||||||||||
1532 | TruncUI != TruncE;) { | ||||||||||||||||
1533 | |||||||||||||||||
1534 | Use &TruncTheUse = TruncUI.getUse(); | ||||||||||||||||
1535 | Instruction *TruncUser = cast<Instruction>(*TruncUI); | ||||||||||||||||
1536 | // Preincrement use iterator so we don't invalidate it. | ||||||||||||||||
1537 | |||||||||||||||||
1538 | ++TruncUI; | ||||||||||||||||
1539 | |||||||||||||||||
1540 | int ISDOpcode = TLI.InstructionOpcodeToISD(TruncUser->getOpcode()); | ||||||||||||||||
1541 | if (!ISDOpcode) | ||||||||||||||||
1542 | continue; | ||||||||||||||||
1543 | |||||||||||||||||
1544 | // If the use is actually a legal node, there will not be an | ||||||||||||||||
1545 | // implicit truncate. | ||||||||||||||||
1546 | // FIXME: always querying the result type is just an | ||||||||||||||||
1547 | // approximation; some nodes' legality is determined by the | ||||||||||||||||
1548 | // operand or other means. There's no good way to find out though. | ||||||||||||||||
1549 | if (TLI.isOperationLegalOrCustom( | ||||||||||||||||
1550 | ISDOpcode, TLI.getValueType(DL, TruncUser->getType(), true))) | ||||||||||||||||
1551 | continue; | ||||||||||||||||
1552 | |||||||||||||||||
1553 | // Don't bother for PHI nodes. | ||||||||||||||||
1554 | if (isa<PHINode>(TruncUser)) | ||||||||||||||||
1555 | continue; | ||||||||||||||||
1556 | |||||||||||||||||
1557 | BasicBlock *TruncUserBB = TruncUser->getParent(); | ||||||||||||||||
1558 | |||||||||||||||||
1559 | if (UserBB == TruncUserBB) | ||||||||||||||||
1560 | continue; | ||||||||||||||||
1561 | |||||||||||||||||
1562 | BinaryOperator *&InsertedShift = InsertedShifts[TruncUserBB]; | ||||||||||||||||
1563 | CastInst *&InsertedTrunc = InsertedTruncs[TruncUserBB]; | ||||||||||||||||
1564 | |||||||||||||||||
1565 | if (!InsertedShift && !InsertedTrunc) { | ||||||||||||||||
1566 | BasicBlock::iterator InsertPt = TruncUserBB->getFirstInsertionPt(); | ||||||||||||||||
1567 | assert(InsertPt != TruncUserBB->end())((InsertPt != TruncUserBB->end()) ? static_cast<void> (0) : __assert_fail ("InsertPt != TruncUserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1567, __PRETTY_FUNCTION__)); | ||||||||||||||||
1568 | // Sink the shift | ||||||||||||||||
1569 | if (ShiftI->getOpcode() == Instruction::AShr) | ||||||||||||||||
1570 | InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, | ||||||||||||||||
1571 | "", &*InsertPt); | ||||||||||||||||
1572 | else | ||||||||||||||||
1573 | InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, | ||||||||||||||||
1574 | "", &*InsertPt); | ||||||||||||||||
1575 | InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); | ||||||||||||||||
1576 | |||||||||||||||||
1577 | // Sink the trunc | ||||||||||||||||
1578 | BasicBlock::iterator TruncInsertPt = TruncUserBB->getFirstInsertionPt(); | ||||||||||||||||
1579 | TruncInsertPt++; | ||||||||||||||||
1580 | assert(TruncInsertPt != TruncUserBB->end())((TruncInsertPt != TruncUserBB->end()) ? static_cast<void > (0) : __assert_fail ("TruncInsertPt != TruncUserBB->end()" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1580, __PRETTY_FUNCTION__)); | ||||||||||||||||
1581 | |||||||||||||||||
1582 | InsertedTrunc = CastInst::Create(TruncI->getOpcode(), InsertedShift, | ||||||||||||||||
1583 | TruncI->getType(), "", &*TruncInsertPt); | ||||||||||||||||
1584 | InsertedTrunc->setDebugLoc(TruncI->getDebugLoc()); | ||||||||||||||||
1585 | |||||||||||||||||
1586 | MadeChange = true; | ||||||||||||||||
1587 | |||||||||||||||||
1588 | TruncTheUse = InsertedTrunc; | ||||||||||||||||
1589 | } | ||||||||||||||||
1590 | } | ||||||||||||||||
1591 | return MadeChange; | ||||||||||||||||
1592 | } | ||||||||||||||||
1593 | |||||||||||||||||
1594 | /// Sink the shift *right* instruction into user blocks if the uses could | ||||||||||||||||
1595 | /// potentially be combined with this shift instruction and generate BitExtract | ||||||||||||||||
1596 | /// instruction. It will only be applied if the architecture supports BitExtract | ||||||||||||||||
1597 | /// instruction. Here is an example: | ||||||||||||||||
1598 | /// BB1: | ||||||||||||||||
1599 | /// %x.extract.shift = lshr i64 %arg1, 32 | ||||||||||||||||
1600 | /// BB2: | ||||||||||||||||
1601 | /// %x.extract.trunc = trunc i64 %x.extract.shift to i16 | ||||||||||||||||
1602 | /// ==> | ||||||||||||||||
1603 | /// | ||||||||||||||||
1604 | /// BB2: | ||||||||||||||||
1605 | /// %x.extract.shift.1 = lshr i64 %arg1, 32 | ||||||||||||||||
1606 | /// %x.extract.trunc = trunc i64 %x.extract.shift.1 to i16 | ||||||||||||||||
1607 | /// | ||||||||||||||||
1608 | /// CodeGen will recognize the pattern in BB2 and generate BitExtract | ||||||||||||||||
1609 | /// instruction. | ||||||||||||||||
1610 | /// Return true if any changes are made. | ||||||||||||||||
1611 | static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI, | ||||||||||||||||
1612 | const TargetLowering &TLI, | ||||||||||||||||
1613 | const DataLayout &DL) { | ||||||||||||||||
1614 | BasicBlock *DefBB = ShiftI->getParent(); | ||||||||||||||||
1615 | |||||||||||||||||
1616 | /// Only insert instructions in each block once. | ||||||||||||||||
1617 | DenseMap<BasicBlock *, BinaryOperator *> InsertedShifts; | ||||||||||||||||
1618 | |||||||||||||||||
1619 | bool shiftIsLegal = TLI.isTypeLegal(TLI.getValueType(DL, ShiftI->getType())); | ||||||||||||||||
1620 | |||||||||||||||||
1621 | bool MadeChange = false; | ||||||||||||||||
1622 | for (Value::user_iterator UI = ShiftI->user_begin(), E = ShiftI->user_end(); | ||||||||||||||||
1623 | UI != E;) { | ||||||||||||||||
1624 | Use &TheUse = UI.getUse(); | ||||||||||||||||
1625 | Instruction *User = cast<Instruction>(*UI); | ||||||||||||||||
1626 | // Preincrement use iterator so we don't invalidate it. | ||||||||||||||||
1627 | ++UI; | ||||||||||||||||
1628 | |||||||||||||||||
1629 | // Don't bother for PHI nodes. | ||||||||||||||||
1630 | if (isa<PHINode>(User)) | ||||||||||||||||
1631 | continue; | ||||||||||||||||
1632 | |||||||||||||||||
1633 | if (!isExtractBitsCandidateUse(User)) | ||||||||||||||||
1634 | continue; | ||||||||||||||||
1635 | |||||||||||||||||
1636 | BasicBlock *UserBB = User->getParent(); | ||||||||||||||||
1637 | |||||||||||||||||
1638 | if (UserBB == DefBB) { | ||||||||||||||||
1639 | // If the shift and truncate instruction are in the same BB. The use of | ||||||||||||||||
1640 | // the truncate(TruncUse) may still introduce another truncate if not | ||||||||||||||||
1641 | // legal. In this case, we would like to sink both shift and truncate | ||||||||||||||||
1642 | // instruction to the BB of TruncUse. | ||||||||||||||||
1643 | // for example: | ||||||||||||||||
1644 | // BB1: | ||||||||||||||||
1645 | // i64 shift.result = lshr i64 opnd, imm | ||||||||||||||||
1646 | // trunc.result = trunc shift.result to i16 | ||||||||||||||||
1647 | // | ||||||||||||||||
1648 | // BB2: | ||||||||||||||||
1649 | // ----> We will have an implicit truncate here if the architecture does | ||||||||||||||||
1650 | // not have i16 compare. | ||||||||||||||||
1651 | // cmp i16 trunc.result, opnd2 | ||||||||||||||||
1652 | // | ||||||||||||||||
1653 | if (isa<TruncInst>(User) && shiftIsLegal | ||||||||||||||||
1654 | // If the type of the truncate is legal, no truncate will be | ||||||||||||||||
1655 | // introduced in other basic blocks. | ||||||||||||||||
1656 | && | ||||||||||||||||
1657 | (!TLI.isTypeLegal(TLI.getValueType(DL, User->getType())))) | ||||||||||||||||
1658 | MadeChange = | ||||||||||||||||
1659 | SinkShiftAndTruncate(ShiftI, User, CI, InsertedShifts, TLI, DL); | ||||||||||||||||
1660 | |||||||||||||||||
1661 | continue; | ||||||||||||||||
1662 | } | ||||||||||||||||
1663 | // If we have already inserted a shift into this block, use it. | ||||||||||||||||
1664 | BinaryOperator *&InsertedShift = InsertedShifts[UserBB]; | ||||||||||||||||
1665 | |||||||||||||||||
1666 | if (!InsertedShift) { | ||||||||||||||||
1667 | BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); | ||||||||||||||||
1668 | assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0) : __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 1668, __PRETTY_FUNCTION__)); | ||||||||||||||||
1669 | |||||||||||||||||
1670 | if (ShiftI->getOpcode() == Instruction::AShr) | ||||||||||||||||
1671 | InsertedShift = BinaryOperator::CreateAShr(ShiftI->getOperand(0), CI, | ||||||||||||||||
1672 | "", &*InsertPt); | ||||||||||||||||
1673 | else | ||||||||||||||||
1674 | InsertedShift = BinaryOperator::CreateLShr(ShiftI->getOperand(0), CI, | ||||||||||||||||
1675 | "", &*InsertPt); | ||||||||||||||||
1676 | InsertedShift->setDebugLoc(ShiftI->getDebugLoc()); | ||||||||||||||||
1677 | |||||||||||||||||
1678 | MadeChange = true; | ||||||||||||||||
1679 | } | ||||||||||||||||
1680 | |||||||||||||||||
1681 | // Replace a use of the shift with a use of the new shift. | ||||||||||||||||
1682 | TheUse = InsertedShift; | ||||||||||||||||
1683 | } | ||||||||||||||||
1684 | |||||||||||||||||
1685 | // If we removed all uses, or there are none, nuke the shift. | ||||||||||||||||
1686 | if (ShiftI->use_empty()) { | ||||||||||||||||
1687 | salvageDebugInfo(*ShiftI); | ||||||||||||||||
1688 | ShiftI->eraseFromParent(); | ||||||||||||||||
1689 | MadeChange = true; | ||||||||||||||||
1690 | } | ||||||||||||||||
1691 | |||||||||||||||||
1692 | return MadeChange; | ||||||||||||||||
1693 | } | ||||||||||||||||
1694 | |||||||||||||||||
1695 | /// If counting leading or trailing zeros is an expensive operation and a zero | ||||||||||||||||
1696 | /// input is defined, add a check for zero to avoid calling the intrinsic. | ||||||||||||||||
1697 | /// | ||||||||||||||||
1698 | /// We want to transform: | ||||||||||||||||
1699 | /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 false) | ||||||||||||||||
1700 | /// | ||||||||||||||||
1701 | /// into: | ||||||||||||||||
1702 | /// entry: | ||||||||||||||||
1703 | /// %cmpz = icmp eq i64 %A, 0 | ||||||||||||||||
1704 | /// br i1 %cmpz, label %cond.end, label %cond.false | ||||||||||||||||
1705 | /// cond.false: | ||||||||||||||||
1706 | /// %z = call i64 @llvm.cttz.i64(i64 %A, i1 true) | ||||||||||||||||
1707 | /// br label %cond.end | ||||||||||||||||
1708 | /// cond.end: | ||||||||||||||||
1709 | /// %ctz = phi i64 [ 64, %entry ], [ %z, %cond.false ] | ||||||||||||||||
1710 | /// | ||||||||||||||||
1711 | /// If the transform is performed, return true and set ModifiedDT to true. | ||||||||||||||||
1712 | static bool despeculateCountZeros(IntrinsicInst *CountZeros, | ||||||||||||||||
1713 | const TargetLowering *TLI, | ||||||||||||||||
1714 | const DataLayout *DL, | ||||||||||||||||
1715 | bool &ModifiedDT) { | ||||||||||||||||
1716 | if (!TLI || !DL) | ||||||||||||||||
1717 | return false; | ||||||||||||||||
1718 | |||||||||||||||||
1719 | // If a zero input is undefined, it doesn't make sense to despeculate that. | ||||||||||||||||
1720 | if (match(CountZeros->getOperand(1), m_One())) | ||||||||||||||||
1721 | return false; | ||||||||||||||||
1722 | |||||||||||||||||
1723 | // If it's cheap to speculate, there's nothing to do. | ||||||||||||||||
1724 | auto IntrinsicID = CountZeros->getIntrinsicID(); | ||||||||||||||||
1725 | if ((IntrinsicID == Intrinsic::cttz && TLI->isCheapToSpeculateCttz()) || | ||||||||||||||||
1726 | (IntrinsicID == Intrinsic::ctlz && TLI->isCheapToSpeculateCtlz())) | ||||||||||||||||
1727 | return false; | ||||||||||||||||
1728 | |||||||||||||||||
1729 | // Only handle legal scalar cases. Anything else requires too much work. | ||||||||||||||||
1730 | Type *Ty = CountZeros->getType(); | ||||||||||||||||
1731 | unsigned SizeInBits = Ty->getPrimitiveSizeInBits(); | ||||||||||||||||
1732 | if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits()) | ||||||||||||||||
1733 | return false; | ||||||||||||||||
1734 | |||||||||||||||||
1735 | // The intrinsic will be sunk behind a compare against zero and branch. | ||||||||||||||||
1736 | BasicBlock *StartBlock = CountZeros->getParent(); | ||||||||||||||||
1737 | BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false"); | ||||||||||||||||
1738 | |||||||||||||||||
1739 | // Create another block after the count zero intrinsic. A PHI will be added | ||||||||||||||||
1740 | // in this block to select the result of the intrinsic or the bit-width | ||||||||||||||||
1741 | // constant if the input to the intrinsic is zero. | ||||||||||||||||
1742 | BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(CountZeros)); | ||||||||||||||||
1743 | BasicBlock *EndBlock = CallBlock->splitBasicBlock(SplitPt, "cond.end"); | ||||||||||||||||
1744 | |||||||||||||||||
1745 | // Set up a builder to create a compare, conditional branch, and PHI. | ||||||||||||||||
1746 | IRBuilder<> Builder(CountZeros->getContext()); | ||||||||||||||||
1747 | Builder.SetInsertPoint(StartBlock->getTerminator()); | ||||||||||||||||
1748 | Builder.SetCurrentDebugLocation(CountZeros->getDebugLoc()); | ||||||||||||||||
1749 | |||||||||||||||||
1750 | // Replace the unconditional branch that was created by the first split with | ||||||||||||||||
1751 | // a compare against zero and a conditional branch. | ||||||||||||||||
1752 | Value *Zero = Constant::getNullValue(Ty); | ||||||||||||||||
1753 | Value *Cmp = Builder.CreateICmpEQ(CountZeros->getOperand(0), Zero, "cmpz"); | ||||||||||||||||
1754 | Builder.CreateCondBr(Cmp, EndBlock, CallBlock); | ||||||||||||||||
1755 | StartBlock->getTerminator()->eraseFromParent(); | ||||||||||||||||
1756 | |||||||||||||||||
1757 | // Create a PHI in the end block to select either the output of the intrinsic | ||||||||||||||||
1758 | // or the bit width of the operand. | ||||||||||||||||
1759 | Builder.SetInsertPoint(&EndBlock->front()); | ||||||||||||||||
1760 | PHINode *PN = Builder.CreatePHI(Ty, 2, "ctz"); | ||||||||||||||||
1761 | CountZeros->replaceAllUsesWith(PN); | ||||||||||||||||
1762 | Value *BitWidth = Builder.getInt(APInt(SizeInBits, SizeInBits)); | ||||||||||||||||
1763 | PN->addIncoming(BitWidth, StartBlock); | ||||||||||||||||
1764 | PN->addIncoming(CountZeros, CallBlock); | ||||||||||||||||
1765 | |||||||||||||||||
1766 | // We are explicitly handling the zero case, so we can set the intrinsic's | ||||||||||||||||
1767 | // undefined zero argument to 'true'. This will also prevent reprocessing the | ||||||||||||||||
1768 | // intrinsic; we only despeculate when a zero input is defined. | ||||||||||||||||
1769 | CountZeros->setArgOperand(1, Builder.getTrue()); | ||||||||||||||||
1770 | ModifiedDT = true; | ||||||||||||||||
1771 | return true; | ||||||||||||||||
1772 | } | ||||||||||||||||
1773 | |||||||||||||||||
1774 | bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) { | ||||||||||||||||
1775 | BasicBlock *BB = CI->getParent(); | ||||||||||||||||
1776 | |||||||||||||||||
1777 | // Lower inline assembly if we can. | ||||||||||||||||
1778 | // If we found an inline asm expession, and if the target knows how to | ||||||||||||||||
1779 | // lower it to normal LLVM code, do so now. | ||||||||||||||||
1780 | if (TLI && isa<InlineAsm>(CI->getCalledValue())) { | ||||||||||||||||
1781 | if (TLI->ExpandInlineAsm(CI)) { | ||||||||||||||||
1782 | // Avoid invalidating the iterator. | ||||||||||||||||
1783 | CurInstIterator = BB->begin(); | ||||||||||||||||
1784 | // Avoid processing instructions out of order, which could cause | ||||||||||||||||
1785 | // reuse before a value is defined. | ||||||||||||||||
1786 | SunkAddrs.clear(); | ||||||||||||||||
1787 | return true; | ||||||||||||||||
1788 | } | ||||||||||||||||
1789 | // Sink address computing for memory operands into the block. | ||||||||||||||||
1790 | if (optimizeInlineAsmInst(CI)) | ||||||||||||||||
1791 | return true; | ||||||||||||||||
1792 | } | ||||||||||||||||
1793 | |||||||||||||||||
1794 | // Align the pointer arguments to this call if the target thinks it's a good | ||||||||||||||||
1795 | // idea | ||||||||||||||||
1796 | unsigned MinSize, PrefAlign; | ||||||||||||||||
1797 | if (TLI && TLI->shouldAlignPointerArgs(CI, MinSize, PrefAlign)) { | ||||||||||||||||
1798 | for (auto &Arg : CI->arg_operands()) { | ||||||||||||||||
1799 | // We want to align both objects whose address is used directly and | ||||||||||||||||
1800 | // objects whose address is used in casts and GEPs, though it only makes | ||||||||||||||||
1801 | // sense for GEPs if the offset is a multiple of the desired alignment and | ||||||||||||||||
1802 | // if size - offset meets the size threshold. | ||||||||||||||||
1803 | if (!Arg->getType()->isPointerTy()) | ||||||||||||||||
1804 | continue; | ||||||||||||||||
1805 | APInt Offset(DL->getIndexSizeInBits( | ||||||||||||||||
1806 | cast<PointerType>(Arg->getType())->getAddressSpace()), | ||||||||||||||||
1807 | 0); | ||||||||||||||||
1808 | Value *Val = Arg->stripAndAccumulateInBoundsConstantOffsets(*DL, Offset); | ||||||||||||||||
1809 | uint64_t Offset2 = Offset.getLimitedValue(); | ||||||||||||||||
1810 | if ((Offset2 & (PrefAlign-1)) != 0) | ||||||||||||||||
1811 | continue; | ||||||||||||||||
1812 | AllocaInst *AI; | ||||||||||||||||
1813 | if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign && | ||||||||||||||||
1814 | DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) | ||||||||||||||||
1815 | AI->setAlignment(MaybeAlign(PrefAlign)); | ||||||||||||||||
1816 | // Global variables can only be aligned if they are defined in this | ||||||||||||||||
1817 | // object (i.e. they are uniquely initialized in this object), and | ||||||||||||||||
1818 | // over-aligning global variables that have an explicit section is | ||||||||||||||||
1819 | // forbidden. | ||||||||||||||||
1820 | GlobalVariable *GV; | ||||||||||||||||
1821 | if ((GV = dyn_cast<GlobalVariable>(Val)) && GV->canIncreaseAlignment() && | ||||||||||||||||
1822 | GV->getPointerAlignment(*DL) < PrefAlign && | ||||||||||||||||
1823 | DL->getTypeAllocSize(GV->getValueType()) >= | ||||||||||||||||
1824 | MinSize + Offset2) | ||||||||||||||||
1825 | GV->setAlignment(PrefAlign); | ||||||||||||||||
1826 | } | ||||||||||||||||
1827 | // If this is a memcpy (or similar) then we may be able to improve the | ||||||||||||||||
1828 | // alignment | ||||||||||||||||
1829 | if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(CI)) { | ||||||||||||||||
1830 | unsigned DestAlign = getKnownAlignment(MI->getDest(), *DL); | ||||||||||||||||
1831 | if (DestAlign > MI->getDestAlignment()) | ||||||||||||||||
1832 | MI->setDestAlignment(DestAlign); | ||||||||||||||||
1833 | if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { | ||||||||||||||||
1834 | unsigned SrcAlign = getKnownAlignment(MTI->getSource(), *DL); | ||||||||||||||||
1835 | if (SrcAlign > MTI->getSourceAlignment()) | ||||||||||||||||
1836 | MTI->setSourceAlignment(SrcAlign); | ||||||||||||||||
1837 | } | ||||||||||||||||
1838 | } | ||||||||||||||||
1839 | } | ||||||||||||||||
1840 | |||||||||||||||||
1841 | // If we have a cold call site, try to sink addressing computation into the | ||||||||||||||||
1842 | // cold block. This interacts with our handling for loads and stores to | ||||||||||||||||
1843 | // ensure that we can fold all uses of a potential addressing computation | ||||||||||||||||
1844 | // into their uses. TODO: generalize this to work over profiling data | ||||||||||||||||
1845 | if (!OptSize && CI->hasFnAttr(Attribute::Cold)) | ||||||||||||||||
1846 | for (auto &Arg : CI->arg_operands()) { | ||||||||||||||||
1847 | if (!Arg->getType()->isPointerTy()) | ||||||||||||||||
1848 | continue; | ||||||||||||||||
1849 | unsigned AS = Arg->getType()->getPointerAddressSpace(); | ||||||||||||||||
1850 | return optimizeMemoryInst(CI, Arg, Arg->getType(), AS); | ||||||||||||||||
1851 | } | ||||||||||||||||
1852 | |||||||||||||||||
1853 | IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); | ||||||||||||||||
1854 | if (II) { | ||||||||||||||||
1855 | switch (II->getIntrinsicID()) { | ||||||||||||||||
1856 | default: break; | ||||||||||||||||
1857 | case Intrinsic::experimental_widenable_condition: { | ||||||||||||||||
1858 | // Give up on future widening oppurtunties so that we can fold away dead | ||||||||||||||||
1859 | // paths and merge blocks before going into block-local instruction | ||||||||||||||||
1860 | // selection. | ||||||||||||||||
1861 | if (II->use_empty()) { | ||||||||||||||||
1862 | II->eraseFromParent(); | ||||||||||||||||
1863 | return true; | ||||||||||||||||
1864 | } | ||||||||||||||||
1865 | Constant *RetVal = ConstantInt::getTrue(II->getContext()); | ||||||||||||||||
1866 | resetIteratorIfInvalidatedWhileCalling(BB, [&]() { | ||||||||||||||||
1867 | replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); | ||||||||||||||||
1868 | }); | ||||||||||||||||
1869 | return true; | ||||||||||||||||
1870 | } | ||||||||||||||||
1871 | case Intrinsic::objectsize: { | ||||||||||||||||
1872 | // Lower all uses of llvm.objectsize.* | ||||||||||||||||
1873 | Value *RetVal = | ||||||||||||||||
1874 | lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); | ||||||||||||||||
1875 | |||||||||||||||||
1876 | resetIteratorIfInvalidatedWhileCalling(BB, [&]() { | ||||||||||||||||
1877 | replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); | ||||||||||||||||
1878 | }); | ||||||||||||||||
1879 | return true; | ||||||||||||||||
1880 | } | ||||||||||||||||
1881 | case Intrinsic::is_constant: { | ||||||||||||||||
1882 | // If is_constant hasn't folded away yet, lower it to false now. | ||||||||||||||||
1883 | Constant *RetVal = ConstantInt::get(II->getType(), 0); | ||||||||||||||||
1884 | resetIteratorIfInvalidatedWhileCalling(BB, [&]() { | ||||||||||||||||
1885 | replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); | ||||||||||||||||
1886 | }); | ||||||||||||||||
1887 | return true; | ||||||||||||||||
1888 | } | ||||||||||||||||
1889 | case Intrinsic::aarch64_stlxr: | ||||||||||||||||
1890 | case Intrinsic::aarch64_stxr: { | ||||||||||||||||
1891 | ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); | ||||||||||||||||
1892 | if (!ExtVal || !ExtVal->hasOneUse() || | ||||||||||||||||
1893 | ExtVal->getParent() == CI->getParent()) | ||||||||||||||||
1894 | return false; | ||||||||||||||||
1895 | // Sink a zext feeding stlxr/stxr before it, so it can be folded into it. | ||||||||||||||||
1896 | ExtVal->moveBefore(CI); | ||||||||||||||||
1897 | // Mark this instruction as "inserted by CGP", so that other | ||||||||||||||||
1898 | // optimizations don't touch it. | ||||||||||||||||
1899 | InsertedInsts.insert(ExtVal); | ||||||||||||||||
1900 | return true; | ||||||||||||||||
1901 | } | ||||||||||||||||
1902 | |||||||||||||||||
1903 | case Intrinsic::launder_invariant_group: | ||||||||||||||||
1904 | case Intrinsic::strip_invariant_group: { | ||||||||||||||||
1905 | Value *ArgVal = II->getArgOperand(0); | ||||||||||||||||
1906 | auto it = LargeOffsetGEPMap.find(II); | ||||||||||||||||
1907 | if (it != LargeOffsetGEPMap.end()) { | ||||||||||||||||
1908 | // Merge entries in LargeOffsetGEPMap to reflect the RAUW. | ||||||||||||||||
1909 | // Make sure not to have to deal with iterator invalidation | ||||||||||||||||
1910 | // after possibly adding ArgVal to LargeOffsetGEPMap. | ||||||||||||||||
1911 | auto GEPs = std::move(it->second); | ||||||||||||||||
1912 | LargeOffsetGEPMap[ArgVal].append(GEPs.begin(), GEPs.end()); | ||||||||||||||||
1913 | LargeOffsetGEPMap.erase(II); | ||||||||||||||||
1914 | } | ||||||||||||||||
1915 | |||||||||||||||||
1916 | II->replaceAllUsesWith(ArgVal); | ||||||||||||||||
1917 | II->eraseFromParent(); | ||||||||||||||||
1918 | return true; | ||||||||||||||||
1919 | } | ||||||||||||||||
1920 | case Intrinsic::cttz: | ||||||||||||||||
1921 | case Intrinsic::ctlz: | ||||||||||||||||
1922 | // If counting zeros is expensive, try to avoid it. | ||||||||||||||||
1923 | return despeculateCountZeros(II, TLI, DL, ModifiedDT); | ||||||||||||||||
1924 | } | ||||||||||||||||
1925 | |||||||||||||||||
1926 | if (TLI) { | ||||||||||||||||
1927 | SmallVector<Value*, 2> PtrOps; | ||||||||||||||||
1928 | Type *AccessTy; | ||||||||||||||||
1929 | if (TLI->getAddrModeArguments(II, PtrOps, AccessTy)) | ||||||||||||||||
1930 | while (!PtrOps.empty()) { | ||||||||||||||||
1931 | Value *PtrVal = PtrOps.pop_back_val(); | ||||||||||||||||
1932 | unsigned AS = PtrVal->getType()->getPointerAddressSpace(); | ||||||||||||||||
1933 | if (optimizeMemoryInst(II, PtrVal, AccessTy, AS)) | ||||||||||||||||
1934 | return true; | ||||||||||||||||
1935 | } | ||||||||||||||||
1936 | } | ||||||||||||||||
1937 | } | ||||||||||||||||
1938 | |||||||||||||||||
1939 | // From here on out we're working with named functions. | ||||||||||||||||
1940 | if (!CI->getCalledFunction()) return false; | ||||||||||||||||
1941 | |||||||||||||||||
1942 | // Lower all default uses of _chk calls. This is very similar | ||||||||||||||||
1943 | // to what InstCombineCalls does, but here we are only lowering calls | ||||||||||||||||
1944 | // to fortified library functions (e.g. __memcpy_chk) that have the default | ||||||||||||||||
1945 | // "don't know" as the objectsize. Anything else should be left alone. | ||||||||||||||||
1946 | FortifiedLibCallSimplifier Simplifier(TLInfo, true); | ||||||||||||||||
1947 | if (Value *V = Simplifier.optimizeCall(CI)) { | ||||||||||||||||
1948 | CI->replaceAllUsesWith(V); | ||||||||||||||||
1949 | CI->eraseFromParent(); | ||||||||||||||||
1950 | return true; | ||||||||||||||||
1951 | } | ||||||||||||||||
1952 | |||||||||||||||||
1953 | return false; | ||||||||||||||||
1954 | } | ||||||||||||||||
1955 | |||||||||||||||||
1956 | /// Look for opportunities to duplicate return instructions to the predecessor | ||||||||||||||||
1957 | /// to enable tail call optimizations. The case it is currently looking for is: | ||||||||||||||||
1958 | /// @code | ||||||||||||||||
1959 | /// bb0: | ||||||||||||||||
1960 | /// %tmp0 = tail call i32 @f0() | ||||||||||||||||
1961 | /// br label %return | ||||||||||||||||
1962 | /// bb1: | ||||||||||||||||
1963 | /// %tmp1 = tail call i32 @f1() | ||||||||||||||||
1964 | /// br label %return | ||||||||||||||||
1965 | /// bb2: | ||||||||||||||||
1966 | /// %tmp2 = tail call i32 @f2() | ||||||||||||||||
1967 | /// br label %return | ||||||||||||||||
1968 | /// return: | ||||||||||||||||
1969 | /// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] | ||||||||||||||||
1970 | /// ret i32 %retval | ||||||||||||||||
1971 | /// @endcode | ||||||||||||||||
1972 | /// | ||||||||||||||||
1973 | /// => | ||||||||||||||||
1974 | /// | ||||||||||||||||
1975 | /// @code | ||||||||||||||||
1976 | /// bb0: | ||||||||||||||||
1977 | /// %tmp0 = tail call i32 @f0() | ||||||||||||||||
1978 | /// ret i32 %tmp0 | ||||||||||||||||
1979 | /// bb1: | ||||||||||||||||
1980 | /// %tmp1 = tail call i32 @f1() | ||||||||||||||||
1981 | /// ret i32 %tmp1 | ||||||||||||||||
1982 | /// bb2: | ||||||||||||||||
1983 | /// %tmp2 = tail call i32 @f2() | ||||||||||||||||
1984 | /// ret i32 %tmp2 | ||||||||||||||||
1985 | /// @endcode | ||||||||||||||||
1986 | bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) { | ||||||||||||||||
1987 | if (!TLI) | ||||||||||||||||
1988 | return false; | ||||||||||||||||
1989 | |||||||||||||||||
1990 | ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator()); | ||||||||||||||||
1991 | if (!RetI) | ||||||||||||||||
1992 | return false; | ||||||||||||||||
1993 | |||||||||||||||||
1994 | PHINode *PN = nullptr; | ||||||||||||||||
1995 | BitCastInst *BCI = nullptr; | ||||||||||||||||
1996 | Value *V = RetI->getReturnValue(); | ||||||||||||||||
1997 | if (V) { | ||||||||||||||||
1998 | BCI = dyn_cast<BitCastInst>(V); | ||||||||||||||||
1999 | if (BCI) | ||||||||||||||||
2000 | V = BCI->getOperand(0); | ||||||||||||||||
2001 | |||||||||||||||||
2002 | PN = dyn_cast<PHINode>(V); | ||||||||||||||||
2003 | if (!PN) | ||||||||||||||||
2004 | return false; | ||||||||||||||||
2005 | } | ||||||||||||||||
2006 | |||||||||||||||||
2007 | if (PN && PN->getParent() != BB) | ||||||||||||||||
2008 | return false; | ||||||||||||||||
2009 | |||||||||||||||||
2010 | // Make sure there are no instructions between the PHI and return, or that the | ||||||||||||||||
2011 | // return is the first instruction in the block. | ||||||||||||||||
2012 | if (PN) { | ||||||||||||||||
2013 | BasicBlock::iterator BI = BB->begin(); | ||||||||||||||||
2014 | // Skip over debug and the bitcast. | ||||||||||||||||
2015 | do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI); | ||||||||||||||||
2016 | if (&*BI != RetI) | ||||||||||||||||
2017 | return false; | ||||||||||||||||
2018 | } else { | ||||||||||||||||
2019 | BasicBlock::iterator BI = BB->begin(); | ||||||||||||||||
2020 | while (isa<DbgInfoIntrinsic>(BI)) ++BI; | ||||||||||||||||
2021 | if (&*BI != RetI) | ||||||||||||||||
2022 | return false; | ||||||||||||||||
2023 | } | ||||||||||||||||
2024 | |||||||||||||||||
2025 | /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail | ||||||||||||||||
2026 | /// call. | ||||||||||||||||
2027 | const Function *F = BB->getParent(); | ||||||||||||||||
2028 | SmallVector<BasicBlock*, 4> TailCallBBs; | ||||||||||||||||
2029 | if (PN) { | ||||||||||||||||
2030 | for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { | ||||||||||||||||
2031 | // Look through bitcasts. | ||||||||||||||||
2032 | Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts(); | ||||||||||||||||
2033 | CallInst *CI = dyn_cast<CallInst>(IncomingVal); | ||||||||||||||||
2034 | BasicBlock *PredBB = PN->getIncomingBlock(I); | ||||||||||||||||
2035 | // Make sure the phi value is indeed produced by the tail call. | ||||||||||||||||
2036 | if (CI && CI->hasOneUse() && CI->getParent() == PredBB && | ||||||||||||||||
2037 | TLI->mayBeEmittedAsTailCall(CI) && | ||||||||||||||||
2038 | attributesPermitTailCall(F, CI, RetI, *TLI)) | ||||||||||||||||
2039 | TailCallBBs.push_back(PredBB); | ||||||||||||||||
2040 | } | ||||||||||||||||
2041 | } else { | ||||||||||||||||
2042 | SmallPtrSet<BasicBlock*, 4> VisitedBBs; | ||||||||||||||||
2043 | for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { | ||||||||||||||||
2044 | if (!VisitedBBs.insert(*PI).second) | ||||||||||||||||
2045 | continue; | ||||||||||||||||
2046 | |||||||||||||||||
2047 | BasicBlock::InstListType &InstList = (*PI)->getInstList(); | ||||||||||||||||
2048 | BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); | ||||||||||||||||
2049 | BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); | ||||||||||||||||
2050 | do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); | ||||||||||||||||
2051 | if (RI == RE) | ||||||||||||||||
2052 | continue; | ||||||||||||||||
2053 | |||||||||||||||||
2054 | CallInst *CI = dyn_cast<CallInst>(&*RI); | ||||||||||||||||
2055 | if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) && | ||||||||||||||||
2056 | attributesPermitTailCall(F, CI, RetI, *TLI)) | ||||||||||||||||
2057 | TailCallBBs.push_back(*PI); | ||||||||||||||||
2058 | } | ||||||||||||||||
2059 | } | ||||||||||||||||
2060 | |||||||||||||||||
2061 | bool Changed = false; | ||||||||||||||||
2062 | for (auto const &TailCallBB : TailCallBBs) { | ||||||||||||||||
2063 | // Make sure the call instruction is followed by an unconditional branch to | ||||||||||||||||
2064 | // the return block. | ||||||||||||||||
2065 | BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator()); | ||||||||||||||||
2066 | if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) | ||||||||||||||||
2067 | continue; | ||||||||||||||||
2068 | |||||||||||||||||
2069 | // Duplicate the return into TailCallBB. | ||||||||||||||||
2070 | (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB); | ||||||||||||||||
2071 | ModifiedDT = Changed = true; | ||||||||||||||||
2072 | ++NumRetsDup; | ||||||||||||||||
2073 | } | ||||||||||||||||
2074 | |||||||||||||||||
2075 | // If we eliminated all predecessors of the block, delete the block now. | ||||||||||||||||
2076 | if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) | ||||||||||||||||
2077 | BB->eraseFromParent(); | ||||||||||||||||
2078 | |||||||||||||||||
2079 | return Changed; | ||||||||||||||||
2080 | } | ||||||||||||||||
2081 | |||||||||||||||||
2082 | //===----------------------------------------------------------------------===// | ||||||||||||||||
2083 | // Memory Optimization | ||||||||||||||||
2084 | //===----------------------------------------------------------------------===// | ||||||||||||||||
2085 | |||||||||||||||||
2086 | namespace { | ||||||||||||||||
2087 | |||||||||||||||||
2088 | /// This is an extended version of TargetLowering::AddrMode | ||||||||||||||||
2089 | /// which holds actual Value*'s for register values. | ||||||||||||||||
2090 | struct ExtAddrMode : public TargetLowering::AddrMode { | ||||||||||||||||
2091 | Value *BaseReg = nullptr; | ||||||||||||||||
2092 | Value *ScaledReg = nullptr; | ||||||||||||||||
2093 | Value *OriginalValue = nullptr; | ||||||||||||||||
2094 | bool InBounds = true; | ||||||||||||||||
2095 | |||||||||||||||||
2096 | enum FieldName { | ||||||||||||||||
2097 | NoField = 0x00, | ||||||||||||||||
2098 | BaseRegField = 0x01, | ||||||||||||||||
2099 | BaseGVField = 0x02, | ||||||||||||||||
2100 | BaseOffsField = 0x04, | ||||||||||||||||
2101 | ScaledRegField = 0x08, | ||||||||||||||||
2102 | ScaleField = 0x10, | ||||||||||||||||
2103 | MultipleFields = 0xff | ||||||||||||||||
2104 | }; | ||||||||||||||||
2105 | |||||||||||||||||
2106 | |||||||||||||||||
2107 | ExtAddrMode() = default; | ||||||||||||||||
2108 | |||||||||||||||||
2109 | void print(raw_ostream &OS) const; | ||||||||||||||||
2110 | void dump() const; | ||||||||||||||||
2111 | |||||||||||||||||
2112 | FieldName compare(const ExtAddrMode &other) { | ||||||||||||||||
2113 | // First check that the types are the same on each field, as differing types | ||||||||||||||||
2114 | // is something we can't cope with later on. | ||||||||||||||||
2115 | if (BaseReg && other.BaseReg && | ||||||||||||||||
2116 | BaseReg->getType() != other.BaseReg->getType()) | ||||||||||||||||
2117 | return MultipleFields; | ||||||||||||||||
2118 | if (BaseGV && other.BaseGV && | ||||||||||||||||
2119 | BaseGV->getType() != other.BaseGV->getType()) | ||||||||||||||||
2120 | return MultipleFields; | ||||||||||||||||
2121 | if (ScaledReg && other.ScaledReg && | ||||||||||||||||
2122 | ScaledReg->getType() != other.ScaledReg->getType()) | ||||||||||||||||
2123 | return MultipleFields; | ||||||||||||||||
2124 | |||||||||||||||||
2125 | // Conservatively reject 'inbounds' mismatches. | ||||||||||||||||
2126 | if (InBounds != other.InBounds) | ||||||||||||||||
2127 | return MultipleFields; | ||||||||||||||||
2128 | |||||||||||||||||
2129 | // Check each field to see if it differs. | ||||||||||||||||
2130 | unsigned Result = NoField; | ||||||||||||||||
2131 | if (BaseReg != other.BaseReg) | ||||||||||||||||
2132 | Result |= BaseRegField; | ||||||||||||||||
2133 | if (BaseGV != other.BaseGV) | ||||||||||||||||
2134 | Result |= BaseGVField; | ||||||||||||||||
2135 | if (BaseOffs != other.BaseOffs) | ||||||||||||||||
2136 | Result |= BaseOffsField; | ||||||||||||||||
2137 | if (ScaledReg != other.ScaledReg) | ||||||||||||||||
2138 | Result |= ScaledRegField; | ||||||||||||||||
2139 | // Don't count 0 as being a different scale, because that actually means | ||||||||||||||||
2140 | // unscaled (which will already be counted by having no ScaledReg). | ||||||||||||||||
2141 | if (Scale && other.Scale && Scale != other.Scale) | ||||||||||||||||
2142 | Result |= ScaleField; | ||||||||||||||||
2143 | |||||||||||||||||
2144 | if (countPopulation(Result) > 1) | ||||||||||||||||
2145 | return MultipleFields; | ||||||||||||||||
2146 | else | ||||||||||||||||
2147 | return static_cast<FieldName>(Result); | ||||||||||||||||
2148 | } | ||||||||||||||||
2149 | |||||||||||||||||
2150 | // An AddrMode is trivial if it involves no calculation i.e. it is just a base | ||||||||||||||||
2151 | // with no offset. | ||||||||||||||||
2152 | bool isTrivial() { | ||||||||||||||||
2153 | // An AddrMode is (BaseGV + BaseReg + BaseOffs + ScaleReg * Scale) so it is | ||||||||||||||||
2154 | // trivial if at most one of these terms is nonzero, except that BaseGV and | ||||||||||||||||
2155 | // BaseReg both being zero actually means a null pointer value, which we | ||||||||||||||||
2156 | // consider to be 'non-zero' here. | ||||||||||||||||
2157 | return !BaseOffs && !Scale && !(BaseGV && BaseReg); | ||||||||||||||||
2158 | } | ||||||||||||||||
2159 | |||||||||||||||||
2160 | Value *GetFieldAsValue(FieldName Field, Type *IntPtrTy) { | ||||||||||||||||
2161 | switch (Field) { | ||||||||||||||||
2162 | default: | ||||||||||||||||
2163 | return nullptr; | ||||||||||||||||
2164 | case BaseRegField: | ||||||||||||||||
2165 | return BaseReg; | ||||||||||||||||
2166 | case BaseGVField: | ||||||||||||||||
2167 | return BaseGV; | ||||||||||||||||
2168 | case ScaledRegField: | ||||||||||||||||
2169 | return ScaledReg; | ||||||||||||||||
2170 | case BaseOffsField: | ||||||||||||||||
2171 | return ConstantInt::get(IntPtrTy, BaseOffs); | ||||||||||||||||
2172 | } | ||||||||||||||||
2173 | } | ||||||||||||||||
2174 | |||||||||||||||||
2175 | void SetCombinedField(FieldName Field, Value *V, | ||||||||||||||||
2176 | const SmallVectorImpl<ExtAddrMode> &AddrModes) { | ||||||||||||||||
2177 | switch (Field) { | ||||||||||||||||
2178 | default: | ||||||||||||||||
2179 | llvm_unreachable("Unhandled fields are expected to be rejected earlier")::llvm::llvm_unreachable_internal("Unhandled fields are expected to be rejected earlier" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2179); | ||||||||||||||||
2180 | break; | ||||||||||||||||
2181 | case ExtAddrMode::BaseRegField: | ||||||||||||||||
2182 | BaseReg = V; | ||||||||||||||||
2183 | break; | ||||||||||||||||
2184 | case ExtAddrMode::BaseGVField: | ||||||||||||||||
2185 | // A combined BaseGV is an Instruction, not a GlobalValue, so it goes | ||||||||||||||||
2186 | // in the BaseReg field. | ||||||||||||||||
2187 | assert(BaseReg == nullptr)((BaseReg == nullptr) ? static_cast<void> (0) : __assert_fail ("BaseReg == nullptr", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2187, __PRETTY_FUNCTION__)); | ||||||||||||||||
2188 | BaseReg = V; | ||||||||||||||||
2189 | BaseGV = nullptr; | ||||||||||||||||
2190 | break; | ||||||||||||||||
2191 | case ExtAddrMode::ScaledRegField: | ||||||||||||||||
2192 | ScaledReg = V; | ||||||||||||||||
2193 | // If we have a mix of scaled and unscaled addrmodes then we want scale | ||||||||||||||||
2194 | // to be the scale and not zero. | ||||||||||||||||
2195 | if (!Scale) | ||||||||||||||||
2196 | for (const ExtAddrMode &AM : AddrModes) | ||||||||||||||||
2197 | if (AM.Scale) { | ||||||||||||||||
2198 | Scale = AM.Scale; | ||||||||||||||||
2199 | break; | ||||||||||||||||
2200 | } | ||||||||||||||||
2201 | break; | ||||||||||||||||
2202 | case ExtAddrMode::BaseOffsField: | ||||||||||||||||
2203 | // The offset is no longer a constant, so it goes in ScaledReg with a | ||||||||||||||||
2204 | // scale of 1. | ||||||||||||||||
2205 | assert(ScaledReg == nullptr)((ScaledReg == nullptr) ? static_cast<void> (0) : __assert_fail ("ScaledReg == nullptr", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2205, __PRETTY_FUNCTION__)); | ||||||||||||||||
2206 | ScaledReg = V; | ||||||||||||||||
2207 | Scale = 1; | ||||||||||||||||
2208 | BaseOffs = 0; | ||||||||||||||||
2209 | break; | ||||||||||||||||
2210 | } | ||||||||||||||||
2211 | } | ||||||||||||||||
2212 | }; | ||||||||||||||||
2213 | |||||||||||||||||
2214 | } // end anonymous namespace | ||||||||||||||||
2215 | |||||||||||||||||
2216 | #ifndef NDEBUG | ||||||||||||||||
2217 | static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { | ||||||||||||||||
2218 | AM.print(OS); | ||||||||||||||||
2219 | return OS; | ||||||||||||||||
2220 | } | ||||||||||||||||
2221 | #endif | ||||||||||||||||
2222 | |||||||||||||||||
2223 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) | ||||||||||||||||
2224 | void ExtAddrMode::print(raw_ostream &OS) const { | ||||||||||||||||
2225 | bool NeedPlus = false; | ||||||||||||||||
2226 | OS << "["; | ||||||||||||||||
2227 | if (InBounds) | ||||||||||||||||
2228 | OS << "inbounds "; | ||||||||||||||||
2229 | if (BaseGV) { | ||||||||||||||||
2230 | OS << (NeedPlus ? " + " : "") | ||||||||||||||||
2231 | << "GV:"; | ||||||||||||||||
2232 | BaseGV->printAsOperand(OS, /*PrintType=*/false); | ||||||||||||||||
2233 | NeedPlus = true; | ||||||||||||||||
2234 | } | ||||||||||||||||
2235 | |||||||||||||||||
2236 | if (BaseOffs) { | ||||||||||||||||
2237 | OS << (NeedPlus ? " + " : "") | ||||||||||||||||
2238 | << BaseOffs; | ||||||||||||||||
2239 | NeedPlus = true; | ||||||||||||||||
2240 | } | ||||||||||||||||
2241 | |||||||||||||||||
2242 | if (BaseReg) { | ||||||||||||||||
2243 | OS << (NeedPlus ? " + " : "") | ||||||||||||||||
2244 | << "Base:"; | ||||||||||||||||
2245 | BaseReg->printAsOperand(OS, /*PrintType=*/false); | ||||||||||||||||
2246 | NeedPlus = true; | ||||||||||||||||
2247 | } | ||||||||||||||||
2248 | if (Scale) { | ||||||||||||||||
2249 | OS << (NeedPlus ? " + " : "") | ||||||||||||||||
2250 | << Scale << "*"; | ||||||||||||||||
2251 | ScaledReg->printAsOperand(OS, /*PrintType=*/false); | ||||||||||||||||
2252 | } | ||||||||||||||||
2253 | |||||||||||||||||
2254 | OS << ']'; | ||||||||||||||||
2255 | } | ||||||||||||||||
2256 | |||||||||||||||||
2257 | LLVM_DUMP_METHOD__attribute__((noinline)) __attribute__((__used__)) void ExtAddrMode::dump() const { | ||||||||||||||||
2258 | print(dbgs()); | ||||||||||||||||
2259 | dbgs() << '\n'; | ||||||||||||||||
2260 | } | ||||||||||||||||
2261 | #endif | ||||||||||||||||
2262 | |||||||||||||||||
2263 | namespace { | ||||||||||||||||
2264 | |||||||||||||||||
2265 | /// This class provides transaction based operation on the IR. | ||||||||||||||||
2266 | /// Every change made through this class is recorded in the internal state and | ||||||||||||||||
2267 | /// can be undone (rollback) until commit is called. | ||||||||||||||||
2268 | class TypePromotionTransaction { | ||||||||||||||||
2269 | /// This represents the common interface of the individual transaction. | ||||||||||||||||
2270 | /// Each class implements the logic for doing one specific modification on | ||||||||||||||||
2271 | /// the IR via the TypePromotionTransaction. | ||||||||||||||||
2272 | class TypePromotionAction { | ||||||||||||||||
2273 | protected: | ||||||||||||||||
2274 | /// The Instruction modified. | ||||||||||||||||
2275 | Instruction *Inst; | ||||||||||||||||
2276 | |||||||||||||||||
2277 | public: | ||||||||||||||||
2278 | /// Constructor of the action. | ||||||||||||||||
2279 | /// The constructor performs the related action on the IR. | ||||||||||||||||
2280 | TypePromotionAction(Instruction *Inst) : Inst(Inst) {} | ||||||||||||||||
2281 | |||||||||||||||||
2282 | virtual ~TypePromotionAction() = default; | ||||||||||||||||
2283 | |||||||||||||||||
2284 | /// Undo the modification done by this action. | ||||||||||||||||
2285 | /// When this method is called, the IR must be in the same state as it was | ||||||||||||||||
2286 | /// before this action was applied. | ||||||||||||||||
2287 | /// \pre Undoing the action works if and only if the IR is in the exact same | ||||||||||||||||
2288 | /// state as it was directly after this action was applied. | ||||||||||||||||
2289 | virtual void undo() = 0; | ||||||||||||||||
2290 | |||||||||||||||||
2291 | /// Advocate every change made by this action. | ||||||||||||||||
2292 | /// When the results on the IR of the action are to be kept, it is important | ||||||||||||||||
2293 | /// to call this function, otherwise hidden information may be kept forever. | ||||||||||||||||
2294 | virtual void commit() { | ||||||||||||||||
2295 | // Nothing to be done, this action is not doing anything. | ||||||||||||||||
2296 | } | ||||||||||||||||
2297 | }; | ||||||||||||||||
2298 | |||||||||||||||||
2299 | /// Utility to remember the position of an instruction. | ||||||||||||||||
2300 | class InsertionHandler { | ||||||||||||||||
2301 | /// Position of an instruction. | ||||||||||||||||
2302 | /// Either an instruction: | ||||||||||||||||
2303 | /// - Is the first in a basic block: BB is used. | ||||||||||||||||
2304 | /// - Has a previous instruction: PrevInst is used. | ||||||||||||||||
2305 | union { | ||||||||||||||||
2306 | Instruction *PrevInst; | ||||||||||||||||
2307 | BasicBlock *BB; | ||||||||||||||||
2308 | } Point; | ||||||||||||||||
2309 | |||||||||||||||||
2310 | /// Remember whether or not the instruction had a previous instruction. | ||||||||||||||||
2311 | bool HasPrevInstruction; | ||||||||||||||||
2312 | |||||||||||||||||
2313 | public: | ||||||||||||||||
2314 | /// Record the position of \p Inst. | ||||||||||||||||
2315 | InsertionHandler(Instruction *Inst) { | ||||||||||||||||
2316 | BasicBlock::iterator It = Inst->getIterator(); | ||||||||||||||||
2317 | HasPrevInstruction = (It != (Inst->getParent()->begin())); | ||||||||||||||||
2318 | if (HasPrevInstruction) | ||||||||||||||||
2319 | Point.PrevInst = &*--It; | ||||||||||||||||
2320 | else | ||||||||||||||||
2321 | Point.BB = Inst->getParent(); | ||||||||||||||||
2322 | } | ||||||||||||||||
2323 | |||||||||||||||||
2324 | /// Insert \p Inst at the recorded position. | ||||||||||||||||
2325 | void insert(Instruction *Inst) { | ||||||||||||||||
2326 | if (HasPrevInstruction) { | ||||||||||||||||
2327 | if (Inst->getParent()) | ||||||||||||||||
2328 | Inst->removeFromParent(); | ||||||||||||||||
2329 | Inst->insertAfter(Point.PrevInst); | ||||||||||||||||
2330 | } else { | ||||||||||||||||
2331 | Instruction *Position = &*Point.BB->getFirstInsertionPt(); | ||||||||||||||||
2332 | if (Inst->getParent()) | ||||||||||||||||
2333 | Inst->moveBefore(Position); | ||||||||||||||||
2334 | else | ||||||||||||||||
2335 | Inst->insertBefore(Position); | ||||||||||||||||
2336 | } | ||||||||||||||||
2337 | } | ||||||||||||||||
2338 | }; | ||||||||||||||||
2339 | |||||||||||||||||
2340 | /// Move an instruction before another. | ||||||||||||||||
2341 | class InstructionMoveBefore : public TypePromotionAction { | ||||||||||||||||
2342 | /// Original position of the instruction. | ||||||||||||||||
2343 | InsertionHandler Position; | ||||||||||||||||
2344 | |||||||||||||||||
2345 | public: | ||||||||||||||||
2346 | /// Move \p Inst before \p Before. | ||||||||||||||||
2347 | InstructionMoveBefore(Instruction *Inst, Instruction *Before) | ||||||||||||||||
2348 | : TypePromotionAction(Inst), Position(Inst) { | ||||||||||||||||
2349 | LLVM_DEBUG(dbgs() << "Do: move: " << *Inst << "\nbefore: " << *Beforedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: move: " << * Inst << "\nbefore: " << *Before << "\n"; } } while (false) | ||||||||||||||||
2350 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: move: " << * Inst << "\nbefore: " << *Before << "\n"; } } while (false); | ||||||||||||||||
2351 | Inst->moveBefore(Before); | ||||||||||||||||
2352 | } | ||||||||||||||||
2353 | |||||||||||||||||
2354 | /// Move the instruction back to its original position. | ||||||||||||||||
2355 | void undo() override { | ||||||||||||||||
2356 | LLVM_DEBUG(dbgs() << "Undo: moveBefore: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: moveBefore: " << *Inst << "\n"; } } while (false); | ||||||||||||||||
2357 | Position.insert(Inst); | ||||||||||||||||
2358 | } | ||||||||||||||||
2359 | }; | ||||||||||||||||
2360 | |||||||||||||||||
2361 | /// Set the operand of an instruction with a new value. | ||||||||||||||||
2362 | class OperandSetter : public TypePromotionAction { | ||||||||||||||||
2363 | /// Original operand of the instruction. | ||||||||||||||||
2364 | Value *Origin; | ||||||||||||||||
2365 | |||||||||||||||||
2366 | /// Index of the modified instruction. | ||||||||||||||||
2367 | unsigned Idx; | ||||||||||||||||
2368 | |||||||||||||||||
2369 | public: | ||||||||||||||||
2370 | /// Set \p Idx operand of \p Inst with \p NewVal. | ||||||||||||||||
2371 | OperandSetter(Instruction *Inst, unsigned Idx, Value *NewVal) | ||||||||||||||||
2372 | : TypePromotionAction(Inst), Idx(Idx) { | ||||||||||||||||
2373 | LLVM_DEBUG(dbgs() << "Do: setOperand: " << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: setOperand: " << Idx << "\n" << "for:" << *Inst << "\n" << "with:" << *NewVal << "\n"; } } while ( false) | ||||||||||||||||
2374 | << "for:" << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: setOperand: " << Idx << "\n" << "for:" << *Inst << "\n" << "with:" << *NewVal << "\n"; } } while ( false) | ||||||||||||||||
2375 | << "with:" << *NewVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: setOperand: " << Idx << "\n" << "for:" << *Inst << "\n" << "with:" << *NewVal << "\n"; } } while ( false); | ||||||||||||||||
2376 | Origin = Inst->getOperand(Idx); | ||||||||||||||||
2377 | Inst->setOperand(Idx, NewVal); | ||||||||||||||||
2378 | } | ||||||||||||||||
2379 | |||||||||||||||||
2380 | /// Restore the original value of the instruction. | ||||||||||||||||
2381 | void undo() override { | ||||||||||||||||
2382 | LLVM_DEBUG(dbgs() << "Undo: setOperand:" << Idx << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: setOperand:" << Idx << "\n" << "for: " << *Inst << "\n" << "with: " << *Origin << "\n"; } } while ( false) | ||||||||||||||||
2383 | << "for: " << *Inst << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: setOperand:" << Idx << "\n" << "for: " << *Inst << "\n" << "with: " << *Origin << "\n"; } } while ( false) | ||||||||||||||||
2384 | << "with: " << *Origin << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: setOperand:" << Idx << "\n" << "for: " << *Inst << "\n" << "with: " << *Origin << "\n"; } } while ( false); | ||||||||||||||||
2385 | Inst->setOperand(Idx, Origin); | ||||||||||||||||
2386 | } | ||||||||||||||||
2387 | }; | ||||||||||||||||
2388 | |||||||||||||||||
2389 | /// Hide the operands of an instruction. | ||||||||||||||||
2390 | /// Do as if this instruction was not using any of its operands. | ||||||||||||||||
2391 | class OperandsHider : public TypePromotionAction { | ||||||||||||||||
2392 | /// The list of original operands. | ||||||||||||||||
2393 | SmallVector<Value *, 4> OriginalValues; | ||||||||||||||||
2394 | |||||||||||||||||
2395 | public: | ||||||||||||||||
2396 | /// Remove \p Inst from the uses of the operands of \p Inst. | ||||||||||||||||
2397 | OperandsHider(Instruction *Inst) : TypePromotionAction(Inst) { | ||||||||||||||||
2398 | LLVM_DEBUG(dbgs() << "Do: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: OperandsHider: " << *Inst << "\n"; } } while (false); | ||||||||||||||||
2399 | unsigned NumOpnds = Inst->getNumOperands(); | ||||||||||||||||
2400 | OriginalValues.reserve(NumOpnds); | ||||||||||||||||
2401 | for (unsigned It = 0; It < NumOpnds; ++It) { | ||||||||||||||||
2402 | // Save the current operand. | ||||||||||||||||
2403 | Value *Val = Inst->getOperand(It); | ||||||||||||||||
2404 | OriginalValues.push_back(Val); | ||||||||||||||||
2405 | // Set a dummy one. | ||||||||||||||||
2406 | // We could use OperandSetter here, but that would imply an overhead | ||||||||||||||||
2407 | // that we are not willing to pay. | ||||||||||||||||
2408 | Inst->setOperand(It, UndefValue::get(Val->getType())); | ||||||||||||||||
2409 | } | ||||||||||||||||
2410 | } | ||||||||||||||||
2411 | |||||||||||||||||
2412 | /// Restore the original list of uses. | ||||||||||||||||
2413 | void undo() override { | ||||||||||||||||
2414 | LLVM_DEBUG(dbgs() << "Undo: OperandsHider: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: OperandsHider: " << *Inst << "\n"; } } while (false); | ||||||||||||||||
2415 | for (unsigned It = 0, EndIt = OriginalValues.size(); It != EndIt; ++It) | ||||||||||||||||
2416 | Inst->setOperand(It, OriginalValues[It]); | ||||||||||||||||
2417 | } | ||||||||||||||||
2418 | }; | ||||||||||||||||
2419 | |||||||||||||||||
2420 | /// Build a truncate instruction. | ||||||||||||||||
2421 | class TruncBuilder : public TypePromotionAction { | ||||||||||||||||
2422 | Value *Val; | ||||||||||||||||
2423 | |||||||||||||||||
2424 | public: | ||||||||||||||||
2425 | /// Build a truncate instruction of \p Opnd producing a \p Ty | ||||||||||||||||
2426 | /// result. | ||||||||||||||||
2427 | /// trunc Opnd to Ty. | ||||||||||||||||
2428 | TruncBuilder(Instruction *Opnd, Type *Ty) : TypePromotionAction(Opnd) { | ||||||||||||||||
2429 | IRBuilder<> Builder(Opnd); | ||||||||||||||||
2430 | Val = Builder.CreateTrunc(Opnd, Ty, "promoted"); | ||||||||||||||||
2431 | LLVM_DEBUG(dbgs() << "Do: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: TruncBuilder: " << *Val << "\n"; } } while (false); | ||||||||||||||||
2432 | } | ||||||||||||||||
2433 | |||||||||||||||||
2434 | /// Get the built value. | ||||||||||||||||
2435 | Value *getBuiltValue() { return Val; } | ||||||||||||||||
2436 | |||||||||||||||||
2437 | /// Remove the built instruction. | ||||||||||||||||
2438 | void undo() override { | ||||||||||||||||
2439 | LLVM_DEBUG(dbgs() << "Undo: TruncBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: TruncBuilder: " << *Val << "\n"; } } while (false); | ||||||||||||||||
2440 | if (Instruction *IVal = dyn_cast<Instruction>(Val)) | ||||||||||||||||
2441 | IVal->eraseFromParent(); | ||||||||||||||||
2442 | } | ||||||||||||||||
2443 | }; | ||||||||||||||||
2444 | |||||||||||||||||
2445 | /// Build a sign extension instruction. | ||||||||||||||||
2446 | class SExtBuilder : public TypePromotionAction { | ||||||||||||||||
2447 | Value *Val; | ||||||||||||||||
2448 | |||||||||||||||||
2449 | public: | ||||||||||||||||
2450 | /// Build a sign extension instruction of \p Opnd producing a \p Ty | ||||||||||||||||
2451 | /// result. | ||||||||||||||||
2452 | /// sext Opnd to Ty. | ||||||||||||||||
2453 | SExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) | ||||||||||||||||
2454 | : TypePromotionAction(InsertPt) { | ||||||||||||||||
2455 | IRBuilder<> Builder(InsertPt); | ||||||||||||||||
2456 | Val = Builder.CreateSExt(Opnd, Ty, "promoted"); | ||||||||||||||||
2457 | LLVM_DEBUG(dbgs() << "Do: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: SExtBuilder: " << *Val << "\n"; } } while (false); | ||||||||||||||||
2458 | } | ||||||||||||||||
2459 | |||||||||||||||||
2460 | /// Get the built value. | ||||||||||||||||
2461 | Value *getBuiltValue() { return Val; } | ||||||||||||||||
2462 | |||||||||||||||||
2463 | /// Remove the built instruction. | ||||||||||||||||
2464 | void undo() override { | ||||||||||||||||
2465 | LLVM_DEBUG(dbgs() << "Undo: SExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: SExtBuilder: " << *Val << "\n"; } } while (false); | ||||||||||||||||
2466 | if (Instruction *IVal = dyn_cast<Instruction>(Val)) | ||||||||||||||||
2467 | IVal->eraseFromParent(); | ||||||||||||||||
2468 | } | ||||||||||||||||
2469 | }; | ||||||||||||||||
2470 | |||||||||||||||||
2471 | /// Build a zero extension instruction. | ||||||||||||||||
2472 | class ZExtBuilder : public TypePromotionAction { | ||||||||||||||||
2473 | Value *Val; | ||||||||||||||||
2474 | |||||||||||||||||
2475 | public: | ||||||||||||||||
2476 | /// Build a zero extension instruction of \p Opnd producing a \p Ty | ||||||||||||||||
2477 | /// result. | ||||||||||||||||
2478 | /// zext Opnd to Ty. | ||||||||||||||||
2479 | ZExtBuilder(Instruction *InsertPt, Value *Opnd, Type *Ty) | ||||||||||||||||
2480 | : TypePromotionAction(InsertPt) { | ||||||||||||||||
2481 | IRBuilder<> Builder(InsertPt); | ||||||||||||||||
2482 | Val = Builder.CreateZExt(Opnd, Ty, "promoted"); | ||||||||||||||||
2483 | LLVM_DEBUG(dbgs() << "Do: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: ZExtBuilder: " << *Val << "\n"; } } while (false); | ||||||||||||||||
2484 | } | ||||||||||||||||
2485 | |||||||||||||||||
2486 | /// Get the built value. | ||||||||||||||||
2487 | Value *getBuiltValue() { return Val; } | ||||||||||||||||
2488 | |||||||||||||||||
2489 | /// Remove the built instruction. | ||||||||||||||||
2490 | void undo() override { | ||||||||||||||||
2491 | LLVM_DEBUG(dbgs() << "Undo: ZExtBuilder: " << *Val << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: ZExtBuilder: " << *Val << "\n"; } } while (false); | ||||||||||||||||
2492 | if (Instruction *IVal = dyn_cast<Instruction>(Val)) | ||||||||||||||||
2493 | IVal->eraseFromParent(); | ||||||||||||||||
2494 | } | ||||||||||||||||
2495 | }; | ||||||||||||||||
2496 | |||||||||||||||||
2497 | /// Mutate an instruction to another type. | ||||||||||||||||
2498 | class TypeMutator : public TypePromotionAction { | ||||||||||||||||
2499 | /// Record the original type. | ||||||||||||||||
2500 | Type *OrigTy; | ||||||||||||||||
2501 | |||||||||||||||||
2502 | public: | ||||||||||||||||
2503 | /// Mutate the type of \p Inst into \p NewTy. | ||||||||||||||||
2504 | TypeMutator(Instruction *Inst, Type *NewTy) | ||||||||||||||||
2505 | : TypePromotionAction(Inst), OrigTy(Inst->getType()) { | ||||||||||||||||
2506 | LLVM_DEBUG(dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy << "\n"; } } while (false) | ||||||||||||||||
2507 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: MutateType: " << *Inst << " with " << *NewTy << "\n"; } } while (false); | ||||||||||||||||
2508 | Inst->mutateType(NewTy); | ||||||||||||||||
2509 | } | ||||||||||||||||
2510 | |||||||||||||||||
2511 | /// Mutate the instruction back to its original type. | ||||||||||||||||
2512 | void undo() override { | ||||||||||||||||
2513 | LLVM_DEBUG(dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTydo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy << "\n"; } } while (false) | ||||||||||||||||
2514 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: MutateType: " << *Inst << " with " << *OrigTy << "\n"; } } while (false); | ||||||||||||||||
2515 | Inst->mutateType(OrigTy); | ||||||||||||||||
2516 | } | ||||||||||||||||
2517 | }; | ||||||||||||||||
2518 | |||||||||||||||||
2519 | /// Replace the uses of an instruction by another instruction. | ||||||||||||||||
2520 | class UsesReplacer : public TypePromotionAction { | ||||||||||||||||
2521 | /// Helper structure to keep track of the replaced uses. | ||||||||||||||||
2522 | struct InstructionAndIdx { | ||||||||||||||||
2523 | /// The instruction using the instruction. | ||||||||||||||||
2524 | Instruction *Inst; | ||||||||||||||||
2525 | |||||||||||||||||
2526 | /// The index where this instruction is used for Inst. | ||||||||||||||||
2527 | unsigned Idx; | ||||||||||||||||
2528 | |||||||||||||||||
2529 | InstructionAndIdx(Instruction *Inst, unsigned Idx) | ||||||||||||||||
2530 | : Inst(Inst), Idx(Idx) {} | ||||||||||||||||
2531 | }; | ||||||||||||||||
2532 | |||||||||||||||||
2533 | /// Keep track of the original uses (pair Instruction, Index). | ||||||||||||||||
2534 | SmallVector<InstructionAndIdx, 4> OriginalUses; | ||||||||||||||||
2535 | /// Keep track of the debug users. | ||||||||||||||||
2536 | SmallVector<DbgValueInst *, 1> DbgValues; | ||||||||||||||||
2537 | |||||||||||||||||
2538 | using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator; | ||||||||||||||||
2539 | |||||||||||||||||
2540 | public: | ||||||||||||||||
2541 | /// Replace all the use of \p Inst by \p New. | ||||||||||||||||
2542 | UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) { | ||||||||||||||||
2543 | LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *Newdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New << "\n"; } } while (false) | ||||||||||||||||
2544 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New << "\n"; } } while (false); | ||||||||||||||||
2545 | // Record the original uses. | ||||||||||||||||
2546 | for (Use &U : Inst->uses()) { | ||||||||||||||||
2547 | Instruction *UserI = cast<Instruction>(U.getUser()); | ||||||||||||||||
2548 | OriginalUses.push_back(InstructionAndIdx(UserI, U.getOperandNo())); | ||||||||||||||||
2549 | } | ||||||||||||||||
2550 | // Record the debug uses separately. They are not in the instruction's | ||||||||||||||||
2551 | // use list, but they are replaced by RAUW. | ||||||||||||||||
2552 | findDbgValues(DbgValues, Inst); | ||||||||||||||||
2553 | |||||||||||||||||
2554 | // Now, we can replace the uses. | ||||||||||||||||
2555 | Inst->replaceAllUsesWith(New); | ||||||||||||||||
2556 | } | ||||||||||||||||
2557 | |||||||||||||||||
2558 | /// Reassign the original uses of Inst to Inst. | ||||||||||||||||
2559 | void undo() override { | ||||||||||||||||
2560 | LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: UsersReplacer: " << *Inst << "\n"; } } while (false); | ||||||||||||||||
2561 | for (use_iterator UseIt = OriginalUses.begin(), | ||||||||||||||||
2562 | EndIt = OriginalUses.end(); | ||||||||||||||||
2563 | UseIt != EndIt; ++UseIt) { | ||||||||||||||||
2564 | UseIt->Inst->setOperand(UseIt->Idx, Inst); | ||||||||||||||||
2565 | } | ||||||||||||||||
2566 | // RAUW has replaced all original uses with references to the new value, | ||||||||||||||||
2567 | // including the debug uses. Since we are undoing the replacements, | ||||||||||||||||
2568 | // the original debug uses must also be reinstated to maintain the | ||||||||||||||||
2569 | // correctness and utility of debug value instructions. | ||||||||||||||||
2570 | for (auto *DVI: DbgValues) { | ||||||||||||||||
2571 | LLVMContext &Ctx = Inst->getType()->getContext(); | ||||||||||||||||
2572 | auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst)); | ||||||||||||||||
2573 | DVI->setOperand(0, MV); | ||||||||||||||||
2574 | } | ||||||||||||||||
2575 | } | ||||||||||||||||
2576 | }; | ||||||||||||||||
2577 | |||||||||||||||||
2578 | /// Remove an instruction from the IR. | ||||||||||||||||
2579 | class InstructionRemover : public TypePromotionAction { | ||||||||||||||||
2580 | /// Original position of the instruction. | ||||||||||||||||
2581 | InsertionHandler Inserter; | ||||||||||||||||
2582 | |||||||||||||||||
2583 | /// Helper structure to hide all the link to the instruction. In other | ||||||||||||||||
2584 | /// words, this helps to do as if the instruction was removed. | ||||||||||||||||
2585 | OperandsHider Hider; | ||||||||||||||||
2586 | |||||||||||||||||
2587 | /// Keep track of the uses replaced, if any. | ||||||||||||||||
2588 | UsesReplacer *Replacer = nullptr; | ||||||||||||||||
2589 | |||||||||||||||||
2590 | /// Keep track of instructions removed. | ||||||||||||||||
2591 | SetOfInstrs &RemovedInsts; | ||||||||||||||||
2592 | |||||||||||||||||
2593 | public: | ||||||||||||||||
2594 | /// Remove all reference of \p Inst and optionally replace all its | ||||||||||||||||
2595 | /// uses with New. | ||||||||||||||||
2596 | /// \p RemovedInsts Keep track of the instructions removed by this Action. | ||||||||||||||||
2597 | /// \pre If !Inst->use_empty(), then New != nullptr | ||||||||||||||||
2598 | InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts, | ||||||||||||||||
2599 | Value *New = nullptr) | ||||||||||||||||
2600 | : TypePromotionAction(Inst), Inserter(Inst), Hider(Inst), | ||||||||||||||||
2601 | RemovedInsts(RemovedInsts) { | ||||||||||||||||
2602 | if (New) | ||||||||||||||||
2603 | Replacer = new UsesReplacer(Inst, New); | ||||||||||||||||
2604 | LLVM_DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Do: InstructionRemover: " << *Inst << "\n"; } } while (false); | ||||||||||||||||
2605 | RemovedInsts.insert(Inst); | ||||||||||||||||
2606 | /// The instructions removed here will be freed after completing | ||||||||||||||||
2607 | /// optimizeBlock() for all blocks as we need to keep track of the | ||||||||||||||||
2608 | /// removed instructions during promotion. | ||||||||||||||||
2609 | Inst->removeFromParent(); | ||||||||||||||||
2610 | } | ||||||||||||||||
2611 | |||||||||||||||||
2612 | ~InstructionRemover() override { delete Replacer; } | ||||||||||||||||
2613 | |||||||||||||||||
2614 | /// Resurrect the instruction and reassign it to the proper uses if | ||||||||||||||||
2615 | /// new value was provided when build this action. | ||||||||||||||||
2616 | void undo() override { | ||||||||||||||||
2617 | LLVM_DEBUG(dbgs() << "Undo: InstructionRemover: " << *Inst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Undo: InstructionRemover: " << *Inst << "\n"; } } while (false); | ||||||||||||||||
2618 | Inserter.insert(Inst); | ||||||||||||||||
2619 | if (Replacer) | ||||||||||||||||
2620 | Replacer->undo(); | ||||||||||||||||
2621 | Hider.undo(); | ||||||||||||||||
2622 | RemovedInsts.erase(Inst); | ||||||||||||||||
2623 | } | ||||||||||||||||
2624 | }; | ||||||||||||||||
2625 | |||||||||||||||||
2626 | public: | ||||||||||||||||
2627 | /// Restoration point. | ||||||||||||||||
2628 | /// The restoration point is a pointer to an action instead of an iterator | ||||||||||||||||
2629 | /// because the iterator may be invalidated but not the pointer. | ||||||||||||||||
2630 | using ConstRestorationPt = const TypePromotionAction *; | ||||||||||||||||
2631 | |||||||||||||||||
2632 | TypePromotionTransaction(SetOfInstrs &RemovedInsts) | ||||||||||||||||
2633 | : RemovedInsts(RemovedInsts) {} | ||||||||||||||||
2634 | |||||||||||||||||
2635 | /// Advocate every changes made in that transaction. | ||||||||||||||||
2636 | void commit(); | ||||||||||||||||
2637 | |||||||||||||||||
2638 | /// Undo all the changes made after the given point. | ||||||||||||||||
2639 | void rollback(ConstRestorationPt Point); | ||||||||||||||||
2640 | |||||||||||||||||
2641 | /// Get the current restoration point. | ||||||||||||||||
2642 | ConstRestorationPt getRestorationPoint() const; | ||||||||||||||||
2643 | |||||||||||||||||
2644 | /// \name API for IR modification with state keeping to support rollback. | ||||||||||||||||
2645 | /// @{ | ||||||||||||||||
2646 | /// Same as Instruction::setOperand. | ||||||||||||||||
2647 | void setOperand(Instruction *Inst, unsigned Idx, Value *NewVal); | ||||||||||||||||
2648 | |||||||||||||||||
2649 | /// Same as Instruction::eraseFromParent. | ||||||||||||||||
2650 | void eraseInstruction(Instruction *Inst, Value *NewVal = nullptr); | ||||||||||||||||
2651 | |||||||||||||||||
2652 | /// Same as Value::replaceAllUsesWith. | ||||||||||||||||
2653 | void replaceAllUsesWith(Instruction *Inst, Value *New); | ||||||||||||||||
2654 | |||||||||||||||||
2655 | /// Same as Value::mutateType. | ||||||||||||||||
2656 | void mutateType(Instruction *Inst, Type *NewTy); | ||||||||||||||||
2657 | |||||||||||||||||
2658 | /// Same as IRBuilder::createTrunc. | ||||||||||||||||
2659 | Value *createTrunc(Instruction *Opnd, Type *Ty); | ||||||||||||||||
2660 | |||||||||||||||||
2661 | /// Same as IRBuilder::createSExt. | ||||||||||||||||
2662 | Value *createSExt(Instruction *Inst, Value *Opnd, Type *Ty); | ||||||||||||||||
2663 | |||||||||||||||||
2664 | /// Same as IRBuilder::createZExt. | ||||||||||||||||
2665 | Value *createZExt(Instruction *Inst, Value *Opnd, Type *Ty); | ||||||||||||||||
2666 | |||||||||||||||||
2667 | /// Same as Instruction::moveBefore. | ||||||||||||||||
2668 | void moveBefore(Instruction *Inst, Instruction *Before); | ||||||||||||||||
2669 | /// @} | ||||||||||||||||
2670 | |||||||||||||||||
2671 | private: | ||||||||||||||||
2672 | /// The ordered list of actions made so far. | ||||||||||||||||
2673 | SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions; | ||||||||||||||||
2674 | |||||||||||||||||
2675 | using CommitPt = SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator; | ||||||||||||||||
2676 | |||||||||||||||||
2677 | SetOfInstrs &RemovedInsts; | ||||||||||||||||
2678 | }; | ||||||||||||||||
2679 | |||||||||||||||||
2680 | } // end anonymous namespace | ||||||||||||||||
2681 | |||||||||||||||||
2682 | void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx, | ||||||||||||||||
2683 | Value *NewVal) { | ||||||||||||||||
2684 | Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>( | ||||||||||||||||
2685 | Inst, Idx, NewVal)); | ||||||||||||||||
2686 | } | ||||||||||||||||
2687 | |||||||||||||||||
2688 | void TypePromotionTransaction::eraseInstruction(Instruction *Inst, | ||||||||||||||||
2689 | Value *NewVal) { | ||||||||||||||||
2690 | Actions.push_back( | ||||||||||||||||
2691 | std::make_unique<TypePromotionTransaction::InstructionRemover>( | ||||||||||||||||
2692 | Inst, RemovedInsts, NewVal)); | ||||||||||||||||
2693 | } | ||||||||||||||||
2694 | |||||||||||||||||
2695 | void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst, | ||||||||||||||||
2696 | Value *New) { | ||||||||||||||||
2697 | Actions.push_back( | ||||||||||||||||
2698 | std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); | ||||||||||||||||
2699 | } | ||||||||||||||||
2700 | |||||||||||||||||
2701 | void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) { | ||||||||||||||||
2702 | Actions.push_back( | ||||||||||||||||
2703 | std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); | ||||||||||||||||
2704 | } | ||||||||||||||||
2705 | |||||||||||||||||
2706 | Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, | ||||||||||||||||
2707 | Type *Ty) { | ||||||||||||||||
2708 | std::unique_ptr<TruncBuilder> Ptr(new TruncBuilder(Opnd, Ty)); | ||||||||||||||||
2709 | Value *Val = Ptr->getBuiltValue(); | ||||||||||||||||
2710 | Actions.push_back(std::move(Ptr)); | ||||||||||||||||
2711 | return Val; | ||||||||||||||||
2712 | } | ||||||||||||||||
2713 | |||||||||||||||||
2714 | Value *TypePromotionTransaction::createSExt(Instruction *Inst, | ||||||||||||||||
2715 | Value *Opnd, Type *Ty) { | ||||||||||||||||
2716 | std::unique_ptr<SExtBuilder> Ptr(new SExtBuilder(Inst, Opnd, Ty)); | ||||||||||||||||
2717 | Value *Val = Ptr->getBuiltValue(); | ||||||||||||||||
2718 | Actions.push_back(std::move(Ptr)); | ||||||||||||||||
2719 | return Val; | ||||||||||||||||
2720 | } | ||||||||||||||||
2721 | |||||||||||||||||
2722 | Value *TypePromotionTransaction::createZExt(Instruction *Inst, | ||||||||||||||||
2723 | Value *Opnd, Type *Ty) { | ||||||||||||||||
2724 | std::unique_ptr<ZExtBuilder> Ptr(new ZExtBuilder(Inst, Opnd, Ty)); | ||||||||||||||||
2725 | Value *Val = Ptr->getBuiltValue(); | ||||||||||||||||
2726 | Actions.push_back(std::move(Ptr)); | ||||||||||||||||
2727 | return Val; | ||||||||||||||||
2728 | } | ||||||||||||||||
2729 | |||||||||||||||||
2730 | void TypePromotionTransaction::moveBefore(Instruction *Inst, | ||||||||||||||||
2731 | Instruction *Before) { | ||||||||||||||||
2732 | Actions.push_back( | ||||||||||||||||
2733 | std::make_unique<TypePromotionTransaction::InstructionMoveBefore>( | ||||||||||||||||
2734 | Inst, Before)); | ||||||||||||||||
2735 | } | ||||||||||||||||
2736 | |||||||||||||||||
2737 | TypePromotionTransaction::ConstRestorationPt | ||||||||||||||||
2738 | TypePromotionTransaction::getRestorationPoint() const { | ||||||||||||||||
2739 | return !Actions.empty() ? Actions.back().get() : nullptr; | ||||||||||||||||
2740 | } | ||||||||||||||||
2741 | |||||||||||||||||
2742 | void TypePromotionTransaction::commit() { | ||||||||||||||||
2743 | for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt; | ||||||||||||||||
2744 | ++It) | ||||||||||||||||
2745 | (*It)->commit(); | ||||||||||||||||
2746 | Actions.clear(); | ||||||||||||||||
2747 | } | ||||||||||||||||
2748 | |||||||||||||||||
2749 | void TypePromotionTransaction::rollback( | ||||||||||||||||
2750 | TypePromotionTransaction::ConstRestorationPt Point) { | ||||||||||||||||
2751 | while (!Actions.empty() && Point != Actions.back().get()) { | ||||||||||||||||
2752 | std::unique_ptr<TypePromotionAction> Curr = Actions.pop_back_val(); | ||||||||||||||||
2753 | Curr->undo(); | ||||||||||||||||
2754 | } | ||||||||||||||||
2755 | } | ||||||||||||||||
2756 | |||||||||||||||||
2757 | namespace { | ||||||||||||||||
2758 | |||||||||||||||||
2759 | /// A helper class for matching addressing modes. | ||||||||||||||||
2760 | /// | ||||||||||||||||
2761 | /// This encapsulates the logic for matching the target-legal addressing modes. | ||||||||||||||||
2762 | class AddressingModeMatcher { | ||||||||||||||||
2763 | SmallVectorImpl<Instruction*> &AddrModeInsts; | ||||||||||||||||
2764 | const TargetLowering &TLI; | ||||||||||||||||
2765 | const TargetRegisterInfo &TRI; | ||||||||||||||||
2766 | const DataLayout &DL; | ||||||||||||||||
2767 | |||||||||||||||||
2768 | /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and | ||||||||||||||||
2769 | /// the memory instruction that we're computing this address for. | ||||||||||||||||
2770 | Type *AccessTy; | ||||||||||||||||
2771 | unsigned AddrSpace; | ||||||||||||||||
2772 | Instruction *MemoryInst; | ||||||||||||||||
2773 | |||||||||||||||||
2774 | /// This is the addressing mode that we're building up. This is | ||||||||||||||||
2775 | /// part of the return value of this addressing mode matching stuff. | ||||||||||||||||
2776 | ExtAddrMode &AddrMode; | ||||||||||||||||
2777 | |||||||||||||||||
2778 | /// The instructions inserted by other CodeGenPrepare optimizations. | ||||||||||||||||
2779 | const SetOfInstrs &InsertedInsts; | ||||||||||||||||
2780 | |||||||||||||||||
2781 | /// A map from the instructions to their type before promotion. | ||||||||||||||||
2782 | InstrToOrigTy &PromotedInsts; | ||||||||||||||||
2783 | |||||||||||||||||
2784 | /// The ongoing transaction where every action should be registered. | ||||||||||||||||
2785 | TypePromotionTransaction &TPT; | ||||||||||||||||
2786 | |||||||||||||||||
2787 | // A GEP which has too large offset to be folded into the addressing mode. | ||||||||||||||||
2788 | std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP; | ||||||||||||||||
2789 | |||||||||||||||||
2790 | /// This is set to true when we should not do profitability checks. | ||||||||||||||||
2791 | /// When true, IsProfitableToFoldIntoAddressingMode always returns true. | ||||||||||||||||
2792 | bool IgnoreProfitability; | ||||||||||||||||
2793 | |||||||||||||||||
2794 | AddressingModeMatcher( | ||||||||||||||||
2795 | SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI, | ||||||||||||||||
2796 | const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI, | ||||||||||||||||
2797 | ExtAddrMode &AM, const SetOfInstrs &InsertedInsts, | ||||||||||||||||
2798 | InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, | ||||||||||||||||
2799 | std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) | ||||||||||||||||
2800 | : AddrModeInsts(AMI), TLI(TLI), TRI(TRI), | ||||||||||||||||
2801 | DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS), | ||||||||||||||||
2802 | MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), | ||||||||||||||||
2803 | PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) { | ||||||||||||||||
2804 | IgnoreProfitability = false; | ||||||||||||||||
2805 | } | ||||||||||||||||
2806 | |||||||||||||||||
2807 | public: | ||||||||||||||||
2808 | /// Find the maximal addressing mode that a load/store of V can fold, | ||||||||||||||||
2809 | /// give an access type of AccessTy. This returns a list of involved | ||||||||||||||||
2810 | /// instructions in AddrModeInsts. | ||||||||||||||||
2811 | /// \p InsertedInsts The instructions inserted by other CodeGenPrepare | ||||||||||||||||
2812 | /// optimizations. | ||||||||||||||||
2813 | /// \p PromotedInsts maps the instructions to their type before promotion. | ||||||||||||||||
2814 | /// \p The ongoing transaction where every action should be registered. | ||||||||||||||||
2815 | static ExtAddrMode | ||||||||||||||||
2816 | Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst, | ||||||||||||||||
2817 | SmallVectorImpl<Instruction *> &AddrModeInsts, | ||||||||||||||||
2818 | const TargetLowering &TLI, const TargetRegisterInfo &TRI, | ||||||||||||||||
2819 | const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts, | ||||||||||||||||
2820 | TypePromotionTransaction &TPT, | ||||||||||||||||
2821 | std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) { | ||||||||||||||||
2822 | ExtAddrMode Result; | ||||||||||||||||
2823 | |||||||||||||||||
2824 | bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS, | ||||||||||||||||
2825 | MemoryInst, Result, InsertedInsts, | ||||||||||||||||
2826 | PromotedInsts, TPT, LargeOffsetGEP) | ||||||||||||||||
2827 | .matchAddr(V, 0); | ||||||||||||||||
2828 | (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast <void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2828, __PRETTY_FUNCTION__)); | ||||||||||||||||
2829 | return Result; | ||||||||||||||||
2830 | } | ||||||||||||||||
2831 | |||||||||||||||||
2832 | private: | ||||||||||||||||
2833 | bool matchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); | ||||||||||||||||
2834 | bool matchAddr(Value *Addr, unsigned Depth); | ||||||||||||||||
2835 | bool matchOperationAddr(User *AddrInst, unsigned Opcode, unsigned Depth, | ||||||||||||||||
2836 | bool *MovedAway = nullptr); | ||||||||||||||||
2837 | bool isProfitableToFoldIntoAddressingMode(Instruction *I, | ||||||||||||||||
2838 | ExtAddrMode &AMBefore, | ||||||||||||||||
2839 | ExtAddrMode &AMAfter); | ||||||||||||||||
2840 | bool valueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); | ||||||||||||||||
2841 | bool isPromotionProfitable(unsigned NewCost, unsigned OldCost, | ||||||||||||||||
2842 | Value *PromotedOperand) const; | ||||||||||||||||
2843 | }; | ||||||||||||||||
2844 | |||||||||||||||||
2845 | class PhiNodeSet; | ||||||||||||||||
2846 | |||||||||||||||||
2847 | /// An iterator for PhiNodeSet. | ||||||||||||||||
2848 | class PhiNodeSetIterator { | ||||||||||||||||
2849 | PhiNodeSet * const Set; | ||||||||||||||||
2850 | size_t CurrentIndex = 0; | ||||||||||||||||
2851 | |||||||||||||||||
2852 | public: | ||||||||||||||||
2853 | /// The constructor. Start should point to either a valid element, or be equal | ||||||||||||||||
2854 | /// to the size of the underlying SmallVector of the PhiNodeSet. | ||||||||||||||||
2855 | PhiNodeSetIterator(PhiNodeSet * const Set, size_t Start); | ||||||||||||||||
2856 | PHINode * operator*() const; | ||||||||||||||||
2857 | PhiNodeSetIterator& operator++(); | ||||||||||||||||
2858 | bool operator==(const PhiNodeSetIterator &RHS) const; | ||||||||||||||||
2859 | bool operator!=(const PhiNodeSetIterator &RHS) const; | ||||||||||||||||
2860 | }; | ||||||||||||||||
2861 | |||||||||||||||||
2862 | /// Keeps a set of PHINodes. | ||||||||||||||||
2863 | /// | ||||||||||||||||
2864 | /// This is a minimal set implementation for a specific use case: | ||||||||||||||||
2865 | /// It is very fast when there are very few elements, but also provides good | ||||||||||||||||
2866 | /// performance when there are many. It is similar to SmallPtrSet, but also | ||||||||||||||||
2867 | /// provides iteration by insertion order, which is deterministic and stable | ||||||||||||||||
2868 | /// across runs. It is also similar to SmallSetVector, but provides removing | ||||||||||||||||
2869 | /// elements in O(1) time. This is achieved by not actually removing the element | ||||||||||||||||
2870 | /// from the underlying vector, so comes at the cost of using more memory, but | ||||||||||||||||
2871 | /// that is fine, since PhiNodeSets are used as short lived objects. | ||||||||||||||||
2872 | class PhiNodeSet { | ||||||||||||||||
2873 | friend class PhiNodeSetIterator; | ||||||||||||||||
2874 | |||||||||||||||||
2875 | using MapType = SmallDenseMap<PHINode *, size_t, 32>; | ||||||||||||||||
2876 | using iterator = PhiNodeSetIterator; | ||||||||||||||||
2877 | |||||||||||||||||
2878 | /// Keeps the elements in the order of their insertion in the underlying | ||||||||||||||||
2879 | /// vector. To achieve constant time removal, it never deletes any element. | ||||||||||||||||
2880 | SmallVector<PHINode *, 32> NodeList; | ||||||||||||||||
2881 | |||||||||||||||||
2882 | /// Keeps the elements in the underlying set implementation. This (and not the | ||||||||||||||||
2883 | /// NodeList defined above) is the source of truth on whether an element | ||||||||||||||||
2884 | /// is actually in the collection. | ||||||||||||||||
2885 | MapType NodeMap; | ||||||||||||||||
2886 | |||||||||||||||||
2887 | /// Points to the first valid (not deleted) element when the set is not empty | ||||||||||||||||
2888 | /// and the value is not zero. Equals to the size of the underlying vector | ||||||||||||||||
2889 | /// when the set is empty. When the value is 0, as in the beginning, the | ||||||||||||||||
2890 | /// first element may or may not be valid. | ||||||||||||||||
2891 | size_t FirstValidElement = 0; | ||||||||||||||||
2892 | |||||||||||||||||
2893 | public: | ||||||||||||||||
2894 | /// Inserts a new element to the collection. | ||||||||||||||||
2895 | /// \returns true if the element is actually added, i.e. was not in the | ||||||||||||||||
2896 | /// collection before the operation. | ||||||||||||||||
2897 | bool insert(PHINode *Ptr) { | ||||||||||||||||
2898 | if (NodeMap.insert(std::make_pair(Ptr, NodeList.size())).second) { | ||||||||||||||||
2899 | NodeList.push_back(Ptr); | ||||||||||||||||
2900 | return true; | ||||||||||||||||
2901 | } | ||||||||||||||||
2902 | return false; | ||||||||||||||||
2903 | } | ||||||||||||||||
2904 | |||||||||||||||||
2905 | /// Removes the element from the collection. | ||||||||||||||||
2906 | /// \returns whether the element is actually removed, i.e. was in the | ||||||||||||||||
2907 | /// collection before the operation. | ||||||||||||||||
2908 | bool erase(PHINode *Ptr) { | ||||||||||||||||
2909 | auto it = NodeMap.find(Ptr); | ||||||||||||||||
2910 | if (it != NodeMap.end()) { | ||||||||||||||||
2911 | NodeMap.erase(Ptr); | ||||||||||||||||
2912 | SkipRemovedElements(FirstValidElement); | ||||||||||||||||
2913 | return true; | ||||||||||||||||
2914 | } | ||||||||||||||||
2915 | return false; | ||||||||||||||||
2916 | } | ||||||||||||||||
2917 | |||||||||||||||||
2918 | /// Removes all elements and clears the collection. | ||||||||||||||||
2919 | void clear() { | ||||||||||||||||
2920 | NodeMap.clear(); | ||||||||||||||||
2921 | NodeList.clear(); | ||||||||||||||||
2922 | FirstValidElement = 0; | ||||||||||||||||
2923 | } | ||||||||||||||||
2924 | |||||||||||||||||
2925 | /// \returns an iterator that will iterate the elements in the order of | ||||||||||||||||
2926 | /// insertion. | ||||||||||||||||
2927 | iterator begin() { | ||||||||||||||||
2928 | if (FirstValidElement == 0) | ||||||||||||||||
2929 | SkipRemovedElements(FirstValidElement); | ||||||||||||||||
2930 | return PhiNodeSetIterator(this, FirstValidElement); | ||||||||||||||||
2931 | } | ||||||||||||||||
2932 | |||||||||||||||||
2933 | /// \returns an iterator that points to the end of the collection. | ||||||||||||||||
2934 | iterator end() { return PhiNodeSetIterator(this, NodeList.size()); } | ||||||||||||||||
2935 | |||||||||||||||||
2936 | /// Returns the number of elements in the collection. | ||||||||||||||||
2937 | size_t size() const { | ||||||||||||||||
2938 | return NodeMap.size(); | ||||||||||||||||
2939 | } | ||||||||||||||||
2940 | |||||||||||||||||
2941 | /// \returns 1 if the given element is in the collection, and 0 if otherwise. | ||||||||||||||||
2942 | size_t count(PHINode *Ptr) const { | ||||||||||||||||
2943 | return NodeMap.count(Ptr); | ||||||||||||||||
2944 | } | ||||||||||||||||
2945 | |||||||||||||||||
2946 | private: | ||||||||||||||||
2947 | /// Updates the CurrentIndex so that it will point to a valid element. | ||||||||||||||||
2948 | /// | ||||||||||||||||
2949 | /// If the element of NodeList at CurrentIndex is valid, it does not | ||||||||||||||||
2950 | /// change it. If there are no more valid elements, it updates CurrentIndex | ||||||||||||||||
2951 | /// to point to the end of the NodeList. | ||||||||||||||||
2952 | void SkipRemovedElements(size_t &CurrentIndex) { | ||||||||||||||||
2953 | while (CurrentIndex < NodeList.size()) { | ||||||||||||||||
2954 | auto it = NodeMap.find(NodeList[CurrentIndex]); | ||||||||||||||||
2955 | // If the element has been deleted and added again later, NodeMap will | ||||||||||||||||
2956 | // point to a different index, so CurrentIndex will still be invalid. | ||||||||||||||||
2957 | if (it != NodeMap.end() && it->second == CurrentIndex) | ||||||||||||||||
2958 | break; | ||||||||||||||||
2959 | ++CurrentIndex; | ||||||||||||||||
2960 | } | ||||||||||||||||
2961 | } | ||||||||||||||||
2962 | }; | ||||||||||||||||
2963 | |||||||||||||||||
2964 | PhiNodeSetIterator::PhiNodeSetIterator(PhiNodeSet *const Set, size_t Start) | ||||||||||||||||
2965 | : Set(Set), CurrentIndex(Start) {} | ||||||||||||||||
2966 | |||||||||||||||||
2967 | PHINode * PhiNodeSetIterator::operator*() const { | ||||||||||||||||
2968 | assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range" ) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2969, __PRETTY_FUNCTION__)) | ||||||||||||||||
2969 | "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range" ) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2969, __PRETTY_FUNCTION__)); | ||||||||||||||||
2970 | return Set->NodeList[CurrentIndex]; | ||||||||||||||||
2971 | } | ||||||||||||||||
2972 | |||||||||||||||||
2973 | PhiNodeSetIterator& PhiNodeSetIterator::operator++() { | ||||||||||||||||
2974 | assert(CurrentIndex < Set->NodeList.size() &&((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range" ) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2975, __PRETTY_FUNCTION__)) | ||||||||||||||||
2975 | "PhiNodeSet access out of range")((CurrentIndex < Set->NodeList.size() && "PhiNodeSet access out of range" ) ? static_cast<void> (0) : __assert_fail ("CurrentIndex < Set->NodeList.size() && \"PhiNodeSet access out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 2975, __PRETTY_FUNCTION__)); | ||||||||||||||||
2976 | ++CurrentIndex; | ||||||||||||||||
2977 | Set->SkipRemovedElements(CurrentIndex); | ||||||||||||||||
2978 | return *this; | ||||||||||||||||
2979 | } | ||||||||||||||||
2980 | |||||||||||||||||
2981 | bool PhiNodeSetIterator::operator==(const PhiNodeSetIterator &RHS) const { | ||||||||||||||||
2982 | return CurrentIndex == RHS.CurrentIndex; | ||||||||||||||||
2983 | } | ||||||||||||||||
2984 | |||||||||||||||||
2985 | bool PhiNodeSetIterator::operator!=(const PhiNodeSetIterator &RHS) const { | ||||||||||||||||
2986 | return !((*this) == RHS); | ||||||||||||||||
2987 | } | ||||||||||||||||
2988 | |||||||||||||||||
2989 | /// Keep track of simplification of Phi nodes. | ||||||||||||||||
2990 | /// Accept the set of all phi nodes and erase phi node from this set | ||||||||||||||||
2991 | /// if it is simplified. | ||||||||||||||||
2992 | class SimplificationTracker { | ||||||||||||||||
2993 | DenseMap<Value *, Value *> Storage; | ||||||||||||||||
2994 | const SimplifyQuery &SQ; | ||||||||||||||||
2995 | // Tracks newly created Phi nodes. The elements are iterated by insertion | ||||||||||||||||
2996 | // order. | ||||||||||||||||
2997 | PhiNodeSet AllPhiNodes; | ||||||||||||||||
2998 | // Tracks newly created Select nodes. | ||||||||||||||||
2999 | SmallPtrSet<SelectInst *, 32> AllSelectNodes; | ||||||||||||||||
3000 | |||||||||||||||||
3001 | public: | ||||||||||||||||
3002 | SimplificationTracker(const SimplifyQuery &sq) | ||||||||||||||||
3003 | : SQ(sq) {} | ||||||||||||||||
3004 | |||||||||||||||||
3005 | Value *Get(Value *V) { | ||||||||||||||||
3006 | do { | ||||||||||||||||
3007 | auto SV = Storage.find(V); | ||||||||||||||||
3008 | if (SV == Storage.end()) | ||||||||||||||||
3009 | return V; | ||||||||||||||||
3010 | V = SV->second; | ||||||||||||||||
3011 | } while (true); | ||||||||||||||||
3012 | } | ||||||||||||||||
3013 | |||||||||||||||||
3014 | Value *Simplify(Value *Val) { | ||||||||||||||||
3015 | SmallVector<Value *, 32> WorkList; | ||||||||||||||||
3016 | SmallPtrSet<Value *, 32> Visited; | ||||||||||||||||
3017 | WorkList.push_back(Val); | ||||||||||||||||
3018 | while (!WorkList.empty()) { | ||||||||||||||||
3019 | auto P = WorkList.pop_back_val(); | ||||||||||||||||
3020 | if (!Visited.insert(P).second) | ||||||||||||||||
3021 | continue; | ||||||||||||||||
3022 | if (auto *PI = dyn_cast<Instruction>(P)) | ||||||||||||||||
3023 | if (Value *V = SimplifyInstruction(cast<Instruction>(PI), SQ)) { | ||||||||||||||||
3024 | for (auto *U : PI->users()) | ||||||||||||||||
3025 | WorkList.push_back(cast<Value>(U)); | ||||||||||||||||
3026 | Put(PI, V); | ||||||||||||||||
3027 | PI->replaceAllUsesWith(V); | ||||||||||||||||
3028 | if (auto *PHI = dyn_cast<PHINode>(PI)) | ||||||||||||||||
3029 | AllPhiNodes.erase(PHI); | ||||||||||||||||
3030 | if (auto *Select = dyn_cast<SelectInst>(PI)) | ||||||||||||||||
3031 | AllSelectNodes.erase(Select); | ||||||||||||||||
3032 | PI->eraseFromParent(); | ||||||||||||||||
3033 | } | ||||||||||||||||
3034 | } | ||||||||||||||||
3035 | return Get(Val); | ||||||||||||||||
3036 | } | ||||||||||||||||
3037 | |||||||||||||||||
3038 | void Put(Value *From, Value *To) { | ||||||||||||||||
3039 | Storage.insert({ From, To }); | ||||||||||||||||
3040 | } | ||||||||||||||||
3041 | |||||||||||||||||
3042 | void ReplacePhi(PHINode *From, PHINode *To) { | ||||||||||||||||
3043 | Value* OldReplacement = Get(From); | ||||||||||||||||
3044 | while (OldReplacement != From) { | ||||||||||||||||
3045 | From = To; | ||||||||||||||||
3046 | To = dyn_cast<PHINode>(OldReplacement); | ||||||||||||||||
3047 | OldReplacement = Get(From); | ||||||||||||||||
3048 | } | ||||||||||||||||
3049 | assert(Get(To) == To && "Replacement PHI node is already replaced.")((Get(To) == To && "Replacement PHI node is already replaced." ) ? static_cast<void> (0) : __assert_fail ("Get(To) == To && \"Replacement PHI node is already replaced.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3049, __PRETTY_FUNCTION__)); | ||||||||||||||||
3050 | Put(From, To); | ||||||||||||||||
3051 | From->replaceAllUsesWith(To); | ||||||||||||||||
3052 | AllPhiNodes.erase(From); | ||||||||||||||||
3053 | From->eraseFromParent(); | ||||||||||||||||
3054 | } | ||||||||||||||||
3055 | |||||||||||||||||
3056 | PhiNodeSet& newPhiNodes() { return AllPhiNodes; } | ||||||||||||||||
3057 | |||||||||||||||||
3058 | void insertNewPhi(PHINode *PN) { AllPhiNodes.insert(PN); } | ||||||||||||||||
3059 | |||||||||||||||||
3060 | void insertNewSelect(SelectInst *SI) { AllSelectNodes.insert(SI); } | ||||||||||||||||
3061 | |||||||||||||||||
3062 | unsigned countNewPhiNodes() const { return AllPhiNodes.size(); } | ||||||||||||||||
3063 | |||||||||||||||||
3064 | unsigned countNewSelectNodes() const { return AllSelectNodes.size(); } | ||||||||||||||||
3065 | |||||||||||||||||
3066 | void destroyNewNodes(Type *CommonType) { | ||||||||||||||||
3067 | // For safe erasing, replace the uses with dummy value first. | ||||||||||||||||
3068 | auto Dummy = UndefValue::get(CommonType); | ||||||||||||||||
3069 | for (auto I : AllPhiNodes) { | ||||||||||||||||
3070 | I->replaceAllUsesWith(Dummy); | ||||||||||||||||
3071 | I->eraseFromParent(); | ||||||||||||||||
3072 | } | ||||||||||||||||
3073 | AllPhiNodes.clear(); | ||||||||||||||||
3074 | for (auto I : AllSelectNodes) { | ||||||||||||||||
3075 | I->replaceAllUsesWith(Dummy); | ||||||||||||||||
3076 | I->eraseFromParent(); | ||||||||||||||||
3077 | } | ||||||||||||||||
3078 | AllSelectNodes.clear(); | ||||||||||||||||
3079 | } | ||||||||||||||||
3080 | }; | ||||||||||||||||
3081 | |||||||||||||||||
3082 | /// A helper class for combining addressing modes. | ||||||||||||||||
3083 | class AddressingModeCombiner { | ||||||||||||||||
3084 | typedef DenseMap<Value *, Value *> FoldAddrToValueMapping; | ||||||||||||||||
3085 | typedef std::pair<PHINode *, PHINode *> PHIPair; | ||||||||||||||||
3086 | |||||||||||||||||
3087 | private: | ||||||||||||||||
3088 | /// The addressing modes we've collected. | ||||||||||||||||
3089 | SmallVector<ExtAddrMode, 16> AddrModes; | ||||||||||||||||
3090 | |||||||||||||||||
3091 | /// The field in which the AddrModes differ, when we have more than one. | ||||||||||||||||
3092 | ExtAddrMode::FieldName DifferentField = ExtAddrMode::NoField; | ||||||||||||||||
3093 | |||||||||||||||||
3094 | /// Are the AddrModes that we have all just equal to their original values? | ||||||||||||||||
3095 | bool AllAddrModesTrivial = true; | ||||||||||||||||
3096 | |||||||||||||||||
3097 | /// Common Type for all different fields in addressing modes. | ||||||||||||||||
3098 | Type *CommonType; | ||||||||||||||||
3099 | |||||||||||||||||
3100 | /// SimplifyQuery for simplifyInstruction utility. | ||||||||||||||||
3101 | const SimplifyQuery &SQ; | ||||||||||||||||
3102 | |||||||||||||||||
3103 | /// Original Address. | ||||||||||||||||
3104 | Value *Original; | ||||||||||||||||
3105 | |||||||||||||||||
3106 | public: | ||||||||||||||||
3107 | AddressingModeCombiner(const SimplifyQuery &_SQ, Value *OriginalValue) | ||||||||||||||||
3108 | : CommonType(nullptr), SQ(_SQ), Original(OriginalValue) {} | ||||||||||||||||
3109 | |||||||||||||||||
3110 | /// Get the combined AddrMode | ||||||||||||||||
3111 | const ExtAddrMode &getAddrMode() const { | ||||||||||||||||
3112 | return AddrModes[0]; | ||||||||||||||||
3113 | } | ||||||||||||||||
3114 | |||||||||||||||||
3115 | /// Add a new AddrMode if it's compatible with the AddrModes we already | ||||||||||||||||
3116 | /// have. | ||||||||||||||||
3117 | /// \return True iff we succeeded in doing so. | ||||||||||||||||
3118 | bool addNewAddrMode(ExtAddrMode &NewAddrMode) { | ||||||||||||||||
3119 | // Take note of if we have any non-trivial AddrModes, as we need to detect | ||||||||||||||||
3120 | // when all AddrModes are trivial as then we would introduce a phi or select | ||||||||||||||||
3121 | // which just duplicates what's already there. | ||||||||||||||||
3122 | AllAddrModesTrivial = AllAddrModesTrivial && NewAddrMode.isTrivial(); | ||||||||||||||||
3123 | |||||||||||||||||
3124 | // If this is the first addrmode then everything is fine. | ||||||||||||||||
3125 | if (AddrModes.empty()) { | ||||||||||||||||
3126 | AddrModes.emplace_back(NewAddrMode); | ||||||||||||||||
3127 | return true; | ||||||||||||||||
3128 | } | ||||||||||||||||
3129 | |||||||||||||||||
3130 | // Figure out how different this is from the other address modes, which we | ||||||||||||||||
3131 | // can do just by comparing against the first one given that we only care | ||||||||||||||||
3132 | // about the cumulative difference. | ||||||||||||||||
3133 | ExtAddrMode::FieldName ThisDifferentField = | ||||||||||||||||
3134 | AddrModes[0].compare(NewAddrMode); | ||||||||||||||||
3135 | if (DifferentField == ExtAddrMode::NoField) | ||||||||||||||||
3136 | DifferentField = ThisDifferentField; | ||||||||||||||||
3137 | else if (DifferentField != ThisDifferentField) | ||||||||||||||||
3138 | DifferentField = ExtAddrMode::MultipleFields; | ||||||||||||||||
3139 | |||||||||||||||||
3140 | // If NewAddrMode differs in more than one dimension we cannot handle it. | ||||||||||||||||
3141 | bool CanHandle = DifferentField != ExtAddrMode::MultipleFields; | ||||||||||||||||
3142 | |||||||||||||||||
3143 | // If Scale Field is different then we reject. | ||||||||||||||||
3144 | CanHandle = CanHandle && DifferentField != ExtAddrMode::ScaleField; | ||||||||||||||||
3145 | |||||||||||||||||
3146 | // We also must reject the case when base offset is different and | ||||||||||||||||
3147 | // scale reg is not null, we cannot handle this case due to merge of | ||||||||||||||||
3148 | // different offsets will be used as ScaleReg. | ||||||||||||||||
3149 | CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseOffsField || | ||||||||||||||||
3150 | !NewAddrMode.ScaledReg); | ||||||||||||||||
3151 | |||||||||||||||||
3152 | // We also must reject the case when GV is different and BaseReg installed | ||||||||||||||||
3153 | // due to we want to use base reg as a merge of GV values. | ||||||||||||||||
3154 | CanHandle = CanHandle && (DifferentField != ExtAddrMode::BaseGVField || | ||||||||||||||||
3155 | !NewAddrMode.HasBaseReg); | ||||||||||||||||
3156 | |||||||||||||||||
3157 | // Even if NewAddMode is the same we still need to collect it due to | ||||||||||||||||
3158 | // original value is different. And later we will need all original values | ||||||||||||||||
3159 | // as anchors during finding the common Phi node. | ||||||||||||||||
3160 | if (CanHandle) | ||||||||||||||||
3161 | AddrModes.emplace_back(NewAddrMode); | ||||||||||||||||
3162 | else | ||||||||||||||||
3163 | AddrModes.clear(); | ||||||||||||||||
3164 | |||||||||||||||||
3165 | return CanHandle; | ||||||||||||||||
3166 | } | ||||||||||||||||
3167 | |||||||||||||||||
3168 | /// Combine the addressing modes we've collected into a single | ||||||||||||||||
3169 | /// addressing mode. | ||||||||||||||||
3170 | /// \return True iff we successfully combined them or we only had one so | ||||||||||||||||
3171 | /// didn't need to combine them anyway. | ||||||||||||||||
3172 | bool combineAddrModes() { | ||||||||||||||||
3173 | // If we have no AddrModes then they can't be combined. | ||||||||||||||||
3174 | if (AddrModes.size() == 0) | ||||||||||||||||
3175 | return false; | ||||||||||||||||
3176 | |||||||||||||||||
3177 | // A single AddrMode can trivially be combined. | ||||||||||||||||
3178 | if (AddrModes.size() == 1 || DifferentField == ExtAddrMode::NoField) | ||||||||||||||||
3179 | return true; | ||||||||||||||||
3180 | |||||||||||||||||
3181 | // If the AddrModes we collected are all just equal to the value they are | ||||||||||||||||
3182 | // derived from then combining them wouldn't do anything useful. | ||||||||||||||||
3183 | if (AllAddrModesTrivial) | ||||||||||||||||
3184 | return false; | ||||||||||||||||
3185 | |||||||||||||||||
3186 | if (!addrModeCombiningAllowed()) | ||||||||||||||||
3187 | return false; | ||||||||||||||||
3188 | |||||||||||||||||
3189 | // Build a map between <original value, basic block where we saw it> to | ||||||||||||||||
3190 | // value of base register. | ||||||||||||||||
3191 | // Bail out if there is no common type. | ||||||||||||||||
3192 | FoldAddrToValueMapping Map; | ||||||||||||||||
3193 | if (!initializeMap(Map)) | ||||||||||||||||
3194 | return false; | ||||||||||||||||
3195 | |||||||||||||||||
3196 | Value *CommonValue = findCommon(Map); | ||||||||||||||||
3197 | if (CommonValue) | ||||||||||||||||
3198 | AddrModes[0].SetCombinedField(DifferentField, CommonValue, AddrModes); | ||||||||||||||||
3199 | return CommonValue != nullptr; | ||||||||||||||||
3200 | } | ||||||||||||||||
3201 | |||||||||||||||||
3202 | private: | ||||||||||||||||
3203 | /// Initialize Map with anchor values. For address seen | ||||||||||||||||
3204 | /// we set the value of different field saw in this address. | ||||||||||||||||
3205 | /// At the same time we find a common type for different field we will | ||||||||||||||||
3206 | /// use to create new Phi/Select nodes. Keep it in CommonType field. | ||||||||||||||||
3207 | /// Return false if there is no common type found. | ||||||||||||||||
3208 | bool initializeMap(FoldAddrToValueMapping &Map) { | ||||||||||||||||
3209 | // Keep track of keys where the value is null. We will need to replace it | ||||||||||||||||
3210 | // with constant null when we know the common type. | ||||||||||||||||
3211 | SmallVector<Value *, 2> NullValue; | ||||||||||||||||
3212 | Type *IntPtrTy = SQ.DL.getIntPtrType(AddrModes[0].OriginalValue->getType()); | ||||||||||||||||
3213 | for (auto &AM : AddrModes) { | ||||||||||||||||
3214 | Value *DV = AM.GetFieldAsValue(DifferentField, IntPtrTy); | ||||||||||||||||
3215 | if (DV) { | ||||||||||||||||
3216 | auto *Type = DV->getType(); | ||||||||||||||||
3217 | if (CommonType && CommonType != Type) | ||||||||||||||||
3218 | return false; | ||||||||||||||||
3219 | CommonType = Type; | ||||||||||||||||
3220 | Map[AM.OriginalValue] = DV; | ||||||||||||||||
3221 | } else { | ||||||||||||||||
3222 | NullValue.push_back(AM.OriginalValue); | ||||||||||||||||
3223 | } | ||||||||||||||||
3224 | } | ||||||||||||||||
3225 | assert(CommonType && "At least one non-null value must be!")((CommonType && "At least one non-null value must be!" ) ? static_cast<void> (0) : __assert_fail ("CommonType && \"At least one non-null value must be!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3225, __PRETTY_FUNCTION__)); | ||||||||||||||||
3226 | for (auto *V : NullValue) | ||||||||||||||||
3227 | Map[V] = Constant::getNullValue(CommonType); | ||||||||||||||||
3228 | return true; | ||||||||||||||||
3229 | } | ||||||||||||||||
3230 | |||||||||||||||||
3231 | /// We have mapping between value A and other value B where B was a field in | ||||||||||||||||
3232 | /// addressing mode represented by A. Also we have an original value C | ||||||||||||||||
3233 | /// representing an address we start with. Traversing from C through phi and | ||||||||||||||||
3234 | /// selects we ended up with A's in a map. This utility function tries to find | ||||||||||||||||
3235 | /// a value V which is a field in addressing mode C and traversing through phi | ||||||||||||||||
3236 | /// nodes and selects we will end up in corresponded values B in a map. | ||||||||||||||||
3237 | /// The utility will create a new Phi/Selects if needed. | ||||||||||||||||
3238 | // The simple example looks as follows: | ||||||||||||||||
3239 | // BB1: | ||||||||||||||||
3240 | // p1 = b1 + 40 | ||||||||||||||||
3241 | // br cond BB2, BB3 | ||||||||||||||||
3242 | // BB2: | ||||||||||||||||
3243 | // p2 = b2 + 40 | ||||||||||||||||
3244 | // br BB3 | ||||||||||||||||
3245 | // BB3: | ||||||||||||||||
3246 | // p = phi [p1, BB1], [p2, BB2] | ||||||||||||||||
3247 | // v = load p | ||||||||||||||||
3248 | // Map is | ||||||||||||||||
3249 | // p1 -> b1 | ||||||||||||||||
3250 | // p2 -> b2 | ||||||||||||||||
3251 | // Request is | ||||||||||||||||
3252 | // p -> ? | ||||||||||||||||
3253 | // The function tries to find or build phi [b1, BB1], [b2, BB2] in BB3. | ||||||||||||||||
3254 | Value *findCommon(FoldAddrToValueMapping &Map) { | ||||||||||||||||
3255 | // Tracks the simplification of newly created phi nodes. The reason we use | ||||||||||||||||
3256 | // this mapping is because we will add new created Phi nodes in AddrToBase. | ||||||||||||||||
3257 | // Simplification of Phi nodes is recursive, so some Phi node may | ||||||||||||||||
3258 | // be simplified after we added it to AddrToBase. In reality this | ||||||||||||||||
3259 | // simplification is possible only if original phi/selects were not | ||||||||||||||||
3260 | // simplified yet. | ||||||||||||||||
3261 | // Using this mapping we can find the current value in AddrToBase. | ||||||||||||||||
3262 | SimplificationTracker ST(SQ); | ||||||||||||||||
3263 | |||||||||||||||||
3264 | // First step, DFS to create PHI nodes for all intermediate blocks. | ||||||||||||||||
3265 | // Also fill traverse order for the second step. | ||||||||||||||||
3266 | SmallVector<Value *, 32> TraverseOrder; | ||||||||||||||||
3267 | InsertPlaceholders(Map, TraverseOrder, ST); | ||||||||||||||||
3268 | |||||||||||||||||
3269 | // Second Step, fill new nodes by merged values and simplify if possible. | ||||||||||||||||
3270 | FillPlaceholders(Map, TraverseOrder, ST); | ||||||||||||||||
3271 | |||||||||||||||||
3272 | if (!AddrSinkNewSelects && ST.countNewSelectNodes() > 0) { | ||||||||||||||||
3273 | ST.destroyNewNodes(CommonType); | ||||||||||||||||
3274 | return nullptr; | ||||||||||||||||
3275 | } | ||||||||||||||||
3276 | |||||||||||||||||
3277 | // Now we'd like to match New Phi nodes to existed ones. | ||||||||||||||||
3278 | unsigned PhiNotMatchedCount = 0; | ||||||||||||||||
3279 | if (!MatchPhiSet(ST, AddrSinkNewPhis, PhiNotMatchedCount)) { | ||||||||||||||||
3280 | ST.destroyNewNodes(CommonType); | ||||||||||||||||
3281 | return nullptr; | ||||||||||||||||
3282 | } | ||||||||||||||||
3283 | |||||||||||||||||
3284 | auto *Result = ST.Get(Map.find(Original)->second); | ||||||||||||||||
3285 | if (Result) { | ||||||||||||||||
3286 | NumMemoryInstsPhiCreated += ST.countNewPhiNodes() + PhiNotMatchedCount; | ||||||||||||||||
3287 | NumMemoryInstsSelectCreated += ST.countNewSelectNodes(); | ||||||||||||||||
3288 | } | ||||||||||||||||
3289 | return Result; | ||||||||||||||||
3290 | } | ||||||||||||||||
3291 | |||||||||||||||||
3292 | /// Try to match PHI node to Candidate. | ||||||||||||||||
3293 | /// Matcher tracks the matched Phi nodes. | ||||||||||||||||
3294 | bool MatchPhiNode(PHINode *PHI, PHINode *Candidate, | ||||||||||||||||
3295 | SmallSetVector<PHIPair, 8> &Matcher, | ||||||||||||||||
3296 | PhiNodeSet &PhiNodesToMatch) { | ||||||||||||||||
3297 | SmallVector<PHIPair, 8> WorkList; | ||||||||||||||||
3298 | Matcher.insert({ PHI, Candidate }); | ||||||||||||||||
3299 | SmallSet<PHINode *, 8> MatchedPHIs; | ||||||||||||||||
3300 | MatchedPHIs.insert(PHI); | ||||||||||||||||
3301 | WorkList.push_back({ PHI, Candidate }); | ||||||||||||||||
3302 | SmallSet<PHIPair, 8> Visited; | ||||||||||||||||
3303 | while (!WorkList.empty()) { | ||||||||||||||||
3304 | auto Item = WorkList.pop_back_val(); | ||||||||||||||||
3305 | if (!Visited.insert(Item).second) | ||||||||||||||||
3306 | continue; | ||||||||||||||||
3307 | // We iterate over all incoming values to Phi to compare them. | ||||||||||||||||
3308 | // If values are different and both of them Phi and the first one is a | ||||||||||||||||
3309 | // Phi we added (subject to match) and both of them is in the same basic | ||||||||||||||||
3310 | // block then we can match our pair if values match. So we state that | ||||||||||||||||
3311 | // these values match and add it to work list to verify that. | ||||||||||||||||
3312 | for (auto B : Item.first->blocks()) { | ||||||||||||||||
3313 | Value *FirstValue = Item.first->getIncomingValueForBlock(B); | ||||||||||||||||
3314 | Value *SecondValue = Item.second->getIncomingValueForBlock(B); | ||||||||||||||||
3315 | if (FirstValue == SecondValue) | ||||||||||||||||
3316 | continue; | ||||||||||||||||
3317 | |||||||||||||||||
3318 | PHINode *FirstPhi = dyn_cast<PHINode>(FirstValue); | ||||||||||||||||
3319 | PHINode *SecondPhi = dyn_cast<PHINode>(SecondValue); | ||||||||||||||||
3320 | |||||||||||||||||
3321 | // One of them is not Phi or | ||||||||||||||||
3322 | // The first one is not Phi node from the set we'd like to match or | ||||||||||||||||
3323 | // Phi nodes from different basic blocks then | ||||||||||||||||
3324 | // we will not be able to match. | ||||||||||||||||
3325 | if (!FirstPhi || !SecondPhi || !PhiNodesToMatch.count(FirstPhi) || | ||||||||||||||||
3326 | FirstPhi->getParent() != SecondPhi->getParent()) | ||||||||||||||||
3327 | return false; | ||||||||||||||||
3328 | |||||||||||||||||
3329 | // If we already matched them then continue. | ||||||||||||||||
3330 | if (Matcher.count({ FirstPhi, SecondPhi })) | ||||||||||||||||
3331 | continue; | ||||||||||||||||
3332 | // So the values are different and does not match. So we need them to | ||||||||||||||||
3333 | // match. (But we register no more than one match per PHI node, so that | ||||||||||||||||
3334 | // we won't later try to replace them twice.) | ||||||||||||||||
3335 | if (MatchedPHIs.insert(FirstPhi).second) | ||||||||||||||||
3336 | Matcher.insert({ FirstPhi, SecondPhi }); | ||||||||||||||||
3337 | // But me must check it. | ||||||||||||||||
3338 | WorkList.push_back({ FirstPhi, SecondPhi }); | ||||||||||||||||
3339 | } | ||||||||||||||||
3340 | } | ||||||||||||||||
3341 | return true; | ||||||||||||||||
3342 | } | ||||||||||||||||
3343 | |||||||||||||||||
3344 | /// For the given set of PHI nodes (in the SimplificationTracker) try | ||||||||||||||||
3345 | /// to find their equivalents. | ||||||||||||||||
3346 | /// Returns false if this matching fails and creation of new Phi is disabled. | ||||||||||||||||
3347 | bool MatchPhiSet(SimplificationTracker &ST, bool AllowNewPhiNodes, | ||||||||||||||||
3348 | unsigned &PhiNotMatchedCount) { | ||||||||||||||||
3349 | // Matched and PhiNodesToMatch iterate their elements in a deterministic | ||||||||||||||||
3350 | // order, so the replacements (ReplacePhi) are also done in a deterministic | ||||||||||||||||
3351 | // order. | ||||||||||||||||
3352 | SmallSetVector<PHIPair, 8> Matched; | ||||||||||||||||
3353 | SmallPtrSet<PHINode *, 8> WillNotMatch; | ||||||||||||||||
3354 | PhiNodeSet &PhiNodesToMatch = ST.newPhiNodes(); | ||||||||||||||||
3355 | while (PhiNodesToMatch.size()) { | ||||||||||||||||
3356 | PHINode *PHI = *PhiNodesToMatch.begin(); | ||||||||||||||||
3357 | |||||||||||||||||
3358 | // Add us, if no Phi nodes in the basic block we do not match. | ||||||||||||||||
3359 | WillNotMatch.clear(); | ||||||||||||||||
3360 | WillNotMatch.insert(PHI); | ||||||||||||||||
3361 | |||||||||||||||||
3362 | // Traverse all Phis until we found equivalent or fail to do that. | ||||||||||||||||
3363 | bool IsMatched = false; | ||||||||||||||||
3364 | for (auto &P : PHI->getParent()->phis()) { | ||||||||||||||||
3365 | if (&P == PHI) | ||||||||||||||||
3366 | continue; | ||||||||||||||||
3367 | if ((IsMatched = MatchPhiNode(PHI, &P, Matched, PhiNodesToMatch))) | ||||||||||||||||
3368 | break; | ||||||||||||||||
3369 | // If it does not match, collect all Phi nodes from matcher. | ||||||||||||||||
3370 | // if we end up with no match, them all these Phi nodes will not match | ||||||||||||||||
3371 | // later. | ||||||||||||||||
3372 | for (auto M : Matched) | ||||||||||||||||
3373 | WillNotMatch.insert(M.first); | ||||||||||||||||
3374 | Matched.clear(); | ||||||||||||||||
3375 | } | ||||||||||||||||
3376 | if (IsMatched) { | ||||||||||||||||
3377 | // Replace all matched values and erase them. | ||||||||||||||||
3378 | for (auto MV : Matched) | ||||||||||||||||
3379 | ST.ReplacePhi(MV.first, MV.second); | ||||||||||||||||
3380 | Matched.clear(); | ||||||||||||||||
3381 | continue; | ||||||||||||||||
3382 | } | ||||||||||||||||
3383 | // If we are not allowed to create new nodes then bail out. | ||||||||||||||||
3384 | if (!AllowNewPhiNodes) | ||||||||||||||||
3385 | return false; | ||||||||||||||||
3386 | // Just remove all seen values in matcher. They will not match anything. | ||||||||||||||||
3387 | PhiNotMatchedCount += WillNotMatch.size(); | ||||||||||||||||
3388 | for (auto *P : WillNotMatch) | ||||||||||||||||
3389 | PhiNodesToMatch.erase(P); | ||||||||||||||||
3390 | } | ||||||||||||||||
3391 | return true; | ||||||||||||||||
3392 | } | ||||||||||||||||
3393 | /// Fill the placeholders with values from predecessors and simplify them. | ||||||||||||||||
3394 | void FillPlaceholders(FoldAddrToValueMapping &Map, | ||||||||||||||||
3395 | SmallVectorImpl<Value *> &TraverseOrder, | ||||||||||||||||
3396 | SimplificationTracker &ST) { | ||||||||||||||||
3397 | while (!TraverseOrder.empty()) { | ||||||||||||||||
3398 | Value *Current = TraverseOrder.pop_back_val(); | ||||||||||||||||
3399 | assert(Map.find(Current) != Map.end() && "No node to fill!!!")((Map.find(Current) != Map.end() && "No node to fill!!!" ) ? static_cast<void> (0) : __assert_fail ("Map.find(Current) != Map.end() && \"No node to fill!!!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3399, __PRETTY_FUNCTION__)); | ||||||||||||||||
3400 | Value *V = Map[Current]; | ||||||||||||||||
3401 | |||||||||||||||||
3402 | if (SelectInst *Select = dyn_cast<SelectInst>(V)) { | ||||||||||||||||
3403 | // CurrentValue also must be Select. | ||||||||||||||||
3404 | auto *CurrentSelect = cast<SelectInst>(Current); | ||||||||||||||||
3405 | auto *TrueValue = CurrentSelect->getTrueValue(); | ||||||||||||||||
3406 | assert(Map.find(TrueValue) != Map.end() && "No True Value!")((Map.find(TrueValue) != Map.end() && "No True Value!" ) ? static_cast<void> (0) : __assert_fail ("Map.find(TrueValue) != Map.end() && \"No True Value!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3406, __PRETTY_FUNCTION__)); | ||||||||||||||||
3407 | Select->setTrueValue(ST.Get(Map[TrueValue])); | ||||||||||||||||
3408 | auto *FalseValue = CurrentSelect->getFalseValue(); | ||||||||||||||||
3409 | assert(Map.find(FalseValue) != Map.end() && "No False Value!")((Map.find(FalseValue) != Map.end() && "No False Value!" ) ? static_cast<void> (0) : __assert_fail ("Map.find(FalseValue) != Map.end() && \"No False Value!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3409, __PRETTY_FUNCTION__)); | ||||||||||||||||
3410 | Select->setFalseValue(ST.Get(Map[FalseValue])); | ||||||||||||||||
3411 | } else { | ||||||||||||||||
3412 | // Must be a Phi node then. | ||||||||||||||||
3413 | PHINode *PHI = cast<PHINode>(V); | ||||||||||||||||
3414 | auto *CurrentPhi = dyn_cast<PHINode>(Current); | ||||||||||||||||
3415 | // Fill the Phi node with values from predecessors. | ||||||||||||||||
3416 | for (auto B : predecessors(PHI->getParent())) { | ||||||||||||||||
3417 | Value *PV = CurrentPhi->getIncomingValueForBlock(B); | ||||||||||||||||
3418 | assert(Map.find(PV) != Map.end() && "No predecessor Value!")((Map.find(PV) != Map.end() && "No predecessor Value!" ) ? static_cast<void> (0) : __assert_fail ("Map.find(PV) != Map.end() && \"No predecessor Value!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3418, __PRETTY_FUNCTION__)); | ||||||||||||||||
3419 | PHI->addIncoming(ST.Get(Map[PV]), B); | ||||||||||||||||
3420 | } | ||||||||||||||||
3421 | } | ||||||||||||||||
3422 | Map[Current] = ST.Simplify(V); | ||||||||||||||||
3423 | } | ||||||||||||||||
3424 | } | ||||||||||||||||
3425 | |||||||||||||||||
3426 | /// Starting from original value recursively iterates over def-use chain up to | ||||||||||||||||
3427 | /// known ending values represented in a map. For each traversed phi/select | ||||||||||||||||
3428 | /// inserts a placeholder Phi or Select. | ||||||||||||||||
3429 | /// Reports all new created Phi/Select nodes by adding them to set. | ||||||||||||||||
3430 | /// Also reports and order in what values have been traversed. | ||||||||||||||||
3431 | void InsertPlaceholders(FoldAddrToValueMapping &Map, | ||||||||||||||||
3432 | SmallVectorImpl<Value *> &TraverseOrder, | ||||||||||||||||
3433 | SimplificationTracker &ST) { | ||||||||||||||||
3434 | SmallVector<Value *, 32> Worklist; | ||||||||||||||||
3435 | assert((isa<PHINode>(Original) || isa<SelectInst>(Original)) &&(((isa<PHINode>(Original) || isa<SelectInst>(Original )) && "Address must be a Phi or Select node") ? static_cast <void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3436, __PRETTY_FUNCTION__)) | ||||||||||||||||
3436 | "Address must be a Phi or Select node")(((isa<PHINode>(Original) || isa<SelectInst>(Original )) && "Address must be a Phi or Select node") ? static_cast <void> (0) : __assert_fail ("(isa<PHINode>(Original) || isa<SelectInst>(Original)) && \"Address must be a Phi or Select node\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3436, __PRETTY_FUNCTION__)); | ||||||||||||||||
3437 | auto *Dummy = UndefValue::get(CommonType); | ||||||||||||||||
3438 | Worklist.push_back(Original); | ||||||||||||||||
3439 | while (!Worklist.empty()) { | ||||||||||||||||
3440 | Value *Current = Worklist.pop_back_val(); | ||||||||||||||||
3441 | // if it is already visited or it is an ending value then skip it. | ||||||||||||||||
3442 | if (Map.find(Current) != Map.end()) | ||||||||||||||||
3443 | continue; | ||||||||||||||||
3444 | TraverseOrder.push_back(Current); | ||||||||||||||||
3445 | |||||||||||||||||
3446 | // CurrentValue must be a Phi node or select. All others must be covered | ||||||||||||||||
3447 | // by anchors. | ||||||||||||||||
3448 | if (SelectInst *CurrentSelect = dyn_cast<SelectInst>(Current)) { | ||||||||||||||||
3449 | // Is it OK to get metadata from OrigSelect?! | ||||||||||||||||
3450 | // Create a Select placeholder with dummy value. | ||||||||||||||||
3451 | SelectInst *Select = SelectInst::Create( | ||||||||||||||||
3452 | CurrentSelect->getCondition(), Dummy, Dummy, | ||||||||||||||||
3453 | CurrentSelect->getName(), CurrentSelect, CurrentSelect); | ||||||||||||||||
3454 | Map[Current] = Select; | ||||||||||||||||
3455 | ST.insertNewSelect(Select); | ||||||||||||||||
3456 | // We are interested in True and False values. | ||||||||||||||||
3457 | Worklist.push_back(CurrentSelect->getTrueValue()); | ||||||||||||||||
3458 | Worklist.push_back(CurrentSelect->getFalseValue()); | ||||||||||||||||
3459 | } else { | ||||||||||||||||
3460 | // It must be a Phi node then. | ||||||||||||||||
3461 | PHINode *CurrentPhi = cast<PHINode>(Current); | ||||||||||||||||
3462 | unsigned PredCount = CurrentPhi->getNumIncomingValues(); | ||||||||||||||||
3463 | PHINode *PHI = | ||||||||||||||||
3464 | PHINode::Create(CommonType, PredCount, "sunk_phi", CurrentPhi); | ||||||||||||||||
3465 | Map[Current] = PHI; | ||||||||||||||||
3466 | ST.insertNewPhi(PHI); | ||||||||||||||||
3467 | for (Value *P : CurrentPhi->incoming_values()) | ||||||||||||||||
3468 | Worklist.push_back(P); | ||||||||||||||||
3469 | } | ||||||||||||||||
3470 | } | ||||||||||||||||
3471 | } | ||||||||||||||||
3472 | |||||||||||||||||
3473 | bool addrModeCombiningAllowed() { | ||||||||||||||||
3474 | if (DisableComplexAddrModes) | ||||||||||||||||
3475 | return false; | ||||||||||||||||
3476 | switch (DifferentField) { | ||||||||||||||||
3477 | default: | ||||||||||||||||
3478 | return false; | ||||||||||||||||
3479 | case ExtAddrMode::BaseRegField: | ||||||||||||||||
3480 | return AddrSinkCombineBaseReg; | ||||||||||||||||
3481 | case ExtAddrMode::BaseGVField: | ||||||||||||||||
3482 | return AddrSinkCombineBaseGV; | ||||||||||||||||
3483 | case ExtAddrMode::BaseOffsField: | ||||||||||||||||
3484 | return AddrSinkCombineBaseOffs; | ||||||||||||||||
3485 | case ExtAddrMode::ScaledRegField: | ||||||||||||||||
3486 | return AddrSinkCombineScaledReg; | ||||||||||||||||
3487 | } | ||||||||||||||||
3488 | } | ||||||||||||||||
3489 | }; | ||||||||||||||||
3490 | } // end anonymous namespace | ||||||||||||||||
3491 | |||||||||||||||||
3492 | /// Try adding ScaleReg*Scale to the current addressing mode. | ||||||||||||||||
3493 | /// Return true and update AddrMode if this addr mode is legal for the target, | ||||||||||||||||
3494 | /// false if not. | ||||||||||||||||
3495 | bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale, | ||||||||||||||||
3496 | unsigned Depth) { | ||||||||||||||||
3497 | // If Scale is 1, then this is the same as adding ScaleReg to the addressing | ||||||||||||||||
3498 | // mode. Just process that directly. | ||||||||||||||||
3499 | if (Scale == 1) | ||||||||||||||||
3500 | return matchAddr(ScaleReg, Depth); | ||||||||||||||||
3501 | |||||||||||||||||
3502 | // If the scale is 0, it takes nothing to add this. | ||||||||||||||||
3503 | if (Scale == 0) | ||||||||||||||||
3504 | return true; | ||||||||||||||||
3505 | |||||||||||||||||
3506 | // If we already have a scale of this value, we can add to it, otherwise, we | ||||||||||||||||
3507 | // need an available scale field. | ||||||||||||||||
3508 | if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) | ||||||||||||||||
3509 | return false; | ||||||||||||||||
3510 | |||||||||||||||||
3511 | ExtAddrMode TestAddrMode = AddrMode; | ||||||||||||||||
3512 | |||||||||||||||||
3513 | // Add scale to turn X*4+X*3 -> X*7. This could also do things like | ||||||||||||||||
3514 | // [A+B + A*7] -> [B+A*8]. | ||||||||||||||||
3515 | TestAddrMode.Scale += Scale; | ||||||||||||||||
3516 | TestAddrMode.ScaledReg = ScaleReg; | ||||||||||||||||
3517 | |||||||||||||||||
3518 | // If the new address isn't legal, bail out. | ||||||||||||||||
3519 | if (!TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) | ||||||||||||||||
3520 | return false; | ||||||||||||||||
3521 | |||||||||||||||||
3522 | // It was legal, so commit it. | ||||||||||||||||
3523 | AddrMode = TestAddrMode; | ||||||||||||||||
3524 | |||||||||||||||||
3525 | // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now | ||||||||||||||||
3526 | // to see if ScaleReg is actually X+C. If so, we can turn this into adding | ||||||||||||||||
3527 | // X*Scale + C*Scale to addr mode. | ||||||||||||||||
3528 | ConstantInt *CI = nullptr; Value *AddLHS = nullptr; | ||||||||||||||||
3529 | if (isa<Instruction>(ScaleReg) && // not a constant expr. | ||||||||||||||||
3530 | match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { | ||||||||||||||||
3531 | TestAddrMode.InBounds = false; | ||||||||||||||||
3532 | TestAddrMode.ScaledReg = AddLHS; | ||||||||||||||||
3533 | TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; | ||||||||||||||||
3534 | |||||||||||||||||
3535 | // If this addressing mode is legal, commit it and remember that we folded | ||||||||||||||||
3536 | // this instruction. | ||||||||||||||||
3537 | if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace)) { | ||||||||||||||||
3538 | AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); | ||||||||||||||||
3539 | AddrMode = TestAddrMode; | ||||||||||||||||
3540 | return true; | ||||||||||||||||
3541 | } | ||||||||||||||||
3542 | } | ||||||||||||||||
3543 | |||||||||||||||||
3544 | // Otherwise, not (x+c)*scale, just return what we have. | ||||||||||||||||
3545 | return true; | ||||||||||||||||
3546 | } | ||||||||||||||||
3547 | |||||||||||||||||
3548 | /// This is a little filter, which returns true if an addressing computation | ||||||||||||||||
3549 | /// involving I might be folded into a load/store accessing it. | ||||||||||||||||
3550 | /// This doesn't need to be perfect, but needs to accept at least | ||||||||||||||||
3551 | /// the set of instructions that MatchOperationAddr can. | ||||||||||||||||
3552 | static bool MightBeFoldableInst(Instruction *I) { | ||||||||||||||||
3553 | switch (I->getOpcode()) { | ||||||||||||||||
3554 | case Instruction::BitCast: | ||||||||||||||||
3555 | case Instruction::AddrSpaceCast: | ||||||||||||||||
3556 | // Don't touch identity bitcasts. | ||||||||||||||||
3557 | if (I->getType() == I->getOperand(0)->getType()) | ||||||||||||||||
3558 | return false; | ||||||||||||||||
3559 | return I->getType()->isIntOrPtrTy(); | ||||||||||||||||
3560 | case Instruction::PtrToInt: | ||||||||||||||||
3561 | // PtrToInt is always a noop, as we know that the int type is pointer sized. | ||||||||||||||||
3562 | return true; | ||||||||||||||||
3563 | case Instruction::IntToPtr: | ||||||||||||||||
3564 | // We know the input is intptr_t, so this is foldable. | ||||||||||||||||
3565 | return true; | ||||||||||||||||
3566 | case Instruction::Add: | ||||||||||||||||
3567 | return true; | ||||||||||||||||
3568 | case Instruction::Mul: | ||||||||||||||||
3569 | case Instruction::Shl: | ||||||||||||||||
3570 | // Can only handle X*C and X << C. | ||||||||||||||||
3571 | return isa<ConstantInt>(I->getOperand(1)); | ||||||||||||||||
3572 | case Instruction::GetElementPtr: | ||||||||||||||||
3573 | return true; | ||||||||||||||||
3574 | default: | ||||||||||||||||
3575 | return false; | ||||||||||||||||
3576 | } | ||||||||||||||||
3577 | } | ||||||||||||||||
3578 | |||||||||||||||||
3579 | /// Check whether or not \p Val is a legal instruction for \p TLI. | ||||||||||||||||
3580 | /// \note \p Val is assumed to be the product of some type promotion. | ||||||||||||||||
3581 | /// Therefore if \p Val has an undefined state in \p TLI, this is assumed | ||||||||||||||||
3582 | /// to be legal, as the non-promoted value would have had the same state. | ||||||||||||||||
3583 | static bool isPromotedInstructionLegal(const TargetLowering &TLI, | ||||||||||||||||
3584 | const DataLayout &DL, Value *Val) { | ||||||||||||||||
3585 | Instruction *PromotedInst = dyn_cast<Instruction>(Val); | ||||||||||||||||
3586 | if (!PromotedInst) | ||||||||||||||||
3587 | return false; | ||||||||||||||||
3588 | int ISDOpcode = TLI.InstructionOpcodeToISD(PromotedInst->getOpcode()); | ||||||||||||||||
3589 | // If the ISDOpcode is undefined, it was undefined before the promotion. | ||||||||||||||||
3590 | if (!ISDOpcode) | ||||||||||||||||
3591 | return true; | ||||||||||||||||
3592 | // Otherwise, check if the promoted instruction is legal or not. | ||||||||||||||||
3593 | return TLI.isOperationLegalOrCustom( | ||||||||||||||||
3594 | ISDOpcode, TLI.getValueType(DL, PromotedInst->getType())); | ||||||||||||||||
3595 | } | ||||||||||||||||
3596 | |||||||||||||||||
3597 | namespace { | ||||||||||||||||
3598 | |||||||||||||||||
3599 | /// Hepler class to perform type promotion. | ||||||||||||||||
3600 | class TypePromotionHelper { | ||||||||||||||||
3601 | /// Utility function to add a promoted instruction \p ExtOpnd to | ||||||||||||||||
3602 | /// \p PromotedInsts and record the type of extension we have seen. | ||||||||||||||||
3603 | static void addPromotedInst(InstrToOrigTy &PromotedInsts, | ||||||||||||||||
3604 | Instruction *ExtOpnd, | ||||||||||||||||
3605 | bool IsSExt) { | ||||||||||||||||
3606 | ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; | ||||||||||||||||
3607 | InstrToOrigTy::iterator It = PromotedInsts.find(ExtOpnd); | ||||||||||||||||
3608 | if (It != PromotedInsts.end()) { | ||||||||||||||||
3609 | // If the new extension is same as original, the information in | ||||||||||||||||
3610 | // PromotedInsts[ExtOpnd] is still correct. | ||||||||||||||||
3611 | if (It->second.getInt() == ExtTy) | ||||||||||||||||
3612 | return; | ||||||||||||||||
3613 | |||||||||||||||||
3614 | // Now the new extension is different from old extension, we make | ||||||||||||||||
3615 | // the type information invalid by setting extension type to | ||||||||||||||||
3616 | // BothExtension. | ||||||||||||||||
3617 | ExtTy = BothExtension; | ||||||||||||||||
3618 | } | ||||||||||||||||
3619 | PromotedInsts[ExtOpnd] = TypeIsSExt(ExtOpnd->getType(), ExtTy); | ||||||||||||||||
3620 | } | ||||||||||||||||
3621 | |||||||||||||||||
3622 | /// Utility function to query the original type of instruction \p Opnd | ||||||||||||||||
3623 | /// with a matched extension type. If the extension doesn't match, we | ||||||||||||||||
3624 | /// cannot use the information we had on the original type. | ||||||||||||||||
3625 | /// BothExtension doesn't match any extension type. | ||||||||||||||||
3626 | static const Type *getOrigType(const InstrToOrigTy &PromotedInsts, | ||||||||||||||||
3627 | Instruction *Opnd, | ||||||||||||||||
3628 | bool IsSExt) { | ||||||||||||||||
3629 | ExtType ExtTy = IsSExt ? SignExtension : ZeroExtension; | ||||||||||||||||
3630 | InstrToOrigTy::const_iterator It = PromotedInsts.find(Opnd); | ||||||||||||||||
3631 | if (It != PromotedInsts.end() && It->second.getInt() == ExtTy) | ||||||||||||||||
3632 | return It->second.getPointer(); | ||||||||||||||||
3633 | return nullptr; | ||||||||||||||||
3634 | } | ||||||||||||||||
3635 | |||||||||||||||||
3636 | /// Utility function to check whether or not a sign or zero extension | ||||||||||||||||
3637 | /// of \p Inst with \p ConsideredExtType can be moved through \p Inst by | ||||||||||||||||
3638 | /// either using the operands of \p Inst or promoting \p Inst. | ||||||||||||||||
3639 | /// The type of the extension is defined by \p IsSExt. | ||||||||||||||||
3640 | /// In other words, check if: | ||||||||||||||||
3641 | /// ext (Ty Inst opnd1 opnd2 ... opndN) to ConsideredExtType. | ||||||||||||||||
3642 | /// #1 Promotion applies: | ||||||||||||||||
3643 | /// ConsideredExtType Inst (ext opnd1 to ConsideredExtType, ...). | ||||||||||||||||
3644 | /// #2 Operand reuses: | ||||||||||||||||
3645 | /// ext opnd1 to ConsideredExtType. | ||||||||||||||||
3646 | /// \p PromotedInsts maps the instructions to their type before promotion. | ||||||||||||||||
3647 | static bool canGetThrough(const Instruction *Inst, Type *ConsideredExtType, | ||||||||||||||||
3648 | const InstrToOrigTy &PromotedInsts, bool IsSExt); | ||||||||||||||||
3649 | |||||||||||||||||
3650 | /// Utility function to determine if \p OpIdx should be promoted when | ||||||||||||||||
3651 | /// promoting \p Inst. | ||||||||||||||||
3652 | static bool shouldExtOperand(const Instruction *Inst, int OpIdx) { | ||||||||||||||||
3653 | return !(isa<SelectInst>(Inst) && OpIdx == 0); | ||||||||||||||||
3654 | } | ||||||||||||||||
3655 | |||||||||||||||||
3656 | /// Utility function to promote the operand of \p Ext when this | ||||||||||||||||
3657 | /// operand is a promotable trunc or sext or zext. | ||||||||||||||||
3658 | /// \p PromotedInsts maps the instructions to their type before promotion. | ||||||||||||||||
3659 | /// \p CreatedInstsCost[out] contains the cost of all instructions | ||||||||||||||||
3660 | /// created to promote the operand of Ext. | ||||||||||||||||
3661 | /// Newly added extensions are inserted in \p Exts. | ||||||||||||||||
3662 | /// Newly added truncates are inserted in \p Truncs. | ||||||||||||||||
3663 | /// Should never be called directly. | ||||||||||||||||
3664 | /// \return The promoted value which is used instead of Ext. | ||||||||||||||||
3665 | static Value *promoteOperandForTruncAndAnyExt( | ||||||||||||||||
3666 | Instruction *Ext, TypePromotionTransaction &TPT, | ||||||||||||||||
3667 | InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, | ||||||||||||||||
3668 | SmallVectorImpl<Instruction *> *Exts, | ||||||||||||||||
3669 | SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI); | ||||||||||||||||
3670 | |||||||||||||||||
3671 | /// Utility function to promote the operand of \p Ext when this | ||||||||||||||||
3672 | /// operand is promotable and is not a supported trunc or sext. | ||||||||||||||||
3673 | /// \p PromotedInsts maps the instructions to their type before promotion. | ||||||||||||||||
3674 | /// \p CreatedInstsCost[out] contains the cost of all the instructions | ||||||||||||||||
3675 | /// created to promote the operand of Ext. | ||||||||||||||||
3676 | /// Newly added extensions are inserted in \p Exts. | ||||||||||||||||
3677 | /// Newly added truncates are inserted in \p Truncs. | ||||||||||||||||
3678 | /// Should never be called directly. | ||||||||||||||||
3679 | /// \return The promoted value which is used instead of Ext. | ||||||||||||||||
3680 | static Value *promoteOperandForOther(Instruction *Ext, | ||||||||||||||||
3681 | TypePromotionTransaction &TPT, | ||||||||||||||||
3682 | InstrToOrigTy &PromotedInsts, | ||||||||||||||||
3683 | unsigned &CreatedInstsCost, | ||||||||||||||||
3684 | SmallVectorImpl<Instruction *> *Exts, | ||||||||||||||||
3685 | SmallVectorImpl<Instruction *> *Truncs, | ||||||||||||||||
3686 | const TargetLowering &TLI, bool IsSExt); | ||||||||||||||||
3687 | |||||||||||||||||
3688 | /// \see promoteOperandForOther. | ||||||||||||||||
3689 | static Value *signExtendOperandForOther( | ||||||||||||||||
3690 | Instruction *Ext, TypePromotionTransaction &TPT, | ||||||||||||||||
3691 | InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, | ||||||||||||||||
3692 | SmallVectorImpl<Instruction *> *Exts, | ||||||||||||||||
3693 | SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { | ||||||||||||||||
3694 | return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, | ||||||||||||||||
3695 | Exts, Truncs, TLI, true); | ||||||||||||||||
3696 | } | ||||||||||||||||
3697 | |||||||||||||||||
3698 | /// \see promoteOperandForOther. | ||||||||||||||||
3699 | static Value *zeroExtendOperandForOther( | ||||||||||||||||
3700 | Instruction *Ext, TypePromotionTransaction &TPT, | ||||||||||||||||
3701 | InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, | ||||||||||||||||
3702 | SmallVectorImpl<Instruction *> *Exts, | ||||||||||||||||
3703 | SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { | ||||||||||||||||
3704 | return promoteOperandForOther(Ext, TPT, PromotedInsts, CreatedInstsCost, | ||||||||||||||||
3705 | Exts, Truncs, TLI, false); | ||||||||||||||||
3706 | } | ||||||||||||||||
3707 | |||||||||||||||||
3708 | public: | ||||||||||||||||
3709 | /// Type for the utility function that promotes the operand of Ext. | ||||||||||||||||
3710 | using Action = Value *(*)(Instruction *Ext, TypePromotionTransaction &TPT, | ||||||||||||||||
3711 | InstrToOrigTy &PromotedInsts, | ||||||||||||||||
3712 | unsigned &CreatedInstsCost, | ||||||||||||||||
3713 | SmallVectorImpl<Instruction *> *Exts, | ||||||||||||||||
3714 | SmallVectorImpl<Instruction *> *Truncs, | ||||||||||||||||
3715 | const TargetLowering &TLI); | ||||||||||||||||
3716 | |||||||||||||||||
3717 | /// Given a sign/zero extend instruction \p Ext, return the appropriate | ||||||||||||||||
3718 | /// action to promote the operand of \p Ext instead of using Ext. | ||||||||||||||||
3719 | /// \return NULL if no promotable action is possible with the current | ||||||||||||||||
3720 | /// sign extension. | ||||||||||||||||
3721 | /// \p InsertedInsts keeps track of all the instructions inserted by the | ||||||||||||||||
3722 | /// other CodeGenPrepare optimizations. This information is important | ||||||||||||||||
3723 | /// because we do not want to promote these instructions as CodeGenPrepare | ||||||||||||||||
3724 | /// will reinsert them later. Thus creating an infinite loop: create/remove. | ||||||||||||||||
3725 | /// \p PromotedInsts maps the instructions to their type before promotion. | ||||||||||||||||
3726 | static Action getAction(Instruction *Ext, const SetOfInstrs &InsertedInsts, | ||||||||||||||||
3727 | const TargetLowering &TLI, | ||||||||||||||||
3728 | const InstrToOrigTy &PromotedInsts); | ||||||||||||||||
3729 | }; | ||||||||||||||||
3730 | |||||||||||||||||
3731 | } // end anonymous namespace | ||||||||||||||||
3732 | |||||||||||||||||
3733 | bool TypePromotionHelper::canGetThrough(const Instruction *Inst, | ||||||||||||||||
3734 | Type *ConsideredExtType, | ||||||||||||||||
3735 | const InstrToOrigTy &PromotedInsts, | ||||||||||||||||
3736 | bool IsSExt) { | ||||||||||||||||
3737 | // The promotion helper does not know how to deal with vector types yet. | ||||||||||||||||
3738 | // To be able to fix that, we would need to fix the places where we | ||||||||||||||||
3739 | // statically extend, e.g., constants and such. | ||||||||||||||||
3740 | if (Inst->getType()->isVectorTy()) | ||||||||||||||||
3741 | return false; | ||||||||||||||||
3742 | |||||||||||||||||
3743 | // We can always get through zext. | ||||||||||||||||
3744 | if (isa<ZExtInst>(Inst)) | ||||||||||||||||
3745 | return true; | ||||||||||||||||
3746 | |||||||||||||||||
3747 | // sext(sext) is ok too. | ||||||||||||||||
3748 | if (IsSExt && isa<SExtInst>(Inst)) | ||||||||||||||||
3749 | return true; | ||||||||||||||||
3750 | |||||||||||||||||
3751 | // We can get through binary operator, if it is legal. In other words, the | ||||||||||||||||
3752 | // binary operator must have a nuw or nsw flag. | ||||||||||||||||
3753 | const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst); | ||||||||||||||||
3754 | if (BinOp && isa<OverflowingBinaryOperator>(BinOp) && | ||||||||||||||||
3755 | ((!IsSExt && BinOp->hasNoUnsignedWrap()) || | ||||||||||||||||
3756 | (IsSExt && BinOp->hasNoSignedWrap()))) | ||||||||||||||||
3757 | return true; | ||||||||||||||||
3758 | |||||||||||||||||
3759 | // ext(and(opnd, cst)) --> and(ext(opnd), ext(cst)) | ||||||||||||||||
3760 | if ((Inst->getOpcode() == Instruction::And || | ||||||||||||||||
3761 | Inst->getOpcode() == Instruction::Or)) | ||||||||||||||||
3762 | return true; | ||||||||||||||||
3763 | |||||||||||||||||
3764 | // ext(xor(opnd, cst)) --> xor(ext(opnd), ext(cst)) | ||||||||||||||||
3765 | if (Inst->getOpcode() == Instruction::Xor) { | ||||||||||||||||
3766 | const ConstantInt *Cst = dyn_cast<ConstantInt>(Inst->getOperand(1)); | ||||||||||||||||
3767 | // Make sure it is not a NOT. | ||||||||||||||||
3768 | if (Cst && !Cst->getValue().isAllOnesValue()) | ||||||||||||||||
3769 | return true; | ||||||||||||||||
3770 | } | ||||||||||||||||
3771 | |||||||||||||||||
3772 | // zext(shrl(opnd, cst)) --> shrl(zext(opnd), zext(cst)) | ||||||||||||||||
3773 | // It may change a poisoned value into a regular value, like | ||||||||||||||||
3774 | // zext i32 (shrl i8 %val, 12) --> shrl i32 (zext i8 %val), 12 | ||||||||||||||||
3775 | // poisoned value regular value | ||||||||||||||||
3776 | // It should be OK since undef covers valid value. | ||||||||||||||||
3777 | if (Inst->getOpcode() == Instruction::LShr && !IsSExt) | ||||||||||||||||
3778 | return true; | ||||||||||||||||
3779 | |||||||||||||||||
3780 | // and(ext(shl(opnd, cst)), cst) --> and(shl(ext(opnd), ext(cst)), cst) | ||||||||||||||||
3781 | // It may change a poisoned value into a regular value, like | ||||||||||||||||
3782 | // zext i32 (shl i8 %val, 12) --> shl i32 (zext i8 %val), 12 | ||||||||||||||||
3783 | // poisoned value regular value | ||||||||||||||||
3784 | // It should be OK since undef covers valid value. | ||||||||||||||||
3785 | if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { | ||||||||||||||||
3786 | const Instruction *ExtInst = | ||||||||||||||||
3787 | dyn_cast<const Instruction>(*Inst->user_begin()); | ||||||||||||||||
3788 | if (ExtInst->hasOneUse()) { | ||||||||||||||||
3789 | const Instruction *AndInst = | ||||||||||||||||
3790 | dyn_cast<const Instruction>(*ExtInst->user_begin()); | ||||||||||||||||
3791 | if (AndInst && AndInst->getOpcode() == Instruction::And) { | ||||||||||||||||
3792 | const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1)); | ||||||||||||||||
3793 | if (Cst && | ||||||||||||||||
3794 | Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth())) | ||||||||||||||||
3795 | return true; | ||||||||||||||||
3796 | } | ||||||||||||||||
3797 | } | ||||||||||||||||
3798 | } | ||||||||||||||||
3799 | |||||||||||||||||
3800 | // Check if we can do the following simplification. | ||||||||||||||||
3801 | // ext(trunc(opnd)) --> ext(opnd) | ||||||||||||||||
3802 | if (!isa<TruncInst>(Inst)) | ||||||||||||||||
3803 | return false; | ||||||||||||||||
3804 | |||||||||||||||||
3805 | Value *OpndVal = Inst->getOperand(0); | ||||||||||||||||
3806 | // Check if we can use this operand in the extension. | ||||||||||||||||
3807 | // If the type is larger than the result type of the extension, we cannot. | ||||||||||||||||
3808 | if (!OpndVal->getType()->isIntegerTy() || | ||||||||||||||||
3809 | OpndVal->getType()->getIntegerBitWidth() > | ||||||||||||||||
3810 | ConsideredExtType->getIntegerBitWidth()) | ||||||||||||||||
3811 | return false; | ||||||||||||||||
3812 | |||||||||||||||||
3813 | // If the operand of the truncate is not an instruction, we will not have | ||||||||||||||||
3814 | // any information on the dropped bits. | ||||||||||||||||
3815 | // (Actually we could for constant but it is not worth the extra logic). | ||||||||||||||||
3816 | Instruction *Opnd = dyn_cast<Instruction>(OpndVal); | ||||||||||||||||
3817 | if (!Opnd) | ||||||||||||||||
3818 | return false; | ||||||||||||||||
3819 | |||||||||||||||||
3820 | // Check if the source of the type is narrow enough. | ||||||||||||||||
3821 | // I.e., check that trunc just drops extended bits of the same kind of | ||||||||||||||||
3822 | // the extension. | ||||||||||||||||
3823 | // #1 get the type of the operand and check the kind of the extended bits. | ||||||||||||||||
3824 | const Type *OpndType = getOrigType(PromotedInsts, Opnd, IsSExt); | ||||||||||||||||
3825 | if (OpndType) | ||||||||||||||||
3826 | ; | ||||||||||||||||
3827 | else if ((IsSExt && isa<SExtInst>(Opnd)) || (!IsSExt && isa<ZExtInst>(Opnd))) | ||||||||||||||||
3828 | OpndType = Opnd->getOperand(0)->getType(); | ||||||||||||||||
3829 | else | ||||||||||||||||
3830 | return false; | ||||||||||||||||
3831 | |||||||||||||||||
3832 | // #2 check that the truncate just drops extended bits. | ||||||||||||||||
3833 | return Inst->getType()->getIntegerBitWidth() >= | ||||||||||||||||
3834 | OpndType->getIntegerBitWidth(); | ||||||||||||||||
3835 | } | ||||||||||||||||
3836 | |||||||||||||||||
3837 | TypePromotionHelper::Action TypePromotionHelper::getAction( | ||||||||||||||||
3838 | Instruction *Ext, const SetOfInstrs &InsertedInsts, | ||||||||||||||||
3839 | const TargetLowering &TLI, const InstrToOrigTy &PromotedInsts) { | ||||||||||||||||
3840 | assert((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) &&(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && "Unexpected instruction type") ? static_cast<void> (0) : __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3841, __PRETTY_FUNCTION__)) | ||||||||||||||||
3841 | "Unexpected instruction type")(((isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && "Unexpected instruction type") ? static_cast<void> (0) : __assert_fail ("(isa<SExtInst>(Ext) || isa<ZExtInst>(Ext)) && \"Unexpected instruction type\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 3841, __PRETTY_FUNCTION__)); | ||||||||||||||||
3842 | Instruction *ExtOpnd = dyn_cast<Instruction>(Ext->getOperand(0)); | ||||||||||||||||
3843 | Type *ExtTy = Ext->getType(); | ||||||||||||||||
3844 | bool IsSExt = isa<SExtInst>(Ext); | ||||||||||||||||
3845 | // If the operand of the extension is not an instruction, we cannot | ||||||||||||||||
3846 | // get through. | ||||||||||||||||
3847 | // If it, check we can get through. | ||||||||||||||||
3848 | if (!ExtOpnd || !canGetThrough(ExtOpnd, ExtTy, PromotedInsts, IsSExt)) | ||||||||||||||||
3849 | return nullptr; | ||||||||||||||||
3850 | |||||||||||||||||
3851 | // Do not promote if the operand has been added by codegenprepare. | ||||||||||||||||
3852 | // Otherwise, it means we are undoing an optimization that is likely to be | ||||||||||||||||
3853 | // redone, thus causing potential infinite loop. | ||||||||||||||||
3854 | if (isa<TruncInst>(ExtOpnd) && InsertedInsts.count(ExtOpnd)) | ||||||||||||||||
3855 | return nullptr; | ||||||||||||||||
3856 | |||||||||||||||||
3857 | // SExt or Trunc instructions. | ||||||||||||||||
3858 | // Return the related handler. | ||||||||||||||||
3859 | if (isa<SExtInst>(ExtOpnd) || isa<TruncInst>(ExtOpnd) || | ||||||||||||||||
3860 | isa<ZExtInst>(ExtOpnd)) | ||||||||||||||||
3861 | return promoteOperandForTruncAndAnyExt; | ||||||||||||||||
3862 | |||||||||||||||||
3863 | // Regular instruction. | ||||||||||||||||
3864 | // Abort early if we will have to insert non-free instructions. | ||||||||||||||||
3865 | if (!ExtOpnd->hasOneUse() && !TLI.isTruncateFree(ExtTy, ExtOpnd->getType())) | ||||||||||||||||
3866 | return nullptr; | ||||||||||||||||
3867 | return IsSExt ? signExtendOperandForOther : zeroExtendOperandForOther; | ||||||||||||||||
3868 | } | ||||||||||||||||
3869 | |||||||||||||||||
3870 | Value *TypePromotionHelper::promoteOperandForTruncAndAnyExt( | ||||||||||||||||
3871 | Instruction *SExt, TypePromotionTransaction &TPT, | ||||||||||||||||
3872 | InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, | ||||||||||||||||
3873 | SmallVectorImpl<Instruction *> *Exts, | ||||||||||||||||
3874 | SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI) { | ||||||||||||||||
3875 | // By construction, the operand of SExt is an instruction. Otherwise we cannot | ||||||||||||||||
3876 | // get through it and this method should not be called. | ||||||||||||||||
3877 | Instruction *SExtOpnd = cast<Instruction>(SExt->getOperand(0)); | ||||||||||||||||
3878 | Value *ExtVal = SExt; | ||||||||||||||||
3879 | bool HasMergedNonFreeExt = false; | ||||||||||||||||
3880 | if (isa<ZExtInst>(SExtOpnd)) { | ||||||||||||||||
3881 | // Replace s|zext(zext(opnd)) | ||||||||||||||||
3882 | // => zext(opnd). | ||||||||||||||||
3883 | HasMergedNonFreeExt = !TLI.isExtFree(SExtOpnd); | ||||||||||||||||
3884 | Value *ZExt = | ||||||||||||||||
3885 | TPT.createZExt(SExt, SExtOpnd->getOperand(0), SExt->getType()); | ||||||||||||||||
3886 | TPT.replaceAllUsesWith(SExt, ZExt); | ||||||||||||||||
3887 | TPT.eraseInstruction(SExt); | ||||||||||||||||
3888 | ExtVal = ZExt; | ||||||||||||||||
3889 | } else { | ||||||||||||||||
3890 | // Replace z|sext(trunc(opnd)) or sext(sext(opnd)) | ||||||||||||||||
3891 | // => z|sext(opnd). | ||||||||||||||||
3892 | TPT.setOperand(SExt, 0, SExtOpnd->getOperand(0)); | ||||||||||||||||
3893 | } | ||||||||||||||||
3894 | CreatedInstsCost = 0; | ||||||||||||||||
3895 | |||||||||||||||||
3896 | // Remove dead code. | ||||||||||||||||
3897 | if (SExtOpnd->use_empty()) | ||||||||||||||||
3898 | TPT.eraseInstruction(SExtOpnd); | ||||||||||||||||
3899 | |||||||||||||||||
3900 | // Check if the extension is still needed. | ||||||||||||||||
3901 | Instruction *ExtInst = dyn_cast<Instruction>(ExtVal); | ||||||||||||||||
3902 | if (!ExtInst || ExtInst->getType() != ExtInst->getOperand(0)->getType()) { | ||||||||||||||||
3903 | if (ExtInst) { | ||||||||||||||||
3904 | if (Exts) | ||||||||||||||||
3905 | Exts->push_back(ExtInst); | ||||||||||||||||
3906 | CreatedInstsCost = !TLI.isExtFree(ExtInst) && !HasMergedNonFreeExt; | ||||||||||||||||
3907 | } | ||||||||||||||||
3908 | return ExtVal; | ||||||||||||||||
3909 | } | ||||||||||||||||
3910 | |||||||||||||||||
3911 | // At this point we have: ext ty opnd to ty. | ||||||||||||||||
3912 | // Reassign the uses of ExtInst to the opnd and remove ExtInst. | ||||||||||||||||
3913 | Value *NextVal = ExtInst->getOperand(0); | ||||||||||||||||
3914 | TPT.eraseInstruction(ExtInst, NextVal); | ||||||||||||||||
3915 | return NextVal; | ||||||||||||||||
3916 | } | ||||||||||||||||
3917 | |||||||||||||||||
3918 | Value *TypePromotionHelper::promoteOperandForOther( | ||||||||||||||||
3919 | Instruction *Ext, TypePromotionTransaction &TPT, | ||||||||||||||||
3920 | InstrToOrigTy &PromotedInsts, unsigned &CreatedInstsCost, | ||||||||||||||||
3921 | SmallVectorImpl<Instruction *> *Exts, | ||||||||||||||||
3922 | SmallVectorImpl<Instruction *> *Truncs, const TargetLowering &TLI, | ||||||||||||||||
3923 | bool IsSExt) { | ||||||||||||||||
3924 | // By construction, the operand of Ext is an instruction. Otherwise we cannot | ||||||||||||||||
3925 | // get through it and this method should not be called. | ||||||||||||||||
3926 | Instruction *ExtOpnd = cast<Instruction>(Ext->getOperand(0)); | ||||||||||||||||
3927 | CreatedInstsCost = 0; | ||||||||||||||||
3928 | if (!ExtOpnd->hasOneUse()) { | ||||||||||||||||
3929 | // ExtOpnd will be promoted. | ||||||||||||||||
3930 | // All its uses, but Ext, will need to use a truncated value of the | ||||||||||||||||
3931 | // promoted version. | ||||||||||||||||
3932 | // Create the truncate now. | ||||||||||||||||
3933 | Value *Trunc = TPT.createTrunc(Ext, ExtOpnd->getType()); | ||||||||||||||||
3934 | if (Instruction *ITrunc = dyn_cast<Instruction>(Trunc)) { | ||||||||||||||||
3935 | // Insert it just after the definition. | ||||||||||||||||
3936 | ITrunc->moveAfter(ExtOpnd); | ||||||||||||||||
3937 | if (Truncs) | ||||||||||||||||
3938 | Truncs->push_back(ITrunc); | ||||||||||||||||
3939 | } | ||||||||||||||||
3940 | |||||||||||||||||
3941 | TPT.replaceAllUsesWith(ExtOpnd, Trunc); | ||||||||||||||||
3942 | // Restore the operand of Ext (which has been replaced by the previous call | ||||||||||||||||
3943 | // to replaceAllUsesWith) to avoid creating a cycle trunc <-> sext. | ||||||||||||||||
3944 | TPT.setOperand(Ext, 0, ExtOpnd); | ||||||||||||||||
3945 | } | ||||||||||||||||
3946 | |||||||||||||||||
3947 | // Get through the Instruction: | ||||||||||||||||
3948 | // 1. Update its type. | ||||||||||||||||
3949 | // 2. Replace the uses of Ext by Inst. | ||||||||||||||||
3950 | // 3. Extend each operand that needs to be extended. | ||||||||||||||||
3951 | |||||||||||||||||
3952 | // Remember the original type of the instruction before promotion. | ||||||||||||||||
3953 | // This is useful to know that the high bits are sign extended bits. | ||||||||||||||||
3954 | addPromotedInst(PromotedInsts, ExtOpnd, IsSExt); | ||||||||||||||||
3955 | // Step #1. | ||||||||||||||||
3956 | TPT.mutateType(ExtOpnd, Ext->getType()); | ||||||||||||||||
3957 | // Step #2. | ||||||||||||||||
3958 | TPT.replaceAllUsesWith(Ext, ExtOpnd); | ||||||||||||||||
3959 | // Step #3. | ||||||||||||||||
3960 | Instruction *ExtForOpnd = Ext; | ||||||||||||||||
3961 | |||||||||||||||||
3962 | LLVM_DEBUG(dbgs() << "Propagate Ext to operands\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Propagate Ext to operands\n" ; } } while (false); | ||||||||||||||||
3963 | for (int OpIdx = 0, EndOpIdx = ExtOpnd->getNumOperands(); OpIdx != EndOpIdx; | ||||||||||||||||
3964 | ++OpIdx) { | ||||||||||||||||
3965 | LLVM_DEBUG(dbgs() << "Operand:\n" << *(ExtOpnd->getOperand(OpIdx)) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Operand:\n" << * (ExtOpnd->getOperand(OpIdx)) << '\n'; } } while (false ); | ||||||||||||||||
3966 | if (ExtOpnd->getOperand(OpIdx)->getType() == Ext->getType() || | ||||||||||||||||
3967 | !shouldExtOperand(ExtOpnd, OpIdx)) { | ||||||||||||||||
3968 | LLVM_DEBUG(dbgs() << "No need to propagate\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "No need to propagate\n" ; } } while (false); | ||||||||||||||||
3969 | continue; | ||||||||||||||||
3970 | } | ||||||||||||||||
3971 | // Check if we can statically extend the operand. | ||||||||||||||||
3972 | Value *Opnd = ExtOpnd->getOperand(OpIdx); | ||||||||||||||||
3973 | if (const ConstantInt *Cst = dyn_cast<ConstantInt>(Opnd)) { | ||||||||||||||||
3974 | LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Statically extend\n"; } } while (false); | ||||||||||||||||
3975 | unsigned BitWidth = Ext->getType()->getIntegerBitWidth(); | ||||||||||||||||
3976 | APInt CstVal = IsSExt ? Cst->getValue().sext(BitWidth) | ||||||||||||||||
3977 | : Cst->getValue().zext(BitWidth); | ||||||||||||||||
3978 | TPT.setOperand(ExtOpnd, OpIdx, ConstantInt::get(Ext->getType(), CstVal)); | ||||||||||||||||
3979 | continue; | ||||||||||||||||
3980 | } | ||||||||||||||||
3981 | // UndefValue are typed, so we have to statically sign extend them. | ||||||||||||||||
3982 | if (isa<UndefValue>(Opnd)) { | ||||||||||||||||
3983 | LLVM_DEBUG(dbgs() << "Statically extend\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Statically extend\n"; } } while (false); | ||||||||||||||||
3984 | TPT.setOperand(ExtOpnd, OpIdx, UndefValue::get(Ext->getType())); | ||||||||||||||||
3985 | continue; | ||||||||||||||||
3986 | } | ||||||||||||||||
3987 | |||||||||||||||||
3988 | // Otherwise we have to explicitly sign extend the operand. | ||||||||||||||||
3989 | // Check if Ext was reused to extend an operand. | ||||||||||||||||
3990 | if (!ExtForOpnd) { | ||||||||||||||||
3991 | // If yes, create a new one. | ||||||||||||||||
3992 | LLVM_DEBUG(dbgs() << "More operands to ext\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "More operands to ext\n" ; } } while (false); | ||||||||||||||||
3993 | Value *ValForExtOpnd = IsSExt ? TPT.createSExt(Ext, Opnd, Ext->getType()) | ||||||||||||||||
3994 | : TPT.createZExt(Ext, Opnd, Ext->getType()); | ||||||||||||||||
3995 | if (!isa<Instruction>(ValForExtOpnd)) { | ||||||||||||||||
3996 | TPT.setOperand(ExtOpnd, OpIdx, ValForExtOpnd); | ||||||||||||||||
3997 | continue; | ||||||||||||||||
3998 | } | ||||||||||||||||
3999 | ExtForOpnd = cast<Instruction>(ValForExtOpnd); | ||||||||||||||||
4000 | } | ||||||||||||||||
4001 | if (Exts) | ||||||||||||||||
4002 | Exts->push_back(ExtForOpnd); | ||||||||||||||||
4003 | TPT.setOperand(ExtForOpnd, 0, Opnd); | ||||||||||||||||
4004 | |||||||||||||||||
4005 | // Move the sign extension before the insertion point. | ||||||||||||||||
4006 | TPT.moveBefore(ExtForOpnd, ExtOpnd); | ||||||||||||||||
4007 | TPT.setOperand(ExtOpnd, OpIdx, ExtForOpnd); | ||||||||||||||||
4008 | CreatedInstsCost += !TLI.isExtFree(ExtForOpnd); | ||||||||||||||||
4009 | // If more sext are required, new instructions will have to be created. | ||||||||||||||||
4010 | ExtForOpnd = nullptr; | ||||||||||||||||
4011 | } | ||||||||||||||||
4012 | if (ExtForOpnd == Ext) { | ||||||||||||||||
4013 | LLVM_DEBUG(dbgs() << "Extension is useless now\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Extension is useless now\n" ; } } while (false); | ||||||||||||||||
4014 | TPT.eraseInstruction(Ext); | ||||||||||||||||
4015 | } | ||||||||||||||||
4016 | return ExtOpnd; | ||||||||||||||||
4017 | } | ||||||||||||||||
4018 | |||||||||||||||||
4019 | /// Check whether or not promoting an instruction to a wider type is profitable. | ||||||||||||||||
4020 | /// \p NewCost gives the cost of extension instructions created by the | ||||||||||||||||
4021 | /// promotion. | ||||||||||||||||
4022 | /// \p OldCost gives the cost of extension instructions before the promotion | ||||||||||||||||
4023 | /// plus the number of instructions that have been | ||||||||||||||||
4024 | /// matched in the addressing mode the promotion. | ||||||||||||||||
4025 | /// \p PromotedOperand is the value that has been promoted. | ||||||||||||||||
4026 | /// \return True if the promotion is profitable, false otherwise. | ||||||||||||||||
4027 | bool AddressingModeMatcher::isPromotionProfitable( | ||||||||||||||||
4028 | unsigned NewCost, unsigned OldCost, Value *PromotedOperand) const { | ||||||||||||||||
4029 | LLVM_DEBUG(dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCostdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'; } } while (false) | ||||||||||||||||
4030 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "OldCost: " << OldCost << "\tNewCost: " << NewCost << '\n'; } } while (false); | ||||||||||||||||
4031 | // The cost of the new extensions is greater than the cost of the | ||||||||||||||||
4032 | // old extension plus what we folded. | ||||||||||||||||
4033 | // This is not profitable. | ||||||||||||||||
4034 | if (NewCost > OldCost) | ||||||||||||||||
4035 | return false; | ||||||||||||||||
4036 | if (NewCost < OldCost) | ||||||||||||||||
4037 | return true; | ||||||||||||||||
4038 | // The promotion is neutral but it may help folding the sign extension in | ||||||||||||||||
4039 | // loads for instance. | ||||||||||||||||
4040 | // Check that we did not create an illegal instruction. | ||||||||||||||||
4041 | return isPromotedInstructionLegal(TLI, DL, PromotedOperand); | ||||||||||||||||
4042 | } | ||||||||||||||||
4043 | |||||||||||||||||
4044 | /// Given an instruction or constant expr, see if we can fold the operation | ||||||||||||||||
4045 | /// into the addressing mode. If so, update the addressing mode and return | ||||||||||||||||
4046 | /// true, otherwise return false without modifying AddrMode. | ||||||||||||||||
4047 | /// If \p MovedAway is not NULL, it contains the information of whether or | ||||||||||||||||
4048 | /// not AddrInst has to be folded into the addressing mode on success. | ||||||||||||||||
4049 | /// If \p MovedAway == true, \p AddrInst will not be part of the addressing | ||||||||||||||||
4050 | /// because it has been moved away. | ||||||||||||||||
4051 | /// Thus AddrInst must not be added in the matched instructions. | ||||||||||||||||
4052 | /// This state can happen when AddrInst is a sext, since it may be moved away. | ||||||||||||||||
4053 | /// Therefore, AddrInst may not be valid when MovedAway is true and it must | ||||||||||||||||
4054 | /// not be referenced anymore. | ||||||||||||||||
4055 | bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode, | ||||||||||||||||
4056 | unsigned Depth, | ||||||||||||||||
4057 | bool *MovedAway) { | ||||||||||||||||
4058 | // Avoid exponential behavior on extremely deep expression trees. | ||||||||||||||||
4059 | if (Depth >= 5) return false; | ||||||||||||||||
4060 | |||||||||||||||||
4061 | // By default, all matched instructions stay in place. | ||||||||||||||||
4062 | if (MovedAway) | ||||||||||||||||
4063 | *MovedAway = false; | ||||||||||||||||
4064 | |||||||||||||||||
4065 | switch (Opcode) { | ||||||||||||||||
4066 | case Instruction::PtrToInt: | ||||||||||||||||
4067 | // PtrToInt is always a noop, as we know that the int type is pointer sized. | ||||||||||||||||
4068 | return matchAddr(AddrInst->getOperand(0), Depth); | ||||||||||||||||
4069 | case Instruction::IntToPtr: { | ||||||||||||||||
4070 | auto AS = AddrInst->getType()->getPointerAddressSpace(); | ||||||||||||||||
4071 | auto PtrTy = MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); | ||||||||||||||||
4072 | // This inttoptr is a no-op if the integer type is pointer sized. | ||||||||||||||||
4073 | if (TLI.getValueType(DL, AddrInst->getOperand(0)->getType()) == PtrTy) | ||||||||||||||||
4074 | return matchAddr(AddrInst->getOperand(0), Depth); | ||||||||||||||||
4075 | return false; | ||||||||||||||||
4076 | } | ||||||||||||||||
4077 | case Instruction::BitCast: | ||||||||||||||||
4078 | // BitCast is always a noop, and we can handle it as long as it is | ||||||||||||||||
4079 | // int->int or pointer->pointer (we don't want int<->fp or something). | ||||||||||||||||
4080 | if (AddrInst->getOperand(0)->getType()->isIntOrPtrTy() && | ||||||||||||||||
4081 | // Don't touch identity bitcasts. These were probably put here by LSR, | ||||||||||||||||
4082 | // and we don't want to mess around with them. Assume it knows what it | ||||||||||||||||
4083 | // is doing. | ||||||||||||||||
4084 | AddrInst->getOperand(0)->getType() != AddrInst->getType()) | ||||||||||||||||
4085 | return matchAddr(AddrInst->getOperand(0), Depth); | ||||||||||||||||
4086 | return false; | ||||||||||||||||
4087 | case Instruction::AddrSpaceCast: { | ||||||||||||||||
4088 | unsigned SrcAS | ||||||||||||||||
4089 | = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); | ||||||||||||||||
4090 | unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); | ||||||||||||||||
4091 | if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) | ||||||||||||||||
4092 | return matchAddr(AddrInst->getOperand(0), Depth); | ||||||||||||||||
4093 | return false; | ||||||||||||||||
4094 | } | ||||||||||||||||
4095 | case Instruction::Add: { | ||||||||||||||||
4096 | // Check to see if we can merge in the RHS then the LHS. If so, we win. | ||||||||||||||||
4097 | ExtAddrMode BackupAddrMode = AddrMode; | ||||||||||||||||
4098 | unsigned OldSize = AddrModeInsts.size(); | ||||||||||||||||
4099 | // Start a transaction at this point. | ||||||||||||||||
4100 | // The LHS may match but not the RHS. | ||||||||||||||||
4101 | // Therefore, we need a higher level restoration point to undo partially | ||||||||||||||||
4102 | // matched operation. | ||||||||||||||||
4103 | TypePromotionTransaction::ConstRestorationPt LastKnownGood = | ||||||||||||||||
4104 | TPT.getRestorationPoint(); | ||||||||||||||||
4105 | |||||||||||||||||
4106 | AddrMode.InBounds = false; | ||||||||||||||||
4107 | if (matchAddr(AddrInst->getOperand(1), Depth+1) && | ||||||||||||||||
4108 | matchAddr(AddrInst->getOperand(0), Depth+1)) | ||||||||||||||||
4109 | return true; | ||||||||||||||||
4110 | |||||||||||||||||
4111 | // Restore the old addr mode info. | ||||||||||||||||
4112 | AddrMode = BackupAddrMode; | ||||||||||||||||
4113 | AddrModeInsts.resize(OldSize); | ||||||||||||||||
4114 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4115 | |||||||||||||||||
4116 | // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. | ||||||||||||||||
4117 | if (matchAddr(AddrInst->getOperand(0), Depth+1) && | ||||||||||||||||
4118 | matchAddr(AddrInst->getOperand(1), Depth+1)) | ||||||||||||||||
4119 | return true; | ||||||||||||||||
4120 | |||||||||||||||||
4121 | // Otherwise we definitely can't merge the ADD in. | ||||||||||||||||
4122 | AddrMode = BackupAddrMode; | ||||||||||||||||
4123 | AddrModeInsts.resize(OldSize); | ||||||||||||||||
4124 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4125 | break; | ||||||||||||||||
4126 | } | ||||||||||||||||
4127 | //case Instruction::Or: | ||||||||||||||||
4128 | // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. | ||||||||||||||||
4129 | //break; | ||||||||||||||||
4130 | case Instruction::Mul: | ||||||||||||||||
4131 | case Instruction::Shl: { | ||||||||||||||||
4132 | // Can only handle X*C and X << C. | ||||||||||||||||
4133 | AddrMode.InBounds = false; | ||||||||||||||||
4134 | ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); | ||||||||||||||||
4135 | if (!RHS || RHS->getBitWidth() > 64) | ||||||||||||||||
4136 | return false; | ||||||||||||||||
4137 | int64_t Scale = RHS->getSExtValue(); | ||||||||||||||||
4138 | if (Opcode == Instruction::Shl) | ||||||||||||||||
4139 | Scale = 1LL << Scale; | ||||||||||||||||
4140 | |||||||||||||||||
4141 | return matchScaledValue(AddrInst->getOperand(0), Scale, Depth); | ||||||||||||||||
4142 | } | ||||||||||||||||
4143 | case Instruction::GetElementPtr: { | ||||||||||||||||
4144 | // Scan the GEP. We check it if it contains constant offsets and at most | ||||||||||||||||
4145 | // one variable offset. | ||||||||||||||||
4146 | int VariableOperand = -1; | ||||||||||||||||
4147 | unsigned VariableScale = 0; | ||||||||||||||||
4148 | |||||||||||||||||
4149 | int64_t ConstantOffset = 0; | ||||||||||||||||
4150 | gep_type_iterator GTI = gep_type_begin(AddrInst); | ||||||||||||||||
4151 | for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { | ||||||||||||||||
4152 | if (StructType *STy = GTI.getStructTypeOrNull()) { | ||||||||||||||||
4153 | const StructLayout *SL = DL.getStructLayout(STy); | ||||||||||||||||
4154 | unsigned Idx = | ||||||||||||||||
4155 | cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); | ||||||||||||||||
4156 | ConstantOffset += SL->getElementOffset(Idx); | ||||||||||||||||
4157 | } else { | ||||||||||||||||
4158 | uint64_t TypeSize = DL.getTypeAllocSize(GTI.getIndexedType()); | ||||||||||||||||
4159 | if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { | ||||||||||||||||
4160 | const APInt &CVal = CI->getValue(); | ||||||||||||||||
4161 | if (CVal.getMinSignedBits() <= 64) { | ||||||||||||||||
4162 | ConstantOffset += CVal.getSExtValue() * TypeSize; | ||||||||||||||||
4163 | continue; | ||||||||||||||||
4164 | } | ||||||||||||||||
4165 | } | ||||||||||||||||
4166 | if (TypeSize) { // Scales of zero don't do anything. | ||||||||||||||||
4167 | // We only allow one variable index at the moment. | ||||||||||||||||
4168 | if (VariableOperand != -1) | ||||||||||||||||
4169 | return false; | ||||||||||||||||
4170 | |||||||||||||||||
4171 | // Remember the variable index. | ||||||||||||||||
4172 | VariableOperand = i; | ||||||||||||||||
4173 | VariableScale = TypeSize; | ||||||||||||||||
4174 | } | ||||||||||||||||
4175 | } | ||||||||||||||||
4176 | } | ||||||||||||||||
4177 | |||||||||||||||||
4178 | // A common case is for the GEP to only do a constant offset. In this case, | ||||||||||||||||
4179 | // just add it to the disp field and check validity. | ||||||||||||||||
4180 | if (VariableOperand == -1) { | ||||||||||||||||
4181 | AddrMode.BaseOffs += ConstantOffset; | ||||||||||||||||
4182 | if (ConstantOffset == 0 || | ||||||||||||||||
4183 | TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) { | ||||||||||||||||
4184 | // Check to see if we can fold the base pointer in too. | ||||||||||||||||
4185 | if (matchAddr(AddrInst->getOperand(0), Depth+1)) { | ||||||||||||||||
4186 | if (!cast<GEPOperator>(AddrInst)->isInBounds()) | ||||||||||||||||
4187 | AddrMode.InBounds = false; | ||||||||||||||||
4188 | return true; | ||||||||||||||||
4189 | } | ||||||||||||||||
4190 | } else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) && | ||||||||||||||||
4191 | TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 && | ||||||||||||||||
4192 | ConstantOffset > 0) { | ||||||||||||||||
4193 | // Record GEPs with non-zero offsets as candidates for splitting in the | ||||||||||||||||
4194 | // event that the offset cannot fit into the r+i addressing mode. | ||||||||||||||||
4195 | // Simple and common case that only one GEP is used in calculating the | ||||||||||||||||
4196 | // address for the memory access. | ||||||||||||||||
4197 | Value *Base = AddrInst->getOperand(0); | ||||||||||||||||
4198 | auto *BaseI = dyn_cast<Instruction>(Base); | ||||||||||||||||
4199 | auto *GEP = cast<GetElementPtrInst>(AddrInst); | ||||||||||||||||
4200 | if (isa<Argument>(Base) || isa<GlobalValue>(Base) || | ||||||||||||||||
4201 | (BaseI && !isa<CastInst>(BaseI) && | ||||||||||||||||
4202 | !isa<GetElementPtrInst>(BaseI))) { | ||||||||||||||||
4203 | // Make sure the parent block allows inserting non-PHI instructions | ||||||||||||||||
4204 | // before the terminator. | ||||||||||||||||
4205 | BasicBlock *Parent = | ||||||||||||||||
4206 | BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock(); | ||||||||||||||||
4207 | if (!Parent->getTerminator()->isEHPad()) | ||||||||||||||||
4208 | LargeOffsetGEP = std::make_pair(GEP, ConstantOffset); | ||||||||||||||||
4209 | } | ||||||||||||||||
4210 | } | ||||||||||||||||
4211 | AddrMode.BaseOffs -= ConstantOffset; | ||||||||||||||||
4212 | return false; | ||||||||||||||||
4213 | } | ||||||||||||||||
4214 | |||||||||||||||||
4215 | // Save the valid addressing mode in case we can't match. | ||||||||||||||||
4216 | ExtAddrMode BackupAddrMode = AddrMode; | ||||||||||||||||
4217 | unsigned OldSize = AddrModeInsts.size(); | ||||||||||||||||
4218 | |||||||||||||||||
4219 | // See if the scale and offset amount is valid for this target. | ||||||||||||||||
4220 | AddrMode.BaseOffs += ConstantOffset; | ||||||||||||||||
4221 | if (!cast<GEPOperator>(AddrInst)->isInBounds()) | ||||||||||||||||
4222 | AddrMode.InBounds = false; | ||||||||||||||||
4223 | |||||||||||||||||
4224 | // Match the base operand of the GEP. | ||||||||||||||||
4225 | if (!matchAddr(AddrInst->getOperand(0), Depth+1)) { | ||||||||||||||||
4226 | // If it couldn't be matched, just stuff the value in a register. | ||||||||||||||||
4227 | if (AddrMode.HasBaseReg) { | ||||||||||||||||
4228 | AddrMode = BackupAddrMode; | ||||||||||||||||
4229 | AddrModeInsts.resize(OldSize); | ||||||||||||||||
4230 | return false; | ||||||||||||||||
4231 | } | ||||||||||||||||
4232 | AddrMode.HasBaseReg = true; | ||||||||||||||||
4233 | AddrMode.BaseReg = AddrInst->getOperand(0); | ||||||||||||||||
4234 | } | ||||||||||||||||
4235 | |||||||||||||||||
4236 | // Match the remaining variable portion of the GEP. | ||||||||||||||||
4237 | if (!matchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, | ||||||||||||||||
4238 | Depth)) { | ||||||||||||||||
4239 | // If it couldn't be matched, try stuffing the base into a register | ||||||||||||||||
4240 | // instead of matching it, and retrying the match of the scale. | ||||||||||||||||
4241 | AddrMode = BackupAddrMode; | ||||||||||||||||
4242 | AddrModeInsts.resize(OldSize); | ||||||||||||||||
4243 | if (AddrMode.HasBaseReg) | ||||||||||||||||
4244 | return false; | ||||||||||||||||
4245 | AddrMode.HasBaseReg = true; | ||||||||||||||||
4246 | AddrMode.BaseReg = AddrInst->getOperand(0); | ||||||||||||||||
4247 | AddrMode.BaseOffs += ConstantOffset; | ||||||||||||||||
4248 | if (!matchScaledValue(AddrInst->getOperand(VariableOperand), | ||||||||||||||||
4249 | VariableScale, Depth)) { | ||||||||||||||||
4250 | // If even that didn't work, bail. | ||||||||||||||||
4251 | AddrMode = BackupAddrMode; | ||||||||||||||||
4252 | AddrModeInsts.resize(OldSize); | ||||||||||||||||
4253 | return false; | ||||||||||||||||
4254 | } | ||||||||||||||||
4255 | } | ||||||||||||||||
4256 | |||||||||||||||||
4257 | return true; | ||||||||||||||||
4258 | } | ||||||||||||||||
4259 | case Instruction::SExt: | ||||||||||||||||
4260 | case Instruction::ZExt: { | ||||||||||||||||
4261 | Instruction *Ext = dyn_cast<Instruction>(AddrInst); | ||||||||||||||||
4262 | if (!Ext) | ||||||||||||||||
4263 | return false; | ||||||||||||||||
4264 | |||||||||||||||||
4265 | // Try to move this ext out of the way of the addressing mode. | ||||||||||||||||
4266 | // Ask for a method for doing so. | ||||||||||||||||
4267 | TypePromotionHelper::Action TPH = | ||||||||||||||||
4268 | TypePromotionHelper::getAction(Ext, InsertedInsts, TLI, PromotedInsts); | ||||||||||||||||
4269 | if (!TPH) | ||||||||||||||||
4270 | return false; | ||||||||||||||||
4271 | |||||||||||||||||
4272 | TypePromotionTransaction::ConstRestorationPt LastKnownGood = | ||||||||||||||||
4273 | TPT.getRestorationPoint(); | ||||||||||||||||
4274 | unsigned CreatedInstsCost = 0; | ||||||||||||||||
4275 | unsigned ExtCost = !TLI.isExtFree(Ext); | ||||||||||||||||
4276 | Value *PromotedOperand = | ||||||||||||||||
4277 | TPH(Ext, TPT, PromotedInsts, CreatedInstsCost, nullptr, nullptr, TLI); | ||||||||||||||||
4278 | // SExt has been moved away. | ||||||||||||||||
4279 | // Thus either it will be rematched later in the recursive calls or it is | ||||||||||||||||
4280 | // gone. Anyway, we must not fold it into the addressing mode at this point. | ||||||||||||||||
4281 | // E.g., | ||||||||||||||||
4282 | // op = add opnd, 1 | ||||||||||||||||
4283 | // idx = ext op | ||||||||||||||||
4284 | // addr = gep base, idx | ||||||||||||||||
4285 | // is now: | ||||||||||||||||
4286 | // promotedOpnd = ext opnd <- no match here | ||||||||||||||||
4287 | // op = promoted_add promotedOpnd, 1 <- match (later in recursive calls) | ||||||||||||||||
4288 | // addr = gep base, op <- match | ||||||||||||||||
4289 | if (MovedAway) | ||||||||||||||||
4290 | *MovedAway = true; | ||||||||||||||||
4291 | |||||||||||||||||
4292 | assert(PromotedOperand &&((PromotedOperand && "TypePromotionHelper should have filtered out those cases" ) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 4293, __PRETTY_FUNCTION__)) | ||||||||||||||||
4293 | "TypePromotionHelper should have filtered out those cases")((PromotedOperand && "TypePromotionHelper should have filtered out those cases" ) ? static_cast<void> (0) : __assert_fail ("PromotedOperand && \"TypePromotionHelper should have filtered out those cases\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 4293, __PRETTY_FUNCTION__)); | ||||||||||||||||
4294 | |||||||||||||||||
4295 | ExtAddrMode BackupAddrMode = AddrMode; | ||||||||||||||||
4296 | unsigned OldSize = AddrModeInsts.size(); | ||||||||||||||||
4297 | |||||||||||||||||
4298 | if (!matchAddr(PromotedOperand, Depth) || | ||||||||||||||||
4299 | // The total of the new cost is equal to the cost of the created | ||||||||||||||||
4300 | // instructions. | ||||||||||||||||
4301 | // The total of the old cost is equal to the cost of the extension plus | ||||||||||||||||
4302 | // what we have saved in the addressing mode. | ||||||||||||||||
4303 | !isPromotionProfitable(CreatedInstsCost, | ||||||||||||||||
4304 | ExtCost + (AddrModeInsts.size() - OldSize), | ||||||||||||||||
4305 | PromotedOperand)) { | ||||||||||||||||
4306 | AddrMode = BackupAddrMode; | ||||||||||||||||
4307 | AddrModeInsts.resize(OldSize); | ||||||||||||||||
4308 | LLVM_DEBUG(dbgs() << "Sign extension does not pay off: rollback\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Sign extension does not pay off: rollback\n" ; } } while (false); | ||||||||||||||||
4309 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4310 | return false; | ||||||||||||||||
4311 | } | ||||||||||||||||
4312 | return true; | ||||||||||||||||
4313 | } | ||||||||||||||||
4314 | } | ||||||||||||||||
4315 | return false; | ||||||||||||||||
4316 | } | ||||||||||||||||
4317 | |||||||||||||||||
4318 | /// If we can, try to add the value of 'Addr' into the current addressing mode. | ||||||||||||||||
4319 | /// If Addr can't be added to AddrMode this returns false and leaves AddrMode | ||||||||||||||||
4320 | /// unmodified. This assumes that Addr is either a pointer type or intptr_t | ||||||||||||||||
4321 | /// for the target. | ||||||||||||||||
4322 | /// | ||||||||||||||||
4323 | bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) { | ||||||||||||||||
4324 | // Start a transaction at this point that we will rollback if the matching | ||||||||||||||||
4325 | // fails. | ||||||||||||||||
4326 | TypePromotionTransaction::ConstRestorationPt LastKnownGood = | ||||||||||||||||
4327 | TPT.getRestorationPoint(); | ||||||||||||||||
4328 | if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { | ||||||||||||||||
4329 | // Fold in immediates if legal for the target. | ||||||||||||||||
4330 | AddrMode.BaseOffs += CI->getSExtValue(); | ||||||||||||||||
4331 | if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) | ||||||||||||||||
4332 | return true; | ||||||||||||||||
4333 | AddrMode.BaseOffs -= CI->getSExtValue(); | ||||||||||||||||
4334 | } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { | ||||||||||||||||
4335 | // If this is a global variable, try to fold it into the addressing mode. | ||||||||||||||||
4336 | if (!AddrMode.BaseGV) { | ||||||||||||||||
4337 | AddrMode.BaseGV = GV; | ||||||||||||||||
4338 | if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) | ||||||||||||||||
4339 | return true; | ||||||||||||||||
4340 | AddrMode.BaseGV = nullptr; | ||||||||||||||||
4341 | } | ||||||||||||||||
4342 | } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { | ||||||||||||||||
4343 | ExtAddrMode BackupAddrMode = AddrMode; | ||||||||||||||||
4344 | unsigned OldSize = AddrModeInsts.size(); | ||||||||||||||||
4345 | |||||||||||||||||
4346 | // Check to see if it is possible to fold this operation. | ||||||||||||||||
4347 | bool MovedAway = false; | ||||||||||||||||
4348 | if (matchOperationAddr(I, I->getOpcode(), Depth, &MovedAway)) { | ||||||||||||||||
4349 | // This instruction may have been moved away. If so, there is nothing | ||||||||||||||||
4350 | // to check here. | ||||||||||||||||
4351 | if (MovedAway) | ||||||||||||||||
4352 | return true; | ||||||||||||||||
4353 | // Okay, it's possible to fold this. Check to see if it is actually | ||||||||||||||||
4354 | // *profitable* to do so. We use a simple cost model to avoid increasing | ||||||||||||||||
4355 | // register pressure too much. | ||||||||||||||||
4356 | if (I->hasOneUse() || | ||||||||||||||||
4357 | isProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { | ||||||||||||||||
4358 | AddrModeInsts.push_back(I); | ||||||||||||||||
4359 | return true; | ||||||||||||||||
4360 | } | ||||||||||||||||
4361 | |||||||||||||||||
4362 | // It isn't profitable to do this, roll back. | ||||||||||||||||
4363 | //cerr << "NOT FOLDING: " << *I; | ||||||||||||||||
4364 | AddrMode = BackupAddrMode; | ||||||||||||||||
4365 | AddrModeInsts.resize(OldSize); | ||||||||||||||||
4366 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4367 | } | ||||||||||||||||
4368 | } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { | ||||||||||||||||
4369 | if (matchOperationAddr(CE, CE->getOpcode(), Depth)) | ||||||||||||||||
4370 | return true; | ||||||||||||||||
4371 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4372 | } else if (isa<ConstantPointerNull>(Addr)) { | ||||||||||||||||
4373 | // Null pointer gets folded without affecting the addressing mode. | ||||||||||||||||
4374 | return true; | ||||||||||||||||
4375 | } | ||||||||||||||||
4376 | |||||||||||||||||
4377 | // Worse case, the target should support [reg] addressing modes. :) | ||||||||||||||||
4378 | if (!AddrMode.HasBaseReg) { | ||||||||||||||||
4379 | AddrMode.HasBaseReg = true; | ||||||||||||||||
4380 | AddrMode.BaseReg = Addr; | ||||||||||||||||
4381 | // Still check for legality in case the target supports [imm] but not [i+r]. | ||||||||||||||||
4382 | if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) | ||||||||||||||||
4383 | return true; | ||||||||||||||||
4384 | AddrMode.HasBaseReg = false; | ||||||||||||||||
4385 | AddrMode.BaseReg = nullptr; | ||||||||||||||||
4386 | } | ||||||||||||||||
4387 | |||||||||||||||||
4388 | // If the base register is already taken, see if we can do [r+r]. | ||||||||||||||||
4389 | if (AddrMode.Scale == 0) { | ||||||||||||||||
4390 | AddrMode.Scale = 1; | ||||||||||||||||
4391 | AddrMode.ScaledReg = Addr; | ||||||||||||||||
4392 | if (TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) | ||||||||||||||||
4393 | return true; | ||||||||||||||||
4394 | AddrMode.Scale = 0; | ||||||||||||||||
4395 | AddrMode.ScaledReg = nullptr; | ||||||||||||||||
4396 | } | ||||||||||||||||
4397 | // Couldn't match. | ||||||||||||||||
4398 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4399 | return false; | ||||||||||||||||
4400 | } | ||||||||||||||||
4401 | |||||||||||||||||
4402 | /// Check to see if all uses of OpVal by the specified inline asm call are due | ||||||||||||||||
4403 | /// to memory operands. If so, return true, otherwise return false. | ||||||||||||||||
4404 | static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, | ||||||||||||||||
4405 | const TargetLowering &TLI, | ||||||||||||||||
4406 | const TargetRegisterInfo &TRI) { | ||||||||||||||||
4407 | const Function *F = CI->getFunction(); | ||||||||||||||||
4408 | TargetLowering::AsmOperandInfoVector TargetConstraints = | ||||||||||||||||
4409 | TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI, | ||||||||||||||||
4410 | ImmutableCallSite(CI)); | ||||||||||||||||
4411 | |||||||||||||||||
4412 | for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { | ||||||||||||||||
4413 | TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; | ||||||||||||||||
4414 | |||||||||||||||||
4415 | // Compute the constraint code and ConstraintType to use. | ||||||||||||||||
4416 | TLI.ComputeConstraintToUse(OpInfo, SDValue()); | ||||||||||||||||
4417 | |||||||||||||||||
4418 | // If this asm operand is our Value*, and if it isn't an indirect memory | ||||||||||||||||
4419 | // operand, we can't fold it! | ||||||||||||||||
4420 | if (OpInfo.CallOperandVal == OpVal && | ||||||||||||||||
4421 | (OpInfo.ConstraintType != TargetLowering::C_Memory || | ||||||||||||||||
4422 | !OpInfo.isIndirect)) | ||||||||||||||||
4423 | return false; | ||||||||||||||||
4424 | } | ||||||||||||||||
4425 | |||||||||||||||||
4426 | return true; | ||||||||||||||||
4427 | } | ||||||||||||||||
4428 | |||||||||||||||||
4429 | // Max number of memory uses to look at before aborting the search to conserve | ||||||||||||||||
4430 | // compile time. | ||||||||||||||||
4431 | static constexpr int MaxMemoryUsesToScan = 20; | ||||||||||||||||
4432 | |||||||||||||||||
4433 | /// Recursively walk all the uses of I until we find a memory use. | ||||||||||||||||
4434 | /// If we find an obviously non-foldable instruction, return true. | ||||||||||||||||
4435 | /// Add the ultimately found memory instructions to MemoryUses. | ||||||||||||||||
4436 | static bool FindAllMemoryUses( | ||||||||||||||||
4437 | Instruction *I, | ||||||||||||||||
4438 | SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses, | ||||||||||||||||
4439 | SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, | ||||||||||||||||
4440 | const TargetRegisterInfo &TRI, int SeenInsts = 0) { | ||||||||||||||||
4441 | // If we already considered this instruction, we're done. | ||||||||||||||||
4442 | if (!ConsideredInsts.insert(I).second) | ||||||||||||||||
4443 | return false; | ||||||||||||||||
4444 | |||||||||||||||||
4445 | // If this is an obviously unfoldable instruction, bail out. | ||||||||||||||||
4446 | if (!MightBeFoldableInst(I)) | ||||||||||||||||
4447 | return true; | ||||||||||||||||
4448 | |||||||||||||||||
4449 | const bool OptSize = I->getFunction()->hasOptSize(); | ||||||||||||||||
4450 | |||||||||||||||||
4451 | // Loop over all the uses, recursively processing them. | ||||||||||||||||
4452 | for (Use &U : I->uses()) { | ||||||||||||||||
4453 | // Conservatively return true if we're seeing a large number or a deep chain | ||||||||||||||||
4454 | // of users. This avoids excessive compilation times in pathological cases. | ||||||||||||||||
4455 | if (SeenInsts++ >= MaxMemoryUsesToScan) | ||||||||||||||||
4456 | return true; | ||||||||||||||||
4457 | |||||||||||||||||
4458 | Instruction *UserI = cast<Instruction>(U.getUser()); | ||||||||||||||||
4459 | if (LoadInst *LI = dyn_cast<LoadInst>(UserI)) { | ||||||||||||||||
4460 | MemoryUses.push_back(std::make_pair(LI, U.getOperandNo())); | ||||||||||||||||
4461 | continue; | ||||||||||||||||
4462 | } | ||||||||||||||||
4463 | |||||||||||||||||
4464 | if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) { | ||||||||||||||||
4465 | unsigned opNo = U.getOperandNo(); | ||||||||||||||||
4466 | if (opNo != StoreInst::getPointerOperandIndex()) | ||||||||||||||||
4467 | return true; // Storing addr, not into addr. | ||||||||||||||||
4468 | MemoryUses.push_back(std::make_pair(SI, opNo)); | ||||||||||||||||
4469 | continue; | ||||||||||||||||
4470 | } | ||||||||||||||||
4471 | |||||||||||||||||
4472 | if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) { | ||||||||||||||||
4473 | unsigned opNo = U.getOperandNo(); | ||||||||||||||||
4474 | if (opNo != AtomicRMWInst::getPointerOperandIndex()) | ||||||||||||||||
4475 | return true; // Storing addr, not into addr. | ||||||||||||||||
4476 | MemoryUses.push_back(std::make_pair(RMW, opNo)); | ||||||||||||||||
4477 | continue; | ||||||||||||||||
4478 | } | ||||||||||||||||
4479 | |||||||||||||||||
4480 | if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) { | ||||||||||||||||
4481 | unsigned opNo = U.getOperandNo(); | ||||||||||||||||
4482 | if (opNo != AtomicCmpXchgInst::getPointerOperandIndex()) | ||||||||||||||||
4483 | return true; // Storing addr, not into addr. | ||||||||||||||||
4484 | MemoryUses.push_back(std::make_pair(CmpX, opNo)); | ||||||||||||||||
4485 | continue; | ||||||||||||||||
4486 | } | ||||||||||||||||
4487 | |||||||||||||||||
4488 | if (CallInst *CI = dyn_cast<CallInst>(UserI)) { | ||||||||||||||||
4489 | // If this is a cold call, we can sink the addressing calculation into | ||||||||||||||||
4490 | // the cold path. See optimizeCallInst | ||||||||||||||||
4491 | if (!OptSize && CI->hasFnAttr(Attribute::Cold)) | ||||||||||||||||
4492 | continue; | ||||||||||||||||
4493 | |||||||||||||||||
4494 | InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); | ||||||||||||||||
4495 | if (!IA) return true; | ||||||||||||||||
4496 | |||||||||||||||||
4497 | // If this is a memory operand, we're cool, otherwise bail out. | ||||||||||||||||
4498 | if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI)) | ||||||||||||||||
4499 | return true; | ||||||||||||||||
4500 | continue; | ||||||||||||||||
4501 | } | ||||||||||||||||
4502 | |||||||||||||||||
4503 | if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, | ||||||||||||||||
4504 | SeenInsts)) | ||||||||||||||||
4505 | return true; | ||||||||||||||||
4506 | } | ||||||||||||||||
4507 | |||||||||||||||||
4508 | return false; | ||||||||||||||||
4509 | } | ||||||||||||||||
4510 | |||||||||||||||||
4511 | /// Return true if Val is already known to be live at the use site that we're | ||||||||||||||||
4512 | /// folding it into. If so, there is no cost to include it in the addressing | ||||||||||||||||
4513 | /// mode. KnownLive1 and KnownLive2 are two values that we know are live at the | ||||||||||||||||
4514 | /// instruction already. | ||||||||||||||||
4515 | bool AddressingModeMatcher::valueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, | ||||||||||||||||
4516 | Value *KnownLive2) { | ||||||||||||||||
4517 | // If Val is either of the known-live values, we know it is live! | ||||||||||||||||
4518 | if (Val == nullptr || Val == KnownLive1 || Val == KnownLive2) | ||||||||||||||||
4519 | return true; | ||||||||||||||||
4520 | |||||||||||||||||
4521 | // All values other than instructions and arguments (e.g. constants) are live. | ||||||||||||||||
4522 | if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; | ||||||||||||||||
4523 | |||||||||||||||||
4524 | // If Val is a constant sized alloca in the entry block, it is live, this is | ||||||||||||||||
4525 | // true because it is just a reference to the stack/frame pointer, which is | ||||||||||||||||
4526 | // live for the whole function. | ||||||||||||||||
4527 | if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) | ||||||||||||||||
4528 | if (AI->isStaticAlloca()) | ||||||||||||||||
4529 | return true; | ||||||||||||||||
4530 | |||||||||||||||||
4531 | // Check to see if this value is already used in the memory instruction's | ||||||||||||||||
4532 | // block. If so, it's already live into the block at the very least, so we | ||||||||||||||||
4533 | // can reasonably fold it. | ||||||||||||||||
4534 | return Val->isUsedInBasicBlock(MemoryInst->getParent()); | ||||||||||||||||
4535 | } | ||||||||||||||||
4536 | |||||||||||||||||
4537 | /// It is possible for the addressing mode of the machine to fold the specified | ||||||||||||||||
4538 | /// instruction into a load or store that ultimately uses it. | ||||||||||||||||
4539 | /// However, the specified instruction has multiple uses. | ||||||||||||||||
4540 | /// Given this, it may actually increase register pressure to fold it | ||||||||||||||||
4541 | /// into the load. For example, consider this code: | ||||||||||||||||
4542 | /// | ||||||||||||||||
4543 | /// X = ... | ||||||||||||||||
4544 | /// Y = X+1 | ||||||||||||||||
4545 | /// use(Y) -> nonload/store | ||||||||||||||||
4546 | /// Z = Y+1 | ||||||||||||||||
4547 | /// load Z | ||||||||||||||||
4548 | /// | ||||||||||||||||
4549 | /// In this case, Y has multiple uses, and can be folded into the load of Z | ||||||||||||||||
4550 | /// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to | ||||||||||||||||
4551 | /// be live at the use(Y) line. If we don't fold Y into load Z, we use one | ||||||||||||||||
4552 | /// fewer register. Since Y can't be folded into "use(Y)" we don't increase the | ||||||||||||||||
4553 | /// number of computations either. | ||||||||||||||||
4554 | /// | ||||||||||||||||
4555 | /// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If | ||||||||||||||||
4556 | /// X was live across 'load Z' for other reasons, we actually *would* want to | ||||||||||||||||
4557 | /// fold the addressing mode in the Z case. This would make Y die earlier. | ||||||||||||||||
4558 | bool AddressingModeMatcher:: | ||||||||||||||||
4559 | isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, | ||||||||||||||||
4560 | ExtAddrMode &AMAfter) { | ||||||||||||||||
4561 | if (IgnoreProfitability) return true; | ||||||||||||||||
4562 | |||||||||||||||||
4563 | // AMBefore is the addressing mode before this instruction was folded into it, | ||||||||||||||||
4564 | // and AMAfter is the addressing mode after the instruction was folded. Get | ||||||||||||||||
4565 | // the set of registers referenced by AMAfter and subtract out those | ||||||||||||||||
4566 | // referenced by AMBefore: this is the set of values which folding in this | ||||||||||||||||
4567 | // address extends the lifetime of. | ||||||||||||||||
4568 | // | ||||||||||||||||
4569 | // Note that there are only two potential values being referenced here, | ||||||||||||||||
4570 | // BaseReg and ScaleReg (global addresses are always available, as are any | ||||||||||||||||
4571 | // folded immediates). | ||||||||||||||||
4572 | Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; | ||||||||||||||||
4573 | |||||||||||||||||
4574 | // If the BaseReg or ScaledReg was referenced by the previous addrmode, their | ||||||||||||||||
4575 | // lifetime wasn't extended by adding this instruction. | ||||||||||||||||
4576 | if (valueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) | ||||||||||||||||
4577 | BaseReg = nullptr; | ||||||||||||||||
4578 | if (valueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) | ||||||||||||||||
4579 | ScaledReg = nullptr; | ||||||||||||||||
4580 | |||||||||||||||||
4581 | // If folding this instruction (and it's subexprs) didn't extend any live | ||||||||||||||||
4582 | // ranges, we're ok with it. | ||||||||||||||||
4583 | if (!BaseReg && !ScaledReg) | ||||||||||||||||
4584 | return true; | ||||||||||||||||
4585 | |||||||||||||||||
4586 | // If all uses of this instruction can have the address mode sunk into them, | ||||||||||||||||
4587 | // we can remove the addressing mode and effectively trade one live register | ||||||||||||||||
4588 | // for another (at worst.) In this context, folding an addressing mode into | ||||||||||||||||
4589 | // the use is just a particularly nice way of sinking it. | ||||||||||||||||
4590 | SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; | ||||||||||||||||
4591 | SmallPtrSet<Instruction*, 16> ConsideredInsts; | ||||||||||||||||
4592 | if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI)) | ||||||||||||||||
4593 | return false; // Has a non-memory, non-foldable use! | ||||||||||||||||
4594 | |||||||||||||||||
4595 | // Now that we know that all uses of this instruction are part of a chain of | ||||||||||||||||
4596 | // computation involving only operations that could theoretically be folded | ||||||||||||||||
4597 | // into a memory use, loop over each of these memory operation uses and see | ||||||||||||||||
4598 | // if they could *actually* fold the instruction. The assumption is that | ||||||||||||||||
4599 | // addressing modes are cheap and that duplicating the computation involved | ||||||||||||||||
4600 | // many times is worthwhile, even on a fastpath. For sinking candidates | ||||||||||||||||
4601 | // (i.e. cold call sites), this serves as a way to prevent excessive code | ||||||||||||||||
4602 | // growth since most architectures have some reasonable small and fast way to | ||||||||||||||||
4603 | // compute an effective address. (i.e LEA on x86) | ||||||||||||||||
4604 | SmallVector<Instruction*, 32> MatchedAddrModeInsts; | ||||||||||||||||
4605 | for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { | ||||||||||||||||
4606 | Instruction *User = MemoryUses[i].first; | ||||||||||||||||
4607 | unsigned OpNo = MemoryUses[i].second; | ||||||||||||||||
4608 | |||||||||||||||||
4609 | // Get the access type of this use. If the use isn't a pointer, we don't | ||||||||||||||||
4610 | // know what it accesses. | ||||||||||||||||
4611 | Value *Address = User->getOperand(OpNo); | ||||||||||||||||
4612 | PointerType *AddrTy = dyn_cast<PointerType>(Address->getType()); | ||||||||||||||||
4613 | if (!AddrTy) | ||||||||||||||||
4614 | return false; | ||||||||||||||||
4615 | Type *AddressAccessTy = AddrTy->getElementType(); | ||||||||||||||||
4616 | unsigned AS = AddrTy->getAddressSpace(); | ||||||||||||||||
4617 | |||||||||||||||||
4618 | // Do a match against the root of this address, ignoring profitability. This | ||||||||||||||||
4619 | // will tell us if the addressing mode for the memory operation will | ||||||||||||||||
4620 | // *actually* cover the shared instruction. | ||||||||||||||||
4621 | ExtAddrMode Result; | ||||||||||||||||
4622 | std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, | ||||||||||||||||
4623 | 0); | ||||||||||||||||
4624 | TypePromotionTransaction::ConstRestorationPt LastKnownGood = | ||||||||||||||||
4625 | TPT.getRestorationPoint(); | ||||||||||||||||
4626 | AddressingModeMatcher Matcher( | ||||||||||||||||
4627 | MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, | ||||||||||||||||
4628 | InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); | ||||||||||||||||
4629 | Matcher.IgnoreProfitability = true; | ||||||||||||||||
4630 | bool Success = Matcher.matchAddr(Address, 0); | ||||||||||||||||
4631 | (void)Success; assert(Success && "Couldn't select *anything*?")((Success && "Couldn't select *anything*?") ? static_cast <void> (0) : __assert_fail ("Success && \"Couldn't select *anything*?\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 4631, __PRETTY_FUNCTION__)); | ||||||||||||||||
4632 | |||||||||||||||||
4633 | // The match was to check the profitability, the changes made are not | ||||||||||||||||
4634 | // part of the original matcher. Therefore, they should be dropped | ||||||||||||||||
4635 | // otherwise the original matcher will not present the right state. | ||||||||||||||||
4636 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4637 | |||||||||||||||||
4638 | // If the match didn't cover I, then it won't be shared by it. | ||||||||||||||||
4639 | if (!is_contained(MatchedAddrModeInsts, I)) | ||||||||||||||||
4640 | return false; | ||||||||||||||||
4641 | |||||||||||||||||
4642 | MatchedAddrModeInsts.clear(); | ||||||||||||||||
4643 | } | ||||||||||||||||
4644 | |||||||||||||||||
4645 | return true; | ||||||||||||||||
4646 | } | ||||||||||||||||
4647 | |||||||||||||||||
4648 | /// Return true if the specified values are defined in a | ||||||||||||||||
4649 | /// different basic block than BB. | ||||||||||||||||
4650 | static bool IsNonLocalValue(Value *V, BasicBlock *BB) { | ||||||||||||||||
4651 | if (Instruction *I = dyn_cast<Instruction>(V)) | ||||||||||||||||
4652 | return I->getParent() != BB; | ||||||||||||||||
4653 | return false; | ||||||||||||||||
4654 | } | ||||||||||||||||
4655 | |||||||||||||||||
4656 | /// Sink addressing mode computation immediate before MemoryInst if doing so | ||||||||||||||||
4657 | /// can be done without increasing register pressure. The need for the | ||||||||||||||||
4658 | /// register pressure constraint means this can end up being an all or nothing | ||||||||||||||||
4659 | /// decision for all uses of the same addressing computation. | ||||||||||||||||
4660 | /// | ||||||||||||||||
4661 | /// Load and Store Instructions often have addressing modes that can do | ||||||||||||||||
4662 | /// significant amounts of computation. As such, instruction selection will try | ||||||||||||||||
4663 | /// to get the load or store to do as much computation as possible for the | ||||||||||||||||
4664 | /// program. The problem is that isel can only see within a single block. As | ||||||||||||||||
4665 | /// such, we sink as much legal addressing mode work into the block as possible. | ||||||||||||||||
4666 | /// | ||||||||||||||||
4667 | /// This method is used to optimize both load/store and inline asms with memory | ||||||||||||||||
4668 | /// operands. It's also used to sink addressing computations feeding into cold | ||||||||||||||||
4669 | /// call sites into their (cold) basic block. | ||||||||||||||||
4670 | /// | ||||||||||||||||
4671 | /// The motivation for handling sinking into cold blocks is that doing so can | ||||||||||||||||
4672 | /// both enable other address mode sinking (by satisfying the register pressure | ||||||||||||||||
4673 | /// constraint above), and reduce register pressure globally (by removing the | ||||||||||||||||
4674 | /// addressing mode computation from the fast path entirely.). | ||||||||||||||||
4675 | bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr, | ||||||||||||||||
4676 | Type *AccessTy, unsigned AddrSpace) { | ||||||||||||||||
4677 | Value *Repl = Addr; | ||||||||||||||||
4678 | |||||||||||||||||
4679 | // Try to collapse single-value PHI nodes. This is necessary to undo | ||||||||||||||||
4680 | // unprofitable PRE transformations. | ||||||||||||||||
4681 | SmallVector<Value*, 8> worklist; | ||||||||||||||||
4682 | SmallPtrSet<Value*, 16> Visited; | ||||||||||||||||
4683 | worklist.push_back(Addr); | ||||||||||||||||
4684 | |||||||||||||||||
4685 | // Use a worklist to iteratively look through PHI and select nodes, and | ||||||||||||||||
4686 | // ensure that the addressing mode obtained from the non-PHI/select roots of | ||||||||||||||||
4687 | // the graph are compatible. | ||||||||||||||||
4688 | bool PhiOrSelectSeen = false; | ||||||||||||||||
4689 | SmallVector<Instruction*, 16> AddrModeInsts; | ||||||||||||||||
4690 | const SimplifyQuery SQ(*DL, TLInfo); | ||||||||||||||||
4691 | AddressingModeCombiner AddrModes(SQ, Addr); | ||||||||||||||||
4692 | TypePromotionTransaction TPT(RemovedInsts); | ||||||||||||||||
4693 | TypePromotionTransaction::ConstRestorationPt LastKnownGood = | ||||||||||||||||
4694 | TPT.getRestorationPoint(); | ||||||||||||||||
4695 | while (!worklist.empty()) { | ||||||||||||||||
4696 | Value *V = worklist.back(); | ||||||||||||||||
4697 | worklist.pop_back(); | ||||||||||||||||
4698 | |||||||||||||||||
4699 | // We allow traversing cyclic Phi nodes. | ||||||||||||||||
4700 | // In case of success after this loop we ensure that traversing through | ||||||||||||||||
4701 | // Phi nodes ends up with all cases to compute address of the form | ||||||||||||||||
4702 | // BaseGV + Base + Scale * Index + Offset | ||||||||||||||||
4703 | // where Scale and Offset are constans and BaseGV, Base and Index | ||||||||||||||||
4704 | // are exactly the same Values in all cases. | ||||||||||||||||
4705 | // It means that BaseGV, Scale and Offset dominate our memory instruction | ||||||||||||||||
4706 | // and have the same value as they had in address computation represented | ||||||||||||||||
4707 | // as Phi. So we can safely sink address computation to memory instruction. | ||||||||||||||||
4708 | if (!Visited.insert(V).second) | ||||||||||||||||
4709 | continue; | ||||||||||||||||
4710 | |||||||||||||||||
4711 | // For a PHI node, push all of its incoming values. | ||||||||||||||||
4712 | if (PHINode *P = dyn_cast<PHINode>(V)) { | ||||||||||||||||
4713 | for (Value *IncValue : P->incoming_values()) | ||||||||||||||||
4714 | worklist.push_back(IncValue); | ||||||||||||||||
4715 | PhiOrSelectSeen = true; | ||||||||||||||||
4716 | continue; | ||||||||||||||||
4717 | } | ||||||||||||||||
4718 | // Similar for select. | ||||||||||||||||
4719 | if (SelectInst *SI = dyn_cast<SelectInst>(V)) { | ||||||||||||||||
4720 | worklist.push_back(SI->getFalseValue()); | ||||||||||||||||
4721 | worklist.push_back(SI->getTrueValue()); | ||||||||||||||||
4722 | PhiOrSelectSeen = true; | ||||||||||||||||
4723 | continue; | ||||||||||||||||
4724 | } | ||||||||||||||||
4725 | |||||||||||||||||
4726 | // For non-PHIs, determine the addressing mode being computed. Note that | ||||||||||||||||
4727 | // the result may differ depending on what other uses our candidate | ||||||||||||||||
4728 | // addressing instructions might have. | ||||||||||||||||
4729 | AddrModeInsts.clear(); | ||||||||||||||||
4730 | std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr, | ||||||||||||||||
4731 | 0); | ||||||||||||||||
4732 | ExtAddrMode NewAddrMode = AddressingModeMatcher::Match( | ||||||||||||||||
4733 | V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, | ||||||||||||||||
4734 | InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); | ||||||||||||||||
4735 | |||||||||||||||||
4736 | GetElementPtrInst *GEP = LargeOffsetGEP.first; | ||||||||||||||||
4737 | if (GEP && !NewGEPBases.count(GEP)) { | ||||||||||||||||
4738 | // If splitting the underlying data structure can reduce the offset of a | ||||||||||||||||
4739 | // GEP, collect the GEP. Skip the GEPs that are the new bases of | ||||||||||||||||
4740 | // previously split data structures. | ||||||||||||||||
4741 | LargeOffsetGEPMap[GEP->getPointerOperand()].push_back(LargeOffsetGEP); | ||||||||||||||||
4742 | if (LargeOffsetGEPID.find(GEP) == LargeOffsetGEPID.end()) | ||||||||||||||||
4743 | LargeOffsetGEPID[GEP] = LargeOffsetGEPID.size(); | ||||||||||||||||
4744 | } | ||||||||||||||||
4745 | |||||||||||||||||
4746 | NewAddrMode.OriginalValue = V; | ||||||||||||||||
4747 | if (!AddrModes.addNewAddrMode(NewAddrMode)) | ||||||||||||||||
4748 | break; | ||||||||||||||||
4749 | } | ||||||||||||||||
4750 | |||||||||||||||||
4751 | // Try to combine the AddrModes we've collected. If we couldn't collect any, | ||||||||||||||||
4752 | // or we have multiple but either couldn't combine them or combining them | ||||||||||||||||
4753 | // wouldn't do anything useful, bail out now. | ||||||||||||||||
4754 | if (!AddrModes.combineAddrModes()) { | ||||||||||||||||
4755 | TPT.rollback(LastKnownGood); | ||||||||||||||||
4756 | return false; | ||||||||||||||||
4757 | } | ||||||||||||||||
4758 | TPT.commit(); | ||||||||||||||||
4759 | |||||||||||||||||
4760 | // Get the combined AddrMode (or the only AddrMode, if we only had one). | ||||||||||||||||
4761 | ExtAddrMode AddrMode = AddrModes.getAddrMode(); | ||||||||||||||||
4762 | |||||||||||||||||
4763 | // If all the instructions matched are already in this BB, don't do anything. | ||||||||||||||||
4764 | // If we saw a Phi node then it is not local definitely, and if we saw a select | ||||||||||||||||
4765 | // then we want to push the address calculation past it even if it's already | ||||||||||||||||
4766 | // in this BB. | ||||||||||||||||
4767 | if (!PhiOrSelectSeen && none_of(AddrModeInsts, [&](Value *V) { | ||||||||||||||||
4768 | return IsNonLocalValue(V, MemoryInst->getParent()); | ||||||||||||||||
4769 | })) { | ||||||||||||||||
4770 | LLVM_DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"; } } while (false) | ||||||||||||||||
4771 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"; } } while (false); | ||||||||||||||||
4772 | return false; | ||||||||||||||||
4773 | } | ||||||||||||||||
4774 | |||||||||||||||||
4775 | // Insert this computation right after this user. Since our caller is | ||||||||||||||||
4776 | // scanning from the top of the BB to the bottom, reuse of the expr are | ||||||||||||||||
4777 | // guaranteed to happen later. | ||||||||||||||||
4778 | IRBuilder<> Builder(MemoryInst); | ||||||||||||||||
4779 | |||||||||||||||||
4780 | // Now that we determined the addressing expression we want to use and know | ||||||||||||||||
4781 | // that we have to sink it into this block. Check to see if we have already | ||||||||||||||||
4782 | // done this for some other load/store instr in this block. If so, reuse | ||||||||||||||||
4783 | // the computation. Before attempting reuse, check if the address is valid | ||||||||||||||||
4784 | // as it may have been erased. | ||||||||||||||||
4785 | |||||||||||||||||
4786 | WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; | ||||||||||||||||
4787 | |||||||||||||||||
4788 | Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; | ||||||||||||||||
4789 | if (SunkAddr) { | ||||||||||||||||
4790 | LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"; } } while (false) | ||||||||||||||||
4791 | << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"; } } while (false); | ||||||||||||||||
4792 | if (SunkAddr->getType() != Addr->getType()) | ||||||||||||||||
4793 | SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); | ||||||||||||||||
4794 | } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && | ||||||||||||||||
4795 | TM && SubtargetInfo->addrSinkUsingGEPs())) { | ||||||||||||||||
4796 | // By default, we use the GEP-based method when AA is used later. This | ||||||||||||||||
4797 | // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. | ||||||||||||||||
4798 | LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"; } } while (false) | ||||||||||||||||
4799 | << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"; } } while (false); | ||||||||||||||||
4800 | Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); | ||||||||||||||||
4801 | Value *ResultPtr = nullptr, *ResultIndex = nullptr; | ||||||||||||||||
4802 | |||||||||||||||||
4803 | // First, find the pointer. | ||||||||||||||||
4804 | if (AddrMode.BaseReg && AddrMode.BaseReg->getType()->isPointerTy()) { | ||||||||||||||||
4805 | ResultPtr = AddrMode.BaseReg; | ||||||||||||||||
4806 | AddrMode.BaseReg = nullptr; | ||||||||||||||||
4807 | } | ||||||||||||||||
4808 | |||||||||||||||||
4809 | if (AddrMode.Scale && AddrMode.ScaledReg->getType()->isPointerTy()) { | ||||||||||||||||
4810 | // We can't add more than one pointer together, nor can we scale a | ||||||||||||||||
4811 | // pointer (both of which seem meaningless). | ||||||||||||||||
4812 | if (ResultPtr || AddrMode.Scale != 1) | ||||||||||||||||
4813 | return false; | ||||||||||||||||
4814 | |||||||||||||||||
4815 | ResultPtr = AddrMode.ScaledReg; | ||||||||||||||||
4816 | AddrMode.Scale = 0; | ||||||||||||||||
4817 | } | ||||||||||||||||
4818 | |||||||||||||||||
4819 | // It is only safe to sign extend the BaseReg if we know that the math | ||||||||||||||||
4820 | // required to create it did not overflow before we extend it. Since | ||||||||||||||||
4821 | // the original IR value was tossed in favor of a constant back when | ||||||||||||||||
4822 | // the AddrMode was created we need to bail out gracefully if widths | ||||||||||||||||
4823 | // do not match instead of extending it. | ||||||||||||||||
4824 | // | ||||||||||||||||
4825 | // (See below for code to add the scale.) | ||||||||||||||||
4826 | if (AddrMode.Scale) { | ||||||||||||||||
4827 | Type *ScaledRegTy = AddrMode.ScaledReg->getType(); | ||||||||||||||||
4828 | if (cast<IntegerType>(IntPtrTy)->getBitWidth() > | ||||||||||||||||
4829 | cast<IntegerType>(ScaledRegTy)->getBitWidth()) | ||||||||||||||||
4830 | return false; | ||||||||||||||||
4831 | } | ||||||||||||||||
4832 | |||||||||||||||||
4833 | if (AddrMode.BaseGV) { | ||||||||||||||||
4834 | if (ResultPtr) | ||||||||||||||||
4835 | return false; | ||||||||||||||||
4836 | |||||||||||||||||
4837 | ResultPtr = AddrMode.BaseGV; | ||||||||||||||||
4838 | } | ||||||||||||||||
4839 | |||||||||||||||||
4840 | // If the real base value actually came from an inttoptr, then the matcher | ||||||||||||||||
4841 | // will look through it and provide only the integer value. In that case, | ||||||||||||||||
4842 | // use it here. | ||||||||||||||||
4843 | if (!DL->isNonIntegralPointerType(Addr->getType())) { | ||||||||||||||||
4844 | if (!ResultPtr && AddrMode.BaseReg) { | ||||||||||||||||
4845 | ResultPtr = Builder.CreateIntToPtr(AddrMode.BaseReg, Addr->getType(), | ||||||||||||||||
4846 | "sunkaddr"); | ||||||||||||||||
4847 | AddrMode.BaseReg = nullptr; | ||||||||||||||||
4848 | } else if (!ResultPtr && AddrMode.Scale == 1) { | ||||||||||||||||
4849 | ResultPtr = Builder.CreateIntToPtr(AddrMode.ScaledReg, Addr->getType(), | ||||||||||||||||
4850 | "sunkaddr"); | ||||||||||||||||
4851 | AddrMode.Scale = 0; | ||||||||||||||||
4852 | } | ||||||||||||||||
4853 | } | ||||||||||||||||
4854 | |||||||||||||||||
4855 | if (!ResultPtr && | ||||||||||||||||
4856 | !AddrMode.BaseReg && !AddrMode.Scale && !AddrMode.BaseOffs) { | ||||||||||||||||
4857 | SunkAddr = Constant::getNullValue(Addr->getType()); | ||||||||||||||||
4858 | } else if (!ResultPtr) { | ||||||||||||||||
4859 | return false; | ||||||||||||||||
4860 | } else { | ||||||||||||||||
4861 | Type *I8PtrTy = | ||||||||||||||||
4862 | Builder.getInt8PtrTy(Addr->getType()->getPointerAddressSpace()); | ||||||||||||||||
4863 | Type *I8Ty = Builder.getInt8Ty(); | ||||||||||||||||
4864 | |||||||||||||||||
4865 | // Start with the base register. Do this first so that subsequent address | ||||||||||||||||
4866 | // matching finds it last, which will prevent it from trying to match it | ||||||||||||||||
4867 | // as the scaled value in case it happens to be a mul. That would be | ||||||||||||||||
4868 | // problematic if we've sunk a different mul for the scale, because then | ||||||||||||||||
4869 | // we'd end up sinking both muls. | ||||||||||||||||
4870 | if (AddrMode.BaseReg) { | ||||||||||||||||
4871 | Value *V = AddrMode.BaseReg; | ||||||||||||||||
4872 | if (V->getType() != IntPtrTy) | ||||||||||||||||
4873 | V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); | ||||||||||||||||
4874 | |||||||||||||||||
4875 | ResultIndex = V; | ||||||||||||||||
4876 | } | ||||||||||||||||
4877 | |||||||||||||||||
4878 | // Add the scale value. | ||||||||||||||||
4879 | if (AddrMode.Scale) { | ||||||||||||||||
4880 | Value *V = AddrMode.ScaledReg; | ||||||||||||||||
4881 | if (V->getType() == IntPtrTy) { | ||||||||||||||||
4882 | // done. | ||||||||||||||||
4883 | } else { | ||||||||||||||||
4884 | assert(cast<IntegerType>(IntPtrTy)->getBitWidth() <((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast <IntegerType>(V->getType())->getBitWidth() && "We can't transform if ScaledReg is too narrow") ? static_cast <void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 4886, __PRETTY_FUNCTION__)) | ||||||||||||||||
4885 | cast<IntegerType>(V->getType())->getBitWidth() &&((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast <IntegerType>(V->getType())->getBitWidth() && "We can't transform if ScaledReg is too narrow") ? static_cast <void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 4886, __PRETTY_FUNCTION__)) | ||||||||||||||||
4886 | "We can't transform if ScaledReg is too narrow")((cast<IntegerType>(IntPtrTy)->getBitWidth() < cast <IntegerType>(V->getType())->getBitWidth() && "We can't transform if ScaledReg is too narrow") ? static_cast <void> (0) : __assert_fail ("cast<IntegerType>(IntPtrTy)->getBitWidth() < cast<IntegerType>(V->getType())->getBitWidth() && \"We can't transform if ScaledReg is too narrow\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 4886, __PRETTY_FUNCTION__)); | ||||||||||||||||
4887 | V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); | ||||||||||||||||
4888 | } | ||||||||||||||||
4889 | |||||||||||||||||
4890 | if (AddrMode.Scale != 1) | ||||||||||||||||
4891 | V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), | ||||||||||||||||
4892 | "sunkaddr"); | ||||||||||||||||
4893 | if (ResultIndex) | ||||||||||||||||
4894 | ResultIndex = Builder.CreateAdd(ResultIndex, V, "sunkaddr"); | ||||||||||||||||
4895 | else | ||||||||||||||||
4896 | ResultIndex = V; | ||||||||||||||||
4897 | } | ||||||||||||||||
4898 | |||||||||||||||||
4899 | // Add in the Base Offset if present. | ||||||||||||||||
4900 | if (AddrMode.BaseOffs) { | ||||||||||||||||
4901 | Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); | ||||||||||||||||
4902 | if (ResultIndex) { | ||||||||||||||||
4903 | // We need to add this separately from the scale above to help with | ||||||||||||||||
4904 | // SDAG consecutive load/store merging. | ||||||||||||||||
4905 | if (ResultPtr->getType() != I8PtrTy) | ||||||||||||||||
4906 | ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); | ||||||||||||||||
4907 | ResultPtr = | ||||||||||||||||
4908 | AddrMode.InBounds | ||||||||||||||||
4909 | ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, | ||||||||||||||||
4910 | "sunkaddr") | ||||||||||||||||
4911 | : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); | ||||||||||||||||
4912 | } | ||||||||||||||||
4913 | |||||||||||||||||
4914 | ResultIndex = V; | ||||||||||||||||
4915 | } | ||||||||||||||||
4916 | |||||||||||||||||
4917 | if (!ResultIndex) { | ||||||||||||||||
4918 | SunkAddr = ResultPtr; | ||||||||||||||||
4919 | } else { | ||||||||||||||||
4920 | if (ResultPtr->getType() != I8PtrTy) | ||||||||||||||||
4921 | ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy); | ||||||||||||||||
4922 | SunkAddr = | ||||||||||||||||
4923 | AddrMode.InBounds | ||||||||||||||||
4924 | ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex, | ||||||||||||||||
4925 | "sunkaddr") | ||||||||||||||||
4926 | : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); | ||||||||||||||||
4927 | } | ||||||||||||||||
4928 | |||||||||||||||||
4929 | if (SunkAddr->getType() != Addr->getType()) | ||||||||||||||||
4930 | SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); | ||||||||||||||||
4931 | } | ||||||||||||||||
4932 | } else { | ||||||||||||||||
4933 | // We'd require a ptrtoint/inttoptr down the line, which we can't do for | ||||||||||||||||
4934 | // non-integral pointers, so in that case bail out now. | ||||||||||||||||
4935 | Type *BaseTy = AddrMode.BaseReg ? AddrMode.BaseReg->getType() : nullptr; | ||||||||||||||||
4936 | Type *ScaleTy = AddrMode.Scale ? AddrMode.ScaledReg->getType() : nullptr; | ||||||||||||||||
4937 | PointerType *BasePtrTy = dyn_cast_or_null<PointerType>(BaseTy); | ||||||||||||||||
4938 | PointerType *ScalePtrTy = dyn_cast_or_null<PointerType>(ScaleTy); | ||||||||||||||||
4939 | if (DL->isNonIntegralPointerType(Addr->getType()) || | ||||||||||||||||
4940 | (BasePtrTy && DL->isNonIntegralPointerType(BasePtrTy)) || | ||||||||||||||||
4941 | (ScalePtrTy && DL->isNonIntegralPointerType(ScalePtrTy)) || | ||||||||||||||||
4942 | (AddrMode.BaseGV && | ||||||||||||||||
4943 | DL->isNonIntegralPointerType(AddrMode.BaseGV->getType()))) | ||||||||||||||||
4944 | return false; | ||||||||||||||||
4945 | |||||||||||||||||
4946 | LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrModedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"; } } while (false) | ||||||||||||||||
4947 | << " for " << *MemoryInst << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"; } } while (false); | ||||||||||||||||
4948 | Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); | ||||||||||||||||
4949 | Value *Result = nullptr; | ||||||||||||||||
4950 | |||||||||||||||||
4951 | // Start with the base register. Do this first so that subsequent address | ||||||||||||||||
4952 | // matching finds it last, which will prevent it from trying to match it | ||||||||||||||||
4953 | // as the scaled value in case it happens to be a mul. That would be | ||||||||||||||||
4954 | // problematic if we've sunk a different mul for the scale, because then | ||||||||||||||||
4955 | // we'd end up sinking both muls. | ||||||||||||||||
4956 | if (AddrMode.BaseReg) { | ||||||||||||||||
4957 | Value *V = AddrMode.BaseReg; | ||||||||||||||||
4958 | if (V->getType()->isPointerTy()) | ||||||||||||||||
4959 | V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); | ||||||||||||||||
4960 | if (V->getType() != IntPtrTy) | ||||||||||||||||
4961 | V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); | ||||||||||||||||
4962 | Result = V; | ||||||||||||||||
4963 | } | ||||||||||||||||
4964 | |||||||||||||||||
4965 | // Add the scale value. | ||||||||||||||||
4966 | if (AddrMode.Scale) { | ||||||||||||||||
4967 | Value *V = AddrMode.ScaledReg; | ||||||||||||||||
4968 | if (V->getType() == IntPtrTy) { | ||||||||||||||||
4969 | // done. | ||||||||||||||||
4970 | } else if (V->getType()->isPointerTy()) { | ||||||||||||||||
4971 | V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); | ||||||||||||||||
4972 | } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < | ||||||||||||||||
4973 | cast<IntegerType>(V->getType())->getBitWidth()) { | ||||||||||||||||
4974 | V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); | ||||||||||||||||
4975 | } else { | ||||||||||||||||
4976 | // It is only safe to sign extend the BaseReg if we know that the math | ||||||||||||||||
4977 | // required to create it did not overflow before we extend it. Since | ||||||||||||||||
4978 | // the original IR value was tossed in favor of a constant back when | ||||||||||||||||
4979 | // the AddrMode was created we need to bail out gracefully if widths | ||||||||||||||||
4980 | // do not match instead of extending it. | ||||||||||||||||
4981 | Instruction *I = dyn_cast_or_null<Instruction>(Result); | ||||||||||||||||
4982 | if (I && (Result != AddrMode.BaseReg)) | ||||||||||||||||
4983 | I->eraseFromParent(); | ||||||||||||||||
4984 | return false; | ||||||||||||||||
4985 | } | ||||||||||||||||
4986 | if (AddrMode.Scale != 1) | ||||||||||||||||
4987 | V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), | ||||||||||||||||
4988 | "sunkaddr"); | ||||||||||||||||
4989 | if (Result) | ||||||||||||||||
4990 | Result = Builder.CreateAdd(Result, V, "sunkaddr"); | ||||||||||||||||
4991 | else | ||||||||||||||||
4992 | Result = V; | ||||||||||||||||
4993 | } | ||||||||||||||||
4994 | |||||||||||||||||
4995 | // Add in the BaseGV if present. | ||||||||||||||||
4996 | if (AddrMode.BaseGV) { | ||||||||||||||||
4997 | Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); | ||||||||||||||||
4998 | if (Result) | ||||||||||||||||
4999 | Result = Builder.CreateAdd(Result, V, "sunkaddr"); | ||||||||||||||||
5000 | else | ||||||||||||||||
5001 | Result = V; | ||||||||||||||||
5002 | } | ||||||||||||||||
5003 | |||||||||||||||||
5004 | // Add in the Base Offset if present. | ||||||||||||||||
5005 | if (AddrMode.BaseOffs) { | ||||||||||||||||
5006 | Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); | ||||||||||||||||
5007 | if (Result) | ||||||||||||||||
5008 | Result = Builder.CreateAdd(Result, V, "sunkaddr"); | ||||||||||||||||
5009 | else | ||||||||||||||||
5010 | Result = V; | ||||||||||||||||
5011 | } | ||||||||||||||||
5012 | |||||||||||||||||
5013 | if (!Result) | ||||||||||||||||
5014 | SunkAddr = Constant::getNullValue(Addr->getType()); | ||||||||||||||||
5015 | else | ||||||||||||||||
5016 | SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); | ||||||||||||||||
5017 | } | ||||||||||||||||
5018 | |||||||||||||||||
5019 | MemoryInst->replaceUsesOfWith(Repl, SunkAddr); | ||||||||||||||||
5020 | // Store the newly computed address into the cache. In the case we reused a | ||||||||||||||||
5021 | // value, this should be idempotent. | ||||||||||||||||
5022 | SunkAddrs[Addr] = WeakTrackingVH(SunkAddr); | ||||||||||||||||
5023 | |||||||||||||||||
5024 | // If we have no uses, recursively delete the value and all dead instructions | ||||||||||||||||
5025 | // using it. | ||||||||||||||||
5026 | if (Repl->use_empty()) { | ||||||||||||||||
5027 | // This can cause recursive deletion, which can invalidate our iterator. | ||||||||||||||||
5028 | // Use a WeakTrackingVH to hold onto it in case this happens. | ||||||||||||||||
5029 | Value *CurValue = &*CurInstIterator; | ||||||||||||||||
5030 | WeakTrackingVH IterHandle(CurValue); | ||||||||||||||||
5031 | BasicBlock *BB = CurInstIterator->getParent(); | ||||||||||||||||
5032 | |||||||||||||||||
5033 | RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); | ||||||||||||||||
5034 | |||||||||||||||||
5035 | if (IterHandle != CurValue) { | ||||||||||||||||
5036 | // If the iterator instruction was recursively deleted, start over at the | ||||||||||||||||
5037 | // start of the block. | ||||||||||||||||
5038 | CurInstIterator = BB->begin(); | ||||||||||||||||
5039 | SunkAddrs.clear(); | ||||||||||||||||
5040 | } | ||||||||||||||||
5041 | } | ||||||||||||||||
5042 | ++NumMemoryInsts; | ||||||||||||||||
5043 | return true; | ||||||||||||||||
5044 | } | ||||||||||||||||
5045 | |||||||||||||||||
5046 | /// If there are any memory operands, use OptimizeMemoryInst to sink their | ||||||||||||||||
5047 | /// address computing into the block when possible / profitable. | ||||||||||||||||
5048 | bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) { | ||||||||||||||||
5049 | bool MadeChange = false; | ||||||||||||||||
5050 | |||||||||||||||||
5051 | const TargetRegisterInfo *TRI = | ||||||||||||||||
5052 | TM->getSubtargetImpl(*CS->getFunction())->getRegisterInfo(); | ||||||||||||||||
5053 | TargetLowering::AsmOperandInfoVector TargetConstraints = | ||||||||||||||||
5054 | TLI->ParseConstraints(*DL, TRI, CS); | ||||||||||||||||
5055 | unsigned ArgNo = 0; | ||||||||||||||||
5056 | for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { | ||||||||||||||||
5057 | TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; | ||||||||||||||||
5058 | |||||||||||||||||
5059 | // Compute the constraint code and ConstraintType to use. | ||||||||||||||||
5060 | TLI->ComputeConstraintToUse(OpInfo, SDValue()); | ||||||||||||||||
5061 | |||||||||||||||||
5062 | if (OpInfo.ConstraintType == TargetLowering::C_Memory && | ||||||||||||||||
5063 | OpInfo.isIndirect) { | ||||||||||||||||
5064 | Value *OpVal = CS->getArgOperand(ArgNo++); | ||||||||||||||||
5065 | MadeChange |= optimizeMemoryInst(CS, OpVal, OpVal->getType(), ~0u); | ||||||||||||||||
5066 | } else if (OpInfo.Type == InlineAsm::isInput) | ||||||||||||||||
5067 | ArgNo++; | ||||||||||||||||
5068 | } | ||||||||||||||||
5069 | |||||||||||||||||
5070 | return MadeChange; | ||||||||||||||||
5071 | } | ||||||||||||||||
5072 | |||||||||||||||||
5073 | /// Check if all the uses of \p Val are equivalent (or free) zero or | ||||||||||||||||
5074 | /// sign extensions. | ||||||||||||||||
5075 | static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) { | ||||||||||||||||
5076 | assert(!Val->use_empty() && "Input must have at least one use")((!Val->use_empty() && "Input must have at least one use" ) ? static_cast<void> (0) : __assert_fail ("!Val->use_empty() && \"Input must have at least one use\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5076, __PRETTY_FUNCTION__)); | ||||||||||||||||
5077 | const Instruction *FirstUser = cast<Instruction>(*Val->user_begin()); | ||||||||||||||||
5078 | bool IsSExt = isa<SExtInst>(FirstUser); | ||||||||||||||||
5079 | Type *ExtTy = FirstUser->getType(); | ||||||||||||||||
5080 | for (const User *U : Val->users()) { | ||||||||||||||||
5081 | const Instruction *UI = cast<Instruction>(U); | ||||||||||||||||
5082 | if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI))) | ||||||||||||||||
5083 | return false; | ||||||||||||||||
5084 | Type *CurTy = UI->getType(); | ||||||||||||||||
5085 | // Same input and output types: Same instruction after CSE. | ||||||||||||||||
5086 | if (CurTy == ExtTy) | ||||||||||||||||
5087 | continue; | ||||||||||||||||
5088 | |||||||||||||||||
5089 | // If IsSExt is true, we are in this situation: | ||||||||||||||||
5090 | // a = Val | ||||||||||||||||
5091 | // b = sext ty1 a to ty2 | ||||||||||||||||
5092 | // c = sext ty1 a to ty3 | ||||||||||||||||
5093 | // Assuming ty2 is shorter than ty3, this could be turned into: | ||||||||||||||||
5094 | // a = Val | ||||||||||||||||
5095 | // b = sext ty1 a to ty2 | ||||||||||||||||
5096 | // c = sext ty2 b to ty3 | ||||||||||||||||
5097 | // However, the last sext is not free. | ||||||||||||||||
5098 | if (IsSExt) | ||||||||||||||||
5099 | return false; | ||||||||||||||||
5100 | |||||||||||||||||
5101 | // This is a ZExt, maybe this is free to extend from one type to another. | ||||||||||||||||
5102 | // In that case, we would not account for a different use. | ||||||||||||||||
5103 | Type *NarrowTy; | ||||||||||||||||
5104 | Type *LargeTy; | ||||||||||||||||
5105 | if (ExtTy->getScalarType()->getIntegerBitWidth() > | ||||||||||||||||
5106 | CurTy->getScalarType()->getIntegerBitWidth()) { | ||||||||||||||||
5107 | NarrowTy = CurTy; | ||||||||||||||||
5108 | LargeTy = ExtTy; | ||||||||||||||||
5109 | } else { | ||||||||||||||||
5110 | NarrowTy = ExtTy; | ||||||||||||||||
5111 | LargeTy = CurTy; | ||||||||||||||||
5112 | } | ||||||||||||||||
5113 | |||||||||||||||||
5114 | if (!TLI.isZExtFree(NarrowTy, LargeTy)) | ||||||||||||||||
5115 | return false; | ||||||||||||||||
5116 | } | ||||||||||||||||
5117 | // All uses are the same or can be derived from one another for free. | ||||||||||||||||
5118 | return true; | ||||||||||||||||
5119 | } | ||||||||||||||||
5120 | |||||||||||||||||
5121 | /// Try to speculatively promote extensions in \p Exts and continue | ||||||||||||||||
5122 | /// promoting through newly promoted operands recursively as far as doing so is | ||||||||||||||||
5123 | /// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts. | ||||||||||||||||
5124 | /// When some promotion happened, \p TPT contains the proper state to revert | ||||||||||||||||
5125 | /// them. | ||||||||||||||||
5126 | /// | ||||||||||||||||
5127 | /// \return true if some promotion happened, false otherwise. | ||||||||||||||||
5128 | bool CodeGenPrepare::tryToPromoteExts( | ||||||||||||||||
5129 | TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts, | ||||||||||||||||
5130 | SmallVectorImpl<Instruction *> &ProfitablyMovedExts, | ||||||||||||||||
5131 | unsigned CreatedInstsCost) { | ||||||||||||||||
5132 | bool Promoted = false; | ||||||||||||||||
5133 | |||||||||||||||||
5134 | // Iterate over all the extensions to try to promote them. | ||||||||||||||||
5135 | for (auto I : Exts) { | ||||||||||||||||
5136 | // Early check if we directly have ext(load). | ||||||||||||||||
5137 | if (isa<LoadInst>(I->getOperand(0))) { | ||||||||||||||||
5138 | ProfitablyMovedExts.push_back(I); | ||||||||||||||||
5139 | continue; | ||||||||||||||||
5140 | } | ||||||||||||||||
5141 | |||||||||||||||||
5142 | // Check whether or not we want to do any promotion. The reason we have | ||||||||||||||||
5143 | // this check inside the for loop is to catch the case where an extension | ||||||||||||||||
5144 | // is directly fed by a load because in such case the extension can be moved | ||||||||||||||||
5145 | // up without any promotion on its operands. | ||||||||||||||||
5146 | if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion) | ||||||||||||||||
5147 | return false; | ||||||||||||||||
5148 | |||||||||||||||||
5149 | // Get the action to perform the promotion. | ||||||||||||||||
5150 | TypePromotionHelper::Action TPH = | ||||||||||||||||
5151 | TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts); | ||||||||||||||||
5152 | // Check if we can promote. | ||||||||||||||||
5153 | if (!TPH) { | ||||||||||||||||
5154 | // Save the current extension as we cannot move up through its operand. | ||||||||||||||||
5155 | ProfitablyMovedExts.push_back(I); | ||||||||||||||||
5156 | continue; | ||||||||||||||||
5157 | } | ||||||||||||||||
5158 | |||||||||||||||||
5159 | // Save the current state. | ||||||||||||||||
5160 | TypePromotionTransaction::ConstRestorationPt LastKnownGood = | ||||||||||||||||
5161 | TPT.getRestorationPoint(); | ||||||||||||||||
5162 | SmallVector<Instruction *, 4> NewExts; | ||||||||||||||||
5163 | unsigned NewCreatedInstsCost = 0; | ||||||||||||||||
5164 | unsigned ExtCost = !TLI->isExtFree(I); | ||||||||||||||||
5165 | // Promote. | ||||||||||||||||
5166 | Value *PromotedVal = TPH(I, TPT, PromotedInsts, NewCreatedInstsCost, | ||||||||||||||||
5167 | &NewExts, nullptr, *TLI); | ||||||||||||||||
5168 | assert(PromotedVal &&((PromotedVal && "TypePromotionHelper should have filtered out those cases" ) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5169, __PRETTY_FUNCTION__)) | ||||||||||||||||
5169 | "TypePromotionHelper should have filtered out those cases")((PromotedVal && "TypePromotionHelper should have filtered out those cases" ) ? static_cast<void> (0) : __assert_fail ("PromotedVal && \"TypePromotionHelper should have filtered out those cases\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5169, __PRETTY_FUNCTION__)); | ||||||||||||||||
5170 | |||||||||||||||||
5171 | // We would be able to merge only one extension in a load. | ||||||||||||||||
5172 | // Therefore, if we have more than 1 new extension we heuristically | ||||||||||||||||
5173 | // cut this search path, because it means we degrade the code quality. | ||||||||||||||||
5174 | // With exactly 2, the transformation is neutral, because we will merge | ||||||||||||||||
5175 | // one extension but leave one. However, we optimistically keep going, | ||||||||||||||||
5176 | // because the new extension may be removed too. | ||||||||||||||||
5177 | long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost; | ||||||||||||||||
5178 | // FIXME: It would be possible to propagate a negative value instead of | ||||||||||||||||
5179 | // conservatively ceiling it to 0. | ||||||||||||||||
5180 | TotalCreatedInstsCost = | ||||||||||||||||
5181 | std::max((long long)0, (TotalCreatedInstsCost - ExtCost)); | ||||||||||||||||
5182 | if (!StressExtLdPromotion && | ||||||||||||||||
5183 | (TotalCreatedInstsCost > 1 || | ||||||||||||||||
5184 | !isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) { | ||||||||||||||||
5185 | // This promotion is not profitable, rollback to the previous state, and | ||||||||||||||||
5186 | // save the current extension in ProfitablyMovedExts as the latest | ||||||||||||||||
5187 | // speculative promotion turned out to be unprofitable. | ||||||||||||||||
5188 | TPT.rollback(LastKnownGood); | ||||||||||||||||
5189 | ProfitablyMovedExts.push_back(I); | ||||||||||||||||
5190 | continue; | ||||||||||||||||
5191 | } | ||||||||||||||||
5192 | // Continue promoting NewExts as far as doing so is profitable. | ||||||||||||||||
5193 | SmallVector<Instruction *, 2> NewlyMovedExts; | ||||||||||||||||
5194 | (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost); | ||||||||||||||||
5195 | bool NewPromoted = false; | ||||||||||||||||
5196 | for (auto ExtInst : NewlyMovedExts) { | ||||||||||||||||
5197 | Instruction *MovedExt = cast<Instruction>(ExtInst); | ||||||||||||||||
5198 | Value *ExtOperand = MovedExt->getOperand(0); | ||||||||||||||||
5199 | // If we have reached to a load, we need this extra profitability check | ||||||||||||||||
5200 | // as it could potentially be merged into an ext(load). | ||||||||||||||||
5201 | if (isa<LoadInst>(ExtOperand) && | ||||||||||||||||
5202 | !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost || | ||||||||||||||||
5203 | (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI)))) | ||||||||||||||||
5204 | continue; | ||||||||||||||||
5205 | |||||||||||||||||
5206 | ProfitablyMovedExts.push_back(MovedExt); | ||||||||||||||||
5207 | NewPromoted = true; | ||||||||||||||||
5208 | } | ||||||||||||||||
5209 | |||||||||||||||||
5210 | // If none of speculative promotions for NewExts is profitable, rollback | ||||||||||||||||
5211 | // and save the current extension (I) as the last profitable extension. | ||||||||||||||||
5212 | if (!NewPromoted) { | ||||||||||||||||
5213 | TPT.rollback(LastKnownGood); | ||||||||||||||||
5214 | ProfitablyMovedExts.push_back(I); | ||||||||||||||||
5215 | continue; | ||||||||||||||||
5216 | } | ||||||||||||||||
5217 | // The promotion is profitable. | ||||||||||||||||
5218 | Promoted = true; | ||||||||||||||||
5219 | } | ||||||||||||||||
5220 | return Promoted; | ||||||||||||||||
5221 | } | ||||||||||||||||
5222 | |||||||||||||||||
5223 | /// Merging redundant sexts when one is dominating the other. | ||||||||||||||||
5224 | bool CodeGenPrepare::mergeSExts(Function &F) { | ||||||||||||||||
5225 | bool Changed = false; | ||||||||||||||||
5226 | for (auto &Entry : ValToSExtendedUses) { | ||||||||||||||||
5227 | SExts &Insts = Entry.second; | ||||||||||||||||
5228 | SExts CurPts; | ||||||||||||||||
5229 | for (Instruction *Inst : Insts) { | ||||||||||||||||
5230 | if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) || | ||||||||||||||||
5231 | Inst->getOperand(0) != Entry.first) | ||||||||||||||||
5232 | continue; | ||||||||||||||||
5233 | bool inserted = false; | ||||||||||||||||
5234 | for (auto &Pt : CurPts) { | ||||||||||||||||
5235 | if (getDT(F).dominates(Inst, Pt)) { | ||||||||||||||||
5236 | Pt->replaceAllUsesWith(Inst); | ||||||||||||||||
5237 | RemovedInsts.insert(Pt); | ||||||||||||||||
5238 | Pt->removeFromParent(); | ||||||||||||||||
5239 | Pt = Inst; | ||||||||||||||||
5240 | inserted = true; | ||||||||||||||||
5241 | Changed = true; | ||||||||||||||||
5242 | break; | ||||||||||||||||
5243 | } | ||||||||||||||||
5244 | if (!getDT(F).dominates(Pt, Inst)) | ||||||||||||||||
5245 | // Give up if we need to merge in a common dominator as the | ||||||||||||||||
5246 | // experiments show it is not profitable. | ||||||||||||||||
5247 | continue; | ||||||||||||||||
5248 | Inst->replaceAllUsesWith(Pt); | ||||||||||||||||
5249 | RemovedInsts.insert(Inst); | ||||||||||||||||
5250 | Inst->removeFromParent(); | ||||||||||||||||
5251 | inserted = true; | ||||||||||||||||
5252 | Changed = true; | ||||||||||||||||
5253 | break; | ||||||||||||||||
5254 | } | ||||||||||||||||
5255 | if (!inserted) | ||||||||||||||||
5256 | CurPts.push_back(Inst); | ||||||||||||||||
5257 | } | ||||||||||||||||
5258 | } | ||||||||||||||||
5259 | return Changed; | ||||||||||||||||
5260 | } | ||||||||||||||||
5261 | |||||||||||||||||
5262 | // Spliting large data structures so that the GEPs accessing them can have | ||||||||||||||||
5263 | // smaller offsets so that they can be sunk to the same blocks as their users. | ||||||||||||||||
5264 | // For example, a large struct starting from %base is splitted into two parts | ||||||||||||||||
5265 | // where the second part starts from %new_base. | ||||||||||||||||
5266 | // | ||||||||||||||||
5267 | // Before: | ||||||||||||||||
5268 | // BB0: | ||||||||||||||||
5269 | // %base = | ||||||||||||||||
5270 | // | ||||||||||||||||
5271 | // BB1: | ||||||||||||||||
5272 | // %gep0 = gep %base, off0 | ||||||||||||||||
5273 | // %gep1 = gep %base, off1 | ||||||||||||||||
5274 | // %gep2 = gep %base, off2 | ||||||||||||||||
5275 | // | ||||||||||||||||
5276 | // BB2: | ||||||||||||||||
5277 | // %load1 = load %gep0 | ||||||||||||||||
5278 | // %load2 = load %gep1 | ||||||||||||||||
5279 | // %load3 = load %gep2 | ||||||||||||||||
5280 | // | ||||||||||||||||
5281 | // After: | ||||||||||||||||
5282 | // BB0: | ||||||||||||||||
5283 | // %base = | ||||||||||||||||
5284 | // %new_base = gep %base, off0 | ||||||||||||||||
5285 | // | ||||||||||||||||
5286 | // BB1: | ||||||||||||||||
5287 | // %new_gep0 = %new_base | ||||||||||||||||
5288 | // %new_gep1 = gep %new_base, off1 - off0 | ||||||||||||||||
5289 | // %new_gep2 = gep %new_base, off2 - off0 | ||||||||||||||||
5290 | // | ||||||||||||||||
5291 | // BB2: | ||||||||||||||||
5292 | // %load1 = load i32, i32* %new_gep0 | ||||||||||||||||
5293 | // %load2 = load i32, i32* %new_gep1 | ||||||||||||||||
5294 | // %load3 = load i32, i32* %new_gep2 | ||||||||||||||||
5295 | // | ||||||||||||||||
5296 | // %new_gep1 and %new_gep2 can be sunk to BB2 now after the splitting because | ||||||||||||||||
5297 | // their offsets are smaller enough to fit into the addressing mode. | ||||||||||||||||
5298 | bool CodeGenPrepare::splitLargeGEPOffsets() { | ||||||||||||||||
5299 | bool Changed = false; | ||||||||||||||||
5300 | for (auto &Entry : LargeOffsetGEPMap) { | ||||||||||||||||
5301 | Value *OldBase = Entry.first; | ||||||||||||||||
5302 | SmallVectorImpl<std::pair<AssertingVH<GetElementPtrInst>, int64_t>> | ||||||||||||||||
5303 | &LargeOffsetGEPs = Entry.second; | ||||||||||||||||
5304 | auto compareGEPOffset = | ||||||||||||||||
5305 | [&](const std::pair<GetElementPtrInst *, int64_t> &LHS, | ||||||||||||||||
5306 | const std::pair<GetElementPtrInst *, int64_t> &RHS) { | ||||||||||||||||
5307 | if (LHS.first == RHS.first) | ||||||||||||||||
5308 | return false; | ||||||||||||||||
5309 | if (LHS.second != RHS.second) | ||||||||||||||||
5310 | return LHS.second < RHS.second; | ||||||||||||||||
5311 | return LargeOffsetGEPID[LHS.first] < LargeOffsetGEPID[RHS.first]; | ||||||||||||||||
5312 | }; | ||||||||||||||||
5313 | // Sorting all the GEPs of the same data structures based on the offsets. | ||||||||||||||||
5314 | llvm::sort(LargeOffsetGEPs, compareGEPOffset); | ||||||||||||||||
5315 | LargeOffsetGEPs.erase( | ||||||||||||||||
5316 | std::unique(LargeOffsetGEPs.begin(), LargeOffsetGEPs.end()), | ||||||||||||||||
5317 | LargeOffsetGEPs.end()); | ||||||||||||||||
5318 | // Skip if all the GEPs have the same offsets. | ||||||||||||||||
5319 | if (LargeOffsetGEPs.front().second == LargeOffsetGEPs.back().second) | ||||||||||||||||
5320 | continue; | ||||||||||||||||
5321 | GetElementPtrInst *BaseGEP = LargeOffsetGEPs.begin()->first; | ||||||||||||||||
5322 | int64_t BaseOffset = LargeOffsetGEPs.begin()->second; | ||||||||||||||||
5323 | Value *NewBaseGEP = nullptr; | ||||||||||||||||
5324 | |||||||||||||||||
5325 | auto LargeOffsetGEP = LargeOffsetGEPs.begin(); | ||||||||||||||||
5326 | while (LargeOffsetGEP != LargeOffsetGEPs.end()) { | ||||||||||||||||
5327 | GetElementPtrInst *GEP = LargeOffsetGEP->first; | ||||||||||||||||
5328 | int64_t Offset = LargeOffsetGEP->second; | ||||||||||||||||
5329 | if (Offset != BaseOffset) { | ||||||||||||||||
5330 | TargetLowering::AddrMode AddrMode; | ||||||||||||||||
5331 | AddrMode.BaseOffs = Offset - BaseOffset; | ||||||||||||||||
5332 | // The result type of the GEP might not be the type of the memory | ||||||||||||||||
5333 | // access. | ||||||||||||||||
5334 | if (!TLI->isLegalAddressingMode(*DL, AddrMode, | ||||||||||||||||
5335 | GEP->getResultElementType(), | ||||||||||||||||
5336 | GEP->getAddressSpace())) { | ||||||||||||||||
5337 | // We need to create a new base if the offset to the current base is | ||||||||||||||||
5338 | // too large to fit into the addressing mode. So, a very large struct | ||||||||||||||||
5339 | // may be splitted into several parts. | ||||||||||||||||
5340 | BaseGEP = GEP; | ||||||||||||||||
5341 | BaseOffset = Offset; | ||||||||||||||||
5342 | NewBaseGEP = nullptr; | ||||||||||||||||
5343 | } | ||||||||||||||||
5344 | } | ||||||||||||||||
5345 | |||||||||||||||||
5346 | // Generate a new GEP to replace the current one. | ||||||||||||||||
5347 | LLVMContext &Ctx = GEP->getContext(); | ||||||||||||||||
5348 | Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); | ||||||||||||||||
5349 | Type *I8PtrTy = | ||||||||||||||||
5350 | Type::getInt8PtrTy(Ctx, GEP->getType()->getPointerAddressSpace()); | ||||||||||||||||
5351 | Type *I8Ty = Type::getInt8Ty(Ctx); | ||||||||||||||||
5352 | |||||||||||||||||
5353 | if (!NewBaseGEP) { | ||||||||||||||||
5354 | // Create a new base if we don't have one yet. Find the insertion | ||||||||||||||||
5355 | // pointer for the new base first. | ||||||||||||||||
5356 | BasicBlock::iterator NewBaseInsertPt; | ||||||||||||||||
5357 | BasicBlock *NewBaseInsertBB; | ||||||||||||||||
5358 | if (auto *BaseI = dyn_cast<Instruction>(OldBase)) { | ||||||||||||||||
5359 | // If the base of the struct is an instruction, the new base will be | ||||||||||||||||
5360 | // inserted close to it. | ||||||||||||||||
5361 | NewBaseInsertBB = BaseI->getParent(); | ||||||||||||||||
5362 | if (isa<PHINode>(BaseI)) | ||||||||||||||||
5363 | NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); | ||||||||||||||||
5364 | else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(BaseI)) { | ||||||||||||||||
5365 | NewBaseInsertBB = | ||||||||||||||||
5366 | SplitEdge(NewBaseInsertBB, Invoke->getNormalDest()); | ||||||||||||||||
5367 | NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); | ||||||||||||||||
5368 | } else | ||||||||||||||||
5369 | NewBaseInsertPt = std::next(BaseI->getIterator()); | ||||||||||||||||
5370 | } else { | ||||||||||||||||
5371 | // If the current base is an argument or global value, the new base | ||||||||||||||||
5372 | // will be inserted to the entry block. | ||||||||||||||||
5373 | NewBaseInsertBB = &BaseGEP->getFunction()->getEntryBlock(); | ||||||||||||||||
5374 | NewBaseInsertPt = NewBaseInsertBB->getFirstInsertionPt(); | ||||||||||||||||
5375 | } | ||||||||||||||||
5376 | IRBuilder<> NewBaseBuilder(NewBaseInsertBB, NewBaseInsertPt); | ||||||||||||||||
5377 | // Create a new base. | ||||||||||||||||
5378 | Value *BaseIndex = ConstantInt::get(IntPtrTy, BaseOffset); | ||||||||||||||||
5379 | NewBaseGEP = OldBase; | ||||||||||||||||
5380 | if (NewBaseGEP->getType() != I8PtrTy) | ||||||||||||||||
5381 | NewBaseGEP = NewBaseBuilder.CreatePointerCast(NewBaseGEP, I8PtrTy); | ||||||||||||||||
5382 | NewBaseGEP = | ||||||||||||||||
5383 | NewBaseBuilder.CreateGEP(I8Ty, NewBaseGEP, BaseIndex, "splitgep"); | ||||||||||||||||
5384 | NewGEPBases.insert(NewBaseGEP); | ||||||||||||||||
5385 | } | ||||||||||||||||
5386 | |||||||||||||||||
5387 | IRBuilder<> Builder(GEP); | ||||||||||||||||
5388 | Value *NewGEP = NewBaseGEP; | ||||||||||||||||
5389 | if (Offset == BaseOffset) { | ||||||||||||||||
5390 | if (GEP->getType() != I8PtrTy) | ||||||||||||||||
5391 | NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); | ||||||||||||||||
5392 | } else { | ||||||||||||||||
5393 | // Calculate the new offset for the new GEP. | ||||||||||||||||
5394 | Value *Index = ConstantInt::get(IntPtrTy, Offset - BaseOffset); | ||||||||||||||||
5395 | NewGEP = Builder.CreateGEP(I8Ty, NewBaseGEP, Index); | ||||||||||||||||
5396 | |||||||||||||||||
5397 | if (GEP->getType() != I8PtrTy) | ||||||||||||||||
5398 | NewGEP = Builder.CreatePointerCast(NewGEP, GEP->getType()); | ||||||||||||||||
5399 | } | ||||||||||||||||
5400 | GEP->replaceAllUsesWith(NewGEP); | ||||||||||||||||
5401 | LargeOffsetGEPID.erase(GEP); | ||||||||||||||||
5402 | LargeOffsetGEP = LargeOffsetGEPs.erase(LargeOffsetGEP); | ||||||||||||||||
5403 | GEP->eraseFromParent(); | ||||||||||||||||
5404 | Changed = true; | ||||||||||||||||
5405 | } | ||||||||||||||||
5406 | } | ||||||||||||||||
5407 | return Changed; | ||||||||||||||||
5408 | } | ||||||||||||||||
5409 | |||||||||||||||||
5410 | /// Return true, if an ext(load) can be formed from an extension in | ||||||||||||||||
5411 | /// \p MovedExts. | ||||||||||||||||
5412 | bool CodeGenPrepare::canFormExtLd( | ||||||||||||||||
5413 | const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI, | ||||||||||||||||
5414 | Instruction *&Inst, bool HasPromoted) { | ||||||||||||||||
5415 | for (auto *MovedExtInst : MovedExts) { | ||||||||||||||||
5416 | if (isa<LoadInst>(MovedExtInst->getOperand(0))) { | ||||||||||||||||
5417 | LI = cast<LoadInst>(MovedExtInst->getOperand(0)); | ||||||||||||||||
5418 | Inst = MovedExtInst; | ||||||||||||||||
5419 | break; | ||||||||||||||||
5420 | } | ||||||||||||||||
5421 | } | ||||||||||||||||
5422 | if (!LI) | ||||||||||||||||
5423 | return false; | ||||||||||||||||
5424 | |||||||||||||||||
5425 | // If they're already in the same block, there's nothing to do. | ||||||||||||||||
5426 | // Make the cheap checks first if we did not promote. | ||||||||||||||||
5427 | // If we promoted, we need to check if it is indeed profitable. | ||||||||||||||||
5428 | if (!HasPromoted && LI->getParent() == Inst->getParent()) | ||||||||||||||||
5429 | return false; | ||||||||||||||||
5430 | |||||||||||||||||
5431 | return TLI->isExtLoad(LI, Inst, *DL); | ||||||||||||||||
5432 | } | ||||||||||||||||
5433 | |||||||||||||||||
5434 | /// Move a zext or sext fed by a load into the same basic block as the load, | ||||||||||||||||
5435 | /// unless conditions are unfavorable. This allows SelectionDAG to fold the | ||||||||||||||||
5436 | /// extend into the load. | ||||||||||||||||
5437 | /// | ||||||||||||||||
5438 | /// E.g., | ||||||||||||||||
5439 | /// \code | ||||||||||||||||
5440 | /// %ld = load i32* %addr | ||||||||||||||||
5441 | /// %add = add nuw i32 %ld, 4 | ||||||||||||||||
5442 | /// %zext = zext i32 %add to i64 | ||||||||||||||||
5443 | // \endcode | ||||||||||||||||
5444 | /// => | ||||||||||||||||
5445 | /// \code | ||||||||||||||||
5446 | /// %ld = load i32* %addr | ||||||||||||||||
5447 | /// %zext = zext i32 %ld to i64 | ||||||||||||||||
5448 | /// %add = add nuw i64 %zext, 4 | ||||||||||||||||
5449 | /// \encode | ||||||||||||||||
5450 | /// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which | ||||||||||||||||
5451 | /// allow us to match zext(load i32*) to i64. | ||||||||||||||||
5452 | /// | ||||||||||||||||
5453 | /// Also, try to promote the computations used to obtain a sign extended | ||||||||||||||||
5454 | /// value used into memory accesses. | ||||||||||||||||
5455 | /// E.g., | ||||||||||||||||
5456 | /// \code | ||||||||||||||||
5457 | /// a = add nsw i32 b, 3 | ||||||||||||||||
5458 | /// d = sext i32 a to i64 | ||||||||||||||||
5459 | /// e = getelementptr ..., i64 d | ||||||||||||||||
5460 | /// \endcode | ||||||||||||||||
5461 | /// => | ||||||||||||||||
5462 | /// \code | ||||||||||||||||
5463 | /// f = sext i32 b to i64 | ||||||||||||||||
5464 | /// a = add nsw i64 f, 3 | ||||||||||||||||
5465 | /// e = getelementptr ..., i64 a | ||||||||||||||||
5466 | /// \endcode | ||||||||||||||||
5467 | /// | ||||||||||||||||
5468 | /// \p Inst[in/out] the extension may be modified during the process if some | ||||||||||||||||
5469 | /// promotions apply. | ||||||||||||||||
5470 | bool CodeGenPrepare::optimizeExt(Instruction *&Inst) { | ||||||||||||||||
5471 | // ExtLoad formation and address type promotion infrastructure requires TLI to | ||||||||||||||||
5472 | // be effective. | ||||||||||||||||
5473 | if (!TLI) | ||||||||||||||||
5474 | return false; | ||||||||||||||||
5475 | |||||||||||||||||
5476 | bool AllowPromotionWithoutCommonHeader = false; | ||||||||||||||||
5477 | /// See if it is an interesting sext operations for the address type | ||||||||||||||||
5478 | /// promotion before trying to promote it, e.g., the ones with the right | ||||||||||||||||
5479 | /// type and used in memory accesses. | ||||||||||||||||
5480 | bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion( | ||||||||||||||||
5481 | *Inst, AllowPromotionWithoutCommonHeader); | ||||||||||||||||
5482 | TypePromotionTransaction TPT(RemovedInsts); | ||||||||||||||||
5483 | TypePromotionTransaction::ConstRestorationPt LastKnownGood = | ||||||||||||||||
5484 | TPT.getRestorationPoint(); | ||||||||||||||||
5485 | SmallVector<Instruction *, 1> Exts; | ||||||||||||||||
5486 | SmallVector<Instruction *, 2> SpeculativelyMovedExts; | ||||||||||||||||
5487 | Exts.push_back(Inst); | ||||||||||||||||
5488 | |||||||||||||||||
5489 | bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts); | ||||||||||||||||
5490 | |||||||||||||||||
5491 | // Look for a load being extended. | ||||||||||||||||
5492 | LoadInst *LI = nullptr; | ||||||||||||||||
5493 | Instruction *ExtFedByLoad; | ||||||||||||||||
5494 | |||||||||||||||||
5495 | // Try to promote a chain of computation if it allows to form an extended | ||||||||||||||||
5496 | // load. | ||||||||||||||||
5497 | if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) { | ||||||||||||||||
5498 | assert(LI && ExtFedByLoad && "Expect a valid load and extension")((LI && ExtFedByLoad && "Expect a valid load and extension" ) ? static_cast<void> (0) : __assert_fail ("LI && ExtFedByLoad && \"Expect a valid load and extension\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5498, __PRETTY_FUNCTION__)); | ||||||||||||||||
5499 | TPT.commit(); | ||||||||||||||||
5500 | // Move the extend into the same block as the load | ||||||||||||||||
5501 | ExtFedByLoad->moveAfter(LI); | ||||||||||||||||
5502 | // CGP does not check if the zext would be speculatively executed when moved | ||||||||||||||||
5503 | // to the same basic block as the load. Preserving its original location | ||||||||||||||||
5504 | // would pessimize the debugging experience, as well as negatively impact | ||||||||||||||||
5505 | // the quality of sample pgo. We don't want to use "line 0" as that has a | ||||||||||||||||
5506 | // size cost in the line-table section and logically the zext can be seen as | ||||||||||||||||
5507 | // part of the load. Therefore we conservatively reuse the same debug | ||||||||||||||||
5508 | // location for the load and the zext. | ||||||||||||||||
5509 | ExtFedByLoad->setDebugLoc(LI->getDebugLoc()); | ||||||||||||||||
5510 | ++NumExtsMoved; | ||||||||||||||||
5511 | Inst = ExtFedByLoad; | ||||||||||||||||
5512 | return true; | ||||||||||||||||
5513 | } | ||||||||||||||||
5514 | |||||||||||||||||
5515 | // Continue promoting SExts if known as considerable depending on targets. | ||||||||||||||||
5516 | if (ATPConsiderable && | ||||||||||||||||
5517 | performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader, | ||||||||||||||||
5518 | HasPromoted, TPT, SpeculativelyMovedExts)) | ||||||||||||||||
5519 | return true; | ||||||||||||||||
5520 | |||||||||||||||||
5521 | TPT.rollback(LastKnownGood); | ||||||||||||||||
5522 | return false; | ||||||||||||||||
5523 | } | ||||||||||||||||
5524 | |||||||||||||||||
5525 | // Perform address type promotion if doing so is profitable. | ||||||||||||||||
5526 | // If AllowPromotionWithoutCommonHeader == false, we should find other sext | ||||||||||||||||
5527 | // instructions that sign extended the same initial value. However, if | ||||||||||||||||
5528 | // AllowPromotionWithoutCommonHeader == true, we expect promoting the | ||||||||||||||||
5529 | // extension is just profitable. | ||||||||||||||||
5530 | bool CodeGenPrepare::performAddressTypePromotion( | ||||||||||||||||
5531 | Instruction *&Inst, bool AllowPromotionWithoutCommonHeader, | ||||||||||||||||
5532 | bool HasPromoted, TypePromotionTransaction &TPT, | ||||||||||||||||
5533 | SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) { | ||||||||||||||||
5534 | bool Promoted = false; | ||||||||||||||||
5535 | SmallPtrSet<Instruction *, 1> UnhandledExts; | ||||||||||||||||
5536 | bool AllSeenFirst = true; | ||||||||||||||||
5537 | for (auto I : SpeculativelyMovedExts) { | ||||||||||||||||
5538 | Value *HeadOfChain = I->getOperand(0); | ||||||||||||||||
5539 | DenseMap<Value *, Instruction *>::iterator AlreadySeen = | ||||||||||||||||
5540 | SeenChainsForSExt.find(HeadOfChain); | ||||||||||||||||
5541 | // If there is an unhandled SExt which has the same header, try to promote | ||||||||||||||||
5542 | // it as well. | ||||||||||||||||
5543 | if (AlreadySeen != SeenChainsForSExt.end()) { | ||||||||||||||||
5544 | if (AlreadySeen->second != nullptr) | ||||||||||||||||
5545 | UnhandledExts.insert(AlreadySeen->second); | ||||||||||||||||
5546 | AllSeenFirst = false; | ||||||||||||||||
5547 | } | ||||||||||||||||
5548 | } | ||||||||||||||||
5549 | |||||||||||||||||
5550 | if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader && | ||||||||||||||||
5551 | SpeculativelyMovedExts.size() == 1)) { | ||||||||||||||||
5552 | TPT.commit(); | ||||||||||||||||
5553 | if (HasPromoted) | ||||||||||||||||
5554 | Promoted = true; | ||||||||||||||||
5555 | for (auto I : SpeculativelyMovedExts) { | ||||||||||||||||
5556 | Value *HeadOfChain = I->getOperand(0); | ||||||||||||||||
5557 | SeenChainsForSExt[HeadOfChain] = nullptr; | ||||||||||||||||
5558 | ValToSExtendedUses[HeadOfChain].push_back(I); | ||||||||||||||||
5559 | } | ||||||||||||||||
5560 | // Update Inst as promotion happen. | ||||||||||||||||
5561 | Inst = SpeculativelyMovedExts.pop_back_val(); | ||||||||||||||||
5562 | } else { | ||||||||||||||||
5563 | // This is the first chain visited from the header, keep the current chain | ||||||||||||||||
5564 | // as unhandled. Defer to promote this until we encounter another SExt | ||||||||||||||||
5565 | // chain derived from the same header. | ||||||||||||||||
5566 | for (auto I : SpeculativelyMovedExts) { | ||||||||||||||||
5567 | Value *HeadOfChain = I->getOperand(0); | ||||||||||||||||
5568 | SeenChainsForSExt[HeadOfChain] = Inst; | ||||||||||||||||
5569 | } | ||||||||||||||||
5570 | return false; | ||||||||||||||||
5571 | } | ||||||||||||||||
5572 | |||||||||||||||||
5573 | if (!AllSeenFirst && !UnhandledExts.empty()) | ||||||||||||||||
5574 | for (auto VisitedSExt : UnhandledExts) { | ||||||||||||||||
5575 | if (RemovedInsts.count(VisitedSExt)) | ||||||||||||||||
5576 | continue; | ||||||||||||||||
5577 | TypePromotionTransaction TPT(RemovedInsts); | ||||||||||||||||
5578 | SmallVector<Instruction *, 1> Exts; | ||||||||||||||||
5579 | SmallVector<Instruction *, 2> Chains; | ||||||||||||||||
5580 | Exts.push_back(VisitedSExt); | ||||||||||||||||
5581 | bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains); | ||||||||||||||||
5582 | TPT.commit(); | ||||||||||||||||
5583 | if (HasPromoted) | ||||||||||||||||
5584 | Promoted = true; | ||||||||||||||||
5585 | for (auto I : Chains) { | ||||||||||||||||
5586 | Value *HeadOfChain = I->getOperand(0); | ||||||||||||||||
5587 | // Mark this as handled. | ||||||||||||||||
5588 | SeenChainsForSExt[HeadOfChain] = nullptr; | ||||||||||||||||
5589 | ValToSExtendedUses[HeadOfChain].push_back(I); | ||||||||||||||||
5590 | } | ||||||||||||||||
5591 | } | ||||||||||||||||
5592 | return Promoted; | ||||||||||||||||
5593 | } | ||||||||||||||||
5594 | |||||||||||||||||
5595 | bool CodeGenPrepare::optimizeExtUses(Instruction *I) { | ||||||||||||||||
5596 | BasicBlock *DefBB = I->getParent(); | ||||||||||||||||
5597 | |||||||||||||||||
5598 | // If the result of a {s|z}ext and its source are both live out, rewrite all | ||||||||||||||||
5599 | // other uses of the source with result of extension. | ||||||||||||||||
5600 | Value *Src = I->getOperand(0); | ||||||||||||||||
5601 | if (Src->hasOneUse()) | ||||||||||||||||
5602 | return false; | ||||||||||||||||
5603 | |||||||||||||||||
5604 | // Only do this xform if truncating is free. | ||||||||||||||||
5605 | if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) | ||||||||||||||||
5606 | return false; | ||||||||||||||||
5607 | |||||||||||||||||
5608 | // Only safe to perform the optimization if the source is also defined in | ||||||||||||||||
5609 | // this block. | ||||||||||||||||
5610 | if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) | ||||||||||||||||
5611 | return false; | ||||||||||||||||
5612 | |||||||||||||||||
5613 | bool DefIsLiveOut = false; | ||||||||||||||||
5614 | for (User *U : I->users()) { | ||||||||||||||||
5615 | Instruction *UI = cast<Instruction>(U); | ||||||||||||||||
5616 | |||||||||||||||||
5617 | // Figure out which BB this ext is used in. | ||||||||||||||||
5618 | BasicBlock *UserBB = UI->getParent(); | ||||||||||||||||
5619 | if (UserBB == DefBB) continue; | ||||||||||||||||
5620 | DefIsLiveOut = true; | ||||||||||||||||
5621 | break; | ||||||||||||||||
5622 | } | ||||||||||||||||
5623 | if (!DefIsLiveOut) | ||||||||||||||||
5624 | return false; | ||||||||||||||||
5625 | |||||||||||||||||
5626 | // Make sure none of the uses are PHI nodes. | ||||||||||||||||
5627 | for (User *U : Src->users()) { | ||||||||||||||||
5628 | Instruction *UI = cast<Instruction>(U); | ||||||||||||||||
5629 | BasicBlock *UserBB = UI->getParent(); | ||||||||||||||||
5630 | if (UserBB == DefBB) continue; | ||||||||||||||||
5631 | // Be conservative. We don't want this xform to end up introducing | ||||||||||||||||
5632 | // reloads just before load / store instructions. | ||||||||||||||||
5633 | if (isa<PHINode>(UI) || isa<LoadInst>(UI) || isa<StoreInst>(UI)) | ||||||||||||||||
5634 | return false; | ||||||||||||||||
5635 | } | ||||||||||||||||
5636 | |||||||||||||||||
5637 | // InsertedTruncs - Only insert one trunc in each block once. | ||||||||||||||||
5638 | DenseMap<BasicBlock*, Instruction*> InsertedTruncs; | ||||||||||||||||
5639 | |||||||||||||||||
5640 | bool MadeChange = false; | ||||||||||||||||
5641 | for (Use &U : Src->uses()) { | ||||||||||||||||
5642 | Instruction *User = cast<Instruction>(U.getUser()); | ||||||||||||||||
5643 | |||||||||||||||||
5644 | // Figure out which BB this ext is used in. | ||||||||||||||||
5645 | BasicBlock *UserBB = User->getParent(); | ||||||||||||||||
5646 | if (UserBB == DefBB) continue; | ||||||||||||||||
5647 | |||||||||||||||||
5648 | // Both src and def are live in this block. Rewrite the use. | ||||||||||||||||
5649 | Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; | ||||||||||||||||
5650 | |||||||||||||||||
5651 | if (!InsertedTrunc) { | ||||||||||||||||
5652 | BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); | ||||||||||||||||
5653 | assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0) : __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5653, __PRETTY_FUNCTION__)); | ||||||||||||||||
5654 | InsertedTrunc = new TruncInst(I, Src->getType(), "", &*InsertPt); | ||||||||||||||||
5655 | InsertedInsts.insert(InsertedTrunc); | ||||||||||||||||
5656 | } | ||||||||||||||||
5657 | |||||||||||||||||
5658 | // Replace a use of the {s|z}ext source with a use of the result. | ||||||||||||||||
5659 | U = InsertedTrunc; | ||||||||||||||||
5660 | ++NumExtUses; | ||||||||||||||||
5661 | MadeChange = true; | ||||||||||||||||
5662 | } | ||||||||||||||||
5663 | |||||||||||||||||
5664 | return MadeChange; | ||||||||||||||||
5665 | } | ||||||||||||||||
5666 | |||||||||||||||||
5667 | // Find loads whose uses only use some of the loaded value's bits. Add an "and" | ||||||||||||||||
5668 | // just after the load if the target can fold this into one extload instruction, | ||||||||||||||||
5669 | // with the hope of eliminating some of the other later "and" instructions using | ||||||||||||||||
5670 | // the loaded value. "and"s that are made trivially redundant by the insertion | ||||||||||||||||
5671 | // of the new "and" are removed by this function, while others (e.g. those whose | ||||||||||||||||
5672 | // path from the load goes through a phi) are left for isel to potentially | ||||||||||||||||
5673 | // remove. | ||||||||||||||||
5674 | // | ||||||||||||||||
5675 | // For example: | ||||||||||||||||
5676 | // | ||||||||||||||||
5677 | // b0: | ||||||||||||||||
5678 | // x = load i32 | ||||||||||||||||
5679 | // ... | ||||||||||||||||
5680 | // b1: | ||||||||||||||||
5681 | // y = and x, 0xff | ||||||||||||||||
5682 | // z = use y | ||||||||||||||||
5683 | // | ||||||||||||||||
5684 | // becomes: | ||||||||||||||||
5685 | // | ||||||||||||||||
5686 | // b0: | ||||||||||||||||
5687 | // x = load i32 | ||||||||||||||||
5688 | // x' = and x, 0xff | ||||||||||||||||
5689 | // ... | ||||||||||||||||
5690 | // b1: | ||||||||||||||||
5691 | // z = use x' | ||||||||||||||||
5692 | // | ||||||||||||||||
5693 | // whereas: | ||||||||||||||||
5694 | // | ||||||||||||||||
5695 | // b0: | ||||||||||||||||
5696 | // x1 = load i32 | ||||||||||||||||
5697 | // ... | ||||||||||||||||
5698 | // b1: | ||||||||||||||||
5699 | // x2 = load i32 | ||||||||||||||||
5700 | // ... | ||||||||||||||||
5701 | // b2: | ||||||||||||||||
5702 | // x = phi x1, x2 | ||||||||||||||||
5703 | // y = and x, 0xff | ||||||||||||||||
5704 | // | ||||||||||||||||
5705 | // becomes (after a call to optimizeLoadExt for each load): | ||||||||||||||||
5706 | // | ||||||||||||||||
5707 | // b0: | ||||||||||||||||
5708 | // x1 = load i32 | ||||||||||||||||
5709 | // x1' = and x1, 0xff | ||||||||||||||||
5710 | // ... | ||||||||||||||||
5711 | // b1: | ||||||||||||||||
5712 | // x2 = load i32 | ||||||||||||||||
5713 | // x2' = and x2, 0xff | ||||||||||||||||
5714 | // ... | ||||||||||||||||
5715 | // b2: | ||||||||||||||||
5716 | // x = phi x1', x2' | ||||||||||||||||
5717 | // y = and x, 0xff | ||||||||||||||||
5718 | bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) { | ||||||||||||||||
5719 | if (!Load->isSimple() || !Load->getType()->isIntOrPtrTy()) | ||||||||||||||||
5720 | return false; | ||||||||||||||||
5721 | |||||||||||||||||
5722 | // Skip loads we've already transformed. | ||||||||||||||||
5723 | if (Load->hasOneUse() && | ||||||||||||||||
5724 | InsertedInsts.count(cast<Instruction>(*Load->user_begin()))) | ||||||||||||||||
5725 | return false; | ||||||||||||||||
5726 | |||||||||||||||||
5727 | // Look at all uses of Load, looking through phis, to determine how many bits | ||||||||||||||||
5728 | // of the loaded value are needed. | ||||||||||||||||
5729 | SmallVector<Instruction *, 8> WorkList; | ||||||||||||||||
5730 | SmallPtrSet<Instruction *, 16> Visited; | ||||||||||||||||
5731 | SmallVector<Instruction *, 8> AndsToMaybeRemove; | ||||||||||||||||
5732 | for (auto *U : Load->users()) | ||||||||||||||||
5733 | WorkList.push_back(cast<Instruction>(U)); | ||||||||||||||||
5734 | |||||||||||||||||
5735 | EVT LoadResultVT = TLI->getValueType(*DL, Load->getType()); | ||||||||||||||||
5736 | unsigned BitWidth = LoadResultVT.getSizeInBits(); | ||||||||||||||||
5737 | APInt DemandBits(BitWidth, 0); | ||||||||||||||||
5738 | APInt WidestAndBits(BitWidth, 0); | ||||||||||||||||
5739 | |||||||||||||||||
5740 | while (!WorkList.empty()) { | ||||||||||||||||
5741 | Instruction *I = WorkList.back(); | ||||||||||||||||
5742 | WorkList.pop_back(); | ||||||||||||||||
5743 | |||||||||||||||||
5744 | // Break use-def graph loops. | ||||||||||||||||
5745 | if (!Visited.insert(I).second) | ||||||||||||||||
5746 | continue; | ||||||||||||||||
5747 | |||||||||||||||||
5748 | // For a PHI node, push all of its users. | ||||||||||||||||
5749 | if (auto *Phi = dyn_cast<PHINode>(I)) { | ||||||||||||||||
5750 | for (auto *U : Phi->users()) | ||||||||||||||||
5751 | WorkList.push_back(cast<Instruction>(U)); | ||||||||||||||||
5752 | continue; | ||||||||||||||||
5753 | } | ||||||||||||||||
5754 | |||||||||||||||||
5755 | switch (I->getOpcode()) { | ||||||||||||||||
5756 | case Instruction::And: { | ||||||||||||||||
5757 | auto *AndC = dyn_cast<ConstantInt>(I->getOperand(1)); | ||||||||||||||||
5758 | if (!AndC) | ||||||||||||||||
5759 | return false; | ||||||||||||||||
5760 | APInt AndBits = AndC->getValue(); | ||||||||||||||||
5761 | DemandBits |= AndBits; | ||||||||||||||||
5762 | // Keep track of the widest and mask we see. | ||||||||||||||||
5763 | if (AndBits.ugt(WidestAndBits)) | ||||||||||||||||
5764 | WidestAndBits = AndBits; | ||||||||||||||||
5765 | if (AndBits == WidestAndBits && I->getOperand(0) == Load) | ||||||||||||||||
5766 | AndsToMaybeRemove.push_back(I); | ||||||||||||||||
5767 | break; | ||||||||||||||||
5768 | } | ||||||||||||||||
5769 | |||||||||||||||||
5770 | case Instruction::Shl: { | ||||||||||||||||
5771 | auto *ShlC = dyn_cast<ConstantInt>(I->getOperand(1)); | ||||||||||||||||
5772 | if (!ShlC) | ||||||||||||||||
5773 | return false; | ||||||||||||||||
5774 | uint64_t ShiftAmt = ShlC->getLimitedValue(BitWidth - 1); | ||||||||||||||||
5775 | DemandBits.setLowBits(BitWidth - ShiftAmt); | ||||||||||||||||
5776 | break; | ||||||||||||||||
5777 | } | ||||||||||||||||
5778 | |||||||||||||||||
5779 | case Instruction::Trunc: { | ||||||||||||||||
5780 | EVT TruncVT = TLI->getValueType(*DL, I->getType()); | ||||||||||||||||
5781 | unsigned TruncBitWidth = TruncVT.getSizeInBits(); | ||||||||||||||||
5782 | DemandBits.setLowBits(TruncBitWidth); | ||||||||||||||||
5783 | break; | ||||||||||||||||
5784 | } | ||||||||||||||||
5785 | |||||||||||||||||
5786 | default: | ||||||||||||||||
5787 | return false; | ||||||||||||||||
5788 | } | ||||||||||||||||
5789 | } | ||||||||||||||||
5790 | |||||||||||||||||
5791 | uint32_t ActiveBits = DemandBits.getActiveBits(); | ||||||||||||||||
5792 | // Avoid hoisting (and (load x) 1) since it is unlikely to be folded by the | ||||||||||||||||
5793 | // target even if isLoadExtLegal says an i1 EXTLOAD is valid. For example, | ||||||||||||||||
5794 | // for the AArch64 target isLoadExtLegal(ZEXTLOAD, i32, i1) returns true, but | ||||||||||||||||
5795 | // (and (load x) 1) is not matched as a single instruction, rather as a LDR | ||||||||||||||||
5796 | // followed by an AND. | ||||||||||||||||
5797 | // TODO: Look into removing this restriction by fixing backends to either | ||||||||||||||||
5798 | // return false for isLoadExtLegal for i1 or have them select this pattern to | ||||||||||||||||
5799 | // a single instruction. | ||||||||||||||||
5800 | // | ||||||||||||||||
5801 | // Also avoid hoisting if we didn't see any ands with the exact DemandBits | ||||||||||||||||
5802 | // mask, since these are the only ands that will be removed by isel. | ||||||||||||||||
5803 | if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) || | ||||||||||||||||
5804 | WidestAndBits != DemandBits) | ||||||||||||||||
5805 | return false; | ||||||||||||||||
5806 | |||||||||||||||||
5807 | LLVMContext &Ctx = Load->getType()->getContext(); | ||||||||||||||||
5808 | Type *TruncTy = Type::getIntNTy(Ctx, ActiveBits); | ||||||||||||||||
5809 | EVT TruncVT = TLI->getValueType(*DL, TruncTy); | ||||||||||||||||
5810 | |||||||||||||||||
5811 | // Reject cases that won't be matched as extloads. | ||||||||||||||||
5812 | if (!LoadResultVT.bitsGT(TruncVT) || !TruncVT.isRound() || | ||||||||||||||||
5813 | !TLI->isLoadExtLegal(ISD::ZEXTLOAD, LoadResultVT, TruncVT)) | ||||||||||||||||
5814 | return false; | ||||||||||||||||
5815 | |||||||||||||||||
5816 | IRBuilder<> Builder(Load->getNextNode()); | ||||||||||||||||
5817 | auto *NewAnd = dyn_cast<Instruction>( | ||||||||||||||||
5818 | Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits))); | ||||||||||||||||
5819 | // Mark this instruction as "inserted by CGP", so that other | ||||||||||||||||
5820 | // optimizations don't touch it. | ||||||||||||||||
5821 | InsertedInsts.insert(NewAnd); | ||||||||||||||||
5822 | |||||||||||||||||
5823 | // Replace all uses of load with new and (except for the use of load in the | ||||||||||||||||
5824 | // new and itself). | ||||||||||||||||
5825 | Load->replaceAllUsesWith(NewAnd); | ||||||||||||||||
5826 | NewAnd->setOperand(0, Load); | ||||||||||||||||
| |||||||||||||||||
5827 | |||||||||||||||||
5828 | // Remove any and instructions that are now redundant. | ||||||||||||||||
5829 | for (auto *And : AndsToMaybeRemove) | ||||||||||||||||
5830 | // Check that the and mask is the same as the one we decided to put on the | ||||||||||||||||
5831 | // new and. | ||||||||||||||||
5832 | if (cast<ConstantInt>(And->getOperand(1))->getValue() == DemandBits) { | ||||||||||||||||
5833 | And->replaceAllUsesWith(NewAnd); | ||||||||||||||||
5834 | if (&*CurInstIterator == And) | ||||||||||||||||
5835 | CurInstIterator = std::next(And->getIterator()); | ||||||||||||||||
5836 | And->eraseFromParent(); | ||||||||||||||||
5837 | ++NumAndUses; | ||||||||||||||||
5838 | } | ||||||||||||||||
5839 | |||||||||||||||||
5840 | ++NumAndsAdded; | ||||||||||||||||
5841 | return true; | ||||||||||||||||
5842 | } | ||||||||||||||||
5843 | |||||||||||||||||
5844 | /// Check if V (an operand of a select instruction) is an expensive instruction | ||||||||||||||||
5845 | /// that is only used once. | ||||||||||||||||
5846 | static bool sinkSelectOperand(const TargetTransformInfo *TTI, Value *V) { | ||||||||||||||||
5847 | auto *I = dyn_cast<Instruction>(V); | ||||||||||||||||
5848 | // If it's safe to speculatively execute, then it should not have side | ||||||||||||||||
5849 | // effects; therefore, it's safe to sink and possibly *not* execute. | ||||||||||||||||
5850 | return I && I->hasOneUse() && isSafeToSpeculativelyExecute(I) && | ||||||||||||||||
5851 | TTI->getUserCost(I) >= TargetTransformInfo::TCC_Expensive; | ||||||||||||||||
5852 | } | ||||||||||||||||
5853 | |||||||||||||||||
5854 | /// Returns true if a SelectInst should be turned into an explicit branch. | ||||||||||||||||
5855 | static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI, | ||||||||||||||||
5856 | const TargetLowering *TLI, | ||||||||||||||||
5857 | SelectInst *SI) { | ||||||||||||||||
5858 | // If even a predictable select is cheap, then a branch can't be cheaper. | ||||||||||||||||
5859 | if (!TLI->isPredictableSelectExpensive()) | ||||||||||||||||
5860 | return false; | ||||||||||||||||
5861 | |||||||||||||||||
5862 | // FIXME: This should use the same heuristics as IfConversion to determine | ||||||||||||||||
5863 | // whether a select is better represented as a branch. | ||||||||||||||||
5864 | |||||||||||||||||
5865 | // If metadata tells us that the select condition is obviously predictable, | ||||||||||||||||
5866 | // then we want to replace the select with a branch. | ||||||||||||||||
5867 | uint64_t TrueWeight, FalseWeight; | ||||||||||||||||
5868 | if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { | ||||||||||||||||
5869 | uint64_t Max = std::max(TrueWeight, FalseWeight); | ||||||||||||||||
5870 | uint64_t Sum = TrueWeight + FalseWeight; | ||||||||||||||||
5871 | if (Sum != 0) { | ||||||||||||||||
5872 | auto Probability = BranchProbability::getBranchProbability(Max, Sum); | ||||||||||||||||
5873 | if (Probability > TLI->getPredictableBranchThreshold()) | ||||||||||||||||
5874 | return true; | ||||||||||||||||
5875 | } | ||||||||||||||||
5876 | } | ||||||||||||||||
5877 | |||||||||||||||||
5878 | CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); | ||||||||||||||||
5879 | |||||||||||||||||
5880 | // If a branch is predictable, an out-of-order CPU can avoid blocking on its | ||||||||||||||||
5881 | // comparison condition. If the compare has more than one use, there's | ||||||||||||||||
5882 | // probably another cmov or setcc around, so it's not worth emitting a branch. | ||||||||||||||||
5883 | if (!Cmp || !Cmp->hasOneUse()) | ||||||||||||||||
5884 | return false; | ||||||||||||||||
5885 | |||||||||||||||||
5886 | // If either operand of the select is expensive and only needed on one side | ||||||||||||||||
5887 | // of the select, we should form a branch. | ||||||||||||||||
5888 | if (sinkSelectOperand(TTI, SI->getTrueValue()) || | ||||||||||||||||
5889 | sinkSelectOperand(TTI, SI->getFalseValue())) | ||||||||||||||||
5890 | return true; | ||||||||||||||||
5891 | |||||||||||||||||
5892 | return false; | ||||||||||||||||
5893 | } | ||||||||||||||||
5894 | |||||||||||||||||
5895 | /// If \p isTrue is true, return the true value of \p SI, otherwise return | ||||||||||||||||
5896 | /// false value of \p SI. If the true/false value of \p SI is defined by any | ||||||||||||||||
5897 | /// select instructions in \p Selects, look through the defining select | ||||||||||||||||
5898 | /// instruction until the true/false value is not defined in \p Selects. | ||||||||||||||||
5899 | static Value *getTrueOrFalseValue( | ||||||||||||||||
5900 | SelectInst *SI, bool isTrue, | ||||||||||||||||
5901 | const SmallPtrSet<const Instruction *, 2> &Selects) { | ||||||||||||||||
5902 | Value *V = nullptr; | ||||||||||||||||
5903 | |||||||||||||||||
5904 | for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI); | ||||||||||||||||
5905 | DefSI = dyn_cast<SelectInst>(V)) { | ||||||||||||||||
5906 | assert(DefSI->getCondition() == SI->getCondition() &&((DefSI->getCondition() == SI->getCondition() && "The condition of DefSI does not match with SI") ? static_cast <void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5907, __PRETTY_FUNCTION__)) | ||||||||||||||||
5907 | "The condition of DefSI does not match with SI")((DefSI->getCondition() == SI->getCondition() && "The condition of DefSI does not match with SI") ? static_cast <void> (0) : __assert_fail ("DefSI->getCondition() == SI->getCondition() && \"The condition of DefSI does not match with SI\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5907, __PRETTY_FUNCTION__)); | ||||||||||||||||
5908 | V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); | ||||||||||||||||
5909 | } | ||||||||||||||||
5910 | |||||||||||||||||
5911 | assert(V && "Failed to get select true/false value")((V && "Failed to get select true/false value") ? static_cast <void> (0) : __assert_fail ("V && \"Failed to get select true/false value\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5911, __PRETTY_FUNCTION__)); | ||||||||||||||||
5912 | return V; | ||||||||||||||||
5913 | } | ||||||||||||||||
5914 | |||||||||||||||||
5915 | bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) { | ||||||||||||||||
5916 | assert(Shift->isShift() && "Expected a shift")((Shift->isShift() && "Expected a shift") ? static_cast <void> (0) : __assert_fail ("Shift->isShift() && \"Expected a shift\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 5916, __PRETTY_FUNCTION__)); | ||||||||||||||||
5917 | |||||||||||||||||
5918 | // If this is (1) a vector shift, (2) shifts by scalars are cheaper than | ||||||||||||||||
5919 | // general vector shifts, and (3) the shift amount is a select-of-splatted | ||||||||||||||||
5920 | // values, hoist the shifts before the select: | ||||||||||||||||
5921 | // shift Op0, (select Cond, TVal, FVal) --> | ||||||||||||||||
5922 | // select Cond, (shift Op0, TVal), (shift Op0, FVal) | ||||||||||||||||
5923 | // | ||||||||||||||||
5924 | // This is inverting a generic IR transform when we know that the cost of a | ||||||||||||||||
5925 | // general vector shift is more than the cost of 2 shift-by-scalars. | ||||||||||||||||
5926 | // We can't do this effectively in SDAG because we may not be able to | ||||||||||||||||
5927 | // determine if the select operands are splats from within a basic block. | ||||||||||||||||
5928 | Type *Ty = Shift->getType(); | ||||||||||||||||
5929 | if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty)) | ||||||||||||||||
5930 | return false; | ||||||||||||||||
5931 | Value *Cond, *TVal, *FVal; | ||||||||||||||||
5932 | if (!match(Shift->getOperand(1), | ||||||||||||||||
5933 | m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal))))) | ||||||||||||||||
5934 | return false; | ||||||||||||||||
5935 | if (!isSplatValue(TVal) || !isSplatValue(FVal)) | ||||||||||||||||
5936 | return false; | ||||||||||||||||
5937 | |||||||||||||||||
5938 | IRBuilder<> Builder(Shift); | ||||||||||||||||
5939 | BinaryOperator::BinaryOps Opcode = Shift->getOpcode(); | ||||||||||||||||
5940 | Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal); | ||||||||||||||||
5941 | Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal); | ||||||||||||||||
5942 | Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal); | ||||||||||||||||
5943 | Shift->replaceAllUsesWith(NewSel); | ||||||||||||||||
5944 | Shift->eraseFromParent(); | ||||||||||||||||
5945 | return true; | ||||||||||||||||
5946 | } | ||||||||||||||||
5947 | |||||||||||||||||
5948 | /// If we have a SelectInst that will likely profit from branch prediction, | ||||||||||||||||
5949 | /// turn it into a branch. | ||||||||||||||||
5950 | bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) { | ||||||||||||||||
5951 | // If branch conversion isn't desirable, exit early. | ||||||||||||||||
5952 | if (DisableSelectToBranch || OptSize || !TLI) | ||||||||||||||||
5953 | return false; | ||||||||||||||||
5954 | |||||||||||||||||
5955 | // Find all consecutive select instructions that share the same condition. | ||||||||||||||||
5956 | SmallVector<SelectInst *, 2> ASI; | ||||||||||||||||
5957 | ASI.push_back(SI); | ||||||||||||||||
5958 | for (BasicBlock::iterator It = ++BasicBlock::iterator(SI); | ||||||||||||||||
5959 | It != SI->getParent()->end(); ++It) { | ||||||||||||||||
5960 | SelectInst *I = dyn_cast<SelectInst>(&*It); | ||||||||||||||||
5961 | if (I && SI->getCondition() == I->getCondition()) { | ||||||||||||||||
5962 | ASI.push_back(I); | ||||||||||||||||
5963 | } else { | ||||||||||||||||
5964 | break; | ||||||||||||||||
5965 | } | ||||||||||||||||
5966 | } | ||||||||||||||||
5967 | |||||||||||||||||
5968 | SelectInst *LastSI = ASI.back(); | ||||||||||||||||
5969 | // Increment the current iterator to skip all the rest of select instructions | ||||||||||||||||
5970 | // because they will be either "not lowered" or "all lowered" to branch. | ||||||||||||||||
5971 | CurInstIterator = std::next(LastSI->getIterator()); | ||||||||||||||||
5972 | |||||||||||||||||
5973 | bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); | ||||||||||||||||
5974 | |||||||||||||||||
5975 | // Can we convert the 'select' to CF ? | ||||||||||||||||
5976 | if (VectorCond || SI->getMetadata(LLVMContext::MD_unpredictable)) | ||||||||||||||||
5977 | return false; | ||||||||||||||||
5978 | |||||||||||||||||
5979 | TargetLowering::SelectSupportKind SelectKind; | ||||||||||||||||
5980 | if (VectorCond) | ||||||||||||||||
5981 | SelectKind = TargetLowering::VectorMaskSelect; | ||||||||||||||||
5982 | else if (SI->getType()->isVectorTy()) | ||||||||||||||||
5983 | SelectKind = TargetLowering::ScalarCondVectorVal; | ||||||||||||||||
5984 | else | ||||||||||||||||
5985 | SelectKind = TargetLowering::ScalarValSelect; | ||||||||||||||||
5986 | |||||||||||||||||
5987 | if (TLI->isSelectSupported(SelectKind) && | ||||||||||||||||
5988 | !isFormingBranchFromSelectProfitable(TTI, TLI, SI)) | ||||||||||||||||
5989 | return false; | ||||||||||||||||
5990 | |||||||||||||||||
5991 | // The DominatorTree needs to be rebuilt by any consumers after this | ||||||||||||||||
5992 | // transformation. We simply reset here rather than setting the ModifiedDT | ||||||||||||||||
5993 | // flag to avoid restarting the function walk in runOnFunction for each | ||||||||||||||||
5994 | // select optimized. | ||||||||||||||||
5995 | DT.reset(); | ||||||||||||||||
5996 | |||||||||||||||||
5997 | // Transform a sequence like this: | ||||||||||||||||
5998 | // start: | ||||||||||||||||
5999 | // %cmp = cmp uge i32 %a, %b | ||||||||||||||||
6000 | // %sel = select i1 %cmp, i32 %c, i32 %d | ||||||||||||||||
6001 | // | ||||||||||||||||
6002 | // Into: | ||||||||||||||||
6003 | // start: | ||||||||||||||||
6004 | // %cmp = cmp uge i32 %a, %b | ||||||||||||||||
6005 | // br i1 %cmp, label %select.true, label %select.false | ||||||||||||||||
6006 | // select.true: | ||||||||||||||||
6007 | // br label %select.end | ||||||||||||||||
6008 | // select.false: | ||||||||||||||||
6009 | // br label %select.end | ||||||||||||||||
6010 | // select.end: | ||||||||||||||||
6011 | // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] | ||||||||||||||||
6012 | // | ||||||||||||||||
6013 | // In addition, we may sink instructions that produce %c or %d from | ||||||||||||||||
6014 | // the entry block into the destination(s) of the new branch. | ||||||||||||||||
6015 | // If the true or false blocks do not contain a sunken instruction, that | ||||||||||||||||
6016 | // block and its branch may be optimized away. In that case, one side of the | ||||||||||||||||
6017 | // first branch will point directly to select.end, and the corresponding PHI | ||||||||||||||||
6018 | // predecessor block will be the start block. | ||||||||||||||||
6019 | |||||||||||||||||
6020 | // First, we split the block containing the select into 2 blocks. | ||||||||||||||||
6021 | BasicBlock *StartBlock = SI->getParent(); | ||||||||||||||||
6022 | BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI)); | ||||||||||||||||
6023 | BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); | ||||||||||||||||
6024 | |||||||||||||||||
6025 | // Delete the unconditional branch that was just created by the split. | ||||||||||||||||
6026 | StartBlock->getTerminator()->eraseFromParent(); | ||||||||||||||||
6027 | |||||||||||||||||
6028 | // These are the new basic blocks for the conditional branch. | ||||||||||||||||
6029 | // At least one will become an actual new basic block. | ||||||||||||||||
6030 | BasicBlock *TrueBlock = nullptr; | ||||||||||||||||
6031 | BasicBlock *FalseBlock = nullptr; | ||||||||||||||||
6032 | BranchInst *TrueBranch = nullptr; | ||||||||||||||||
6033 | BranchInst *FalseBranch = nullptr; | ||||||||||||||||
6034 | |||||||||||||||||
6035 | // Sink expensive instructions into the conditional blocks to avoid executing | ||||||||||||||||
6036 | // them speculatively. | ||||||||||||||||
6037 | for (SelectInst *SI : ASI) { | ||||||||||||||||
6038 | if (sinkSelectOperand(TTI, SI->getTrueValue())) { | ||||||||||||||||
6039 | if (TrueBlock == nullptr) { | ||||||||||||||||
6040 | TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink", | ||||||||||||||||
6041 | EndBlock->getParent(), EndBlock); | ||||||||||||||||
6042 | TrueBranch = BranchInst::Create(EndBlock, TrueBlock); | ||||||||||||||||
6043 | TrueBranch->setDebugLoc(SI->getDebugLoc()); | ||||||||||||||||
6044 | } | ||||||||||||||||
6045 | auto *TrueInst = cast<Instruction>(SI->getTrueValue()); | ||||||||||||||||
6046 | TrueInst->moveBefore(TrueBranch); | ||||||||||||||||
6047 | } | ||||||||||||||||
6048 | if (sinkSelectOperand(TTI, SI->getFalseValue())) { | ||||||||||||||||
6049 | if (FalseBlock == nullptr) { | ||||||||||||||||
6050 | FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink", | ||||||||||||||||
6051 | EndBlock->getParent(), EndBlock); | ||||||||||||||||
6052 | FalseBranch = BranchInst::Create(EndBlock, FalseBlock); | ||||||||||||||||
6053 | FalseBranch->setDebugLoc(SI->getDebugLoc()); | ||||||||||||||||
6054 | } | ||||||||||||||||
6055 | auto *FalseInst = cast<Instruction>(SI->getFalseValue()); | ||||||||||||||||
6056 | FalseInst->moveBefore(FalseBranch); | ||||||||||||||||
6057 | } | ||||||||||||||||
6058 | } | ||||||||||||||||
6059 | |||||||||||||||||
6060 | // If there was nothing to sink, then arbitrarily choose the 'false' side | ||||||||||||||||
6061 | // for a new input value to the PHI. | ||||||||||||||||
6062 | if (TrueBlock == FalseBlock) { | ||||||||||||||||
6063 | assert(TrueBlock == nullptr &&((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select" ) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6064, __PRETTY_FUNCTION__)) | ||||||||||||||||
6064 | "Unexpected basic block transform while optimizing select")((TrueBlock == nullptr && "Unexpected basic block transform while optimizing select" ) ? static_cast<void> (0) : __assert_fail ("TrueBlock == nullptr && \"Unexpected basic block transform while optimizing select\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6064, __PRETTY_FUNCTION__)); | ||||||||||||||||
6065 | |||||||||||||||||
6066 | FalseBlock = BasicBlock::Create(SI->getContext(), "select.false", | ||||||||||||||||
6067 | EndBlock->getParent(), EndBlock); | ||||||||||||||||
6068 | auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); | ||||||||||||||||
6069 | FalseBranch->setDebugLoc(SI->getDebugLoc()); | ||||||||||||||||
6070 | } | ||||||||||||||||
6071 | |||||||||||||||||
6072 | // Insert the real conditional branch based on the original condition. | ||||||||||||||||
6073 | // If we did not create a new block for one of the 'true' or 'false' paths | ||||||||||||||||
6074 | // of the condition, it means that side of the branch goes to the end block | ||||||||||||||||
6075 | // directly and the path originates from the start block from the point of | ||||||||||||||||
6076 | // view of the new PHI. | ||||||||||||||||
6077 | BasicBlock *TT, *FT; | ||||||||||||||||
6078 | if (TrueBlock == nullptr) { | ||||||||||||||||
6079 | TT = EndBlock; | ||||||||||||||||
6080 | FT = FalseBlock; | ||||||||||||||||
6081 | TrueBlock = StartBlock; | ||||||||||||||||
6082 | } else if (FalseBlock == nullptr) { | ||||||||||||||||
6083 | TT = TrueBlock; | ||||||||||||||||
6084 | FT = EndBlock; | ||||||||||||||||
6085 | FalseBlock = StartBlock; | ||||||||||||||||
6086 | } else { | ||||||||||||||||
6087 | TT = TrueBlock; | ||||||||||||||||
6088 | FT = FalseBlock; | ||||||||||||||||
6089 | } | ||||||||||||||||
6090 | IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI); | ||||||||||||||||
6091 | |||||||||||||||||
6092 | SmallPtrSet<const Instruction *, 2> INS; | ||||||||||||||||
6093 | INS.insert(ASI.begin(), ASI.end()); | ||||||||||||||||
6094 | // Use reverse iterator because later select may use the value of the | ||||||||||||||||
6095 | // earlier select, and we need to propagate value through earlier select | ||||||||||||||||
6096 | // to get the PHI operand. | ||||||||||||||||
6097 | for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { | ||||||||||||||||
6098 | SelectInst *SI = *It; | ||||||||||||||||
6099 | // The select itself is replaced with a PHI Node. | ||||||||||||||||
6100 | PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front()); | ||||||||||||||||
6101 | PN->takeName(SI); | ||||||||||||||||
6102 | PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock); | ||||||||||||||||
6103 | PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock); | ||||||||||||||||
6104 | PN->setDebugLoc(SI->getDebugLoc()); | ||||||||||||||||
6105 | |||||||||||||||||
6106 | SI->replaceAllUsesWith(PN); | ||||||||||||||||
6107 | SI->eraseFromParent(); | ||||||||||||||||
6108 | INS.erase(SI); | ||||||||||||||||
6109 | ++NumSelectsExpanded; | ||||||||||||||||
6110 | } | ||||||||||||||||
6111 | |||||||||||||||||
6112 | // Instruct OptimizeBlock to skip to the next block. | ||||||||||||||||
6113 | CurInstIterator = StartBlock->end(); | ||||||||||||||||
6114 | return true; | ||||||||||||||||
6115 | } | ||||||||||||||||
6116 | |||||||||||||||||
6117 | static bool isBroadcastShuffle(ShuffleVectorInst *SVI) { | ||||||||||||||||
6118 | SmallVector<int, 16> Mask(SVI->getShuffleMask()); | ||||||||||||||||
6119 | int SplatElem = -1; | ||||||||||||||||
6120 | for (unsigned i = 0; i < Mask.size(); ++i) { | ||||||||||||||||
6121 | if (SplatElem != -1 && Mask[i] != -1 && Mask[i] != SplatElem) | ||||||||||||||||
6122 | return false; | ||||||||||||||||
6123 | SplatElem = Mask[i]; | ||||||||||||||||
6124 | } | ||||||||||||||||
6125 | |||||||||||||||||
6126 | return true; | ||||||||||||||||
6127 | } | ||||||||||||||||
6128 | |||||||||||||||||
6129 | /// Some targets have expensive vector shifts if the lanes aren't all the same | ||||||||||||||||
6130 | /// (e.g. x86 only introduced "vpsllvd" and friends with AVX2). In these cases | ||||||||||||||||
6131 | /// it's often worth sinking a shufflevector splat down to its use so that | ||||||||||||||||
6132 | /// codegen can spot all lanes are identical. | ||||||||||||||||
6133 | bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) { | ||||||||||||||||
6134 | BasicBlock *DefBB = SVI->getParent(); | ||||||||||||||||
6135 | |||||||||||||||||
6136 | // Only do this xform if variable vector shifts are particularly expensive. | ||||||||||||||||
6137 | if (!TLI || !TLI->isVectorShiftByScalarCheap(SVI->getType())) | ||||||||||||||||
6138 | return false; | ||||||||||||||||
6139 | |||||||||||||||||
6140 | // We only expect better codegen by sinking a shuffle if we can recognise a | ||||||||||||||||
6141 | // constant splat. | ||||||||||||||||
6142 | if (!isBroadcastShuffle(SVI)) | ||||||||||||||||
6143 | return false; | ||||||||||||||||
6144 | |||||||||||||||||
6145 | // InsertedShuffles - Only insert a shuffle in each block once. | ||||||||||||||||
6146 | DenseMap<BasicBlock*, Instruction*> InsertedShuffles; | ||||||||||||||||
6147 | |||||||||||||||||
6148 | bool MadeChange = false; | ||||||||||||||||
6149 | for (User *U : SVI->users()) { | ||||||||||||||||
6150 | Instruction *UI = cast<Instruction>(U); | ||||||||||||||||
6151 | |||||||||||||||||
6152 | // Figure out which BB this ext is used in. | ||||||||||||||||
6153 | BasicBlock *UserBB = UI->getParent(); | ||||||||||||||||
6154 | if (UserBB == DefBB) continue; | ||||||||||||||||
6155 | |||||||||||||||||
6156 | // For now only apply this when the splat is used by a shift instruction. | ||||||||||||||||
6157 | if (!UI->isShift()) continue; | ||||||||||||||||
6158 | |||||||||||||||||
6159 | // Everything checks out, sink the shuffle if the user's block doesn't | ||||||||||||||||
6160 | // already have a copy. | ||||||||||||||||
6161 | Instruction *&InsertedShuffle = InsertedShuffles[UserBB]; | ||||||||||||||||
6162 | |||||||||||||||||
6163 | if (!InsertedShuffle) { | ||||||||||||||||
6164 | BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); | ||||||||||||||||
6165 | assert(InsertPt != UserBB->end())((InsertPt != UserBB->end()) ? static_cast<void> (0) : __assert_fail ("InsertPt != UserBB->end()", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6165, __PRETTY_FUNCTION__)); | ||||||||||||||||
6166 | InsertedShuffle = | ||||||||||||||||
6167 | new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1), | ||||||||||||||||
6168 | SVI->getOperand(2), "", &*InsertPt); | ||||||||||||||||
6169 | InsertedShuffle->setDebugLoc(SVI->getDebugLoc()); | ||||||||||||||||
6170 | } | ||||||||||||||||
6171 | |||||||||||||||||
6172 | UI->replaceUsesOfWith(SVI, InsertedShuffle); | ||||||||||||||||
6173 | MadeChange = true; | ||||||||||||||||
6174 | } | ||||||||||||||||
6175 | |||||||||||||||||
6176 | // If we removed all uses, nuke the shuffle. | ||||||||||||||||
6177 | if (SVI->use_empty()) { | ||||||||||||||||
6178 | SVI->eraseFromParent(); | ||||||||||||||||
6179 | MadeChange = true; | ||||||||||||||||
6180 | } | ||||||||||||||||
6181 | |||||||||||||||||
6182 | return MadeChange; | ||||||||||||||||
6183 | } | ||||||||||||||||
6184 | |||||||||||||||||
6185 | bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) { | ||||||||||||||||
6186 | // If the operands of I can be folded into a target instruction together with | ||||||||||||||||
6187 | // I, duplicate and sink them. | ||||||||||||||||
6188 | SmallVector<Use *, 4> OpsToSink; | ||||||||||||||||
6189 | if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink)) | ||||||||||||||||
6190 | return false; | ||||||||||||||||
6191 | |||||||||||||||||
6192 | // OpsToSink can contain multiple uses in a use chain (e.g. | ||||||||||||||||
6193 | // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating | ||||||||||||||||
6194 | // uses must come first, so we process the ops in reverse order so as to not | ||||||||||||||||
6195 | // create invalid IR. | ||||||||||||||||
6196 | BasicBlock *TargetBB = I->getParent(); | ||||||||||||||||
6197 | bool Changed = false; | ||||||||||||||||
6198 | SmallVector<Use *, 4> ToReplace; | ||||||||||||||||
6199 | for (Use *U : reverse(OpsToSink)) { | ||||||||||||||||
6200 | auto *UI = cast<Instruction>(U->get()); | ||||||||||||||||
6201 | if (UI->getParent() == TargetBB || isa<PHINode>(UI)) | ||||||||||||||||
6202 | continue; | ||||||||||||||||
6203 | ToReplace.push_back(U); | ||||||||||||||||
6204 | } | ||||||||||||||||
6205 | |||||||||||||||||
6206 | SetVector<Instruction *> MaybeDead; | ||||||||||||||||
6207 | DenseMap<Instruction *, Instruction *> NewInstructions; | ||||||||||||||||
6208 | Instruction *InsertPoint = I; | ||||||||||||||||
6209 | for (Use *U : ToReplace) { | ||||||||||||||||
6210 | auto *UI = cast<Instruction>(U->get()); | ||||||||||||||||
6211 | Instruction *NI = UI->clone(); | ||||||||||||||||
6212 | NewInstructions[UI] = NI; | ||||||||||||||||
6213 | MaybeDead.insert(UI); | ||||||||||||||||
6214 | LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Sinking " << *UI << " to user " << *I << "\n"; } } while (false ); | ||||||||||||||||
6215 | NI->insertBefore(InsertPoint); | ||||||||||||||||
6216 | InsertPoint = NI; | ||||||||||||||||
6217 | InsertedInsts.insert(NI); | ||||||||||||||||
6218 | |||||||||||||||||
6219 | // Update the use for the new instruction, making sure that we update the | ||||||||||||||||
6220 | // sunk instruction uses, if it is part of a chain that has already been | ||||||||||||||||
6221 | // sunk. | ||||||||||||||||
6222 | Instruction *OldI = cast<Instruction>(U->getUser()); | ||||||||||||||||
6223 | if (NewInstructions.count(OldI)) | ||||||||||||||||
6224 | NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); | ||||||||||||||||
6225 | else | ||||||||||||||||
6226 | U->set(NI); | ||||||||||||||||
6227 | Changed = true; | ||||||||||||||||
6228 | } | ||||||||||||||||
6229 | |||||||||||||||||
6230 | // Remove instructions that are dead after sinking. | ||||||||||||||||
6231 | for (auto *I : MaybeDead) { | ||||||||||||||||
6232 | if (!I->hasNUsesOrMore(1)) { | ||||||||||||||||
6233 | LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Removing dead instruction: " << *I << "\n"; } } while (false); | ||||||||||||||||
6234 | I->eraseFromParent(); | ||||||||||||||||
6235 | } | ||||||||||||||||
6236 | } | ||||||||||||||||
6237 | |||||||||||||||||
6238 | return Changed; | ||||||||||||||||
6239 | } | ||||||||||||||||
6240 | |||||||||||||||||
6241 | bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) { | ||||||||||||||||
6242 | if (!TLI || !DL) | ||||||||||||||||
6243 | return false; | ||||||||||||||||
6244 | |||||||||||||||||
6245 | Value *Cond = SI->getCondition(); | ||||||||||||||||
6246 | Type *OldType = Cond->getType(); | ||||||||||||||||
6247 | LLVMContext &Context = Cond->getContext(); | ||||||||||||||||
6248 | MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType)); | ||||||||||||||||
6249 | unsigned RegWidth = RegType.getSizeInBits(); | ||||||||||||||||
6250 | |||||||||||||||||
6251 | if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth()) | ||||||||||||||||
6252 | return false; | ||||||||||||||||
6253 | |||||||||||||||||
6254 | // If the register width is greater than the type width, expand the condition | ||||||||||||||||
6255 | // of the switch instruction and each case constant to the width of the | ||||||||||||||||
6256 | // register. By widening the type of the switch condition, subsequent | ||||||||||||||||
6257 | // comparisons (for case comparisons) will not need to be extended to the | ||||||||||||||||
6258 | // preferred register width, so we will potentially eliminate N-1 extends, | ||||||||||||||||
6259 | // where N is the number of cases in the switch. | ||||||||||||||||
6260 | auto *NewType = Type::getIntNTy(Context, RegWidth); | ||||||||||||||||
6261 | |||||||||||||||||
6262 | // Zero-extend the switch condition and case constants unless the switch | ||||||||||||||||
6263 | // condition is a function argument that is already being sign-extended. | ||||||||||||||||
6264 | // In that case, we can avoid an unnecessary mask/extension by sign-extending | ||||||||||||||||
6265 | // everything instead. | ||||||||||||||||
6266 | Instruction::CastOps ExtType = Instruction::ZExt; | ||||||||||||||||
6267 | if (auto *Arg = dyn_cast<Argument>(Cond)) | ||||||||||||||||
6268 | if (Arg->hasSExtAttr()) | ||||||||||||||||
6269 | ExtType = Instruction::SExt; | ||||||||||||||||
6270 | |||||||||||||||||
6271 | auto *ExtInst = CastInst::Create(ExtType, Cond, NewType); | ||||||||||||||||
6272 | ExtInst->insertBefore(SI); | ||||||||||||||||
6273 | ExtInst->setDebugLoc(SI->getDebugLoc()); | ||||||||||||||||
6274 | SI->setCondition(ExtInst); | ||||||||||||||||
6275 | for (auto Case : SI->cases()) { | ||||||||||||||||
6276 | APInt NarrowConst = Case.getCaseValue()->getValue(); | ||||||||||||||||
6277 | APInt WideConst = (ExtType == Instruction::ZExt) ? | ||||||||||||||||
6278 | NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth); | ||||||||||||||||
6279 | Case.setValue(ConstantInt::get(Context, WideConst)); | ||||||||||||||||
6280 | } | ||||||||||||||||
6281 | |||||||||||||||||
6282 | return true; | ||||||||||||||||
6283 | } | ||||||||||||||||
6284 | |||||||||||||||||
6285 | |||||||||||||||||
6286 | namespace { | ||||||||||||||||
6287 | |||||||||||||||||
6288 | /// Helper class to promote a scalar operation to a vector one. | ||||||||||||||||
6289 | /// This class is used to move downward extractelement transition. | ||||||||||||||||
6290 | /// E.g., | ||||||||||||||||
6291 | /// a = vector_op <2 x i32> | ||||||||||||||||
6292 | /// b = extractelement <2 x i32> a, i32 0 | ||||||||||||||||
6293 | /// c = scalar_op b | ||||||||||||||||
6294 | /// store c | ||||||||||||||||
6295 | /// | ||||||||||||||||
6296 | /// => | ||||||||||||||||
6297 | /// a = vector_op <2 x i32> | ||||||||||||||||
6298 | /// c = vector_op a (equivalent to scalar_op on the related lane) | ||||||||||||||||
6299 | /// * d = extractelement <2 x i32> c, i32 0 | ||||||||||||||||
6300 | /// * store d | ||||||||||||||||
6301 | /// Assuming both extractelement and store can be combine, we get rid of the | ||||||||||||||||
6302 | /// transition. | ||||||||||||||||
6303 | class VectorPromoteHelper { | ||||||||||||||||
6304 | /// DataLayout associated with the current module. | ||||||||||||||||
6305 | const DataLayout &DL; | ||||||||||||||||
6306 | |||||||||||||||||
6307 | /// Used to perform some checks on the legality of vector operations. | ||||||||||||||||
6308 | const TargetLowering &TLI; | ||||||||||||||||
6309 | |||||||||||||||||
6310 | /// Used to estimated the cost of the promoted chain. | ||||||||||||||||
6311 | const TargetTransformInfo &TTI; | ||||||||||||||||
6312 | |||||||||||||||||
6313 | /// The transition being moved downwards. | ||||||||||||||||
6314 | Instruction *Transition; | ||||||||||||||||
6315 | |||||||||||||||||
6316 | /// The sequence of instructions to be promoted. | ||||||||||||||||
6317 | SmallVector<Instruction *, 4> InstsToBePromoted; | ||||||||||||||||
6318 | |||||||||||||||||
6319 | /// Cost of combining a store and an extract. | ||||||||||||||||
6320 | unsigned StoreExtractCombineCost; | ||||||||||||||||
6321 | |||||||||||||||||
6322 | /// Instruction that will be combined with the transition. | ||||||||||||||||
6323 | Instruction *CombineInst = nullptr; | ||||||||||||||||
6324 | |||||||||||||||||
6325 | /// The instruction that represents the current end of the transition. | ||||||||||||||||
6326 | /// Since we are faking the promotion until we reach the end of the chain | ||||||||||||||||
6327 | /// of computation, we need a way to get the current end of the transition. | ||||||||||||||||
6328 | Instruction *getEndOfTransition() const { | ||||||||||||||||
6329 | if (InstsToBePromoted.empty()) | ||||||||||||||||
6330 | return Transition; | ||||||||||||||||
6331 | return InstsToBePromoted.back(); | ||||||||||||||||
6332 | } | ||||||||||||||||
6333 | |||||||||||||||||
6334 | /// Return the index of the original value in the transition. | ||||||||||||||||
6335 | /// E.g., for "extractelement <2 x i32> c, i32 1" the original value, | ||||||||||||||||
6336 | /// c, is at index 0. | ||||||||||||||||
6337 | unsigned getTransitionOriginalValueIdx() const { | ||||||||||||||||
6338 | assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet" ) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6339, __PRETTY_FUNCTION__)) | ||||||||||||||||
6339 | "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet" ) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6339, __PRETTY_FUNCTION__)); | ||||||||||||||||
6340 | return 0; | ||||||||||||||||
6341 | } | ||||||||||||||||
6342 | |||||||||||||||||
6343 | /// Return the index of the index in the transition. | ||||||||||||||||
6344 | /// E.g., for "extractelement <2 x i32> c, i32 0" the index | ||||||||||||||||
6345 | /// is at index 1. | ||||||||||||||||
6346 | unsigned getTransitionIdx() const { | ||||||||||||||||
6347 | assert(isa<ExtractElementInst>(Transition) &&((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet" ) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6348, __PRETTY_FUNCTION__)) | ||||||||||||||||
6348 | "Other kind of transitions are not supported yet")((isa<ExtractElementInst>(Transition) && "Other kind of transitions are not supported yet" ) ? static_cast<void> (0) : __assert_fail ("isa<ExtractElementInst>(Transition) && \"Other kind of transitions are not supported yet\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6348, __PRETTY_FUNCTION__)); | ||||||||||||||||
6349 | return 1; | ||||||||||||||||
6350 | } | ||||||||||||||||
6351 | |||||||||||||||||
6352 | /// Get the type of the transition. | ||||||||||||||||
6353 | /// This is the type of the original value. | ||||||||||||||||
6354 | /// E.g., for "extractelement <2 x i32> c, i32 1" the type of the | ||||||||||||||||
6355 | /// transition is <2 x i32>. | ||||||||||||||||
6356 | Type *getTransitionType() const { | ||||||||||||||||
6357 | return Transition->getOperand(getTransitionOriginalValueIdx())->getType(); | ||||||||||||||||
6358 | } | ||||||||||||||||
6359 | |||||||||||||||||
6360 | /// Promote \p ToBePromoted by moving \p Def downward through. | ||||||||||||||||
6361 | /// I.e., we have the following sequence: | ||||||||||||||||
6362 | /// Def = Transition <ty1> a to <ty2> | ||||||||||||||||
6363 | /// b = ToBePromoted <ty2> Def, ... | ||||||||||||||||
6364 | /// => | ||||||||||||||||
6365 | /// b = ToBePromoted <ty1> a, ... | ||||||||||||||||
6366 | /// Def = Transition <ty1> ToBePromoted to <ty2> | ||||||||||||||||
6367 | void promoteImpl(Instruction *ToBePromoted); | ||||||||||||||||
6368 | |||||||||||||||||
6369 | /// Check whether or not it is profitable to promote all the | ||||||||||||||||
6370 | /// instructions enqueued to be promoted. | ||||||||||||||||
6371 | bool isProfitableToPromote() { | ||||||||||||||||
6372 | Value *ValIdx = Transition->getOperand(getTransitionOriginalValueIdx()); | ||||||||||||||||
6373 | unsigned Index = isa<ConstantInt>(ValIdx) | ||||||||||||||||
6374 | ? cast<ConstantInt>(ValIdx)->getZExtValue() | ||||||||||||||||
6375 | : -1; | ||||||||||||||||
6376 | Type *PromotedType = getTransitionType(); | ||||||||||||||||
6377 | |||||||||||||||||
6378 | StoreInst *ST = cast<StoreInst>(CombineInst); | ||||||||||||||||
6379 | unsigned AS = ST->getPointerAddressSpace(); | ||||||||||||||||
6380 | unsigned Align = ST->getAlignment(); | ||||||||||||||||
6381 | // Check if this store is supported. | ||||||||||||||||
6382 | if (!TLI.allowsMisalignedMemoryAccesses( | ||||||||||||||||
6383 | TLI.getValueType(DL, ST->getValueOperand()->getType()), AS, | ||||||||||||||||
6384 | Align)) { | ||||||||||||||||
6385 | // If this is not supported, there is no way we can combine | ||||||||||||||||
6386 | // the extract with the store. | ||||||||||||||||
6387 | return false; | ||||||||||||||||
6388 | } | ||||||||||||||||
6389 | |||||||||||||||||
6390 | // The scalar chain of computation has to pay for the transition | ||||||||||||||||
6391 | // scalar to vector. | ||||||||||||||||
6392 | // The vector chain has to account for the combining cost. | ||||||||||||||||
6393 | uint64_t ScalarCost = | ||||||||||||||||
6394 | TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index); | ||||||||||||||||
6395 | uint64_t VectorCost = StoreExtractCombineCost; | ||||||||||||||||
6396 | for (const auto &Inst : InstsToBePromoted) { | ||||||||||||||||
6397 | // Compute the cost. | ||||||||||||||||
6398 | // By construction, all instructions being promoted are arithmetic ones. | ||||||||||||||||
6399 | // Moreover, one argument is a constant that can be viewed as a splat | ||||||||||||||||
6400 | // constant. | ||||||||||||||||
6401 | Value *Arg0 = Inst->getOperand(0); | ||||||||||||||||
6402 | bool IsArg0Constant = isa<UndefValue>(Arg0) || isa<ConstantInt>(Arg0) || | ||||||||||||||||
6403 | isa<ConstantFP>(Arg0); | ||||||||||||||||
6404 | TargetTransformInfo::OperandValueKind Arg0OVK = | ||||||||||||||||
6405 | IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue | ||||||||||||||||
6406 | : TargetTransformInfo::OK_AnyValue; | ||||||||||||||||
6407 | TargetTransformInfo::OperandValueKind Arg1OVK = | ||||||||||||||||
6408 | !IsArg0Constant ? TargetTransformInfo::OK_UniformConstantValue | ||||||||||||||||
6409 | : TargetTransformInfo::OK_AnyValue; | ||||||||||||||||
6410 | ScalarCost += TTI.getArithmeticInstrCost( | ||||||||||||||||
6411 | Inst->getOpcode(), Inst->getType(), Arg0OVK, Arg1OVK); | ||||||||||||||||
6412 | VectorCost += TTI.getArithmeticInstrCost(Inst->getOpcode(), PromotedType, | ||||||||||||||||
6413 | Arg0OVK, Arg1OVK); | ||||||||||||||||
6414 | } | ||||||||||||||||
6415 | LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: " << ScalarCost << "\nVector: " << VectorCost << '\n'; } } while (false) | ||||||||||||||||
6416 | dbgs() << "Estimated cost of computation to be promoted:\nScalar: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: " << ScalarCost << "\nVector: " << VectorCost << '\n'; } } while (false) | ||||||||||||||||
6417 | << ScalarCost << "\nVector: " << VectorCost << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Estimated cost of computation to be promoted:\nScalar: " << ScalarCost << "\nVector: " << VectorCost << '\n'; } } while (false); | ||||||||||||||||
6418 | return ScalarCost > VectorCost; | ||||||||||||||||
6419 | } | ||||||||||||||||
6420 | |||||||||||||||||
6421 | /// Generate a constant vector with \p Val with the same | ||||||||||||||||
6422 | /// number of elements as the transition. | ||||||||||||||||
6423 | /// \p UseSplat defines whether or not \p Val should be replicated | ||||||||||||||||
6424 | /// across the whole vector. | ||||||||||||||||
6425 | /// In other words, if UseSplat == true, we generate <Val, Val, ..., Val>, | ||||||||||||||||
6426 | /// otherwise we generate a vector with as many undef as possible: | ||||||||||||||||
6427 | /// <undef, ..., undef, Val, undef, ..., undef> where \p Val is only | ||||||||||||||||
6428 | /// used at the index of the extract. | ||||||||||||||||
6429 | Value *getConstantVector(Constant *Val, bool UseSplat) const { | ||||||||||||||||
6430 | unsigned ExtractIdx = std::numeric_limits<unsigned>::max(); | ||||||||||||||||
6431 | if (!UseSplat) { | ||||||||||||||||
6432 | // If we cannot determine where the constant must be, we have to | ||||||||||||||||
6433 | // use a splat constant. | ||||||||||||||||
6434 | Value *ValExtractIdx = Transition->getOperand(getTransitionIdx()); | ||||||||||||||||
6435 | if (ConstantInt *CstVal = dyn_cast<ConstantInt>(ValExtractIdx)) | ||||||||||||||||
6436 | ExtractIdx = CstVal->getSExtValue(); | ||||||||||||||||
6437 | else | ||||||||||||||||
6438 | UseSplat = true; | ||||||||||||||||
6439 | } | ||||||||||||||||
6440 | |||||||||||||||||
6441 | unsigned End = getTransitionType()->getVectorNumElements(); | ||||||||||||||||
6442 | if (UseSplat) | ||||||||||||||||
6443 | return ConstantVector::getSplat(End, Val); | ||||||||||||||||
6444 | |||||||||||||||||
6445 | SmallVector<Constant *, 4> ConstVec; | ||||||||||||||||
6446 | UndefValue *UndefVal = UndefValue::get(Val->getType()); | ||||||||||||||||
6447 | for (unsigned Idx = 0; Idx != End; ++Idx) { | ||||||||||||||||
6448 | if (Idx == ExtractIdx) | ||||||||||||||||
6449 | ConstVec.push_back(Val); | ||||||||||||||||
6450 | else | ||||||||||||||||
6451 | ConstVec.push_back(UndefVal); | ||||||||||||||||
6452 | } | ||||||||||||||||
6453 | return ConstantVector::get(ConstVec); | ||||||||||||||||
6454 | } | ||||||||||||||||
6455 | |||||||||||||||||
6456 | /// Check if promoting to a vector type an operand at \p OperandIdx | ||||||||||||||||
6457 | /// in \p Use can trigger undefined behavior. | ||||||||||||||||
6458 | static bool canCauseUndefinedBehavior(const Instruction *Use, | ||||||||||||||||
6459 | unsigned OperandIdx) { | ||||||||||||||||
6460 | // This is not safe to introduce undef when the operand is on | ||||||||||||||||
6461 | // the right hand side of a division-like instruction. | ||||||||||||||||
6462 | if (OperandIdx != 1) | ||||||||||||||||
6463 | return false; | ||||||||||||||||
6464 | switch (Use->getOpcode()) { | ||||||||||||||||
6465 | default: | ||||||||||||||||
6466 | return false; | ||||||||||||||||
6467 | case Instruction::SDiv: | ||||||||||||||||
6468 | case Instruction::UDiv: | ||||||||||||||||
6469 | case Instruction::SRem: | ||||||||||||||||
6470 | case Instruction::URem: | ||||||||||||||||
6471 | return true; | ||||||||||||||||
6472 | case Instruction::FDiv: | ||||||||||||||||
6473 | case Instruction::FRem: | ||||||||||||||||
6474 | return !Use->hasNoNaNs(); | ||||||||||||||||
6475 | } | ||||||||||||||||
6476 | llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6476); | ||||||||||||||||
6477 | } | ||||||||||||||||
6478 | |||||||||||||||||
6479 | public: | ||||||||||||||||
6480 | VectorPromoteHelper(const DataLayout &DL, const TargetLowering &TLI, | ||||||||||||||||
6481 | const TargetTransformInfo &TTI, Instruction *Transition, | ||||||||||||||||
6482 | unsigned CombineCost) | ||||||||||||||||
6483 | : DL(DL), TLI(TLI), TTI(TTI), Transition(Transition), | ||||||||||||||||
6484 | StoreExtractCombineCost(CombineCost) { | ||||||||||||||||
6485 | assert(Transition && "Do not know how to promote null")((Transition && "Do not know how to promote null") ? static_cast <void> (0) : __assert_fail ("Transition && \"Do not know how to promote null\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6485, __PRETTY_FUNCTION__)); | ||||||||||||||||
6486 | } | ||||||||||||||||
6487 | |||||||||||||||||
6488 | /// Check if we can promote \p ToBePromoted to \p Type. | ||||||||||||||||
6489 | bool canPromote(const Instruction *ToBePromoted) const { | ||||||||||||||||
6490 | // We could support CastInst too. | ||||||||||||||||
6491 | return isa<BinaryOperator>(ToBePromoted); | ||||||||||||||||
6492 | } | ||||||||||||||||
6493 | |||||||||||||||||
6494 | /// Check if it is profitable to promote \p ToBePromoted | ||||||||||||||||
6495 | /// by moving downward the transition through. | ||||||||||||||||
6496 | bool shouldPromote(const Instruction *ToBePromoted) const { | ||||||||||||||||
6497 | // Promote only if all the operands can be statically expanded. | ||||||||||||||||
6498 | // Indeed, we do not want to introduce any new kind of transitions. | ||||||||||||||||
6499 | for (const Use &U : ToBePromoted->operands()) { | ||||||||||||||||
6500 | const Value *Val = U.get(); | ||||||||||||||||
6501 | if (Val == getEndOfTransition()) { | ||||||||||||||||
6502 | // If the use is a division and the transition is on the rhs, | ||||||||||||||||
6503 | // we cannot promote the operation, otherwise we may create a | ||||||||||||||||
6504 | // division by zero. | ||||||||||||||||
6505 | if (canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())) | ||||||||||||||||
6506 | return false; | ||||||||||||||||
6507 | continue; | ||||||||||||||||
6508 | } | ||||||||||||||||
6509 | if (!isa<ConstantInt>(Val) && !isa<UndefValue>(Val) && | ||||||||||||||||
6510 | !isa<ConstantFP>(Val)) | ||||||||||||||||
6511 | return false; | ||||||||||||||||
6512 | } | ||||||||||||||||
6513 | // Check that the resulting operation is legal. | ||||||||||||||||
6514 | int ISDOpcode = TLI.InstructionOpcodeToISD(ToBePromoted->getOpcode()); | ||||||||||||||||
6515 | if (!ISDOpcode) | ||||||||||||||||
6516 | return false; | ||||||||||||||||
6517 | return StressStoreExtract || | ||||||||||||||||
6518 | TLI.isOperationLegalOrCustom( | ||||||||||||||||
6519 | ISDOpcode, TLI.getValueType(DL, getTransitionType(), true)); | ||||||||||||||||
6520 | } | ||||||||||||||||
6521 | |||||||||||||||||
6522 | /// Check whether or not \p Use can be combined | ||||||||||||||||
6523 | /// with the transition. | ||||||||||||||||
6524 | /// I.e., is it possible to do Use(Transition) => AnotherUse? | ||||||||||||||||
6525 | bool canCombine(const Instruction *Use) { return isa<StoreInst>(Use); } | ||||||||||||||||
6526 | |||||||||||||||||
6527 | /// Record \p ToBePromoted as part of the chain to be promoted. | ||||||||||||||||
6528 | void enqueueForPromotion(Instruction *ToBePromoted) { | ||||||||||||||||
6529 | InstsToBePromoted.push_back(ToBePromoted); | ||||||||||||||||
6530 | } | ||||||||||||||||
6531 | |||||||||||||||||
6532 | /// Set the instruction that will be combined with the transition. | ||||||||||||||||
6533 | void recordCombineInstruction(Instruction *ToBeCombined) { | ||||||||||||||||
6534 | assert(canCombine(ToBeCombined) && "Unsupported instruction to combine")((canCombine(ToBeCombined) && "Unsupported instruction to combine" ) ? static_cast<void> (0) : __assert_fail ("canCombine(ToBeCombined) && \"Unsupported instruction to combine\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6534, __PRETTY_FUNCTION__)); | ||||||||||||||||
6535 | CombineInst = ToBeCombined; | ||||||||||||||||
6536 | } | ||||||||||||||||
6537 | |||||||||||||||||
6538 | /// Promote all the instructions enqueued for promotion if it is | ||||||||||||||||
6539 | /// is profitable. | ||||||||||||||||
6540 | /// \return True if the promotion happened, false otherwise. | ||||||||||||||||
6541 | bool promote() { | ||||||||||||||||
6542 | // Check if there is something to promote. | ||||||||||||||||
6543 | // Right now, if we do not have anything to combine with, | ||||||||||||||||
6544 | // we assume the promotion is not profitable. | ||||||||||||||||
6545 | if (InstsToBePromoted.empty() || !CombineInst) | ||||||||||||||||
6546 | return false; | ||||||||||||||||
6547 | |||||||||||||||||
6548 | // Check cost. | ||||||||||||||||
6549 | if (!StressStoreExtract && !isProfitableToPromote()) | ||||||||||||||||
6550 | return false; | ||||||||||||||||
6551 | |||||||||||||||||
6552 | // Promote. | ||||||||||||||||
6553 | for (auto &ToBePromoted : InstsToBePromoted) | ||||||||||||||||
6554 | promoteImpl(ToBePromoted); | ||||||||||||||||
6555 | InstsToBePromoted.clear(); | ||||||||||||||||
6556 | return true; | ||||||||||||||||
6557 | } | ||||||||||||||||
6558 | }; | ||||||||||||||||
6559 | |||||||||||||||||
6560 | } // end anonymous namespace | ||||||||||||||||
6561 | |||||||||||||||||
6562 | void VectorPromoteHelper::promoteImpl(Instruction *ToBePromoted) { | ||||||||||||||||
6563 | // At this point, we know that all the operands of ToBePromoted but Def | ||||||||||||||||
6564 | // can be statically promoted. | ||||||||||||||||
6565 | // For Def, we need to use its parameter in ToBePromoted: | ||||||||||||||||
6566 | // b = ToBePromoted ty1 a | ||||||||||||||||
6567 | // Def = Transition ty1 b to ty2 | ||||||||||||||||
6568 | // Move the transition down. | ||||||||||||||||
6569 | // 1. Replace all uses of the promoted operation by the transition. | ||||||||||||||||
6570 | // = ... b => = ... Def. | ||||||||||||||||
6571 | assert(ToBePromoted->getType() == Transition->getType() &&((ToBePromoted->getType() == Transition->getType() && "The type of the result of the transition does not match " "the final type" ) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6573, __PRETTY_FUNCTION__)) | ||||||||||||||||
6572 | "The type of the result of the transition does not match "((ToBePromoted->getType() == Transition->getType() && "The type of the result of the transition does not match " "the final type" ) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6573, __PRETTY_FUNCTION__)) | ||||||||||||||||
6573 | "the final type")((ToBePromoted->getType() == Transition->getType() && "The type of the result of the transition does not match " "the final type" ) ? static_cast<void> (0) : __assert_fail ("ToBePromoted->getType() == Transition->getType() && \"The type of the result of the transition does not match \" \"the final type\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6573, __PRETTY_FUNCTION__)); | ||||||||||||||||
6574 | ToBePromoted->replaceAllUsesWith(Transition); | ||||||||||||||||
6575 | // 2. Update the type of the uses. | ||||||||||||||||
6576 | // b = ToBePromoted ty2 Def => b = ToBePromoted ty1 Def. | ||||||||||||||||
6577 | Type *TransitionTy = getTransitionType(); | ||||||||||||||||
6578 | ToBePromoted->mutateType(TransitionTy); | ||||||||||||||||
6579 | // 3. Update all the operands of the promoted operation with promoted | ||||||||||||||||
6580 | // operands. | ||||||||||||||||
6581 | // b = ToBePromoted ty1 Def => b = ToBePromoted ty1 a. | ||||||||||||||||
6582 | for (Use &U : ToBePromoted->operands()) { | ||||||||||||||||
6583 | Value *Val = U.get(); | ||||||||||||||||
6584 | Value *NewVal = nullptr; | ||||||||||||||||
6585 | if (Val == Transition) | ||||||||||||||||
6586 | NewVal = Transition->getOperand(getTransitionOriginalValueIdx()); | ||||||||||||||||
6587 | else if (isa<UndefValue>(Val) || isa<ConstantInt>(Val) || | ||||||||||||||||
6588 | isa<ConstantFP>(Val)) { | ||||||||||||||||
6589 | // Use a splat constant if it is not safe to use undef. | ||||||||||||||||
6590 | NewVal = getConstantVector( | ||||||||||||||||
6591 | cast<Constant>(Val), | ||||||||||||||||
6592 | isa<UndefValue>(Val) || | ||||||||||||||||
6593 | canCauseUndefinedBehavior(ToBePromoted, U.getOperandNo())); | ||||||||||||||||
6594 | } else | ||||||||||||||||
6595 | llvm_unreachable("Did you modified shouldPromote and forgot to update "::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update " "this?", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6596) | ||||||||||||||||
6596 | "this?")::llvm::llvm_unreachable_internal("Did you modified shouldPromote and forgot to update " "this?", "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6596); | ||||||||||||||||
6597 | ToBePromoted->setOperand(U.getOperandNo(), NewVal); | ||||||||||||||||
6598 | } | ||||||||||||||||
6599 | Transition->moveAfter(ToBePromoted); | ||||||||||||||||
6600 | Transition->setOperand(getTransitionOriginalValueIdx(), ToBePromoted); | ||||||||||||||||
6601 | } | ||||||||||||||||
6602 | |||||||||||||||||
6603 | /// Some targets can do store(extractelement) with one instruction. | ||||||||||||||||
6604 | /// Try to push the extractelement towards the stores when the target | ||||||||||||||||
6605 | /// has this feature and this is profitable. | ||||||||||||||||
6606 | bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) { | ||||||||||||||||
6607 | unsigned CombineCost = std::numeric_limits<unsigned>::max(); | ||||||||||||||||
6608 | if (DisableStoreExtract || !TLI || | ||||||||||||||||
6609 | (!StressStoreExtract && | ||||||||||||||||
6610 | !TLI->canCombineStoreAndExtract(Inst->getOperand(0)->getType(), | ||||||||||||||||
6611 | Inst->getOperand(1), CombineCost))) | ||||||||||||||||
6612 | return false; | ||||||||||||||||
6613 | |||||||||||||||||
6614 | // At this point we know that Inst is a vector to scalar transition. | ||||||||||||||||
6615 | // Try to move it down the def-use chain, until: | ||||||||||||||||
6616 | // - We can combine the transition with its single use | ||||||||||||||||
6617 | // => we got rid of the transition. | ||||||||||||||||
6618 | // - We escape the current basic block | ||||||||||||||||
6619 | // => we would need to check that we are moving it at a cheaper place and | ||||||||||||||||
6620 | // we do not do that for now. | ||||||||||||||||
6621 | BasicBlock *Parent = Inst->getParent(); | ||||||||||||||||
6622 | LLVM_DEBUG(dbgs() << "Found an interesting transition: " << *Inst << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Found an interesting transition: " << *Inst << '\n'; } } while (false); | ||||||||||||||||
6623 | VectorPromoteHelper VPH(*DL, *TLI, *TTI, Inst, CombineCost); | ||||||||||||||||
6624 | // If the transition has more than one use, assume this is not going to be | ||||||||||||||||
6625 | // beneficial. | ||||||||||||||||
6626 | while (Inst->hasOneUse()) { | ||||||||||||||||
6627 | Instruction *ToBePromoted = cast<Instruction>(*Inst->user_begin()); | ||||||||||||||||
6628 | LLVM_DEBUG(dbgs() << "Use: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Use: " << *ToBePromoted << '\n'; } } while (false); | ||||||||||||||||
6629 | |||||||||||||||||
6630 | if (ToBePromoted->getParent() != Parent) { | ||||||||||||||||
6631 | LLVM_DEBUG(dbgs() << "Instruction to promote is in a different block ("do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Instruction to promote is in a different block (" << ToBePromoted->getParent()->getName() << ") than the transition (" << Parent->getName() << ").\n"; } } while (false) | ||||||||||||||||
6632 | << ToBePromoted->getParent()->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Instruction to promote is in a different block (" << ToBePromoted->getParent()->getName() << ") than the transition (" << Parent->getName() << ").\n"; } } while (false) | ||||||||||||||||
6633 | << ") than the transition (" << Parent->getName()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Instruction to promote is in a different block (" << ToBePromoted->getParent()->getName() << ") than the transition (" << Parent->getName() << ").\n"; } } while (false) | ||||||||||||||||
6634 | << ").\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Instruction to promote is in a different block (" << ToBePromoted->getParent()->getName() << ") than the transition (" << Parent->getName() << ").\n"; } } while (false); | ||||||||||||||||
6635 | return false; | ||||||||||||||||
6636 | } | ||||||||||||||||
6637 | |||||||||||||||||
6638 | if (VPH.canCombine(ToBePromoted)) { | ||||||||||||||||
6639 | LLVM_DEBUG(dbgs() << "Assume " << *Inst << '\n'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Assume " << *Inst << '\n' << "will be combined with: " << *ToBePromoted << '\n'; } } while (false) | ||||||||||||||||
6640 | << "will be combined with: " << *ToBePromoted << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Assume " << *Inst << '\n' << "will be combined with: " << *ToBePromoted << '\n'; } } while (false); | ||||||||||||||||
6641 | VPH.recordCombineInstruction(ToBePromoted); | ||||||||||||||||
6642 | bool Changed = VPH.promote(); | ||||||||||||||||
6643 | NumStoreExtractExposed += Changed; | ||||||||||||||||
6644 | return Changed; | ||||||||||||||||
6645 | } | ||||||||||||||||
6646 | |||||||||||||||||
6647 | LLVM_DEBUG(dbgs() << "Try promoting.\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Try promoting.\n"; } } while (false); | ||||||||||||||||
6648 | if (!VPH.canPromote(ToBePromoted) || !VPH.shouldPromote(ToBePromoted)) | ||||||||||||||||
6649 | return false; | ||||||||||||||||
6650 | |||||||||||||||||
6651 | LLVM_DEBUG(dbgs() << "Promoting is possible... Enqueue for promotion!\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Promoting is possible... Enqueue for promotion!\n" ; } } while (false); | ||||||||||||||||
6652 | |||||||||||||||||
6653 | VPH.enqueueForPromotion(ToBePromoted); | ||||||||||||||||
6654 | Inst = ToBePromoted; | ||||||||||||||||
6655 | } | ||||||||||||||||
6656 | return false; | ||||||||||||||||
6657 | } | ||||||||||||||||
6658 | |||||||||||||||||
6659 | /// For the instruction sequence of store below, F and I values | ||||||||||||||||
6660 | /// are bundled together as an i64 value before being stored into memory. | ||||||||||||||||
6661 | /// Sometimes it is more efficient to generate separate stores for F and I, | ||||||||||||||||
6662 | /// which can remove the bitwise instructions or sink them to colder places. | ||||||||||||||||
6663 | /// | ||||||||||||||||
6664 | /// (store (or (zext (bitcast F to i32) to i64), | ||||||||||||||||
6665 | /// (shl (zext I to i64), 32)), addr) --> | ||||||||||||||||
6666 | /// (store F, addr) and (store I, addr+4) | ||||||||||||||||
6667 | /// | ||||||||||||||||
6668 | /// Similarly, splitting for other merged store can also be beneficial, like: | ||||||||||||||||
6669 | /// For pair of {i32, i32}, i64 store --> two i32 stores. | ||||||||||||||||
6670 | /// For pair of {i32, i16}, i64 store --> two i32 stores. | ||||||||||||||||
6671 | /// For pair of {i16, i16}, i32 store --> two i16 stores. | ||||||||||||||||
6672 | /// For pair of {i16, i8}, i32 store --> two i16 stores. | ||||||||||||||||
6673 | /// For pair of {i8, i8}, i16 store --> two i8 stores. | ||||||||||||||||
6674 | /// | ||||||||||||||||
6675 | /// We allow each target to determine specifically which kind of splitting is | ||||||||||||||||
6676 | /// supported. | ||||||||||||||||
6677 | /// | ||||||||||||||||
6678 | /// The store patterns are commonly seen from the simple code snippet below | ||||||||||||||||
6679 | /// if only std::make_pair(...) is sroa transformed before inlined into hoo. | ||||||||||||||||
6680 | /// void goo(const std::pair<int, float> &); | ||||||||||||||||
6681 | /// hoo() { | ||||||||||||||||
6682 | /// ... | ||||||||||||||||
6683 | /// goo(std::make_pair(tmp, ftmp)); | ||||||||||||||||
6684 | /// ... | ||||||||||||||||
6685 | /// } | ||||||||||||||||
6686 | /// | ||||||||||||||||
6687 | /// Although we already have similar splitting in DAG Combine, we duplicate | ||||||||||||||||
6688 | /// it in CodeGenPrepare to catch the case in which pattern is across | ||||||||||||||||
6689 | /// multiple BBs. The logic in DAG Combine is kept to catch case generated | ||||||||||||||||
6690 | /// during code expansion. | ||||||||||||||||
6691 | static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL, | ||||||||||||||||
6692 | const TargetLowering &TLI) { | ||||||||||||||||
6693 | // Handle simple but common cases only. | ||||||||||||||||
6694 | Type *StoreType = SI.getValueOperand()->getType(); | ||||||||||||||||
6695 | if (!DL.typeSizeEqualsStoreSize(StoreType) || | ||||||||||||||||
6696 | DL.getTypeSizeInBits(StoreType) == 0) | ||||||||||||||||
6697 | return false; | ||||||||||||||||
6698 | |||||||||||||||||
6699 | unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2; | ||||||||||||||||
6700 | Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize); | ||||||||||||||||
6701 | if (!DL.typeSizeEqualsStoreSize(SplitStoreType)) | ||||||||||||||||
6702 | return false; | ||||||||||||||||
6703 | |||||||||||||||||
6704 | // Don't split the store if it is volatile. | ||||||||||||||||
6705 | if (SI.isVolatile()) | ||||||||||||||||
6706 | return false; | ||||||||||||||||
6707 | |||||||||||||||||
6708 | // Match the following patterns: | ||||||||||||||||
6709 | // (store (or (zext LValue to i64), | ||||||||||||||||
6710 | // (shl (zext HValue to i64), 32)), HalfValBitSize) | ||||||||||||||||
6711 | // or | ||||||||||||||||
6712 | // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize) | ||||||||||||||||
6713 | // (zext LValue to i64), | ||||||||||||||||
6714 | // Expect both operands of OR and the first operand of SHL have only | ||||||||||||||||
6715 | // one use. | ||||||||||||||||
6716 | Value *LValue, *HValue; | ||||||||||||||||
6717 | if (!match(SI.getValueOperand(), | ||||||||||||||||
6718 | m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))), | ||||||||||||||||
6719 | m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))), | ||||||||||||||||
6720 | m_SpecificInt(HalfValBitSize)))))) | ||||||||||||||||
6721 | return false; | ||||||||||||||||
6722 | |||||||||||||||||
6723 | // Check LValue and HValue are int with size less or equal than 32. | ||||||||||||||||
6724 | if (!LValue->getType()->isIntegerTy() || | ||||||||||||||||
6725 | DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize || | ||||||||||||||||
6726 | !HValue->getType()->isIntegerTy() || | ||||||||||||||||
6727 | DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize) | ||||||||||||||||
6728 | return false; | ||||||||||||||||
6729 | |||||||||||||||||
6730 | // If LValue/HValue is a bitcast instruction, use the EVT before bitcast | ||||||||||||||||
6731 | // as the input of target query. | ||||||||||||||||
6732 | auto *LBC = dyn_cast<BitCastInst>(LValue); | ||||||||||||||||
6733 | auto *HBC = dyn_cast<BitCastInst>(HValue); | ||||||||||||||||
6734 | EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType()) | ||||||||||||||||
6735 | : EVT::getEVT(LValue->getType()); | ||||||||||||||||
6736 | EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType()) | ||||||||||||||||
6737 | : EVT::getEVT(HValue->getType()); | ||||||||||||||||
6738 | if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy)) | ||||||||||||||||
6739 | return false; | ||||||||||||||||
6740 | |||||||||||||||||
6741 | // Start to split store. | ||||||||||||||||
6742 | IRBuilder<> Builder(SI.getContext()); | ||||||||||||||||
6743 | Builder.SetInsertPoint(&SI); | ||||||||||||||||
6744 | |||||||||||||||||
6745 | // If LValue/HValue is a bitcast in another BB, create a new one in current | ||||||||||||||||
6746 | // BB so it may be merged with the splitted stores by dag combiner. | ||||||||||||||||
6747 | if (LBC && LBC->getParent() != SI.getParent()) | ||||||||||||||||
6748 | LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType()); | ||||||||||||||||
6749 | if (HBC && HBC->getParent() != SI.getParent()) | ||||||||||||||||
6750 | HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType()); | ||||||||||||||||
6751 | |||||||||||||||||
6752 | bool IsLE = SI.getModule()->getDataLayout().isLittleEndian(); | ||||||||||||||||
6753 | auto CreateSplitStore = [&](Value *V, bool Upper) { | ||||||||||||||||
6754 | V = Builder.CreateZExtOrBitCast(V, SplitStoreType); | ||||||||||||||||
6755 | Value *Addr = Builder.CreateBitCast( | ||||||||||||||||
6756 | SI.getOperand(1), | ||||||||||||||||
6757 | SplitStoreType->getPointerTo(SI.getPointerAddressSpace())); | ||||||||||||||||
6758 | if ((IsLE && Upper) || (!IsLE && !Upper)) | ||||||||||||||||
6759 | Addr = Builder.CreateGEP( | ||||||||||||||||
6760 | SplitStoreType, Addr, | ||||||||||||||||
6761 | ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1)); | ||||||||||||||||
6762 | Builder.CreateAlignedStore( | ||||||||||||||||
6763 | V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment()); | ||||||||||||||||
6764 | }; | ||||||||||||||||
6765 | |||||||||||||||||
6766 | CreateSplitStore(LValue, false); | ||||||||||||||||
6767 | CreateSplitStore(HValue, true); | ||||||||||||||||
6768 | |||||||||||||||||
6769 | // Delete the old store. | ||||||||||||||||
6770 | SI.eraseFromParent(); | ||||||||||||||||
6771 | return true; | ||||||||||||||||
6772 | } | ||||||||||||||||
6773 | |||||||||||||||||
6774 | // Return true if the GEP has two operands, the first operand is of a sequential | ||||||||||||||||
6775 | // type, and the second operand is a constant. | ||||||||||||||||
6776 | static bool GEPSequentialConstIndexed(GetElementPtrInst *GEP) { | ||||||||||||||||
6777 | gep_type_iterator I = gep_type_begin(*GEP); | ||||||||||||||||
6778 | return GEP->getNumOperands() == 2 && | ||||||||||||||||
6779 | I.isSequential() && | ||||||||||||||||
6780 | isa<ConstantInt>(GEP->getOperand(1)); | ||||||||||||||||
6781 | } | ||||||||||||||||
6782 | |||||||||||||||||
6783 | // Try unmerging GEPs to reduce liveness interference (register pressure) across | ||||||||||||||||
6784 | // IndirectBr edges. Since IndirectBr edges tend to touch on many blocks, | ||||||||||||||||
6785 | // reducing liveness interference across those edges benefits global register | ||||||||||||||||
6786 | // allocation. Currently handles only certain cases. | ||||||||||||||||
6787 | // | ||||||||||||||||
6788 | // For example, unmerge %GEPI and %UGEPI as below. | ||||||||||||||||
6789 | // | ||||||||||||||||
6790 | // ---------- BEFORE ---------- | ||||||||||||||||
6791 | // SrcBlock: | ||||||||||||||||
6792 | // ... | ||||||||||||||||
6793 | // %GEPIOp = ... | ||||||||||||||||
6794 | // ... | ||||||||||||||||
6795 | // %GEPI = gep %GEPIOp, Idx | ||||||||||||||||
6796 | // ... | ||||||||||||||||
6797 | // indirectbr ... [ label %DstB0, label %DstB1, ... label %DstBi ... ] | ||||||||||||||||
6798 | // (* %GEPI is alive on the indirectbr edges due to other uses ahead) | ||||||||||||||||
6799 | // (* %GEPIOp is alive on the indirectbr edges only because of it's used by | ||||||||||||||||
6800 | // %UGEPI) | ||||||||||||||||
6801 | // | ||||||||||||||||
6802 | // DstB0: ... (there may be a gep similar to %UGEPI to be unmerged) | ||||||||||||||||
6803 | // DstB1: ... (there may be a gep similar to %UGEPI to be unmerged) | ||||||||||||||||
6804 | // ... | ||||||||||||||||
6805 | // | ||||||||||||||||
6806 | // DstBi: | ||||||||||||||||
6807 | // ... | ||||||||||||||||
6808 | // %UGEPI = gep %GEPIOp, UIdx | ||||||||||||||||
6809 | // ... | ||||||||||||||||
6810 | // --------------------------- | ||||||||||||||||
6811 | // | ||||||||||||||||
6812 | // ---------- AFTER ---------- | ||||||||||||||||
6813 | // SrcBlock: | ||||||||||||||||
6814 | // ... (same as above) | ||||||||||||||||
6815 | // (* %GEPI is still alive on the indirectbr edges) | ||||||||||||||||
6816 | // (* %GEPIOp is no longer alive on the indirectbr edges as a result of the | ||||||||||||||||
6817 | // unmerging) | ||||||||||||||||
6818 | // ... | ||||||||||||||||
6819 | // | ||||||||||||||||
6820 | // DstBi: | ||||||||||||||||
6821 | // ... | ||||||||||||||||
6822 | // %UGEPI = gep %GEPI, (UIdx-Idx) | ||||||||||||||||
6823 | // ... | ||||||||||||||||
6824 | // --------------------------- | ||||||||||||||||
6825 | // | ||||||||||||||||
6826 | // The register pressure on the IndirectBr edges is reduced because %GEPIOp is | ||||||||||||||||
6827 | // no longer alive on them. | ||||||||||||||||
6828 | // | ||||||||||||||||
6829 | // We try to unmerge GEPs here in CodGenPrepare, as opposed to limiting merging | ||||||||||||||||
6830 | // of GEPs in the first place in InstCombiner::visitGetElementPtrInst() so as | ||||||||||||||||
6831 | // not to disable further simplications and optimizations as a result of GEP | ||||||||||||||||
6832 | // merging. | ||||||||||||||||
6833 | // | ||||||||||||||||
6834 | // Note this unmerging may increase the length of the data flow critical path | ||||||||||||||||
6835 | // (the path from %GEPIOp to %UGEPI would go through %GEPI), which is a tradeoff | ||||||||||||||||
6836 | // between the register pressure and the length of data-flow critical | ||||||||||||||||
6837 | // path. Restricting this to the uncommon IndirectBr case would minimize the | ||||||||||||||||
6838 | // impact of potentially longer critical path, if any, and the impact on compile | ||||||||||||||||
6839 | // time. | ||||||||||||||||
6840 | static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI, | ||||||||||||||||
6841 | const TargetTransformInfo *TTI) { | ||||||||||||||||
6842 | BasicBlock *SrcBlock = GEPI->getParent(); | ||||||||||||||||
6843 | // Check that SrcBlock ends with an IndirectBr. If not, give up. The common | ||||||||||||||||
6844 | // (non-IndirectBr) cases exit early here. | ||||||||||||||||
6845 | if (!isa<IndirectBrInst>(SrcBlock->getTerminator())) | ||||||||||||||||
6846 | return false; | ||||||||||||||||
6847 | // Check that GEPI is a simple gep with a single constant index. | ||||||||||||||||
6848 | if (!GEPSequentialConstIndexed(GEPI)) | ||||||||||||||||
6849 | return false; | ||||||||||||||||
6850 | ConstantInt *GEPIIdx = cast<ConstantInt>(GEPI->getOperand(1)); | ||||||||||||||||
6851 | // Check that GEPI is a cheap one. | ||||||||||||||||
6852 | if (TTI->getIntImmCost(GEPIIdx->getValue(), GEPIIdx->getType()) | ||||||||||||||||
6853 | > TargetTransformInfo::TCC_Basic) | ||||||||||||||||
6854 | return false; | ||||||||||||||||
6855 | Value *GEPIOp = GEPI->getOperand(0); | ||||||||||||||||
6856 | // Check that GEPIOp is an instruction that's also defined in SrcBlock. | ||||||||||||||||
6857 | if (!isa<Instruction>(GEPIOp)) | ||||||||||||||||
6858 | return false; | ||||||||||||||||
6859 | auto *GEPIOpI = cast<Instruction>(GEPIOp); | ||||||||||||||||
6860 | if (GEPIOpI->getParent() != SrcBlock) | ||||||||||||||||
6861 | return false; | ||||||||||||||||
6862 | // Check that GEP is used outside the block, meaning it's alive on the | ||||||||||||||||
6863 | // IndirectBr edge(s). | ||||||||||||||||
6864 | if (find_if(GEPI->users(), [&](User *Usr) { | ||||||||||||||||
6865 | if (auto *I = dyn_cast<Instruction>(Usr)) { | ||||||||||||||||
6866 | if (I->getParent() != SrcBlock) { | ||||||||||||||||
6867 | return true; | ||||||||||||||||
6868 | } | ||||||||||||||||
6869 | } | ||||||||||||||||
6870 | return false; | ||||||||||||||||
6871 | }) == GEPI->users().end()) | ||||||||||||||||
6872 | return false; | ||||||||||||||||
6873 | // The second elements of the GEP chains to be unmerged. | ||||||||||||||||
6874 | std::vector<GetElementPtrInst *> UGEPIs; | ||||||||||||||||
6875 | // Check each user of GEPIOp to check if unmerging would make GEPIOp not alive | ||||||||||||||||
6876 | // on IndirectBr edges. | ||||||||||||||||
6877 | for (User *Usr : GEPIOp->users()) { | ||||||||||||||||
6878 | if (Usr == GEPI) continue; | ||||||||||||||||
6879 | // Check if Usr is an Instruction. If not, give up. | ||||||||||||||||
6880 | if (!isa<Instruction>(Usr)) | ||||||||||||||||
6881 | return false; | ||||||||||||||||
6882 | auto *UI = cast<Instruction>(Usr); | ||||||||||||||||
6883 | // Check if Usr in the same block as GEPIOp, which is fine, skip. | ||||||||||||||||
6884 | if (UI->getParent() == SrcBlock) | ||||||||||||||||
6885 | continue; | ||||||||||||||||
6886 | // Check if Usr is a GEP. If not, give up. | ||||||||||||||||
6887 | if (!isa<GetElementPtrInst>(Usr)) | ||||||||||||||||
6888 | return false; | ||||||||||||||||
6889 | auto *UGEPI = cast<GetElementPtrInst>(Usr); | ||||||||||||||||
6890 | // Check if UGEPI is a simple gep with a single constant index and GEPIOp is | ||||||||||||||||
6891 | // the pointer operand to it. If so, record it in the vector. If not, give | ||||||||||||||||
6892 | // up. | ||||||||||||||||
6893 | if (!GEPSequentialConstIndexed(UGEPI)) | ||||||||||||||||
6894 | return false; | ||||||||||||||||
6895 | if (UGEPI->getOperand(0) != GEPIOp) | ||||||||||||||||
6896 | return false; | ||||||||||||||||
6897 | if (GEPIIdx->getType() != | ||||||||||||||||
6898 | cast<ConstantInt>(UGEPI->getOperand(1))->getType()) | ||||||||||||||||
6899 | return false; | ||||||||||||||||
6900 | ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); | ||||||||||||||||
6901 | if (TTI->getIntImmCost(UGEPIIdx->getValue(), UGEPIIdx->getType()) | ||||||||||||||||
6902 | > TargetTransformInfo::TCC_Basic) | ||||||||||||||||
6903 | return false; | ||||||||||||||||
6904 | UGEPIs.push_back(UGEPI); | ||||||||||||||||
6905 | } | ||||||||||||||||
6906 | if (UGEPIs.size() == 0) | ||||||||||||||||
6907 | return false; | ||||||||||||||||
6908 | // Check the materializing cost of (Uidx-Idx). | ||||||||||||||||
6909 | for (GetElementPtrInst *UGEPI : UGEPIs) { | ||||||||||||||||
6910 | ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); | ||||||||||||||||
6911 | APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue(); | ||||||||||||||||
6912 | unsigned ImmCost = TTI->getIntImmCost(NewIdx, GEPIIdx->getType()); | ||||||||||||||||
6913 | if (ImmCost > TargetTransformInfo::TCC_Basic) | ||||||||||||||||
6914 | return false; | ||||||||||||||||
6915 | } | ||||||||||||||||
6916 | // Now unmerge between GEPI and UGEPIs. | ||||||||||||||||
6917 | for (GetElementPtrInst *UGEPI : UGEPIs) { | ||||||||||||||||
6918 | UGEPI->setOperand(0, GEPI); | ||||||||||||||||
6919 | ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1)); | ||||||||||||||||
6920 | Constant *NewUGEPIIdx = | ||||||||||||||||
6921 | ConstantInt::get(GEPIIdx->getType(), | ||||||||||||||||
6922 | UGEPIIdx->getValue() - GEPIIdx->getValue()); | ||||||||||||||||
6923 | UGEPI->setOperand(1, NewUGEPIIdx); | ||||||||||||||||
6924 | // If GEPI is not inbounds but UGEPI is inbounds, change UGEPI to not | ||||||||||||||||
6925 | // inbounds to avoid UB. | ||||||||||||||||
6926 | if (!GEPI->isInBounds()) { | ||||||||||||||||
6927 | UGEPI->setIsInBounds(false); | ||||||||||||||||
6928 | } | ||||||||||||||||
6929 | } | ||||||||||||||||
6930 | // After unmerging, verify that GEPIOp is actually only used in SrcBlock (not | ||||||||||||||||
6931 | // alive on IndirectBr edges). | ||||||||||||||||
6932 | assert(find_if(GEPIOp->users(), [&](User *Usr) {((find_if(GEPIOp->users(), [&](User *Usr) { return cast <Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp ->users().end() && "GEPIOp is used outside SrcBlock" ) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6934, __PRETTY_FUNCTION__)) | ||||||||||||||||
6933 | return cast<Instruction>(Usr)->getParent() != SrcBlock;((find_if(GEPIOp->users(), [&](User *Usr) { return cast <Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp ->users().end() && "GEPIOp is used outside SrcBlock" ) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6934, __PRETTY_FUNCTION__)) | ||||||||||||||||
6934 | }) == GEPIOp->users().end() && "GEPIOp is used outside SrcBlock")((find_if(GEPIOp->users(), [&](User *Usr) { return cast <Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp ->users().end() && "GEPIOp is used outside SrcBlock" ) ? static_cast<void> (0) : __assert_fail ("find_if(GEPIOp->users(), [&](User *Usr) { return cast<Instruction>(Usr)->getParent() != SrcBlock; }) == GEPIOp->users().end() && \"GEPIOp is used outside SrcBlock\"" , "/build/llvm-toolchain-snapshot-10~svn373517/lib/CodeGen/CodeGenPrepare.cpp" , 6934, __PRETTY_FUNCTION__)); | ||||||||||||||||
6935 | return true; | ||||||||||||||||
6936 | } | ||||||||||||||||
6937 | |||||||||||||||||
6938 | bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) { | ||||||||||||||||
6939 | // Bail out if we inserted the instruction to prevent optimizations from | ||||||||||||||||
6940 | // stepping on each other's toes. | ||||||||||||||||
6941 | if (InsertedInsts.count(I)) | ||||||||||||||||
6942 | return false; | ||||||||||||||||
6943 | |||||||||||||||||
6944 | // TODO: Move into the switch on opcode below here. | ||||||||||||||||
6945 | if (PHINode *P
| ||||||||||||||||
6946 | // It is possible for very late stage optimizations (such as SimplifyCFG) | ||||||||||||||||
6947 | // to introduce PHI nodes too late to be cleaned up. If we detect such a | ||||||||||||||||
6948 | // trivial PHI, go ahead and zap it here. | ||||||||||||||||
6949 | if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) { | ||||||||||||||||
6950 | LargeOffsetGEPMap.erase(P); | ||||||||||||||||
6951 | P->replaceAllUsesWith(V); | ||||||||||||||||
6952 | P->eraseFromParent(); | ||||||||||||||||
6953 | ++NumPHIsElim; | ||||||||||||||||
6954 | return true; | ||||||||||||||||
6955 | } | ||||||||||||||||
6956 | return false; | ||||||||||||||||
6957 | } | ||||||||||||||||
6958 | |||||||||||||||||
6959 | if (CastInst *CI
| ||||||||||||||||
6960 | // If the source of the cast is a constant, then this should have | ||||||||||||||||
6961 | // already been constant folded. The only reason NOT to constant fold | ||||||||||||||||
6962 | // it is if something (e.g. LSR) was careful to place the constant | ||||||||||||||||
6963 | // evaluation in a block other than then one that uses it (e.g. to hoist | ||||||||||||||||
6964 | // the address of globals out of a loop). If this is the case, we don't | ||||||||||||||||
6965 | // want to forward-subst the cast. | ||||||||||||||||
6966 | if (isa<Constant>(CI->getOperand(0))) | ||||||||||||||||
6967 | return false; | ||||||||||||||||
6968 | |||||||||||||||||
6969 | if (TLI && OptimizeNoopCopyExpression(CI, *TLI, *DL)) | ||||||||||||||||
6970 | return true; | ||||||||||||||||
6971 | |||||||||||||||||
6972 | if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { | ||||||||||||||||
6973 | /// Sink a zext or sext into its user blocks if the target type doesn't | ||||||||||||||||
6974 | /// fit in one register | ||||||||||||||||
6975 | if (TLI && | ||||||||||||||||
6976 | TLI->getTypeAction(CI->getContext(), | ||||||||||||||||
6977 | TLI->getValueType(*DL, CI->getType())) == | ||||||||||||||||
6978 | TargetLowering::TypeExpandInteger) { | ||||||||||||||||
6979 | return SinkCast(CI); | ||||||||||||||||
6980 | } else { | ||||||||||||||||
6981 | bool MadeChange = optimizeExt(I); | ||||||||||||||||
6982 | return MadeChange | optimizeExtUses(I); | ||||||||||||||||
6983 | } | ||||||||||||||||
6984 | } | ||||||||||||||||
6985 | return false; | ||||||||||||||||
6986 | } | ||||||||||||||||
6987 | |||||||||||||||||
6988 | if (auto *Cmp
| ||||||||||||||||
6989 | if (TLI && optimizeCmp(Cmp, ModifiedDT)) | ||||||||||||||||
6990 | return true; | ||||||||||||||||
6991 | |||||||||||||||||
6992 | if (LoadInst *LI
| ||||||||||||||||
6993 | LI->setMetadata(LLVMContext::MD_invariant_group, nullptr); | ||||||||||||||||
6994 | if (TLI) { | ||||||||||||||||
6995 | bool Modified = optimizeLoadExt(LI); | ||||||||||||||||
6996 | unsigned AS = LI->getPointerAddressSpace(); | ||||||||||||||||
6997 | Modified |= optimizeMemoryInst(I, I->getOperand(0), LI->getType(), AS); | ||||||||||||||||
6998 | return Modified; | ||||||||||||||||
6999 | } | ||||||||||||||||
7000 | return false; | ||||||||||||||||
7001 | } | ||||||||||||||||
7002 | |||||||||||||||||
7003 | if (StoreInst *SI
| ||||||||||||||||
7004 | if (TLI && splitMergedValStore(*SI, *DL, *TLI)) | ||||||||||||||||
7005 | return true; | ||||||||||||||||
7006 | SI->setMetadata(LLVMContext::MD_invariant_group, nullptr); | ||||||||||||||||
7007 | if (TLI) { | ||||||||||||||||
7008 | unsigned AS = SI->getPointerAddressSpace(); | ||||||||||||||||
7009 | return optimizeMemoryInst(I, SI->getOperand(1), | ||||||||||||||||
7010 | SI->getOperand(0)->getType(), AS); | ||||||||||||||||
7011 | } | ||||||||||||||||
7012 | return false; | ||||||||||||||||
7013 | } | ||||||||||||||||
7014 | |||||||||||||||||
7015 | if (AtomicRMWInst *RMW
| ||||||||||||||||
7016 | unsigned AS = RMW->getPointerAddressSpace(); | ||||||||||||||||
7017 | return optimizeMemoryInst(I, RMW->getPointerOperand(), | ||||||||||||||||
7018 | RMW->getType(), AS); | ||||||||||||||||
7019 | } | ||||||||||||||||
7020 | |||||||||||||||||
7021 | if (AtomicCmpXchgInst *CmpX
| ||||||||||||||||
7022 | unsigned AS = CmpX->getPointerAddressSpace(); | ||||||||||||||||
7023 | return optimizeMemoryInst(I, CmpX->getPointerOperand(), | ||||||||||||||||
7024 | CmpX->getCompareOperand()->getType(), AS); | ||||||||||||||||
7025 | } | ||||||||||||||||
7026 | |||||||||||||||||
7027 | BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I); | ||||||||||||||||
7028 | |||||||||||||||||
7029 | if (BinOp
| ||||||||||||||||
7030 | EnableAndCmpSinking && TLI) | ||||||||||||||||
7031 | return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts); | ||||||||||||||||
7032 | |||||||||||||||||
7033 | // TODO: Move this into the switch on opcode - it handles shifts already. | ||||||||||||||||
7034 | if (BinOp
| ||||||||||||||||
7035 | BinOp->getOpcode() == Instruction::LShr)) { | ||||||||||||||||
7036 | ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1)); | ||||||||||||||||
7037 | if (TLI && CI && TLI->hasExtractBitsInsn()) | ||||||||||||||||
7038 | if (OptimizeExtractBits(BinOp, CI, *TLI, *DL)) | ||||||||||||||||
7039 | return true; | ||||||||||||||||
7040 | } | ||||||||||||||||
7041 | |||||||||||||||||
7042 | if (GetElementPtrInst *GEPI
| ||||||||||||||||
7043 | if (GEPI->hasAllZeroIndices()) { | ||||||||||||||||
7044 | /// The GEP operand must be a pointer, so must its result -> BitCast | ||||||||||||||||
7045 | Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), | ||||||||||||||||
7046 | GEPI->getName(), GEPI); | ||||||||||||||||
7047 | NC->setDebugLoc(GEPI->getDebugLoc()); | ||||||||||||||||
7048 | GEPI->replaceAllUsesWith(NC); | ||||||||||||||||
7049 | GEPI->eraseFromParent(); | ||||||||||||||||
7050 | ++NumGEPsElim; | ||||||||||||||||
7051 | optimizeInst(NC, ModifiedDT); | ||||||||||||||||
7052 | return true; | ||||||||||||||||
7053 | } | ||||||||||||||||
7054 | if (tryUnmergingGEPsAcrossIndirectBr(GEPI, TTI)) { | ||||||||||||||||
7055 | return true; | ||||||||||||||||
7056 | } | ||||||||||||||||
7057 | return false; | ||||||||||||||||
7058 | } | ||||||||||||||||
7059 | |||||||||||||||||
7060 | if (tryToSinkFreeOperands(I)) | ||||||||||||||||
7061 | return true; | ||||||||||||||||
7062 | |||||||||||||||||
7063 | switch (I->getOpcode()) { | ||||||||||||||||
7064 | case Instruction::Shl: | ||||||||||||||||
7065 | case Instruction::LShr: | ||||||||||||||||
7066 | case Instruction::AShr: | ||||||||||||||||
7067 | return optimizeShiftInst(cast<BinaryOperator>(I)); | ||||||||||||||||
7068 | case Instruction::Call: | ||||||||||||||||
7069 | return optimizeCallInst(cast<CallInst>(I), ModifiedDT); | ||||||||||||||||
7070 | case Instruction::Select: | ||||||||||||||||
7071 | return optimizeSelectInst(cast<SelectInst>(I)); | ||||||||||||||||
7072 | case Instruction::ShuffleVector: | ||||||||||||||||
7073 | return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I)); | ||||||||||||||||
7074 | case Instruction::Switch: | ||||||||||||||||
7075 | return optimizeSwitchInst(cast<SwitchInst>(I)); | ||||||||||||||||
7076 | case Instruction::ExtractElement: | ||||||||||||||||
7077 | return optimizeExtractElementInst(cast<ExtractElementInst>(I)); | ||||||||||||||||
7078 | } | ||||||||||||||||
7079 | |||||||||||||||||
7080 | return false; | ||||||||||||||||
7081 | } | ||||||||||||||||
7082 | |||||||||||||||||
7083 | /// Given an OR instruction, check to see if this is a bitreverse | ||||||||||||||||
7084 | /// idiom. If so, insert the new intrinsic and return true. | ||||||||||||||||
7085 | static bool makeBitReverse(Instruction &I, const DataLayout &DL, | ||||||||||||||||
7086 | const TargetLowering &TLI) { | ||||||||||||||||
7087 | if (!I.getType()->isIntegerTy() || | ||||||||||||||||
7088 | !TLI.isOperationLegalOrCustom(ISD::BITREVERSE, | ||||||||||||||||
7089 | TLI.getValueType(DL, I.getType(), true))) | ||||||||||||||||
7090 | return false; | ||||||||||||||||
7091 | |||||||||||||||||
7092 | SmallVector<Instruction*, 4> Insts; | ||||||||||||||||
7093 | if (!recognizeBSwapOrBitReverseIdiom(&I, false, true, Insts)) | ||||||||||||||||
7094 | return false; | ||||||||||||||||
7095 | Instruction *LastInst = Insts.back(); | ||||||||||||||||
7096 | I.replaceAllUsesWith(LastInst); | ||||||||||||||||
7097 | RecursivelyDeleteTriviallyDeadInstructions(&I); | ||||||||||||||||
7098 | return true; | ||||||||||||||||
7099 | } | ||||||||||||||||
7100 | |||||||||||||||||
7101 | // In this pass we look for GEP and cast instructions that are used | ||||||||||||||||
7102 | // across basic blocks and rewrite them to improve basic-block-at-a-time | ||||||||||||||||
7103 | // selection. | ||||||||||||||||
7104 | bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) { | ||||||||||||||||
7105 | SunkAddrs.clear(); | ||||||||||||||||
7106 | bool MadeChange = false; | ||||||||||||||||
7107 | |||||||||||||||||
7108 | CurInstIterator = BB.begin(); | ||||||||||||||||
7109 | while (CurInstIterator != BB.end()) { | ||||||||||||||||
7110 | MadeChange |= optimizeInst(&*CurInstIterator++, ModifiedDT); | ||||||||||||||||
7111 | if (ModifiedDT) | ||||||||||||||||
7112 | return true; | ||||||||||||||||
7113 | } | ||||||||||||||||
7114 | |||||||||||||||||
7115 | bool MadeBitReverse = true; | ||||||||||||||||
7116 | while (TLI && MadeBitReverse) { | ||||||||||||||||
7117 | MadeBitReverse = false; | ||||||||||||||||
7118 | for (auto &I : reverse(BB)) { | ||||||||||||||||
7119 | if (makeBitReverse(I, *DL, *TLI)) { | ||||||||||||||||
7120 | MadeBitReverse = MadeChange = true; | ||||||||||||||||
7121 | break; | ||||||||||||||||
7122 | } | ||||||||||||||||
7123 | } | ||||||||||||||||
7124 | } | ||||||||||||||||
7125 | MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT); | ||||||||||||||||
7126 | |||||||||||||||||
7127 | return MadeChange; | ||||||||||||||||
7128 | } | ||||||||||||||||
7129 | |||||||||||||||||
7130 | // llvm.dbg.value is far away from the value then iSel may not be able | ||||||||||||||||
7131 | // handle it properly. iSel will drop llvm.dbg.value if it can not | ||||||||||||||||
7132 | // find a node corresponding to the value. | ||||||||||||||||
7133 | bool CodeGenPrepare::placeDbgValues(Function &F) { | ||||||||||||||||
7134 | bool MadeChange = false; | ||||||||||||||||
7135 | for (BasicBlock &BB : F) { | ||||||||||||||||
7136 | Instruction *PrevNonDbgInst = nullptr; | ||||||||||||||||
7137 | for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) { | ||||||||||||||||
7138 | Instruction *Insn = &*BI++; | ||||||||||||||||
7139 | DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); | ||||||||||||||||
7140 | // Leave dbg.values that refer to an alloca alone. These | ||||||||||||||||
7141 | // intrinsics describe the address of a variable (= the alloca) | ||||||||||||||||
7142 | // being taken. They should not be moved next to the alloca | ||||||||||||||||
7143 | // (and to the beginning of the scope), but rather stay close to | ||||||||||||||||
7144 | // where said address is used. | ||||||||||||||||
7145 | if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { | ||||||||||||||||
7146 | PrevNonDbgInst = Insn; | ||||||||||||||||
7147 | continue; | ||||||||||||||||
7148 | } | ||||||||||||||||
7149 | |||||||||||||||||
7150 | Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); | ||||||||||||||||
7151 | if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { | ||||||||||||||||
7152 | // If VI is a phi in a block with an EHPad terminator, we can't insert | ||||||||||||||||
7153 | // after it. | ||||||||||||||||
7154 | if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) | ||||||||||||||||
7155 | continue; | ||||||||||||||||
7156 | LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI; } } while (false) | ||||||||||||||||
7157 | << *DVI << ' ' << *VI)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI; } } while (false); | ||||||||||||||||
7158 | DVI->removeFromParent(); | ||||||||||||||||
7159 | if (isa<PHINode>(VI)) | ||||||||||||||||
7160 | DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); | ||||||||||||||||
7161 | else | ||||||||||||||||
7162 | DVI->insertAfter(VI); | ||||||||||||||||
7163 | MadeChange = true; | ||||||||||||||||
7164 | ++NumDbgValueMoved; | ||||||||||||||||
7165 | } | ||||||||||||||||
7166 | } | ||||||||||||||||
7167 | } | ||||||||||||||||
7168 | return MadeChange; | ||||||||||||||||
7169 | } | ||||||||||||||||
7170 | |||||||||||||||||
7171 | /// Scale down both weights to fit into uint32_t. | ||||||||||||||||
7172 | static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) { | ||||||||||||||||
7173 | uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse; | ||||||||||||||||
7174 | uint32_t Scale = (NewMax / std::numeric_limits<uint32_t>::max()) + 1; | ||||||||||||||||
7175 | NewTrue = NewTrue / Scale; | ||||||||||||||||
7176 | NewFalse = NewFalse / Scale; | ||||||||||||||||
7177 | } | ||||||||||||||||
7178 | |||||||||||||||||
7179 | /// Some targets prefer to split a conditional branch like: | ||||||||||||||||
7180 | /// \code | ||||||||||||||||
7181 | /// %0 = icmp ne i32 %a, 0 | ||||||||||||||||
7182 | /// %1 = icmp ne i32 %b, 0 | ||||||||||||||||
7183 | /// %or.cond = or i1 %0, %1 | ||||||||||||||||
7184 | /// br i1 %or.cond, label %TrueBB, label %FalseBB | ||||||||||||||||
7185 | /// \endcode | ||||||||||||||||
7186 | /// into multiple branch instructions like: | ||||||||||||||||
7187 | /// \code | ||||||||||||||||
7188 | /// bb1: | ||||||||||||||||
7189 | /// %0 = icmp ne i32 %a, 0 | ||||||||||||||||
7190 | /// br i1 %0, label %TrueBB, label %bb2 | ||||||||||||||||
7191 | /// bb2: | ||||||||||||||||
7192 | /// %1 = icmp ne i32 %b, 0 | ||||||||||||||||
7193 | /// br i1 %1, label %TrueBB, label %FalseBB | ||||||||||||||||
7194 | /// \endcode | ||||||||||||||||
7195 | /// This usually allows instruction selection to do even further optimizations | ||||||||||||||||
7196 | /// and combine the compare with the branch instruction. Currently this is | ||||||||||||||||
7197 | /// applied for targets which have "cheap" jump instructions. | ||||||||||||||||
7198 | /// | ||||||||||||||||
7199 | /// FIXME: Remove the (equivalent?) implementation in SelectionDAG. | ||||||||||||||||
7200 | /// | ||||||||||||||||
7201 | bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) { | ||||||||||||||||
7202 | if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive()) | ||||||||||||||||
7203 | return false; | ||||||||||||||||
7204 | |||||||||||||||||
7205 | bool MadeChange = false; | ||||||||||||||||
7206 | for (auto &BB : F) { | ||||||||||||||||
7207 | // Does this BB end with the following? | ||||||||||||||||
7208 | // %cond1 = icmp|fcmp|binary instruction ... | ||||||||||||||||
7209 | // %cond2 = icmp|fcmp|binary instruction ... | ||||||||||||||||
7210 | // %cond.or = or|and i1 %cond1, cond2 | ||||||||||||||||
7211 | // br i1 %cond.or label %dest1, label %dest2" | ||||||||||||||||
7212 | BinaryOperator *LogicOp; | ||||||||||||||||
7213 | BasicBlock *TBB, *FBB; | ||||||||||||||||
7214 | if (!match(BB.getTerminator(), m_Br(m_OneUse(m_BinOp(LogicOp)), TBB, FBB))) | ||||||||||||||||
7215 | continue; | ||||||||||||||||
7216 | |||||||||||||||||
7217 | auto *Br1 = cast<BranchInst>(BB.getTerminator()); | ||||||||||||||||
7218 | if (Br1->getMetadata(LLVMContext::MD_unpredictable)) | ||||||||||||||||
7219 | continue; | ||||||||||||||||
7220 | |||||||||||||||||
7221 | unsigned Opc; | ||||||||||||||||
7222 | Value *Cond1, *Cond2; | ||||||||||||||||
7223 | if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), | ||||||||||||||||
7224 | m_OneUse(m_Value(Cond2))))) | ||||||||||||||||
7225 | Opc = Instruction::And; | ||||||||||||||||
7226 | else if (match(LogicOp, m_Or(m_OneUse(m_Value(Cond1)), | ||||||||||||||||
7227 | m_OneUse(m_Value(Cond2))))) | ||||||||||||||||
7228 | Opc = Instruction::Or; | ||||||||||||||||
7229 | else | ||||||||||||||||
7230 | continue; | ||||||||||||||||
7231 | |||||||||||||||||
7232 | if (!match(Cond1, m_CombineOr(m_Cmp(), m_BinOp())) || | ||||||||||||||||
7233 | !match(Cond2, m_CombineOr(m_Cmp(), m_BinOp())) ) | ||||||||||||||||
7234 | continue; | ||||||||||||||||
7235 | |||||||||||||||||
7236 | LLVM_DEBUG(dbgs() << "Before branch condition splitting\n"; BB.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "Before branch condition splitting\n" ; BB.dump(); } } while (false); | ||||||||||||||||
7237 | |||||||||||||||||
7238 | // Create a new BB. | ||||||||||||||||
7239 | auto TmpBB = | ||||||||||||||||
7240 | BasicBlock::Create(BB.getContext(), BB.getName() + ".cond.split", | ||||||||||||||||
7241 | BB.getParent(), BB.getNextNode()); | ||||||||||||||||
7242 | |||||||||||||||||
7243 | // Update original basic block by using the first condition directly by the | ||||||||||||||||
7244 | // branch instruction and removing the no longer needed and/or instruction. | ||||||||||||||||
7245 | Br1->setCondition(Cond1); | ||||||||||||||||
7246 | LogicOp->eraseFromParent(); | ||||||||||||||||
7247 | |||||||||||||||||
7248 | // Depending on the condition we have to either replace the true or the | ||||||||||||||||
7249 | // false successor of the original branch instruction. | ||||||||||||||||
7250 | if (Opc == Instruction::And) | ||||||||||||||||
7251 | Br1->setSuccessor(0, TmpBB); | ||||||||||||||||
7252 | else | ||||||||||||||||
7253 | Br1->setSuccessor(1, TmpBB); | ||||||||||||||||
7254 | |||||||||||||||||
7255 | // Fill in the new basic block. | ||||||||||||||||
7256 | auto *Br2 = IRBuilder<>(TmpBB).CreateCondBr(Cond2, TBB, FBB); | ||||||||||||||||
7257 | if (auto *I = dyn_cast<Instruction>(Cond2)) { | ||||||||||||||||
7258 | I->removeFromParent(); | ||||||||||||||||
7259 | I->insertBefore(Br2); | ||||||||||||||||
7260 | } | ||||||||||||||||
7261 | |||||||||||||||||
7262 | // Update PHI nodes in both successors. The original BB needs to be | ||||||||||||||||
7263 | // replaced in one successor's PHI nodes, because the branch comes now from | ||||||||||||||||
7264 | // the newly generated BB (NewBB). In the other successor we need to add one | ||||||||||||||||
7265 | // incoming edge to the PHI nodes, because both branch instructions target | ||||||||||||||||
7266 | // now the same successor. Depending on the original branch condition | ||||||||||||||||
7267 | // (and/or) we have to swap the successors (TrueDest, FalseDest), so that | ||||||||||||||||
7268 | // we perform the correct update for the PHI nodes. | ||||||||||||||||
7269 | // This doesn't change the successor order of the just created branch | ||||||||||||||||
7270 | // instruction (or any other instruction). | ||||||||||||||||
7271 | if (Opc == Instruction::Or) | ||||||||||||||||
7272 | std::swap(TBB, FBB); | ||||||||||||||||
7273 | |||||||||||||||||
7274 | // Replace the old BB with the new BB. | ||||||||||||||||
7275 | TBB->replacePhiUsesWith(&BB, TmpBB); | ||||||||||||||||
7276 | |||||||||||||||||
7277 | // Add another incoming edge form the new BB. | ||||||||||||||||
7278 | for (PHINode &PN : FBB->phis()) { | ||||||||||||||||
7279 | auto *Val = PN.getIncomingValueForBlock(&BB); | ||||||||||||||||
7280 | PN.addIncoming(Val, TmpBB); | ||||||||||||||||
7281 | } | ||||||||||||||||
7282 | |||||||||||||||||
7283 | // Update the branch weights (from SelectionDAGBuilder:: | ||||||||||||||||
7284 | // FindMergedConditions). | ||||||||||||||||
7285 | if (Opc == Instruction::Or) { | ||||||||||||||||
7286 | // Codegen X | Y as: | ||||||||||||||||
7287 | // BB1: | ||||||||||||||||
7288 | // jmp_if_X TBB | ||||||||||||||||
7289 | // jmp TmpBB | ||||||||||||||||
7290 | // TmpBB: | ||||||||||||||||
7291 | // jmp_if_Y TBB | ||||||||||||||||
7292 | // jmp FBB | ||||||||||||||||
7293 | // | ||||||||||||||||
7294 | |||||||||||||||||
7295 | // We have flexibility in setting Prob for BB1 and Prob for NewBB. | ||||||||||||||||
7296 | // The requirement is that | ||||||||||||||||
7297 | // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) | ||||||||||||||||
7298 | // = TrueProb for original BB. | ||||||||||||||||
7299 | // Assuming the original weights are A and B, one choice is to set BB1's | ||||||||||||||||
7300 | // weights to A and A+2B, and set TmpBB's weights to A and 2B. This choice | ||||||||||||||||
7301 | // assumes that | ||||||||||||||||
7302 | // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. | ||||||||||||||||
7303 | // Another choice is to assume TrueProb for BB1 equals to TrueProb for | ||||||||||||||||
7304 | // TmpBB, but the math is more complicated. | ||||||||||||||||
7305 | uint64_t TrueWeight, FalseWeight; | ||||||||||||||||
7306 | if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { | ||||||||||||||||
7307 | uint64_t NewTrueWeight = TrueWeight; | ||||||||||||||||
7308 | uint64_t NewFalseWeight = TrueWeight + 2 * FalseWeight; | ||||||||||||||||
7309 | scaleWeights(NewTrueWeight, NewFalseWeight); | ||||||||||||||||
7310 | Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) | ||||||||||||||||
7311 | .createBranchWeights(TrueWeight, FalseWeight)); | ||||||||||||||||
7312 | |||||||||||||||||
7313 | NewTrueWeight = TrueWeight; | ||||||||||||||||
7314 | NewFalseWeight = 2 * FalseWeight; | ||||||||||||||||
7315 | scaleWeights(NewTrueWeight, NewFalseWeight); | ||||||||||||||||
7316 | Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) | ||||||||||||||||
7317 | .createBranchWeights(TrueWeight, FalseWeight)); | ||||||||||||||||
7318 | } | ||||||||||||||||
7319 | } else { | ||||||||||||||||
7320 | // Codegen X & Y as: | ||||||||||||||||
7321 | // BB1: | ||||||||||||||||
7322 | // jmp_if_X TmpBB | ||||||||||||||||
7323 | // jmp FBB | ||||||||||||||||
7324 | // TmpBB: | ||||||||||||||||
7325 | // jmp_if_Y TBB | ||||||||||||||||
7326 | // jmp FBB | ||||||||||||||||
7327 | // | ||||||||||||||||
7328 | // This requires creation of TmpBB after CurBB. | ||||||||||||||||
7329 | |||||||||||||||||
7330 | // We have flexibility in setting Prob for BB1 and Prob for TmpBB. | ||||||||||||||||
7331 | // The requirement is that | ||||||||||||||||
7332 | // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) | ||||||||||||||||
7333 | // = FalseProb for original BB. | ||||||||||||||||
7334 | // Assuming the original weights are A and B, one choice is to set BB1's | ||||||||||||||||
7335 | // weights to 2A+B and B, and set TmpBB's weights to 2A and B. This choice | ||||||||||||||||
7336 | // assumes that | ||||||||||||||||
7337 | // FalseProb for BB1 == TrueProb for BB1 * FalseProb for TmpBB. | ||||||||||||||||
7338 | uint64_t TrueWeight, FalseWeight; | ||||||||||||||||
7339 | if (Br1->extractProfMetadata(TrueWeight, FalseWeight)) { | ||||||||||||||||
7340 | uint64_t NewTrueWeight = 2 * TrueWeight + FalseWeight; | ||||||||||||||||
7341 | uint64_t NewFalseWeight = FalseWeight; | ||||||||||||||||
7342 | scaleWeights(NewTrueWeight, NewFalseWeight); | ||||||||||||||||
7343 | Br1->setMetadata(LLVMContext::MD_prof, MDBuilder(Br1->getContext()) | ||||||||||||||||
7344 | .createBranchWeights(TrueWeight, FalseWeight)); | ||||||||||||||||
7345 | |||||||||||||||||
7346 | NewTrueWeight = 2 * TrueWeight; | ||||||||||||||||
7347 | NewFalseWeight = FalseWeight; | ||||||||||||||||
7348 | scaleWeights(NewTrueWeight, NewFalseWeight); | ||||||||||||||||
7349 | Br2->setMetadata(LLVMContext::MD_prof, MDBuilder(Br2->getContext()) | ||||||||||||||||
7350 | .createBranchWeights(TrueWeight, FalseWeight)); | ||||||||||||||||
7351 | } | ||||||||||||||||
7352 | } | ||||||||||||||||
7353 | |||||||||||||||||
7354 | ModifiedDT = true; | ||||||||||||||||
7355 | MadeChange = true; | ||||||||||||||||
7356 | |||||||||||||||||
7357 | LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "After branch condition splitting\n" ; BB.dump(); TmpBB->dump(); } } while (false) | ||||||||||||||||
7358 | TmpBB->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("codegenprepare")) { dbgs() << "After branch condition splitting\n" ; BB.dump(); TmpBB->dump(); } } while (false); | ||||||||||||||||
7359 | } | ||||||||||||||||
7360 | return MadeChange; | ||||||||||||||||
7361 | } |
1 | //===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file defines the SmallVector class. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #ifndef LLVM_ADT_SMALLVECTOR_H |
14 | #define LLVM_ADT_SMALLVECTOR_H |
15 | |
16 | #include "llvm/ADT/iterator_range.h" |
17 | #include "llvm/Support/AlignOf.h" |
18 | #include "llvm/Support/Compiler.h" |
19 | #include "llvm/Support/MathExtras.h" |
20 | #include "llvm/Support/MemAlloc.h" |
21 | #include "llvm/Support/type_traits.h" |
22 | #include "llvm/Support/ErrorHandling.h" |
23 | #include <algorithm> |
24 | #include <cassert> |
25 | #include <cstddef> |
26 | #include <cstdlib> |
27 | #include <cstring> |
28 | #include <initializer_list> |
29 | #include <iterator> |
30 | #include <memory> |
31 | #include <new> |
32 | #include <type_traits> |
33 | #include <utility> |
34 | |
35 | namespace llvm { |
36 | |
37 | /// This is all the non-templated stuff common to all SmallVectors. |
38 | class SmallVectorBase { |
39 | protected: |
40 | void *BeginX; |
41 | unsigned Size = 0, Capacity; |
42 | |
43 | SmallVectorBase() = delete; |
44 | SmallVectorBase(void *FirstEl, size_t TotalCapacity) |
45 | : BeginX(FirstEl), Capacity(TotalCapacity) {} |
46 | |
47 | /// This is an implementation of the grow() method which only works |
48 | /// on POD-like data types and is out of line to reduce code duplication. |
49 | void grow_pod(void *FirstEl, size_t MinCapacity, size_t TSize); |
50 | |
51 | public: |
52 | size_t size() const { return Size; } |
53 | size_t capacity() const { return Capacity; } |
54 | |
55 | LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; } |
56 | |
57 | /// Set the array size to \p N, which the current array must have enough |
58 | /// capacity for. |
59 | /// |
60 | /// This does not construct or destroy any elements in the vector. |
61 | /// |
62 | /// Clients can use this in conjunction with capacity() to write past the end |
63 | /// of the buffer when they know that more elements are available, and only |
64 | /// update the size later. This avoids the cost of value initializing elements |
65 | /// which will only be overwritten. |
66 | void set_size(size_t N) { |
67 | assert(N <= capacity())((N <= capacity()) ? static_cast<void> (0) : __assert_fail ("N <= capacity()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 67, __PRETTY_FUNCTION__)); |
68 | Size = N; |
69 | } |
70 | }; |
71 | |
72 | /// Figure out the offset of the first element. |
73 | template <class T, typename = void> struct SmallVectorAlignmentAndSize { |
74 | AlignedCharArrayUnion<SmallVectorBase> Base; |
75 | AlignedCharArrayUnion<T> FirstEl; |
76 | }; |
77 | |
78 | /// This is the part of SmallVectorTemplateBase which does not depend on whether |
79 | /// the type T is a POD. The extra dummy template argument is used by ArrayRef |
80 | /// to avoid unnecessarily requiring T to be complete. |
81 | template <typename T, typename = void> |
82 | class SmallVectorTemplateCommon : public SmallVectorBase { |
83 | /// Find the address of the first element. For this pointer math to be valid |
84 | /// with small-size of 0 for T with lots of alignment, it's important that |
85 | /// SmallVectorStorage is properly-aligned even for small-size of 0. |
86 | void *getFirstEl() const { |
87 | return const_cast<void *>(reinterpret_cast<const void *>( |
88 | reinterpret_cast<const char *>(this) + |
89 | offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl ))); |
90 | } |
91 | // Space after 'FirstEl' is clobbered, do not add any instance vars after it. |
92 | |
93 | protected: |
94 | SmallVectorTemplateCommon(size_t Size) |
95 | : SmallVectorBase(getFirstEl(), Size) {} |
96 | |
97 | void grow_pod(size_t MinCapacity, size_t TSize) { |
98 | SmallVectorBase::grow_pod(getFirstEl(), MinCapacity, TSize); |
99 | } |
100 | |
101 | /// Return true if this is a smallvector which has not had dynamic |
102 | /// memory allocated for it. |
103 | bool isSmall() const { return BeginX == getFirstEl(); } |
104 | |
105 | /// Put this vector in a state of being small. |
106 | void resetToSmall() { |
107 | BeginX = getFirstEl(); |
108 | Size = Capacity = 0; // FIXME: Setting Capacity to 0 is suspect. |
109 | } |
110 | |
111 | public: |
112 | using size_type = size_t; |
113 | using difference_type = ptrdiff_t; |
114 | using value_type = T; |
115 | using iterator = T *; |
116 | using const_iterator = const T *; |
117 | |
118 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; |
119 | using reverse_iterator = std::reverse_iterator<iterator>; |
120 | |
121 | using reference = T &; |
122 | using const_reference = const T &; |
123 | using pointer = T *; |
124 | using const_pointer = const T *; |
125 | |
126 | // forward iterator creation methods. |
127 | iterator begin() { return (iterator)this->BeginX; } |
128 | const_iterator begin() const { return (const_iterator)this->BeginX; } |
129 | iterator end() { return begin() + size(); } |
130 | const_iterator end() const { return begin() + size(); } |
131 | |
132 | // reverse iterator creation methods. |
133 | reverse_iterator rbegin() { return reverse_iterator(end()); } |
134 | const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } |
135 | reverse_iterator rend() { return reverse_iterator(begin()); } |
136 | const_reverse_iterator rend() const { return const_reverse_iterator(begin());} |
137 | |
138 | size_type size_in_bytes() const { return size() * sizeof(T); } |
139 | size_type max_size() const { return size_type(-1) / sizeof(T); } |
140 | |
141 | size_t capacity_in_bytes() const { return capacity() * sizeof(T); } |
142 | |
143 | /// Return a pointer to the vector's buffer, even if empty(). |
144 | pointer data() { return pointer(begin()); } |
145 | /// Return a pointer to the vector's buffer, even if empty(). |
146 | const_pointer data() const { return const_pointer(begin()); } |
147 | |
148 | reference operator[](size_type idx) { |
149 | assert(idx < size())((idx < size()) ? static_cast<void> (0) : __assert_fail ("idx < size()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 149, __PRETTY_FUNCTION__)); |
150 | return begin()[idx]; |
151 | } |
152 | const_reference operator[](size_type idx) const { |
153 | assert(idx < size())((idx < size()) ? static_cast<void> (0) : __assert_fail ("idx < size()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 153, __PRETTY_FUNCTION__)); |
154 | return begin()[idx]; |
155 | } |
156 | |
157 | reference front() { |
158 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 158, __PRETTY_FUNCTION__)); |
159 | return begin()[0]; |
160 | } |
161 | const_reference front() const { |
162 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 162, __PRETTY_FUNCTION__)); |
163 | return begin()[0]; |
164 | } |
165 | |
166 | reference back() { |
167 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 167, __PRETTY_FUNCTION__)); |
168 | return end()[-1]; |
169 | } |
170 | const_reference back() const { |
171 | assert(!empty())((!empty()) ? static_cast<void> (0) : __assert_fail ("!empty()" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 171, __PRETTY_FUNCTION__)); |
172 | return end()[-1]; |
173 | } |
174 | }; |
175 | |
176 | /// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put method |
177 | /// implementations that are designed to work with non-POD-like T's. |
178 | template <typename T, bool = is_trivially_copyable<T>::value> |
179 | class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { |
180 | protected: |
181 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
182 | |
183 | static void destroy_range(T *S, T *E) { |
184 | while (S != E) { |
185 | --E; |
186 | E->~T(); |
187 | } |
188 | } |
189 | |
190 | /// Move the range [I, E) into the uninitialized memory starting with "Dest", |
191 | /// constructing elements as needed. |
192 | template<typename It1, typename It2> |
193 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
194 | std::uninitialized_copy(std::make_move_iterator(I), |
195 | std::make_move_iterator(E), Dest); |
196 | } |
197 | |
198 | /// Copy the range [I, E) onto the uninitialized memory starting with "Dest", |
199 | /// constructing elements as needed. |
200 | template<typename It1, typename It2> |
201 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
202 | std::uninitialized_copy(I, E, Dest); |
203 | } |
204 | |
205 | /// Grow the allocated memory (without initializing new elements), doubling |
206 | /// the size of the allocated memory. Guarantees space for at least one more |
207 | /// element, or MinSize more elements if specified. |
208 | void grow(size_t MinSize = 0); |
209 | |
210 | public: |
211 | void push_back(const T &Elt) { |
212 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
213 | this->grow(); |
214 | ::new ((void*) this->end()) T(Elt); |
215 | this->set_size(this->size() + 1); |
216 | } |
217 | |
218 | void push_back(T &&Elt) { |
219 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
220 | this->grow(); |
221 | ::new ((void*) this->end()) T(::std::move(Elt)); |
222 | this->set_size(this->size() + 1); |
223 | } |
224 | |
225 | void pop_back() { |
226 | this->set_size(this->size() - 1); |
227 | this->end()->~T(); |
228 | } |
229 | }; |
230 | |
231 | // Define this out-of-line to dissuade the C++ compiler from inlining it. |
232 | template <typename T, bool TriviallyCopyable> |
233 | void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { |
234 | if (MinSize > UINT32_MAX(4294967295U)) |
235 | report_bad_alloc_error("SmallVector capacity overflow during allocation"); |
236 | |
237 | // Always grow, even from zero. |
238 | size_t NewCapacity = size_t(NextPowerOf2(this->capacity() + 2)); |
239 | NewCapacity = std::min(std::max(NewCapacity, MinSize), size_t(UINT32_MAX(4294967295U))); |
240 | T *NewElts = static_cast<T*>(llvm::safe_malloc(NewCapacity*sizeof(T))); |
241 | |
242 | // Move the elements over. |
243 | this->uninitialized_move(this->begin(), this->end(), NewElts); |
244 | |
245 | // Destroy the original elements. |
246 | destroy_range(this->begin(), this->end()); |
247 | |
248 | // If this wasn't grown from the inline copy, deallocate the old space. |
249 | if (!this->isSmall()) |
250 | free(this->begin()); |
251 | |
252 | this->BeginX = NewElts; |
253 | this->Capacity = NewCapacity; |
254 | } |
255 | |
256 | /// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put |
257 | /// method implementations that are designed to work with POD-like T's. |
258 | template <typename T> |
259 | class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { |
260 | protected: |
261 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
262 | |
263 | // No need to do a destroy loop for POD's. |
264 | static void destroy_range(T *, T *) {} |
265 | |
266 | /// Move the range [I, E) onto the uninitialized memory |
267 | /// starting with "Dest", constructing elements into it as needed. |
268 | template<typename It1, typename It2> |
269 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
270 | // Just do a copy. |
271 | uninitialized_copy(I, E, Dest); |
272 | } |
273 | |
274 | /// Copy the range [I, E) onto the uninitialized memory |
275 | /// starting with "Dest", constructing elements into it as needed. |
276 | template<typename It1, typename It2> |
277 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
278 | // Arbitrary iterator types; just use the basic implementation. |
279 | std::uninitialized_copy(I, E, Dest); |
280 | } |
281 | |
282 | /// Copy the range [I, E) onto the uninitialized memory |
283 | /// starting with "Dest", constructing elements into it as needed. |
284 | template <typename T1, typename T2> |
285 | static void uninitialized_copy( |
286 | T1 *I, T1 *E, T2 *Dest, |
287 | typename std::enable_if<std::is_same<typename std::remove_const<T1>::type, |
288 | T2>::value>::type * = nullptr) { |
289 | // Use memcpy for PODs iterated by pointers (which includes SmallVector |
290 | // iterators): std::uninitialized_copy optimizes to memmove, but we can |
291 | // use memcpy here. Note that I and E are iterators and thus might be |
292 | // invalid for memcpy if they are equal. |
293 | if (I != E) |
294 | memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T)); |
295 | } |
296 | |
297 | /// Double the size of the allocated memory, guaranteeing space for at |
298 | /// least one more element or MinSize if specified. |
299 | void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); } |
300 | |
301 | public: |
302 | void push_back(const T &Elt) { |
303 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
304 | this->grow(); |
305 | memcpy(reinterpret_cast<void *>(this->end()), &Elt, sizeof(T)); |
306 | this->set_size(this->size() + 1); |
307 | } |
308 | |
309 | void pop_back() { this->set_size(this->size() - 1); } |
310 | }; |
311 | |
312 | /// This class consists of common code factored out of the SmallVector class to |
313 | /// reduce code duplication based on the SmallVector 'N' template parameter. |
314 | template <typename T> |
315 | class SmallVectorImpl : public SmallVectorTemplateBase<T> { |
316 | using SuperClass = SmallVectorTemplateBase<T>; |
317 | |
318 | public: |
319 | using iterator = typename SuperClass::iterator; |
320 | using const_iterator = typename SuperClass::const_iterator; |
321 | using reference = typename SuperClass::reference; |
322 | using size_type = typename SuperClass::size_type; |
323 | |
324 | protected: |
325 | // Default ctor - Initialize to empty. |
326 | explicit SmallVectorImpl(unsigned N) |
327 | : SmallVectorTemplateBase<T>(N) {} |
328 | |
329 | public: |
330 | SmallVectorImpl(const SmallVectorImpl &) = delete; |
331 | |
332 | ~SmallVectorImpl() { |
333 | // Subclass has already destructed this vector's elements. |
334 | // If this wasn't grown from the inline copy, deallocate the old space. |
335 | if (!this->isSmall()) |
336 | free(this->begin()); |
337 | } |
338 | |
339 | void clear() { |
340 | this->destroy_range(this->begin(), this->end()); |
341 | this->Size = 0; |
342 | } |
343 | |
344 | void resize(size_type N) { |
345 | if (N < this->size()) { |
346 | this->destroy_range(this->begin()+N, this->end()); |
347 | this->set_size(N); |
348 | } else if (N > this->size()) { |
349 | if (this->capacity() < N) |
350 | this->grow(N); |
351 | for (auto I = this->end(), E = this->begin() + N; I != E; ++I) |
352 | new (&*I) T(); |
353 | this->set_size(N); |
354 | } |
355 | } |
356 | |
357 | void resize(size_type N, const T &NV) { |
358 | if (N < this->size()) { |
359 | this->destroy_range(this->begin()+N, this->end()); |
360 | this->set_size(N); |
361 | } else if (N > this->size()) { |
362 | if (this->capacity() < N) |
363 | this->grow(N); |
364 | std::uninitialized_fill(this->end(), this->begin()+N, NV); |
365 | this->set_size(N); |
366 | } |
367 | } |
368 | |
369 | void reserve(size_type N) { |
370 | if (this->capacity() < N) |
371 | this->grow(N); |
372 | } |
373 | |
374 | LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() { |
375 | T Result = ::std::move(this->back()); |
376 | this->pop_back(); |
377 | return Result; |
378 | } |
379 | |
380 | void swap(SmallVectorImpl &RHS); |
381 | |
382 | /// Add the specified range to the end of the SmallVector. |
383 | template <typename in_iter, |
384 | typename = typename std::enable_if<std::is_convertible< |
385 | typename std::iterator_traits<in_iter>::iterator_category, |
386 | std::input_iterator_tag>::value>::type> |
387 | void append(in_iter in_start, in_iter in_end) { |
388 | size_type NumInputs = std::distance(in_start, in_end); |
389 | if (NumInputs > this->capacity() - this->size()) |
390 | this->grow(this->size()+NumInputs); |
391 | |
392 | this->uninitialized_copy(in_start, in_end, this->end()); |
393 | this->set_size(this->size() + NumInputs); |
394 | } |
395 | |
396 | /// Append \p NumInputs copies of \p Elt to the end. |
397 | void append(size_type NumInputs, const T &Elt) { |
398 | if (NumInputs > this->capacity() - this->size()) |
399 | this->grow(this->size()+NumInputs); |
400 | |
401 | std::uninitialized_fill_n(this->end(), NumInputs, Elt); |
402 | this->set_size(this->size() + NumInputs); |
403 | } |
404 | |
405 | void append(std::initializer_list<T> IL) { |
406 | append(IL.begin(), IL.end()); |
407 | } |
408 | |
409 | // FIXME: Consider assigning over existing elements, rather than clearing & |
410 | // re-initializing them - for all assign(...) variants. |
411 | |
412 | void assign(size_type NumElts, const T &Elt) { |
413 | clear(); |
414 | if (this->capacity() < NumElts) |
415 | this->grow(NumElts); |
416 | this->set_size(NumElts); |
417 | std::uninitialized_fill(this->begin(), this->end(), Elt); |
418 | } |
419 | |
420 | template <typename in_iter, |
421 | typename = typename std::enable_if<std::is_convertible< |
422 | typename std::iterator_traits<in_iter>::iterator_category, |
423 | std::input_iterator_tag>::value>::type> |
424 | void assign(in_iter in_start, in_iter in_end) { |
425 | clear(); |
426 | append(in_start, in_end); |
427 | } |
428 | |
429 | void assign(std::initializer_list<T> IL) { |
430 | clear(); |
431 | append(IL); |
432 | } |
433 | |
434 | iterator erase(const_iterator CI) { |
435 | // Just cast away constness because this is a non-const member function. |
436 | iterator I = const_cast<iterator>(CI); |
437 | |
438 | assert(I >= this->begin() && "Iterator to erase is out of bounds.")((I >= this->begin() && "Iterator to erase is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Iterator to erase is out of bounds.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 438, __PRETTY_FUNCTION__)); |
439 | assert(I < this->end() && "Erasing at past-the-end iterator.")((I < this->end() && "Erasing at past-the-end iterator." ) ? static_cast<void> (0) : __assert_fail ("I < this->end() && \"Erasing at past-the-end iterator.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 439, __PRETTY_FUNCTION__)); |
440 | |
441 | iterator N = I; |
442 | // Shift all elts down one. |
443 | std::move(I+1, this->end(), I); |
444 | // Drop the last elt. |
445 | this->pop_back(); |
446 | return(N); |
447 | } |
448 | |
449 | iterator erase(const_iterator CS, const_iterator CE) { |
450 | // Just cast away constness because this is a non-const member function. |
451 | iterator S = const_cast<iterator>(CS); |
452 | iterator E = const_cast<iterator>(CE); |
453 | |
454 | assert(S >= this->begin() && "Range to erase is out of bounds.")((S >= this->begin() && "Range to erase is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("S >= this->begin() && \"Range to erase is out of bounds.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 454, __PRETTY_FUNCTION__)); |
455 | assert(S <= E && "Trying to erase invalid range.")((S <= E && "Trying to erase invalid range.") ? static_cast <void> (0) : __assert_fail ("S <= E && \"Trying to erase invalid range.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 455, __PRETTY_FUNCTION__)); |
456 | assert(E <= this->end() && "Trying to erase past the end.")((E <= this->end() && "Trying to erase past the end." ) ? static_cast<void> (0) : __assert_fail ("E <= this->end() && \"Trying to erase past the end.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 456, __PRETTY_FUNCTION__)); |
457 | |
458 | iterator N = S; |
459 | // Shift all elts down. |
460 | iterator I = std::move(E, this->end(), S); |
461 | // Drop the last elts. |
462 | this->destroy_range(I, this->end()); |
463 | this->set_size(I - this->begin()); |
464 | return(N); |
465 | } |
466 | |
467 | iterator insert(iterator I, T &&Elt) { |
468 | if (I == this->end()) { // Important special case for empty vector. |
469 | this->push_back(::std::move(Elt)); |
470 | return this->end()-1; |
471 | } |
472 | |
473 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 473, __PRETTY_FUNCTION__)); |
474 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 474, __PRETTY_FUNCTION__)); |
475 | |
476 | if (this->size() >= this->capacity()) { |
477 | size_t EltNo = I-this->begin(); |
478 | this->grow(); |
479 | I = this->begin()+EltNo; |
480 | } |
481 | |
482 | ::new ((void*) this->end()) T(::std::move(this->back())); |
483 | // Push everything else over. |
484 | std::move_backward(I, this->end()-1, this->end()); |
485 | this->set_size(this->size() + 1); |
486 | |
487 | // If we just moved the element we're inserting, be sure to update |
488 | // the reference. |
489 | T *EltPtr = &Elt; |
490 | if (I <= EltPtr && EltPtr < this->end()) |
491 | ++EltPtr; |
492 | |
493 | *I = ::std::move(*EltPtr); |
494 | return I; |
495 | } |
496 | |
497 | iterator insert(iterator I, const T &Elt) { |
498 | if (I == this->end()) { // Important special case for empty vector. |
499 | this->push_back(Elt); |
500 | return this->end()-1; |
501 | } |
502 | |
503 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 503, __PRETTY_FUNCTION__)); |
504 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 504, __PRETTY_FUNCTION__)); |
505 | |
506 | if (this->size() >= this->capacity()) { |
507 | size_t EltNo = I-this->begin(); |
508 | this->grow(); |
509 | I = this->begin()+EltNo; |
510 | } |
511 | ::new ((void*) this->end()) T(std::move(this->back())); |
512 | // Push everything else over. |
513 | std::move_backward(I, this->end()-1, this->end()); |
514 | this->set_size(this->size() + 1); |
515 | |
516 | // If we just moved the element we're inserting, be sure to update |
517 | // the reference. |
518 | const T *EltPtr = &Elt; |
519 | if (I <= EltPtr && EltPtr < this->end()) |
520 | ++EltPtr; |
521 | |
522 | *I = *EltPtr; |
523 | return I; |
524 | } |
525 | |
526 | iterator insert(iterator I, size_type NumToInsert, const T &Elt) { |
527 | // Convert iterator to elt# to avoid invalidating iterator when we reserve() |
528 | size_t InsertElt = I - this->begin(); |
529 | |
530 | if (I == this->end()) { // Important special case for empty vector. |
531 | append(NumToInsert, Elt); |
532 | return this->begin()+InsertElt; |
533 | } |
534 | |
535 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 535, __PRETTY_FUNCTION__)); |
536 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 536, __PRETTY_FUNCTION__)); |
537 | |
538 | // Ensure there is enough space. |
539 | reserve(this->size() + NumToInsert); |
540 | |
541 | // Uninvalidate the iterator. |
542 | I = this->begin()+InsertElt; |
543 | |
544 | // If there are more elements between the insertion point and the end of the |
545 | // range than there are being inserted, we can use a simple approach to |
546 | // insertion. Since we already reserved space, we know that this won't |
547 | // reallocate the vector. |
548 | if (size_t(this->end()-I) >= NumToInsert) { |
549 | T *OldEnd = this->end(); |
550 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
551 | std::move_iterator<iterator>(this->end())); |
552 | |
553 | // Copy the existing elements that get replaced. |
554 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
555 | |
556 | std::fill_n(I, NumToInsert, Elt); |
557 | return I; |
558 | } |
559 | |
560 | // Otherwise, we're inserting more elements than exist already, and we're |
561 | // not inserting at the end. |
562 | |
563 | // Move over the elements that we're about to overwrite. |
564 | T *OldEnd = this->end(); |
565 | this->set_size(this->size() + NumToInsert); |
566 | size_t NumOverwritten = OldEnd-I; |
567 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
568 | |
569 | // Replace the overwritten part. |
570 | std::fill_n(I, NumOverwritten, Elt); |
571 | |
572 | // Insert the non-overwritten middle part. |
573 | std::uninitialized_fill_n(OldEnd, NumToInsert-NumOverwritten, Elt); |
574 | return I; |
575 | } |
576 | |
577 | template <typename ItTy, |
578 | typename = typename std::enable_if<std::is_convertible< |
579 | typename std::iterator_traits<ItTy>::iterator_category, |
580 | std::input_iterator_tag>::value>::type> |
581 | iterator insert(iterator I, ItTy From, ItTy To) { |
582 | // Convert iterator to elt# to avoid invalidating iterator when we reserve() |
583 | size_t InsertElt = I - this->begin(); |
584 | |
585 | if (I == this->end()) { // Important special case for empty vector. |
586 | append(From, To); |
587 | return this->begin()+InsertElt; |
588 | } |
589 | |
590 | assert(I >= this->begin() && "Insertion iterator is out of bounds.")((I >= this->begin() && "Insertion iterator is out of bounds." ) ? static_cast<void> (0) : __assert_fail ("I >= this->begin() && \"Insertion iterator is out of bounds.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 590, __PRETTY_FUNCTION__)); |
591 | assert(I <= this->end() && "Inserting past the end of the vector.")((I <= this->end() && "Inserting past the end of the vector." ) ? static_cast<void> (0) : __assert_fail ("I <= this->end() && \"Inserting past the end of the vector.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/SmallVector.h" , 591, __PRETTY_FUNCTION__)); |
592 | |
593 | size_t NumToInsert = std::distance(From, To); |
594 | |
595 | // Ensure there is enough space. |
596 | reserve(this->size() + NumToInsert); |
597 | |
598 | // Uninvalidate the iterator. |
599 | I = this->begin()+InsertElt; |
600 | |
601 | // If there are more elements between the insertion point and the end of the |
602 | // range than there are being inserted, we can use a simple approach to |
603 | // insertion. Since we already reserved space, we know that this won't |
604 | // reallocate the vector. |
605 | if (size_t(this->end()-I) >= NumToInsert) { |
606 | T *OldEnd = this->end(); |
607 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
608 | std::move_iterator<iterator>(this->end())); |
609 | |
610 | // Copy the existing elements that get replaced. |
611 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
612 | |
613 | std::copy(From, To, I); |
614 | return I; |
615 | } |
616 | |
617 | // Otherwise, we're inserting more elements than exist already, and we're |
618 | // not inserting at the end. |
619 | |
620 | // Move over the elements that we're about to overwrite. |
621 | T *OldEnd = this->end(); |
622 | this->set_size(this->size() + NumToInsert); |
623 | size_t NumOverwritten = OldEnd-I; |
624 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
625 | |
626 | // Replace the overwritten part. |
627 | for (T *J = I; NumOverwritten > 0; --NumOverwritten) { |
628 | *J = *From; |
629 | ++J; ++From; |
630 | } |
631 | |
632 | // Insert the non-overwritten middle part. |
633 | this->uninitialized_copy(From, To, OldEnd); |
634 | return I; |
635 | } |
636 | |
637 | void insert(iterator I, std::initializer_list<T> IL) { |
638 | insert(I, IL.begin(), IL.end()); |
639 | } |
640 | |
641 | template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) { |
642 | if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity ()), false)) |
643 | this->grow(); |
644 | ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...); |
645 | this->set_size(this->size() + 1); |
646 | return this->back(); |
647 | } |
648 | |
649 | SmallVectorImpl &operator=(const SmallVectorImpl &RHS); |
650 | |
651 | SmallVectorImpl &operator=(SmallVectorImpl &&RHS); |
652 | |
653 | bool operator==(const SmallVectorImpl &RHS) const { |
654 | if (this->size() != RHS.size()) return false; |
655 | return std::equal(this->begin(), this->end(), RHS.begin()); |
656 | } |
657 | bool operator!=(const SmallVectorImpl &RHS) const { |
658 | return !(*this == RHS); |
659 | } |
660 | |
661 | bool operator<(const SmallVectorImpl &RHS) const { |
662 | return std::lexicographical_compare(this->begin(), this->end(), |
663 | RHS.begin(), RHS.end()); |
664 | } |
665 | }; |
666 | |
667 | template <typename T> |
668 | void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) { |
669 | if (this == &RHS) return; |
670 | |
671 | // We can only avoid copying elements if neither vector is small. |
672 | if (!this->isSmall() && !RHS.isSmall()) { |
673 | std::swap(this->BeginX, RHS.BeginX); |
674 | std::swap(this->Size, RHS.Size); |
675 | std::swap(this->Capacity, RHS.Capacity); |
676 | return; |
677 | } |
678 | if (RHS.size() > this->capacity()) |
679 | this->grow(RHS.size()); |
680 | if (this->size() > RHS.capacity()) |
681 | RHS.grow(this->size()); |
682 | |
683 | // Swap the shared elements. |
684 | size_t NumShared = this->size(); |
685 | if (NumShared > RHS.size()) NumShared = RHS.size(); |
686 | for (size_type i = 0; i != NumShared; ++i) |
687 | std::swap((*this)[i], RHS[i]); |
688 | |
689 | // Copy over the extra elts. |
690 | if (this->size() > RHS.size()) { |
691 | size_t EltDiff = this->size() - RHS.size(); |
692 | this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); |
693 | RHS.set_size(RHS.size() + EltDiff); |
694 | this->destroy_range(this->begin()+NumShared, this->end()); |
695 | this->set_size(NumShared); |
696 | } else if (RHS.size() > this->size()) { |
697 | size_t EltDiff = RHS.size() - this->size(); |
698 | this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); |
699 | this->set_size(this->size() + EltDiff); |
700 | this->destroy_range(RHS.begin()+NumShared, RHS.end()); |
701 | RHS.set_size(NumShared); |
702 | } |
703 | } |
704 | |
705 | template <typename T> |
706 | SmallVectorImpl<T> &SmallVectorImpl<T>:: |
707 | operator=(const SmallVectorImpl<T> &RHS) { |
708 | // Avoid self-assignment. |
709 | if (this == &RHS) return *this; |
710 | |
711 | // If we already have sufficient space, assign the common elements, then |
712 | // destroy any excess. |
713 | size_t RHSSize = RHS.size(); |
714 | size_t CurSize = this->size(); |
715 | if (CurSize >= RHSSize) { |
716 | // Assign common elements. |
717 | iterator NewEnd; |
718 | if (RHSSize) |
719 | NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin()); |
720 | else |
721 | NewEnd = this->begin(); |
722 | |
723 | // Destroy excess elements. |
724 | this->destroy_range(NewEnd, this->end()); |
725 | |
726 | // Trim. |
727 | this->set_size(RHSSize); |
728 | return *this; |
729 | } |
730 | |
731 | // If we have to grow to have enough elements, destroy the current elements. |
732 | // This allows us to avoid copying them during the grow. |
733 | // FIXME: don't do this if they're efficiently moveable. |
734 | if (this->capacity() < RHSSize) { |
735 | // Destroy current elements. |
736 | this->destroy_range(this->begin(), this->end()); |
737 | this->set_size(0); |
738 | CurSize = 0; |
739 | this->grow(RHSSize); |
740 | } else if (CurSize) { |
741 | // Otherwise, use assignment for the already-constructed elements. |
742 | std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
743 | } |
744 | |
745 | // Copy construct the new elements in place. |
746 | this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(), |
747 | this->begin()+CurSize); |
748 | |
749 | // Set end. |
750 | this->set_size(RHSSize); |
751 | return *this; |
752 | } |
753 | |
754 | template <typename T> |
755 | SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { |
756 | // Avoid self-assignment. |
757 | if (this == &RHS) return *this; |
758 | |
759 | // If the RHS isn't small, clear this vector and then steal its buffer. |
760 | if (!RHS.isSmall()) { |
761 | this->destroy_range(this->begin(), this->end()); |
762 | if (!this->isSmall()) free(this->begin()); |
763 | this->BeginX = RHS.BeginX; |
764 | this->Size = RHS.Size; |
765 | this->Capacity = RHS.Capacity; |
766 | RHS.resetToSmall(); |
767 | return *this; |
768 | } |
769 | |
770 | // If we already have sufficient space, assign the common elements, then |
771 | // destroy any excess. |
772 | size_t RHSSize = RHS.size(); |
773 | size_t CurSize = this->size(); |
774 | if (CurSize >= RHSSize) { |
775 | // Assign common elements. |
776 | iterator NewEnd = this->begin(); |
777 | if (RHSSize) |
778 | NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd); |
779 | |
780 | // Destroy excess elements and trim the bounds. |
781 | this->destroy_range(NewEnd, this->end()); |
782 | this->set_size(RHSSize); |
783 | |
784 | // Clear the RHS. |
785 | RHS.clear(); |
786 | |
787 | return *this; |
788 | } |
789 | |
790 | // If we have to grow to have enough elements, destroy the current elements. |
791 | // This allows us to avoid copying them during the grow. |
792 | // FIXME: this may not actually make any sense if we can efficiently move |
793 | // elements. |
794 | if (this->capacity() < RHSSize) { |
795 | // Destroy current elements. |
796 | this->destroy_range(this->begin(), this->end()); |
797 | this->set_size(0); |
798 | CurSize = 0; |
799 | this->grow(RHSSize); |
800 | } else if (CurSize) { |
801 | // Otherwise, use assignment for the already-constructed elements. |
802 | std::move(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
803 | } |
804 | |
805 | // Move-construct the new elements in place. |
806 | this->uninitialized_move(RHS.begin()+CurSize, RHS.end(), |
807 | this->begin()+CurSize); |
808 | |
809 | // Set end. |
810 | this->set_size(RHSSize); |
811 | |
812 | RHS.clear(); |
813 | return *this; |
814 | } |
815 | |
816 | /// Storage for the SmallVector elements. This is specialized for the N=0 case |
817 | /// to avoid allocating unnecessary storage. |
818 | template <typename T, unsigned N> |
819 | struct SmallVectorStorage { |
820 | AlignedCharArrayUnion<T> InlineElts[N]; |
821 | }; |
822 | |
823 | /// We need the storage to be properly aligned even for small-size of 0 so that |
824 | /// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is |
825 | /// well-defined. |
826 | template <typename T> struct alignas(alignof(T)) SmallVectorStorage<T, 0> {}; |
827 | |
828 | /// This is a 'vector' (really, a variable-sized array), optimized |
829 | /// for the case when the array is small. It contains some number of elements |
830 | /// in-place, which allows it to avoid heap allocation when the actual number of |
831 | /// elements is below that threshold. This allows normal "small" cases to be |
832 | /// fast without losing generality for large inputs. |
833 | /// |
834 | /// Note that this does not attempt to be exception safe. |
835 | /// |
836 | template <typename T, unsigned N> |
837 | class SmallVector : public SmallVectorImpl<T>, SmallVectorStorage<T, N> { |
838 | public: |
839 | SmallVector() : SmallVectorImpl<T>(N) {} |
840 | |
841 | ~SmallVector() { |
842 | // Destroy the constructed elements in the vector. |
843 | this->destroy_range(this->begin(), this->end()); |
844 | } |
845 | |
846 | explicit SmallVector(size_t Size, const T &Value = T()) |
847 | : SmallVectorImpl<T>(N) { |
848 | this->assign(Size, Value); |
849 | } |
850 | |
851 | template <typename ItTy, |
852 | typename = typename std::enable_if<std::is_convertible< |
853 | typename std::iterator_traits<ItTy>::iterator_category, |
854 | std::input_iterator_tag>::value>::type> |
855 | SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) { |
856 | this->append(S, E); |
857 | } |
858 | |
859 | template <typename RangeTy> |
860 | explicit SmallVector(const iterator_range<RangeTy> &R) |
861 | : SmallVectorImpl<T>(N) { |
862 | this->append(R.begin(), R.end()); |
863 | } |
864 | |
865 | SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) { |
866 | this->assign(IL); |
867 | } |
868 | |
869 | SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) { |
870 | if (!RHS.empty()) |
871 | SmallVectorImpl<T>::operator=(RHS); |
872 | } |
873 | |
874 | const SmallVector &operator=(const SmallVector &RHS) { |
875 | SmallVectorImpl<T>::operator=(RHS); |
876 | return *this; |
877 | } |
878 | |
879 | SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) { |
880 | if (!RHS.empty()) |
881 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
882 | } |
883 | |
884 | SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) { |
885 | if (!RHS.empty()) |
886 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
887 | } |
888 | |
889 | const SmallVector &operator=(SmallVector &&RHS) { |
890 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
891 | return *this; |
892 | } |
893 | |
894 | const SmallVector &operator=(SmallVectorImpl<T> &&RHS) { |
895 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
896 | return *this; |
897 | } |
898 | |
899 | const SmallVector &operator=(std::initializer_list<T> IL) { |
900 | this->assign(IL); |
901 | return *this; |
902 | } |
903 | }; |
904 | |
905 | template <typename T, unsigned N> |
906 | inline size_t capacity_in_bytes(const SmallVector<T, N> &X) { |
907 | return X.capacity_in_bytes(); |
908 | } |
909 | |
910 | } // end namespace llvm |
911 | |
912 | namespace std { |
913 | |
914 | /// Implement std::swap in terms of SmallVector swap. |
915 | template<typename T> |
916 | inline void |
917 | swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) { |
918 | LHS.swap(RHS); |
919 | } |
920 | |
921 | /// Implement std::swap in terms of SmallVector swap. |
922 | template<typename T, unsigned N> |
923 | inline void |
924 | swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) { |
925 | LHS.swap(RHS); |
926 | } |
927 | |
928 | } // end namespace std |
929 | |
930 | #endif // LLVM_ADT_SMALLVECTOR_H |
1 | //===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file implements a class to represent arbitrary precision |
11 | /// integral constant values and operations on them. |
12 | /// |
13 | //===----------------------------------------------------------------------===// |
14 | |
15 | #ifndef LLVM_ADT_APINT_H |
16 | #define LLVM_ADT_APINT_H |
17 | |
18 | #include "llvm/Support/Compiler.h" |
19 | #include "llvm/Support/MathExtras.h" |
20 | #include <cassert> |
21 | #include <climits> |
22 | #include <cstring> |
23 | #include <string> |
24 | |
25 | namespace llvm { |
26 | class FoldingSetNodeID; |
27 | class StringRef; |
28 | class hash_code; |
29 | class raw_ostream; |
30 | |
31 | template <typename T> class SmallVectorImpl; |
32 | template <typename T> class ArrayRef; |
33 | template <typename T> class Optional; |
34 | |
35 | class APInt; |
36 | |
37 | inline APInt operator-(APInt); |
38 | |
39 | //===----------------------------------------------------------------------===// |
40 | // APInt Class |
41 | //===----------------------------------------------------------------------===// |
42 | |
43 | /// Class for arbitrary precision integers. |
44 | /// |
45 | /// APInt is a functional replacement for common case unsigned integer type like |
46 | /// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width |
47 | /// integer sizes and large integer value types such as 3-bits, 15-bits, or more |
48 | /// than 64-bits of precision. APInt provides a variety of arithmetic operators |
49 | /// and methods to manipulate integer values of any bit-width. It supports both |
50 | /// the typical integer arithmetic and comparison operations as well as bitwise |
51 | /// manipulation. |
52 | /// |
53 | /// The class has several invariants worth noting: |
54 | /// * All bit, byte, and word positions are zero-based. |
55 | /// * Once the bit width is set, it doesn't change except by the Truncate, |
56 | /// SignExtend, or ZeroExtend operations. |
57 | /// * All binary operators must be on APInt instances of the same bit width. |
58 | /// Attempting to use these operators on instances with different bit |
59 | /// widths will yield an assertion. |
60 | /// * The value is stored canonically as an unsigned value. For operations |
61 | /// where it makes a difference, there are both signed and unsigned variants |
62 | /// of the operation. For example, sdiv and udiv. However, because the bit |
63 | /// widths must be the same, operations such as Mul and Add produce the same |
64 | /// results regardless of whether the values are interpreted as signed or |
65 | /// not. |
66 | /// * In general, the class tries to follow the style of computation that LLVM |
67 | /// uses in its IR. This simplifies its use for LLVM. |
68 | /// |
69 | class LLVM_NODISCARD[[clang::warn_unused_result]] APInt { |
70 | public: |
71 | typedef uint64_t WordType; |
72 | |
73 | /// This enum is used to hold the constants we needed for APInt. |
74 | enum : unsigned { |
75 | /// Byte size of a word. |
76 | APINT_WORD_SIZE = sizeof(WordType), |
77 | /// Bits in a word. |
78 | APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8 |
79 | }; |
80 | |
81 | enum class Rounding { |
82 | DOWN, |
83 | TOWARD_ZERO, |
84 | UP, |
85 | }; |
86 | |
87 | static const WordType WORDTYPE_MAX = ~WordType(0); |
88 | |
89 | private: |
90 | /// This union is used to store the integer value. When the |
91 | /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. |
92 | union { |
93 | uint64_t VAL; ///< Used to store the <= 64 bits integer value. |
94 | uint64_t *pVal; ///< Used to store the >64 bits integer value. |
95 | } U; |
96 | |
97 | unsigned BitWidth; ///< The number of bits in this APInt. |
98 | |
99 | friend struct DenseMapAPIntKeyInfo; |
100 | |
101 | friend class APSInt; |
102 | |
103 | /// Fast internal constructor |
104 | /// |
105 | /// This constructor is used only internally for speed of construction of |
106 | /// temporaries. It is unsafe for general use so it is not public. |
107 | APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { |
108 | U.pVal = val; |
109 | } |
110 | |
111 | /// Determine if this APInt just has one word to store value. |
112 | /// |
113 | /// \returns true if the number of bits <= 64, false otherwise. |
114 | bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; } |
115 | |
116 | /// Determine which word a bit is in. |
117 | /// |
118 | /// \returns the word position for the specified bit position. |
119 | static unsigned whichWord(unsigned bitPosition) { |
120 | return bitPosition / APINT_BITS_PER_WORD; |
121 | } |
122 | |
123 | /// Determine which bit in a word a bit is in. |
124 | /// |
125 | /// \returns the bit position in a word for the specified bit position |
126 | /// in the APInt. |
127 | static unsigned whichBit(unsigned bitPosition) { |
128 | return bitPosition % APINT_BITS_PER_WORD; |
129 | } |
130 | |
131 | /// Get a single bit mask. |
132 | /// |
133 | /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set |
134 | /// This method generates and returns a uint64_t (word) mask for a single |
135 | /// bit at a specific bit position. This is used to mask the bit in the |
136 | /// corresponding word. |
137 | static uint64_t maskBit(unsigned bitPosition) { |
138 | return 1ULL << whichBit(bitPosition); |
139 | } |
140 | |
141 | /// Clear unused high order bits |
142 | /// |
143 | /// This method is used internally to clear the top "N" bits in the high order |
144 | /// word that are not used by the APInt. This is needed after the most |
145 | /// significant word is assigned a value to ensure that those bits are |
146 | /// zero'd out. |
147 | APInt &clearUnusedBits() { |
148 | // Compute how many bits are used in the final word |
149 | unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1; |
150 | |
151 | // Mask out the high bits. |
152 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits); |
153 | if (isSingleWord()) |
154 | U.VAL &= mask; |
155 | else |
156 | U.pVal[getNumWords() - 1] &= mask; |
157 | return *this; |
158 | } |
159 | |
160 | /// Get the word corresponding to a bit position |
161 | /// \returns the corresponding word for the specified bit position. |
162 | uint64_t getWord(unsigned bitPosition) const { |
163 | return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; |
164 | } |
165 | |
166 | /// Utility method to change the bit width of this APInt to new bit width, |
167 | /// allocating and/or deallocating as necessary. There is no guarantee on the |
168 | /// value of any bits upon return. Caller should populate the bits after. |
169 | void reallocate(unsigned NewBitWidth); |
170 | |
171 | /// Convert a char array into an APInt |
172 | /// |
173 | /// \param radix 2, 8, 10, 16, or 36 |
174 | /// Converts a string into a number. The string must be non-empty |
175 | /// and well-formed as a number of the given base. The bit-width |
176 | /// must be sufficient to hold the result. |
177 | /// |
178 | /// This is used by the constructors that take string arguments. |
179 | /// |
180 | /// StringRef::getAsInteger is superficially similar but (1) does |
181 | /// not assume that the string is well-formed and (2) grows the |
182 | /// result to hold the input. |
183 | void fromString(unsigned numBits, StringRef str, uint8_t radix); |
184 | |
185 | /// An internal division function for dividing APInts. |
186 | /// |
187 | /// This is used by the toString method to divide by the radix. It simply |
188 | /// provides a more convenient form of divide for internal use since KnuthDiv |
189 | /// has specific constraints on its inputs. If those constraints are not met |
190 | /// then it provides a simpler form of divide. |
191 | static void divide(const WordType *LHS, unsigned lhsWords, |
192 | const WordType *RHS, unsigned rhsWords, WordType *Quotient, |
193 | WordType *Remainder); |
194 | |
195 | /// out-of-line slow case for inline constructor |
196 | void initSlowCase(uint64_t val, bool isSigned); |
197 | |
198 | /// shared code between two array constructors |
199 | void initFromArray(ArrayRef<uint64_t> array); |
200 | |
201 | /// out-of-line slow case for inline copy constructor |
202 | void initSlowCase(const APInt &that); |
203 | |
204 | /// out-of-line slow case for shl |
205 | void shlSlowCase(unsigned ShiftAmt); |
206 | |
207 | /// out-of-line slow case for lshr. |
208 | void lshrSlowCase(unsigned ShiftAmt); |
209 | |
210 | /// out-of-line slow case for ashr. |
211 | void ashrSlowCase(unsigned ShiftAmt); |
212 | |
213 | /// out-of-line slow case for operator= |
214 | void AssignSlowCase(const APInt &RHS); |
215 | |
216 | /// out-of-line slow case for operator== |
217 | bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); |
218 | |
219 | /// out-of-line slow case for countLeadingZeros |
220 | unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); |
221 | |
222 | /// out-of-line slow case for countLeadingOnes. |
223 | unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); |
224 | |
225 | /// out-of-line slow case for countTrailingZeros. |
226 | unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); |
227 | |
228 | /// out-of-line slow case for countTrailingOnes |
229 | unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); |
230 | |
231 | /// out-of-line slow case for countPopulation |
232 | unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__)); |
233 | |
234 | /// out-of-line slow case for intersects. |
235 | bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); |
236 | |
237 | /// out-of-line slow case for isSubsetOf. |
238 | bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); |
239 | |
240 | /// out-of-line slow case for setBits. |
241 | void setBitsSlowCase(unsigned loBit, unsigned hiBit); |
242 | |
243 | /// out-of-line slow case for flipAllBits. |
244 | void flipAllBitsSlowCase(); |
245 | |
246 | /// out-of-line slow case for operator&=. |
247 | void AndAssignSlowCase(const APInt& RHS); |
248 | |
249 | /// out-of-line slow case for operator|=. |
250 | void OrAssignSlowCase(const APInt& RHS); |
251 | |
252 | /// out-of-line slow case for operator^=. |
253 | void XorAssignSlowCase(const APInt& RHS); |
254 | |
255 | /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal |
256 | /// to, or greater than RHS. |
257 | int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); |
258 | |
259 | /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal |
260 | /// to, or greater than RHS. |
261 | int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); |
262 | |
263 | public: |
264 | /// \name Constructors |
265 | /// @{ |
266 | |
267 | /// Create a new APInt of numBits width, initialized as val. |
268 | /// |
269 | /// If isSigned is true then val is treated as if it were a signed value |
270 | /// (i.e. as an int64_t) and the appropriate sign extension to the bit width |
271 | /// will be done. Otherwise, no sign extension occurs (high order bits beyond |
272 | /// the range of val are zero filled). |
273 | /// |
274 | /// \param numBits the bit width of the constructed APInt |
275 | /// \param val the initial value of the APInt |
276 | /// \param isSigned how to treat signedness of val |
277 | APInt(unsigned numBits, uint64_t val, bool isSigned = false) |
278 | : BitWidth(numBits) { |
279 | assert(BitWidth && "bitwidth too small")((BitWidth && "bitwidth too small") ? static_cast< void> (0) : __assert_fail ("BitWidth && \"bitwidth too small\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 279, __PRETTY_FUNCTION__)); |
280 | if (isSingleWord()) { |
281 | U.VAL = val; |
282 | clearUnusedBits(); |
283 | } else { |
284 | initSlowCase(val, isSigned); |
285 | } |
286 | } |
287 | |
288 | /// Construct an APInt of numBits width, initialized as bigVal[]. |
289 | /// |
290 | /// Note that bigVal.size() can be smaller or larger than the corresponding |
291 | /// bit width but any extraneous bits will be dropped. |
292 | /// |
293 | /// \param numBits the bit width of the constructed APInt |
294 | /// \param bigVal a sequence of words to form the initial value of the APInt |
295 | APInt(unsigned numBits, ArrayRef<uint64_t> bigVal); |
296 | |
297 | /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but |
298 | /// deprecated because this constructor is prone to ambiguity with the |
299 | /// APInt(unsigned, uint64_t, bool) constructor. |
300 | /// |
301 | /// If this overload is ever deleted, care should be taken to prevent calls |
302 | /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool) |
303 | /// constructor. |
304 | APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]); |
305 | |
306 | /// Construct an APInt from a string representation. |
307 | /// |
308 | /// This constructor interprets the string \p str in the given radix. The |
309 | /// interpretation stops when the first character that is not suitable for the |
310 | /// radix is encountered, or the end of the string. Acceptable radix values |
311 | /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the |
312 | /// string to require more bits than numBits. |
313 | /// |
314 | /// \param numBits the bit width of the constructed APInt |
315 | /// \param str the string to be interpreted |
316 | /// \param radix the radix to use for the conversion |
317 | APInt(unsigned numBits, StringRef str, uint8_t radix); |
318 | |
319 | /// Simply makes *this a copy of that. |
320 | /// Copy Constructor. |
321 | APInt(const APInt &that) : BitWidth(that.BitWidth) { |
322 | if (isSingleWord()) |
323 | U.VAL = that.U.VAL; |
324 | else |
325 | initSlowCase(that); |
326 | } |
327 | |
328 | /// Move Constructor. |
329 | APInt(APInt &&that) : BitWidth(that.BitWidth) { |
330 | memcpy(&U, &that.U, sizeof(U)); |
331 | that.BitWidth = 0; |
332 | } |
333 | |
334 | /// Destructor. |
335 | ~APInt() { |
336 | if (needsCleanup()) |
337 | delete[] U.pVal; |
338 | } |
339 | |
340 | /// Default constructor that creates an uninteresting APInt |
341 | /// representing a 1-bit zero value. |
342 | /// |
343 | /// This is useful for object deserialization (pair this with the static |
344 | /// method Read). |
345 | explicit APInt() : BitWidth(1) { U.VAL = 0; } |
346 | |
347 | /// Returns whether this instance allocated memory. |
348 | bool needsCleanup() const { return !isSingleWord(); } |
349 | |
350 | /// Used to insert APInt objects, or objects that contain APInt objects, into |
351 | /// FoldingSets. |
352 | void Profile(FoldingSetNodeID &id) const; |
353 | |
354 | /// @} |
355 | /// \name Value Tests |
356 | /// @{ |
357 | |
358 | /// Determine sign of this APInt. |
359 | /// |
360 | /// This tests the high bit of this APInt to determine if it is set. |
361 | /// |
362 | /// \returns true if this APInt is negative, false otherwise |
363 | bool isNegative() const { return (*this)[BitWidth - 1]; } |
364 | |
365 | /// Determine if this APInt Value is non-negative (>= 0) |
366 | /// |
367 | /// This tests the high bit of the APInt to determine if it is unset. |
368 | bool isNonNegative() const { return !isNegative(); } |
369 | |
370 | /// Determine if sign bit of this APInt is set. |
371 | /// |
372 | /// This tests the high bit of this APInt to determine if it is set. |
373 | /// |
374 | /// \returns true if this APInt has its sign bit set, false otherwise. |
375 | bool isSignBitSet() const { return (*this)[BitWidth-1]; } |
376 | |
377 | /// Determine if sign bit of this APInt is clear. |
378 | /// |
379 | /// This tests the high bit of this APInt to determine if it is clear. |
380 | /// |
381 | /// \returns true if this APInt has its sign bit clear, false otherwise. |
382 | bool isSignBitClear() const { return !isSignBitSet(); } |
383 | |
384 | /// Determine if this APInt Value is positive. |
385 | /// |
386 | /// This tests if the value of this APInt is positive (> 0). Note |
387 | /// that 0 is not a positive value. |
388 | /// |
389 | /// \returns true if this APInt is positive. |
390 | bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); } |
391 | |
392 | /// Determine if all bits are set |
393 | /// |
394 | /// This checks to see if the value has all bits of the APInt are set or not. |
395 | bool isAllOnesValue() const { |
396 | if (isSingleWord()) |
397 | return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth); |
398 | return countTrailingOnesSlowCase() == BitWidth; |
399 | } |
400 | |
401 | /// Determine if all bits are clear |
402 | /// |
403 | /// This checks to see if the value has all bits of the APInt are clear or |
404 | /// not. |
405 | bool isNullValue() const { return !*this; } |
406 | |
407 | /// Determine if this is a value of 1. |
408 | /// |
409 | /// This checks to see if the value of this APInt is one. |
410 | bool isOneValue() const { |
411 | if (isSingleWord()) |
412 | return U.VAL == 1; |
413 | return countLeadingZerosSlowCase() == BitWidth - 1; |
414 | } |
415 | |
416 | /// Determine if this is the largest unsigned value. |
417 | /// |
418 | /// This checks to see if the value of this APInt is the maximum unsigned |
419 | /// value for the APInt's bit width. |
420 | bool isMaxValue() const { return isAllOnesValue(); } |
421 | |
422 | /// Determine if this is the largest signed value. |
423 | /// |
424 | /// This checks to see if the value of this APInt is the maximum signed |
425 | /// value for the APInt's bit width. |
426 | bool isMaxSignedValue() const { |
427 | if (isSingleWord()) |
428 | return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1); |
429 | return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1; |
430 | } |
431 | |
432 | /// Determine if this is the smallest unsigned value. |
433 | /// |
434 | /// This checks to see if the value of this APInt is the minimum unsigned |
435 | /// value for the APInt's bit width. |
436 | bool isMinValue() const { return isNullValue(); } |
437 | |
438 | /// Determine if this is the smallest signed value. |
439 | /// |
440 | /// This checks to see if the value of this APInt is the minimum signed |
441 | /// value for the APInt's bit width. |
442 | bool isMinSignedValue() const { |
443 | if (isSingleWord()) |
444 | return U.VAL == (WordType(1) << (BitWidth - 1)); |
445 | return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1; |
446 | } |
447 | |
448 | /// Check if this APInt has an N-bits unsigned integer value. |
449 | bool isIntN(unsigned N) const { |
450 | assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 450, __PRETTY_FUNCTION__)); |
451 | return getActiveBits() <= N; |
452 | } |
453 | |
454 | /// Check if this APInt has an N-bits signed integer value. |
455 | bool isSignedIntN(unsigned N) const { |
456 | assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 456, __PRETTY_FUNCTION__)); |
457 | return getMinSignedBits() <= N; |
458 | } |
459 | |
460 | /// Check if this APInt's value is a power of two greater than zero. |
461 | /// |
462 | /// \returns true if the argument APInt value is a power of two > 0. |
463 | bool isPowerOf2() const { |
464 | if (isSingleWord()) |
465 | return isPowerOf2_64(U.VAL); |
466 | return countPopulationSlowCase() == 1; |
467 | } |
468 | |
469 | /// Check if the APInt's value is returned by getSignMask. |
470 | /// |
471 | /// \returns true if this is the value returned by getSignMask. |
472 | bool isSignMask() const { return isMinSignedValue(); } |
473 | |
474 | /// Convert APInt to a boolean value. |
475 | /// |
476 | /// This converts the APInt to a boolean value as a test against zero. |
477 | bool getBoolValue() const { return !!*this; } |
478 | |
479 | /// If this value is smaller than the specified limit, return it, otherwise |
480 | /// return the limit value. This causes the value to saturate to the limit. |
481 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const { |
482 | return ugt(Limit) ? Limit : getZExtValue(); |
483 | } |
484 | |
485 | /// Check if the APInt consists of a repeated bit pattern. |
486 | /// |
487 | /// e.g. 0x01010101 satisfies isSplat(8). |
488 | /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit |
489 | /// width without remainder. |
490 | bool isSplat(unsigned SplatSizeInBits) const; |
491 | |
492 | /// \returns true if this APInt value is a sequence of \param numBits ones |
493 | /// starting at the least significant bit with the remainder zero. |
494 | bool isMask(unsigned numBits) const { |
495 | assert(numBits != 0 && "numBits must be non-zero")((numBits != 0 && "numBits must be non-zero") ? static_cast <void> (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 495, __PRETTY_FUNCTION__)); |
496 | assert(numBits <= BitWidth && "numBits out of range")((numBits <= BitWidth && "numBits out of range") ? static_cast<void> (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 496, __PRETTY_FUNCTION__)); |
497 | if (isSingleWord()) |
498 | return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits)); |
499 | unsigned Ones = countTrailingOnesSlowCase(); |
500 | return (numBits == Ones) && |
501 | ((Ones + countLeadingZerosSlowCase()) == BitWidth); |
502 | } |
503 | |
504 | /// \returns true if this APInt is a non-empty sequence of ones starting at |
505 | /// the least significant bit with the remainder zero. |
506 | /// Ex. isMask(0x0000FFFFU) == true. |
507 | bool isMask() const { |
508 | if (isSingleWord()) |
509 | return isMask_64(U.VAL); |
510 | unsigned Ones = countTrailingOnesSlowCase(); |
511 | return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); |
512 | } |
513 | |
514 | /// Return true if this APInt value contains a sequence of ones with |
515 | /// the remainder zero. |
516 | bool isShiftedMask() const { |
517 | if (isSingleWord()) |
518 | return isShiftedMask_64(U.VAL); |
519 | unsigned Ones = countPopulationSlowCase(); |
520 | unsigned LeadZ = countLeadingZerosSlowCase(); |
521 | return (Ones + LeadZ + countTrailingZeros()) == BitWidth; |
522 | } |
523 | |
524 | /// @} |
525 | /// \name Value Generators |
526 | /// @{ |
527 | |
528 | /// Gets maximum unsigned value of APInt for specific bit width. |
529 | static APInt getMaxValue(unsigned numBits) { |
530 | return getAllOnesValue(numBits); |
531 | } |
532 | |
533 | /// Gets maximum signed value of APInt for a specific bit width. |
534 | static APInt getSignedMaxValue(unsigned numBits) { |
535 | APInt API = getAllOnesValue(numBits); |
536 | API.clearBit(numBits - 1); |
537 | return API; |
538 | } |
539 | |
540 | /// Gets minimum unsigned value of APInt for a specific bit width. |
541 | static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); } |
542 | |
543 | /// Gets minimum signed value of APInt for a specific bit width. |
544 | static APInt getSignedMinValue(unsigned numBits) { |
545 | APInt API(numBits, 0); |
546 | API.setBit(numBits - 1); |
547 | return API; |
548 | } |
549 | |
550 | /// Get the SignMask for a specific bit width. |
551 | /// |
552 | /// This is just a wrapper function of getSignedMinValue(), and it helps code |
553 | /// readability when we want to get a SignMask. |
554 | static APInt getSignMask(unsigned BitWidth) { |
555 | return getSignedMinValue(BitWidth); |
556 | } |
557 | |
558 | /// Get the all-ones value. |
559 | /// |
560 | /// \returns the all-ones value for an APInt of the specified bit-width. |
561 | static APInt getAllOnesValue(unsigned numBits) { |
562 | return APInt(numBits, WORDTYPE_MAX, true); |
563 | } |
564 | |
565 | /// Get the '0' value. |
566 | /// |
567 | /// \returns the '0' value for an APInt of the specified bit-width. |
568 | static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); } |
569 | |
570 | /// Compute an APInt containing numBits highbits from this APInt. |
571 | /// |
572 | /// Get an APInt with the same BitWidth as this APInt, just zero mask |
573 | /// the low bits and right shift to the least significant bit. |
574 | /// |
575 | /// \returns the high "numBits" bits of this APInt. |
576 | APInt getHiBits(unsigned numBits) const; |
577 | |
578 | /// Compute an APInt containing numBits lowbits from this APInt. |
579 | /// |
580 | /// Get an APInt with the same BitWidth as this APInt, just zero mask |
581 | /// the high bits. |
582 | /// |
583 | /// \returns the low "numBits" bits of this APInt. |
584 | APInt getLoBits(unsigned numBits) const; |
585 | |
586 | /// Return an APInt with exactly one bit set in the result. |
587 | static APInt getOneBitSet(unsigned numBits, unsigned BitNo) { |
588 | APInt Res(numBits, 0); |
589 | Res.setBit(BitNo); |
590 | return Res; |
591 | } |
592 | |
593 | /// Get a value with a block of bits set. |
594 | /// |
595 | /// Constructs an APInt value that has a contiguous range of bits set. The |
596 | /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other |
597 | /// bits will be zero. For example, with parameters(32, 0, 16) you would get |
598 | /// 0x0000FFFF. If hiBit is less than loBit then the set bits "wrap". For |
599 | /// example, with parameters (32, 28, 4), you would get 0xF000000F. |
600 | /// |
601 | /// \param numBits the intended bit width of the result |
602 | /// \param loBit the index of the lowest bit set. |
603 | /// \param hiBit the index of the highest bit set. |
604 | /// |
605 | /// \returns An APInt value with the requested bits set. |
606 | static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) { |
607 | APInt Res(numBits, 0); |
608 | Res.setBits(loBit, hiBit); |
609 | return Res; |
610 | } |
611 | |
612 | /// Get a value with upper bits starting at loBit set. |
613 | /// |
614 | /// Constructs an APInt value that has a contiguous range of bits set. The |
615 | /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other |
616 | /// bits will be zero. For example, with parameters(32, 12) you would get |
617 | /// 0xFFFFF000. |
618 | /// |
619 | /// \param numBits the intended bit width of the result |
620 | /// \param loBit the index of the lowest bit to set. |
621 | /// |
622 | /// \returns An APInt value with the requested bits set. |
623 | static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) { |
624 | APInt Res(numBits, 0); |
625 | Res.setBitsFrom(loBit); |
626 | return Res; |
627 | } |
628 | |
629 | /// Get a value with high bits set |
630 | /// |
631 | /// Constructs an APInt value that has the top hiBitsSet bits set. |
632 | /// |
633 | /// \param numBits the bitwidth of the result |
634 | /// \param hiBitsSet the number of high-order bits set in the result. |
635 | static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) { |
636 | APInt Res(numBits, 0); |
637 | Res.setHighBits(hiBitsSet); |
638 | return Res; |
639 | } |
640 | |
641 | /// Get a value with low bits set |
642 | /// |
643 | /// Constructs an APInt value that has the bottom loBitsSet bits set. |
644 | /// |
645 | /// \param numBits the bitwidth of the result |
646 | /// \param loBitsSet the number of low-order bits set in the result. |
647 | static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) { |
648 | APInt Res(numBits, 0); |
649 | Res.setLowBits(loBitsSet); |
650 | return Res; |
651 | } |
652 | |
653 | /// Return a value containing V broadcasted over NewLen bits. |
654 | static APInt getSplat(unsigned NewLen, const APInt &V); |
655 | |
656 | /// Determine if two APInts have the same value, after zero-extending |
657 | /// one of them (if needed!) to ensure that the bit-widths match. |
658 | static bool isSameValue(const APInt &I1, const APInt &I2) { |
659 | if (I1.getBitWidth() == I2.getBitWidth()) |
660 | return I1 == I2; |
661 | |
662 | if (I1.getBitWidth() > I2.getBitWidth()) |
663 | return I1 == I2.zext(I1.getBitWidth()); |
664 | |
665 | return I1.zext(I2.getBitWidth()) == I2; |
666 | } |
667 | |
668 | /// Overload to compute a hash_code for an APInt value. |
669 | friend hash_code hash_value(const APInt &Arg); |
670 | |
671 | /// This function returns a pointer to the internal storage of the APInt. |
672 | /// This is useful for writing out the APInt in binary form without any |
673 | /// conversions. |
674 | const uint64_t *getRawData() const { |
675 | if (isSingleWord()) |
676 | return &U.VAL; |
677 | return &U.pVal[0]; |
678 | } |
679 | |
680 | /// @} |
681 | /// \name Unary Operators |
682 | /// @{ |
683 | |
684 | /// Postfix increment operator. |
685 | /// |
686 | /// Increments *this by 1. |
687 | /// |
688 | /// \returns a new APInt value representing the original value of *this. |
689 | const APInt operator++(int) { |
690 | APInt API(*this); |
691 | ++(*this); |
692 | return API; |
693 | } |
694 | |
695 | /// Prefix increment operator. |
696 | /// |
697 | /// \returns *this incremented by one |
698 | APInt &operator++(); |
699 | |
700 | /// Postfix decrement operator. |
701 | /// |
702 | /// Decrements *this by 1. |
703 | /// |
704 | /// \returns a new APInt value representing the original value of *this. |
705 | const APInt operator--(int) { |
706 | APInt API(*this); |
707 | --(*this); |
708 | return API; |
709 | } |
710 | |
711 | /// Prefix decrement operator. |
712 | /// |
713 | /// \returns *this decremented by one. |
714 | APInt &operator--(); |
715 | |
716 | /// Logical negation operator. |
717 | /// |
718 | /// Performs logical negation operation on this APInt. |
719 | /// |
720 | /// \returns true if *this is zero, false otherwise. |
721 | bool operator!() const { |
722 | if (isSingleWord()) |
723 | return U.VAL == 0; |
724 | return countLeadingZerosSlowCase() == BitWidth; |
725 | } |
726 | |
727 | /// @} |
728 | /// \name Assignment Operators |
729 | /// @{ |
730 | |
731 | /// Copy assignment operator. |
732 | /// |
733 | /// \returns *this after assignment of RHS. |
734 | APInt &operator=(const APInt &RHS) { |
735 | // If the bitwidths are the same, we can avoid mucking with memory |
736 | if (isSingleWord() && RHS.isSingleWord()) { |
737 | U.VAL = RHS.U.VAL; |
738 | BitWidth = RHS.BitWidth; |
739 | return clearUnusedBits(); |
740 | } |
741 | |
742 | AssignSlowCase(RHS); |
743 | return *this; |
744 | } |
745 | |
746 | /// Move assignment operator. |
747 | APInt &operator=(APInt &&that) { |
748 | #ifdef _MSC_VER |
749 | // The MSVC std::shuffle implementation still does self-assignment. |
750 | if (this == &that) |
751 | return *this; |
752 | #endif |
753 | assert(this != &that && "Self-move not supported")((this != &that && "Self-move not supported") ? static_cast <void> (0) : __assert_fail ("this != &that && \"Self-move not supported\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 753, __PRETTY_FUNCTION__)); |
754 | if (!isSingleWord()) |
755 | delete[] U.pVal; |
756 | |
757 | // Use memcpy so that type based alias analysis sees both VAL and pVal |
758 | // as modified. |
759 | memcpy(&U, &that.U, sizeof(U)); |
760 | |
761 | BitWidth = that.BitWidth; |
762 | that.BitWidth = 0; |
763 | |
764 | return *this; |
765 | } |
766 | |
767 | /// Assignment operator. |
768 | /// |
769 | /// The RHS value is assigned to *this. If the significant bits in RHS exceed |
770 | /// the bit width, the excess bits are truncated. If the bit width is larger |
771 | /// than 64, the value is zero filled in the unspecified high order bits. |
772 | /// |
773 | /// \returns *this after assignment of RHS value. |
774 | APInt &operator=(uint64_t RHS) { |
775 | if (isSingleWord()) { |
776 | U.VAL = RHS; |
777 | clearUnusedBits(); |
778 | } else { |
779 | U.pVal[0] = RHS; |
780 | memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); |
781 | } |
782 | return *this; |
783 | } |
784 | |
785 | /// Bitwise AND assignment operator. |
786 | /// |
787 | /// Performs a bitwise AND operation on this APInt and RHS. The result is |
788 | /// assigned to *this. |
789 | /// |
790 | /// \returns *this after ANDing with RHS. |
791 | APInt &operator&=(const APInt &RHS) { |
792 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same" ) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 792, __PRETTY_FUNCTION__)); |
793 | if (isSingleWord()) |
794 | U.VAL &= RHS.U.VAL; |
795 | else |
796 | AndAssignSlowCase(RHS); |
797 | return *this; |
798 | } |
799 | |
800 | /// Bitwise AND assignment operator. |
801 | /// |
802 | /// Performs a bitwise AND operation on this APInt and RHS. RHS is |
803 | /// logically zero-extended or truncated to match the bit-width of |
804 | /// the LHS. |
805 | APInt &operator&=(uint64_t RHS) { |
806 | if (isSingleWord()) { |
807 | U.VAL &= RHS; |
808 | return *this; |
809 | } |
810 | U.pVal[0] &= RHS; |
811 | memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); |
812 | return *this; |
813 | } |
814 | |
815 | /// Bitwise OR assignment operator. |
816 | /// |
817 | /// Performs a bitwise OR operation on this APInt and RHS. The result is |
818 | /// assigned *this; |
819 | /// |
820 | /// \returns *this after ORing with RHS. |
821 | APInt &operator|=(const APInt &RHS) { |
822 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same" ) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 822, __PRETTY_FUNCTION__)); |
823 | if (isSingleWord()) |
824 | U.VAL |= RHS.U.VAL; |
825 | else |
826 | OrAssignSlowCase(RHS); |
827 | return *this; |
828 | } |
829 | |
830 | /// Bitwise OR assignment operator. |
831 | /// |
832 | /// Performs a bitwise OR operation on this APInt and RHS. RHS is |
833 | /// logically zero-extended or truncated to match the bit-width of |
834 | /// the LHS. |
835 | APInt &operator|=(uint64_t RHS) { |
836 | if (isSingleWord()) { |
837 | U.VAL |= RHS; |
838 | clearUnusedBits(); |
839 | } else { |
840 | U.pVal[0] |= RHS; |
841 | } |
842 | return *this; |
843 | } |
844 | |
845 | /// Bitwise XOR assignment operator. |
846 | /// |
847 | /// Performs a bitwise XOR operation on this APInt and RHS. The result is |
848 | /// assigned to *this. |
849 | /// |
850 | /// \returns *this after XORing with RHS. |
851 | APInt &operator^=(const APInt &RHS) { |
852 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same" ) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 852, __PRETTY_FUNCTION__)); |
853 | if (isSingleWord()) |
854 | U.VAL ^= RHS.U.VAL; |
855 | else |
856 | XorAssignSlowCase(RHS); |
857 | return *this; |
858 | } |
859 | |
860 | /// Bitwise XOR assignment operator. |
861 | /// |
862 | /// Performs a bitwise XOR operation on this APInt and RHS. RHS is |
863 | /// logically zero-extended or truncated to match the bit-width of |
864 | /// the LHS. |
865 | APInt &operator^=(uint64_t RHS) { |
866 | if (isSingleWord()) { |
867 | U.VAL ^= RHS; |
868 | clearUnusedBits(); |
869 | } else { |
870 | U.pVal[0] ^= RHS; |
871 | } |
872 | return *this; |
873 | } |
874 | |
875 | /// Multiplication assignment operator. |
876 | /// |
877 | /// Multiplies this APInt by RHS and assigns the result to *this. |
878 | /// |
879 | /// \returns *this |
880 | APInt &operator*=(const APInt &RHS); |
881 | APInt &operator*=(uint64_t RHS); |
882 | |
883 | /// Addition assignment operator. |
884 | /// |
885 | /// Adds RHS to *this and assigns the result to *this. |
886 | /// |
887 | /// \returns *this |
888 | APInt &operator+=(const APInt &RHS); |
889 | APInt &operator+=(uint64_t RHS); |
890 | |
891 | /// Subtraction assignment operator. |
892 | /// |
893 | /// Subtracts RHS from *this and assigns the result to *this. |
894 | /// |
895 | /// \returns *this |
896 | APInt &operator-=(const APInt &RHS); |
897 | APInt &operator-=(uint64_t RHS); |
898 | |
899 | /// Left-shift assignment function. |
900 | /// |
901 | /// Shifts *this left by shiftAmt and assigns the result to *this. |
902 | /// |
903 | /// \returns *this after shifting left by ShiftAmt |
904 | APInt &operator<<=(unsigned ShiftAmt) { |
905 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ? static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 905, __PRETTY_FUNCTION__)); |
906 | if (isSingleWord()) { |
907 | if (ShiftAmt == BitWidth) |
908 | U.VAL = 0; |
909 | else |
910 | U.VAL <<= ShiftAmt; |
911 | return clearUnusedBits(); |
912 | } |
913 | shlSlowCase(ShiftAmt); |
914 | return *this; |
915 | } |
916 | |
917 | /// Left-shift assignment function. |
918 | /// |
919 | /// Shifts *this left by shiftAmt and assigns the result to *this. |
920 | /// |
921 | /// \returns *this after shifting left by ShiftAmt |
922 | APInt &operator<<=(const APInt &ShiftAmt); |
923 | |
924 | /// @} |
925 | /// \name Binary Operators |
926 | /// @{ |
927 | |
928 | /// Multiplication operator. |
929 | /// |
930 | /// Multiplies this APInt by RHS and returns the result. |
931 | APInt operator*(const APInt &RHS) const; |
932 | |
933 | /// Left logical shift operator. |
934 | /// |
935 | /// Shifts this APInt left by \p Bits and returns the result. |
936 | APInt operator<<(unsigned Bits) const { return shl(Bits); } |
937 | |
938 | /// Left logical shift operator. |
939 | /// |
940 | /// Shifts this APInt left by \p Bits and returns the result. |
941 | APInt operator<<(const APInt &Bits) const { return shl(Bits); } |
942 | |
943 | /// Arithmetic right-shift function. |
944 | /// |
945 | /// Arithmetic right-shift this APInt by shiftAmt. |
946 | APInt ashr(unsigned ShiftAmt) const { |
947 | APInt R(*this); |
948 | R.ashrInPlace(ShiftAmt); |
949 | return R; |
950 | } |
951 | |
952 | /// Arithmetic right-shift this APInt by ShiftAmt in place. |
953 | void ashrInPlace(unsigned ShiftAmt) { |
954 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ? static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 954, __PRETTY_FUNCTION__)); |
955 | if (isSingleWord()) { |
956 | int64_t SExtVAL = SignExtend64(U.VAL, BitWidth); |
957 | if (ShiftAmt == BitWidth) |
958 | U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. |
959 | else |
960 | U.VAL = SExtVAL >> ShiftAmt; |
961 | clearUnusedBits(); |
962 | return; |
963 | } |
964 | ashrSlowCase(ShiftAmt); |
965 | } |
966 | |
967 | /// Logical right-shift function. |
968 | /// |
969 | /// Logical right-shift this APInt by shiftAmt. |
970 | APInt lshr(unsigned shiftAmt) const { |
971 | APInt R(*this); |
972 | R.lshrInPlace(shiftAmt); |
973 | return R; |
974 | } |
975 | |
976 | /// Logical right-shift this APInt by ShiftAmt in place. |
977 | void lshrInPlace(unsigned ShiftAmt) { |
978 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ? static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 978, __PRETTY_FUNCTION__)); |
979 | if (isSingleWord()) { |
980 | if (ShiftAmt == BitWidth) |
981 | U.VAL = 0; |
982 | else |
983 | U.VAL >>= ShiftAmt; |
984 | return; |
985 | } |
986 | lshrSlowCase(ShiftAmt); |
987 | } |
988 | |
989 | /// Left-shift function. |
990 | /// |
991 | /// Left-shift this APInt by shiftAmt. |
992 | APInt shl(unsigned shiftAmt) const { |
993 | APInt R(*this); |
994 | R <<= shiftAmt; |
995 | return R; |
996 | } |
997 | |
998 | /// Rotate left by rotateAmt. |
999 | APInt rotl(unsigned rotateAmt) const; |
1000 | |
1001 | /// Rotate right by rotateAmt. |
1002 | APInt rotr(unsigned rotateAmt) const; |
1003 | |
1004 | /// Arithmetic right-shift function. |
1005 | /// |
1006 | /// Arithmetic right-shift this APInt by shiftAmt. |
1007 | APInt ashr(const APInt &ShiftAmt) const { |
1008 | APInt R(*this); |
1009 | R.ashrInPlace(ShiftAmt); |
1010 | return R; |
1011 | } |
1012 | |
1013 | /// Arithmetic right-shift this APInt by shiftAmt in place. |
1014 | void ashrInPlace(const APInt &shiftAmt); |
1015 | |
1016 | /// Logical right-shift function. |
1017 | /// |
1018 | /// Logical right-shift this APInt by shiftAmt. |
1019 | APInt lshr(const APInt &ShiftAmt) const { |
1020 | APInt R(*this); |
1021 | R.lshrInPlace(ShiftAmt); |
1022 | return R; |
1023 | } |
1024 | |
1025 | /// Logical right-shift this APInt by ShiftAmt in place. |
1026 | void lshrInPlace(const APInt &ShiftAmt); |
1027 | |
1028 | /// Left-shift function. |
1029 | /// |
1030 | /// Left-shift this APInt by shiftAmt. |
1031 | APInt shl(const APInt &ShiftAmt) const { |
1032 | APInt R(*this); |
1033 | R <<= ShiftAmt; |
1034 | return R; |
1035 | } |
1036 | |
1037 | /// Rotate left by rotateAmt. |
1038 | APInt rotl(const APInt &rotateAmt) const; |
1039 | |
1040 | /// Rotate right by rotateAmt. |
1041 | APInt rotr(const APInt &rotateAmt) const; |
1042 | |
1043 | /// Unsigned division operation. |
1044 | /// |
1045 | /// Perform an unsigned divide operation on this APInt by RHS. Both this and |
1046 | /// RHS are treated as unsigned quantities for purposes of this division. |
1047 | /// |
1048 | /// \returns a new APInt value containing the division result, rounded towards |
1049 | /// zero. |
1050 | APInt udiv(const APInt &RHS) const; |
1051 | APInt udiv(uint64_t RHS) const; |
1052 | |
1053 | /// Signed division function for APInt. |
1054 | /// |
1055 | /// Signed divide this APInt by APInt RHS. |
1056 | /// |
1057 | /// The result is rounded towards zero. |
1058 | APInt sdiv(const APInt &RHS) const; |
1059 | APInt sdiv(int64_t RHS) const; |
1060 | |
1061 | /// Unsigned remainder operation. |
1062 | /// |
1063 | /// Perform an unsigned remainder operation on this APInt with RHS being the |
1064 | /// divisor. Both this and RHS are treated as unsigned quantities for purposes |
1065 | /// of this operation. Note that this is a true remainder operation and not a |
1066 | /// modulo operation because the sign follows the sign of the dividend which |
1067 | /// is *this. |
1068 | /// |
1069 | /// \returns a new APInt value containing the remainder result |
1070 | APInt urem(const APInt &RHS) const; |
1071 | uint64_t urem(uint64_t RHS) const; |
1072 | |
1073 | /// Function for signed remainder operation. |
1074 | /// |
1075 | /// Signed remainder operation on APInt. |
1076 | APInt srem(const APInt &RHS) const; |
1077 | int64_t srem(int64_t RHS) const; |
1078 | |
1079 | /// Dual division/remainder interface. |
1080 | /// |
1081 | /// Sometimes it is convenient to divide two APInt values and obtain both the |
1082 | /// quotient and remainder. This function does both operations in the same |
1083 | /// computation making it a little more efficient. The pair of input arguments |
1084 | /// may overlap with the pair of output arguments. It is safe to call |
1085 | /// udivrem(X, Y, X, Y), for example. |
1086 | static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, |
1087 | APInt &Remainder); |
1088 | static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, |
1089 | uint64_t &Remainder); |
1090 | |
1091 | static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, |
1092 | APInt &Remainder); |
1093 | static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient, |
1094 | int64_t &Remainder); |
1095 | |
1096 | // Operations that return overflow indicators. |
1097 | APInt sadd_ov(const APInt &RHS, bool &Overflow) const; |
1098 | APInt uadd_ov(const APInt &RHS, bool &Overflow) const; |
1099 | APInt ssub_ov(const APInt &RHS, bool &Overflow) const; |
1100 | APInt usub_ov(const APInt &RHS, bool &Overflow) const; |
1101 | APInt sdiv_ov(const APInt &RHS, bool &Overflow) const; |
1102 | APInt smul_ov(const APInt &RHS, bool &Overflow) const; |
1103 | APInt umul_ov(const APInt &RHS, bool &Overflow) const; |
1104 | APInt sshl_ov(const APInt &Amt, bool &Overflow) const; |
1105 | APInt ushl_ov(const APInt &Amt, bool &Overflow) const; |
1106 | |
1107 | // Operations that saturate |
1108 | APInt sadd_sat(const APInt &RHS) const; |
1109 | APInt uadd_sat(const APInt &RHS) const; |
1110 | APInt ssub_sat(const APInt &RHS) const; |
1111 | APInt usub_sat(const APInt &RHS) const; |
1112 | |
1113 | /// Array-indexing support. |
1114 | /// |
1115 | /// \returns the bit value at bitPosition |
1116 | bool operator[](unsigned bitPosition) const { |
1117 | assert(bitPosition < getBitWidth() && "Bit position out of bounds!")((bitPosition < getBitWidth() && "Bit position out of bounds!" ) ? static_cast<void> (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1117, __PRETTY_FUNCTION__)); |
1118 | return (maskBit(bitPosition) & getWord(bitPosition)) != 0; |
1119 | } |
1120 | |
1121 | /// @} |
1122 | /// \name Comparison Operators |
1123 | /// @{ |
1124 | |
1125 | /// Equality operator. |
1126 | /// |
1127 | /// Compares this APInt with RHS for the validity of the equality |
1128 | /// relationship. |
1129 | bool operator==(const APInt &RHS) const { |
1130 | assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")((BitWidth == RHS.BitWidth && "Comparison requires equal bit widths" ) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1130, __PRETTY_FUNCTION__)); |
1131 | if (isSingleWord()) |
1132 | return U.VAL == RHS.U.VAL; |
1133 | return EqualSlowCase(RHS); |
1134 | } |
1135 | |
1136 | /// Equality operator. |
1137 | /// |
1138 | /// Compares this APInt with a uint64_t for the validity of the equality |
1139 | /// relationship. |
1140 | /// |
1141 | /// \returns true if *this == Val |
1142 | bool operator==(uint64_t Val) const { |
1143 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val; |
1144 | } |
1145 | |
1146 | /// Equality comparison. |
1147 | /// |
1148 | /// Compares this APInt with RHS for the validity of the equality |
1149 | /// relationship. |
1150 | /// |
1151 | /// \returns true if *this == Val |
1152 | bool eq(const APInt &RHS) const { return (*this) == RHS; } |
1153 | |
1154 | /// Inequality operator. |
1155 | /// |
1156 | /// Compares this APInt with RHS for the validity of the inequality |
1157 | /// relationship. |
1158 | /// |
1159 | /// \returns true if *this != Val |
1160 | bool operator!=(const APInt &RHS) const { return !((*this) == RHS); } |
1161 | |
1162 | /// Inequality operator. |
1163 | /// |
1164 | /// Compares this APInt with a uint64_t for the validity of the inequality |
1165 | /// relationship. |
1166 | /// |
1167 | /// \returns true if *this != Val |
1168 | bool operator!=(uint64_t Val) const { return !((*this) == Val); } |
1169 | |
1170 | /// Inequality comparison |
1171 | /// |
1172 | /// Compares this APInt with RHS for the validity of the inequality |
1173 | /// relationship. |
1174 | /// |
1175 | /// \returns true if *this != Val |
1176 | bool ne(const APInt &RHS) const { return !((*this) == RHS); } |
1177 | |
1178 | /// Unsigned less than comparison |
1179 | /// |
1180 | /// Regards both *this and RHS as unsigned quantities and compares them for |
1181 | /// the validity of the less-than relationship. |
1182 | /// |
1183 | /// \returns true if *this < RHS when both are considered unsigned. |
1184 | bool ult(const APInt &RHS) const { return compare(RHS) < 0; } |
1185 | |
1186 | /// Unsigned less than comparison |
1187 | /// |
1188 | /// Regards both *this as an unsigned quantity and compares it with RHS for |
1189 | /// the validity of the less-than relationship. |
1190 | /// |
1191 | /// \returns true if *this < RHS when considered unsigned. |
1192 | bool ult(uint64_t RHS) const { |
1193 | // Only need to check active bits if not a single word. |
1194 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS; |
1195 | } |
1196 | |
1197 | /// Signed less than comparison |
1198 | /// |
1199 | /// Regards both *this and RHS as signed quantities and compares them for |
1200 | /// validity of the less-than relationship. |
1201 | /// |
1202 | /// \returns true if *this < RHS when both are considered signed. |
1203 | bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; } |
1204 | |
1205 | /// Signed less than comparison |
1206 | /// |
1207 | /// Regards both *this as a signed quantity and compares it with RHS for |
1208 | /// the validity of the less-than relationship. |
1209 | /// |
1210 | /// \returns true if *this < RHS when considered signed. |
1211 | bool slt(int64_t RHS) const { |
1212 | return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative() |
1213 | : getSExtValue() < RHS; |
1214 | } |
1215 | |
1216 | /// Unsigned less or equal comparison |
1217 | /// |
1218 | /// Regards both *this and RHS as unsigned quantities and compares them for |
1219 | /// validity of the less-or-equal relationship. |
1220 | /// |
1221 | /// \returns true if *this <= RHS when both are considered unsigned. |
1222 | bool ule(const APInt &RHS) const { return compare(RHS) <= 0; } |
1223 | |
1224 | /// Unsigned less or equal comparison |
1225 | /// |
1226 | /// Regards both *this as an unsigned quantity and compares it with RHS for |
1227 | /// the validity of the less-or-equal relationship. |
1228 | /// |
1229 | /// \returns true if *this <= RHS when considered unsigned. |
1230 | bool ule(uint64_t RHS) const { return !ugt(RHS); } |
1231 | |
1232 | /// Signed less or equal comparison |
1233 | /// |
1234 | /// Regards both *this and RHS as signed quantities and compares them for |
1235 | /// validity of the less-or-equal relationship. |
1236 | /// |
1237 | /// \returns true if *this <= RHS when both are considered signed. |
1238 | bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; } |
1239 | |
1240 | /// Signed less or equal comparison |
1241 | /// |
1242 | /// Regards both *this as a signed quantity and compares it with RHS for the |
1243 | /// validity of the less-or-equal relationship. |
1244 | /// |
1245 | /// \returns true if *this <= RHS when considered signed. |
1246 | bool sle(uint64_t RHS) const { return !sgt(RHS); } |
1247 | |
1248 | /// Unsigned greather than comparison |
1249 | /// |
1250 | /// Regards both *this and RHS as unsigned quantities and compares them for |
1251 | /// the validity of the greater-than relationship. |
1252 | /// |
1253 | /// \returns true if *this > RHS when both are considered unsigned. |
1254 | bool ugt(const APInt &RHS) const { return !ule(RHS); } |
1255 | |
1256 | /// Unsigned greater than comparison |
1257 | /// |
1258 | /// Regards both *this as an unsigned quantity and compares it with RHS for |
1259 | /// the validity of the greater-than relationship. |
1260 | /// |
1261 | /// \returns true if *this > RHS when considered unsigned. |
1262 | bool ugt(uint64_t RHS) const { |
1263 | // Only need to check active bits if not a single word. |
1264 | return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS; |
1265 | } |
1266 | |
1267 | /// Signed greather than comparison |
1268 | /// |
1269 | /// Regards both *this and RHS as signed quantities and compares them for the |
1270 | /// validity of the greater-than relationship. |
1271 | /// |
1272 | /// \returns true if *this > RHS when both are considered signed. |
1273 | bool sgt(const APInt &RHS) const { return !sle(RHS); } |
1274 | |
1275 | /// Signed greater than comparison |
1276 | /// |
1277 | /// Regards both *this as a signed quantity and compares it with RHS for |
1278 | /// the validity of the greater-than relationship. |
1279 | /// |
1280 | /// \returns true if *this > RHS when considered signed. |
1281 | bool sgt(int64_t RHS) const { |
1282 | return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative() |
1283 | : getSExtValue() > RHS; |
1284 | } |
1285 | |
1286 | /// Unsigned greater or equal comparison |
1287 | /// |
1288 | /// Regards both *this and RHS as unsigned quantities and compares them for |
1289 | /// validity of the greater-or-equal relationship. |
1290 | /// |
1291 | /// \returns true if *this >= RHS when both are considered unsigned. |
1292 | bool uge(const APInt &RHS) const { return !ult(RHS); } |
1293 | |
1294 | /// Unsigned greater or equal comparison |
1295 | /// |
1296 | /// Regards both *this as an unsigned quantity and compares it with RHS for |
1297 | /// the validity of the greater-or-equal relationship. |
1298 | /// |
1299 | /// \returns true if *this >= RHS when considered unsigned. |
1300 | bool uge(uint64_t RHS) const { return !ult(RHS); } |
1301 | |
1302 | /// Signed greater or equal comparison |
1303 | /// |
1304 | /// Regards both *this and RHS as signed quantities and compares them for |
1305 | /// validity of the greater-or-equal relationship. |
1306 | /// |
1307 | /// \returns true if *this >= RHS when both are considered signed. |
1308 | bool sge(const APInt &RHS) const { return !slt(RHS); } |
1309 | |
1310 | /// Signed greater or equal comparison |
1311 | /// |
1312 | /// Regards both *this as a signed quantity and compares it with RHS for |
1313 | /// the validity of the greater-or-equal relationship. |
1314 | /// |
1315 | /// \returns true if *this >= RHS when considered signed. |
1316 | bool sge(int64_t RHS) const { return !slt(RHS); } |
1317 | |
1318 | /// This operation tests if there are any pairs of corresponding bits |
1319 | /// between this APInt and RHS that are both set. |
1320 | bool intersects(const APInt &RHS) const { |
1321 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same" ) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1321, __PRETTY_FUNCTION__)); |
1322 | if (isSingleWord()) |
1323 | return (U.VAL & RHS.U.VAL) != 0; |
1324 | return intersectsSlowCase(RHS); |
1325 | } |
1326 | |
1327 | /// This operation checks that all bits set in this APInt are also set in RHS. |
1328 | bool isSubsetOf(const APInt &RHS) const { |
1329 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same" ) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1329, __PRETTY_FUNCTION__)); |
1330 | if (isSingleWord()) |
1331 | return (U.VAL & ~RHS.U.VAL) == 0; |
1332 | return isSubsetOfSlowCase(RHS); |
1333 | } |
1334 | |
1335 | /// @} |
1336 | /// \name Resizing Operators |
1337 | /// @{ |
1338 | |
1339 | /// Truncate to new width. |
1340 | /// |
1341 | /// Truncate the APInt to a specified width. It is an error to specify a width |
1342 | /// that is greater than or equal to the current width. |
1343 | APInt trunc(unsigned width) const; |
1344 | |
1345 | /// Sign extend to a new width. |
1346 | /// |
1347 | /// This operation sign extends the APInt to a new width. If the high order |
1348 | /// bit is set, the fill on the left will be done with 1 bits, otherwise zero. |
1349 | /// It is an error to specify a width that is less than or equal to the |
1350 | /// current width. |
1351 | APInt sext(unsigned width) const; |
1352 | |
1353 | /// Zero extend to a new width. |
1354 | /// |
1355 | /// This operation zero extends the APInt to a new width. The high order bits |
1356 | /// are filled with 0 bits. It is an error to specify a width that is less |
1357 | /// than or equal to the current width. |
1358 | APInt zext(unsigned width) const; |
1359 | |
1360 | /// Sign extend or truncate to width |
1361 | /// |
1362 | /// Make this APInt have the bit width given by \p width. The value is sign |
1363 | /// extended, truncated, or left alone to make it that width. |
1364 | APInt sextOrTrunc(unsigned width) const; |
1365 | |
1366 | /// Zero extend or truncate to width |
1367 | /// |
1368 | /// Make this APInt have the bit width given by \p width. The value is zero |
1369 | /// extended, truncated, or left alone to make it that width. |
1370 | APInt zextOrTrunc(unsigned width) const; |
1371 | |
1372 | /// Sign extend or truncate to width |
1373 | /// |
1374 | /// Make this APInt have the bit width given by \p width. The value is sign |
1375 | /// extended, or left alone to make it that width. |
1376 | APInt sextOrSelf(unsigned width) const; |
1377 | |
1378 | /// Zero extend or truncate to width |
1379 | /// |
1380 | /// Make this APInt have the bit width given by \p width. The value is zero |
1381 | /// extended, or left alone to make it that width. |
1382 | APInt zextOrSelf(unsigned width) const; |
1383 | |
1384 | /// @} |
1385 | /// \name Bit Manipulation Operators |
1386 | /// @{ |
1387 | |
1388 | /// Set every bit to 1. |
1389 | void setAllBits() { |
1390 | if (isSingleWord()) |
1391 | U.VAL = WORDTYPE_MAX; |
1392 | else |
1393 | // Set all the bits in all the words. |
1394 | memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE); |
1395 | // Clear the unused ones |
1396 | clearUnusedBits(); |
1397 | } |
1398 | |
1399 | /// Set a given bit to 1. |
1400 | /// |
1401 | /// Set the given bit to 1 whose position is given as "bitPosition". |
1402 | void setBit(unsigned BitPosition) { |
1403 | assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range" ) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1403, __PRETTY_FUNCTION__)); |
1404 | WordType Mask = maskBit(BitPosition); |
1405 | if (isSingleWord()) |
1406 | U.VAL |= Mask; |
1407 | else |
1408 | U.pVal[whichWord(BitPosition)] |= Mask; |
1409 | } |
1410 | |
1411 | /// Set the sign bit to 1. |
1412 | void setSignBit() { |
1413 | setBit(BitWidth - 1); |
1414 | } |
1415 | |
1416 | /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. |
1417 | void setBits(unsigned loBit, unsigned hiBit) { |
1418 | assert(hiBit <= BitWidth && "hiBit out of range")((hiBit <= BitWidth && "hiBit out of range") ? static_cast <void> (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1418, __PRETTY_FUNCTION__)); |
1419 | assert(loBit <= BitWidth && "loBit out of range")((loBit <= BitWidth && "loBit out of range") ? static_cast <void> (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1419, __PRETTY_FUNCTION__)); |
1420 | assert(loBit <= hiBit && "loBit greater than hiBit")((loBit <= hiBit && "loBit greater than hiBit") ? static_cast <void> (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1420, __PRETTY_FUNCTION__)); |
1421 | if (loBit == hiBit) |
1422 | return; |
1423 | if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) { |
1424 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); |
1425 | mask <<= loBit; |
1426 | if (isSingleWord()) |
1427 | U.VAL |= mask; |
1428 | else |
1429 | U.pVal[0] |= mask; |
1430 | } else { |
1431 | setBitsSlowCase(loBit, hiBit); |
1432 | } |
1433 | } |
1434 | |
1435 | /// Set the top bits starting from loBit. |
1436 | void setBitsFrom(unsigned loBit) { |
1437 | return setBits(loBit, BitWidth); |
1438 | } |
1439 | |
1440 | /// Set the bottom loBits bits. |
1441 | void setLowBits(unsigned loBits) { |
1442 | return setBits(0, loBits); |
1443 | } |
1444 | |
1445 | /// Set the top hiBits bits. |
1446 | void setHighBits(unsigned hiBits) { |
1447 | return setBits(BitWidth - hiBits, BitWidth); |
1448 | } |
1449 | |
1450 | /// Set every bit to 0. |
1451 | void clearAllBits() { |
1452 | if (isSingleWord()) |
1453 | U.VAL = 0; |
1454 | else |
1455 | memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE); |
1456 | } |
1457 | |
1458 | /// Set a given bit to 0. |
1459 | /// |
1460 | /// Set the given bit to 0 whose position is given as "bitPosition". |
1461 | void clearBit(unsigned BitPosition) { |
1462 | assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range" ) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1462, __PRETTY_FUNCTION__)); |
1463 | WordType Mask = ~maskBit(BitPosition); |
1464 | if (isSingleWord()) |
1465 | U.VAL &= Mask; |
1466 | else |
1467 | U.pVal[whichWord(BitPosition)] &= Mask; |
1468 | } |
1469 | |
1470 | /// Set bottom loBits bits to 0. |
1471 | void clearLowBits(unsigned loBits) { |
1472 | assert(loBits <= BitWidth && "More bits than bitwidth")((loBits <= BitWidth && "More bits than bitwidth") ? static_cast<void> (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1472, __PRETTY_FUNCTION__)); |
1473 | APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits); |
1474 | *this &= Keep; |
1475 | } |
1476 | |
1477 | /// Set the sign bit to 0. |
1478 | void clearSignBit() { |
1479 | clearBit(BitWidth - 1); |
1480 | } |
1481 | |
1482 | /// Toggle every bit to its opposite value. |
1483 | void flipAllBits() { |
1484 | if (isSingleWord()) { |
1485 | U.VAL ^= WORDTYPE_MAX; |
1486 | clearUnusedBits(); |
1487 | } else { |
1488 | flipAllBitsSlowCase(); |
1489 | } |
1490 | } |
1491 | |
1492 | /// Toggles a given bit to its opposite value. |
1493 | /// |
1494 | /// Toggle a given bit to its opposite value whose position is given |
1495 | /// as "bitPosition". |
1496 | void flipBit(unsigned bitPosition); |
1497 | |
1498 | /// Negate this APInt in place. |
1499 | void negate() { |
1500 | flipAllBits(); |
1501 | ++(*this); |
1502 | } |
1503 | |
1504 | /// Insert the bits from a smaller APInt starting at bitPosition. |
1505 | void insertBits(const APInt &SubBits, unsigned bitPosition); |
1506 | void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits); |
1507 | |
1508 | /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). |
1509 | APInt extractBits(unsigned numBits, unsigned bitPosition) const; |
1510 | uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const; |
1511 | |
1512 | /// @} |
1513 | /// \name Value Characterization Functions |
1514 | /// @{ |
1515 | |
1516 | /// Return the number of bits in the APInt. |
1517 | unsigned getBitWidth() const { return BitWidth; } |
1518 | |
1519 | /// Get the number of words. |
1520 | /// |
1521 | /// Here one word's bitwidth equals to that of uint64_t. |
1522 | /// |
1523 | /// \returns the number of words to hold the integer value of this APInt. |
1524 | unsigned getNumWords() const { return getNumWords(BitWidth); } |
1525 | |
1526 | /// Get the number of words. |
1527 | /// |
1528 | /// *NOTE* Here one word's bitwidth equals to that of uint64_t. |
1529 | /// |
1530 | /// \returns the number of words to hold the integer value with a given bit |
1531 | /// width. |
1532 | static unsigned getNumWords(unsigned BitWidth) { |
1533 | return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; |
1534 | } |
1535 | |
1536 | /// Compute the number of active bits in the value |
1537 | /// |
1538 | /// This function returns the number of active bits which is defined as the |
1539 | /// bit width minus the number of leading zeros. This is used in several |
1540 | /// computations to see how "wide" the value is. |
1541 | unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); } |
1542 | |
1543 | /// Compute the number of active words in the value of this APInt. |
1544 | /// |
1545 | /// This is used in conjunction with getActiveData to extract the raw value of |
1546 | /// the APInt. |
1547 | unsigned getActiveWords() const { |
1548 | unsigned numActiveBits = getActiveBits(); |
1549 | return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1; |
1550 | } |
1551 | |
1552 | /// Get the minimum bit size for this signed APInt |
1553 | /// |
1554 | /// Computes the minimum bit width for this APInt while considering it to be a |
1555 | /// signed (and probably negative) value. If the value is not negative, this |
1556 | /// function returns the same value as getActiveBits()+1. Otherwise, it |
1557 | /// returns the smallest bit width that will retain the negative value. For |
1558 | /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so |
1559 | /// for -1, this function will always return 1. |
1560 | unsigned getMinSignedBits() const { |
1561 | if (isNegative()) |
1562 | return BitWidth - countLeadingOnes() + 1; |
1563 | return getActiveBits() + 1; |
1564 | } |
1565 | |
1566 | /// Get zero extended value |
1567 | /// |
1568 | /// This method attempts to return the value of this APInt as a zero extended |
1569 | /// uint64_t. The bitwidth must be <= 64 or the value must fit within a |
1570 | /// uint64_t. Otherwise an assertion will result. |
1571 | uint64_t getZExtValue() const { |
1572 | if (isSingleWord()) |
1573 | return U.VAL; |
1574 | assert(getActiveBits() <= 64 && "Too many bits for uint64_t")((getActiveBits() <= 64 && "Too many bits for uint64_t" ) ? static_cast<void> (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1574, __PRETTY_FUNCTION__)); |
1575 | return U.pVal[0]; |
1576 | } |
1577 | |
1578 | /// Get sign extended value |
1579 | /// |
1580 | /// This method attempts to return the value of this APInt as a sign extended |
1581 | /// int64_t. The bit width must be <= 64 or the value must fit within an |
1582 | /// int64_t. Otherwise an assertion will result. |
1583 | int64_t getSExtValue() const { |
1584 | if (isSingleWord()) |
1585 | return SignExtend64(U.VAL, BitWidth); |
1586 | assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")((getMinSignedBits() <= 64 && "Too many bits for int64_t" ) ? static_cast<void> (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/ADT/APInt.h" , 1586, __PRETTY_FUNCTION__)); |
1587 | return int64_t(U.pVal[0]); |
1588 | } |
1589 | |
1590 | /// Get bits required for string value. |
1591 | /// |
1592 | /// This method determines how many bits are required to hold the APInt |
1593 | /// equivalent of the string given by \p str. |
1594 | static unsigned getBitsNeeded(StringRef str, uint8_t radix); |
1595 | |
1596 | /// The APInt version of the countLeadingZeros functions in |
1597 | /// MathExtras.h. |
1598 | /// |
1599 | /// It counts the number of zeros from the most significant bit to the first |
1600 | /// one bit. |
1601 | /// |
1602 | /// \returns BitWidth if the value is zero, otherwise returns the number of |
1603 | /// zeros from the most significant bit to the first one bits. |
1604 | unsigned countLeadingZeros() const { |
1605 | if (isSingleWord()) { |
1606 | unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth; |
1607 | return llvm::countLeadingZeros(U.VAL) - unusedBits; |
1608 | } |
1609 | return countLeadingZerosSlowCase(); |
1610 | } |
1611 | |
1612 | /// Count the number of leading one bits. |
1613 | /// |
1614 | /// This function is an APInt version of the countLeadingOnes |
1615 | /// functions in MathExtras.h. It counts the number of ones from the most |
1616 | /// significant bit to the first zero bit. |
1617 | /// |
1618 | /// \returns 0 if the high order bit is not set, otherwise returns the number |
1619 | /// of 1 bits from the most significant to the least |
1620 | unsigned countLeadingOnes() const { |
1621 | if (isSingleWord()) |
1622 | return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); |
1623 | return countLeadingOnesSlowCase(); |
1624 | } |
1625 | |
1626 | /// Computes the number of leading bits of this APInt that are equal to its |
1627 | /// sign bit. |
1628 | unsigned getNumSignBits() const { |
1629 | return isNegative() ? countLeadingOnes() : countLeadingZeros(); |
1630 | } |
1631 | |
1632 | /// Count the number of trailing zero bits. |
1633 | /// |
1634 | /// This function is an APInt version of the countTrailingZeros |
1635 | /// functions in MathExtras.h. It counts the number of zeros from the least |
1636 | /// significant bit to the first set bit. |
1637 | /// |
1638 | /// \returns BitWidth if the value is zero, otherwise returns the number of |
1639 | /// zeros from the least significant bit to the first one bit. |
1640 | unsigned countTrailingZeros() const { |
1641 | if (isSingleWord()) |
1642 | return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth); |
1643 | return countTrailingZerosSlowCase(); |
1644 | } |
1645 | |
1646 | /// Count the number of trailing one bits. |
1647 | /// |
1648 | /// This function is an APInt version of the countTrailingOnes |
1649 | /// functions in MathExtras.h. It counts the number of ones from the least |
1650 | /// significant bit to the first zero bit. |
1651 | /// |
1652 | /// \returns BitWidth if the value is all ones, otherwise returns the number |
1653 | /// of ones from the least significant bit to the first zero bit. |
1654 | unsigned countTrailingOnes() const { |
1655 | if (isSingleWord()) |
1656 | return llvm::countTrailingOnes(U.VAL); |
1657 | return countTrailingOnesSlowCase(); |
1658 | } |
1659 | |
1660 | /// Count the number of bits set. |
1661 | /// |
1662 | /// This function is an APInt version of the countPopulation functions |
1663 | /// in MathExtras.h. It counts the number of 1 bits in the APInt value. |
1664 | /// |
1665 | /// \returns 0 if the value is zero, otherwise returns the number of set bits. |
1666 | unsigned countPopulation() const { |
1667 | if (isSingleWord()) |
1668 | return llvm::countPopulation(U.VAL); |
1669 | return countPopulationSlowCase(); |
1670 | } |
1671 | |
1672 | /// @} |
1673 | /// \name Conversion Functions |
1674 | /// @{ |
1675 | void print(raw_ostream &OS, bool isSigned) const; |
1676 | |
1677 | /// Converts an APInt to a string and append it to Str. Str is commonly a |
1678 | /// SmallString. |
1679 | void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed, |
1680 | bool formatAsCLiteral = false) const; |
1681 | |
1682 | /// Considers the APInt to be unsigned and converts it into a string in the |
1683 | /// radix given. The radix can be 2, 8, 10 16, or 36. |
1684 | void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { |
1685 | toString(Str, Radix, false, false); |
1686 | } |
1687 | |
1688 | /// Considers the APInt to be signed and converts it into a string in the |
1689 | /// radix given. The radix can be 2, 8, 10, 16, or 36. |
1690 | void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { |
1691 | toString(Str, Radix, true, false); |
1692 | } |
1693 | |
1694 | /// Return the APInt as a std::string. |
1695 | /// |
1696 | /// Note that this is an inefficient method. It is better to pass in a |
1697 | /// SmallVector/SmallString to the methods above to avoid thrashing the heap |
1698 | /// for the string. |
1699 | std::string toString(unsigned Radix, bool Signed) const; |
1700 | |
1701 | /// \returns a byte-swapped representation of this APInt Value. |
1702 | APInt byteSwap() const; |
1703 | |
1704 | /// \returns the value with the bit representation reversed of this APInt |
1705 | /// Value. |
1706 | APInt reverseBits() const; |
1707 | |
1708 | /// Converts this APInt to a double value. |
1709 | double roundToDouble(bool isSigned) const; |
1710 | |
1711 | /// Converts this unsigned APInt to a double value. |
1712 | double roundToDouble() const { return roundToDouble(false); } |
1713 | |
1714 | /// Converts this signed APInt to a double value. |
1715 | double signedRoundToDouble() const { return roundToDouble(true); } |
1716 | |
1717 | /// Converts APInt bits to a double |
1718 | /// |
1719 | /// The conversion does not do a translation from integer to double, it just |
1720 | /// re-interprets the bits as a double. Note that it is valid to do this on |
1721 | /// any bit width. Exactly 64 bits will be translated. |
1722 | double bitsToDouble() const { |
1723 | return BitsToDouble(getWord(0)); |
1724 | } |
1725 | |
1726 | /// Converts APInt bits to a double |
1727 | /// |
1728 | /// The conversion does not do a translation from integer to float, it just |
1729 | /// re-interprets the bits as a float. Note that it is valid to do this on |
1730 | /// any bit width. Exactly 32 bits will be translated. |
1731 | float bitsToFloat() const { |
1732 | return BitsToFloat(getWord(0)); |
1733 | } |
1734 | |
1735 | /// Converts a double to APInt bits. |
1736 | /// |
1737 | /// The conversion does not do a translation from double to integer, it just |
1738 | /// re-interprets the bits of the double. |
1739 | static APInt doubleToBits(double V) { |
1740 | return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V)); |
1741 | } |
1742 | |
1743 | /// Converts a float to APInt bits. |
1744 | /// |
1745 | /// The conversion does not do a translation from float to integer, it just |
1746 | /// re-interprets the bits of the float. |
1747 | static APInt floatToBits(float V) { |
1748 | return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V)); |
1749 | } |
1750 | |
1751 | /// @} |
1752 | /// \name Mathematics Operations |
1753 | /// @{ |
1754 | |
1755 | /// \returns the floor log base 2 of this APInt. |
1756 | unsigned logBase2() const { return getActiveBits() - 1; } |
1757 | |
1758 | /// \returns the ceil log base 2 of this APInt. |
1759 | unsigned ceilLogBase2() const { |
1760 | APInt temp(*this); |
1761 | --temp; |
1762 | return temp.getActiveBits(); |
1763 | } |
1764 | |
1765 | /// \returns the nearest log base 2 of this APInt. Ties round up. |
1766 | /// |
1767 | /// NOTE: When we have a BitWidth of 1, we define: |
1768 | /// |
1769 | /// log2(0) = UINT32_MAX |
1770 | /// log2(1) = 0 |
1771 | /// |
1772 | /// to get around any mathematical concerns resulting from |
1773 | /// referencing 2 in a space where 2 does no exist. |
1774 | unsigned nearestLogBase2() const { |
1775 | // Special case when we have a bitwidth of 1. If VAL is 1, then we |
1776 | // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to |
1777 | // UINT32_MAX. |
1778 | if (BitWidth == 1) |
1779 | return U.VAL - 1; |
1780 | |
1781 | // Handle the zero case. |
1782 | if (isNullValue()) |
1783 | return UINT32_MAX(4294967295U); |
1784 | |
1785 | // The non-zero case is handled by computing: |
1786 | // |
1787 | // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1]. |
1788 | // |
1789 | // where x[i] is referring to the value of the ith bit of x. |
1790 | unsigned lg = logBase2(); |
1791 | return lg + unsigned((*this)[lg - 1]); |
1792 | } |
1793 | |
1794 | /// \returns the log base 2 of this APInt if its an exact power of two, -1 |
1795 | /// otherwise |
1796 | int32_t exactLogBase2() const { |
1797 | if (!isPowerOf2()) |
1798 | return -1; |
1799 | return logBase2(); |
1800 | } |
1801 | |
1802 | /// Compute the square root |
1803 | APInt sqrt() const; |
1804 | |
1805 | /// Get the absolute value; |
1806 | /// |
1807 | /// If *this is < 0 then return -(*this), otherwise *this; |
1808 | APInt abs() const { |
1809 | if (isNegative()) |
1810 | return -(*this); |
1811 | return *this; |
1812 | } |
1813 | |
1814 | /// \returns the multiplicative inverse for a given modulo. |
1815 | APInt multiplicativeInverse(const APInt &modulo) const; |
1816 | |
1817 | /// @} |
1818 | /// \name Support for division by constant |
1819 | /// @{ |
1820 | |
1821 | /// Calculate the magic number for signed division by a constant. |
1822 | struct ms; |
1823 | ms magic() const; |
1824 | |
1825 | /// Calculate the magic number for unsigned division by a constant. |
1826 | struct mu; |
1827 | mu magicu(unsigned LeadingZeros = 0) const; |
1828 | |
1829 | /// @} |
1830 | /// \name Building-block Operations for APInt and APFloat |
1831 | /// @{ |
1832 | |
1833 | // These building block operations operate on a representation of arbitrary |
1834 | // precision, two's-complement, bignum integer values. They should be |
1835 | // sufficient to implement APInt and APFloat bignum requirements. Inputs are |
1836 | // generally a pointer to the base of an array of integer parts, representing |
1837 | // an unsigned bignum, and a count of how many parts there are. |
1838 | |
1839 | /// Sets the least significant part of a bignum to the input value, and zeroes |
1840 | /// out higher parts. |
1841 | static void tcSet(WordType *, WordType, unsigned); |
1842 | |
1843 | /// Assign one bignum to another. |
1844 | static void tcAssign(WordType *, const WordType *, unsigned); |
1845 | |
1846 | /// Returns true if a bignum is zero, false otherwise. |
1847 | static bool tcIsZero(const WordType *, unsigned); |
1848 | |
1849 | /// Extract the given bit of a bignum; returns 0 or 1. Zero-based. |
1850 | static int tcExtractBit(const WordType *, unsigned bit); |
1851 | |
1852 | /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to |
1853 | /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least |
1854 | /// significant bit of DST. All high bits above srcBITS in DST are |
1855 | /// zero-filled. |
1856 | static void tcExtract(WordType *, unsigned dstCount, |
1857 | const WordType *, unsigned srcBits, |
1858 | unsigned srcLSB); |
1859 | |
1860 | /// Set the given bit of a bignum. Zero-based. |
1861 | static void tcSetBit(WordType *, unsigned bit); |
1862 | |
1863 | /// Clear the given bit of a bignum. Zero-based. |
1864 | static void tcClearBit(WordType *, unsigned bit); |
1865 | |
1866 | /// Returns the bit number of the least or most significant set bit of a |
1867 | /// number. If the input number has no bits set -1U is returned. |
1868 | static unsigned tcLSB(const WordType *, unsigned n); |
1869 | static unsigned tcMSB(const WordType *parts, unsigned n); |
1870 | |
1871 | /// Negate a bignum in-place. |
1872 | static void tcNegate(WordType *, unsigned); |
1873 | |
1874 | /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag. |
1875 | static WordType tcAdd(WordType *, const WordType *, |
1876 | WordType carry, unsigned); |
1877 | /// DST += RHS. Returns the carry flag. |
1878 | static WordType tcAddPart(WordType *, WordType, unsigned); |
1879 | |
1880 | /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag. |
1881 | static WordType tcSubtract(WordType *, const WordType *, |
1882 | WordType carry, unsigned); |
1883 | /// DST -= RHS. Returns the carry flag. |
1884 | static WordType tcSubtractPart(WordType *, WordType, unsigned); |
1885 | |
1886 | /// DST += SRC * MULTIPLIER + PART if add is true |
1887 | /// DST = SRC * MULTIPLIER + PART if add is false |
1888 | /// |
1889 | /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must |
1890 | /// start at the same point, i.e. DST == SRC. |
1891 | /// |
1892 | /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned. |
1893 | /// Otherwise DST is filled with the least significant DSTPARTS parts of the |
1894 | /// result, and if all of the omitted higher parts were zero return zero, |
1895 | /// otherwise overflow occurred and return one. |
1896 | static int tcMultiplyPart(WordType *dst, const WordType *src, |
1897 | WordType multiplier, WordType carry, |
1898 | unsigned srcParts, unsigned dstParts, |
1899 | bool add); |
1900 | |
1901 | /// DST = LHS * RHS, where DST has the same width as the operands and is |
1902 | /// filled with the least significant parts of the result. Returns one if |
1903 | /// overflow occurred, otherwise zero. DST must be disjoint from both |
1904 | /// operands. |
1905 | static int tcMultiply(WordType *, const WordType *, const WordType *, |
1906 | unsigned); |
1907 | |
1908 | /// DST = LHS * RHS, where DST has width the sum of the widths of the |
1909 | /// operands. No overflow occurs. DST must be disjoint from both operands. |
1910 | static void tcFullMultiply(WordType *, const WordType *, |
1911 | const WordType *, unsigned, unsigned); |
1912 | |
1913 | /// If RHS is zero LHS and REMAINDER are left unchanged, return one. |
1914 | /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set |
1915 | /// REMAINDER to the remainder, return zero. i.e. |
1916 | /// |
1917 | /// OLD_LHS = RHS * LHS + REMAINDER |
1918 | /// |
1919 | /// SCRATCH is a bignum of the same size as the operands and result for use by |
1920 | /// the routine; its contents need not be initialized and are destroyed. LHS, |
1921 | /// REMAINDER and SCRATCH must be distinct. |
1922 | static int tcDivide(WordType *lhs, const WordType *rhs, |
1923 | WordType *remainder, WordType *scratch, |
1924 | unsigned parts); |
1925 | |
1926 | /// Shift a bignum left Count bits. Shifted in bits are zero. There are no |
1927 | /// restrictions on Count. |
1928 | static void tcShiftLeft(WordType *, unsigned Words, unsigned Count); |
1929 | |
1930 | /// Shift a bignum right Count bits. Shifted in bits are zero. There are no |
1931 | /// restrictions on Count. |
1932 | static void tcShiftRight(WordType *, unsigned Words, unsigned Count); |
1933 | |
1934 | /// The obvious AND, OR and XOR and complement operations. |
1935 | static void tcAnd(WordType *, const WordType *, unsigned); |
1936 | static void tcOr(WordType *, const WordType *, unsigned); |
1937 | static void tcXor(WordType *, const WordType *, unsigned); |
1938 | static void tcComplement(WordType *, unsigned); |
1939 | |
1940 | /// Comparison (unsigned) of two bignums. |
1941 | static int tcCompare(const WordType *, const WordType *, unsigned); |
1942 | |
1943 | /// Increment a bignum in-place. Return the carry flag. |
1944 | static WordType tcIncrement(WordType *dst, unsigned parts) { |
1945 | return tcAddPart(dst, 1, parts); |
1946 | } |
1947 | |
1948 | /// Decrement a bignum in-place. Return the borrow flag. |
1949 | static WordType tcDecrement(WordType *dst, unsigned parts) { |
1950 | return tcSubtractPart(dst, 1, parts); |
1951 | } |
1952 | |
1953 | /// Set the least significant BITS and clear the rest. |
1954 | static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits); |
1955 | |
1956 | /// debug method |
1957 | void dump() const; |
1958 | |
1959 | /// @} |
1960 | }; |
1961 | |
1962 | /// Magic data for optimising signed division by a constant. |
1963 | struct APInt::ms { |
1964 | APInt m; ///< magic number |
1965 | unsigned s; ///< shift amount |
1966 | }; |
1967 | |
1968 | /// Magic data for optimising unsigned division by a constant. |
1969 | struct APInt::mu { |
1970 | APInt m; ///< magic number |
1971 | bool a; ///< add indicator |
1972 | unsigned s; ///< shift amount |
1973 | }; |
1974 | |
1975 | inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; } |
1976 | |
1977 | inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; } |
1978 | |
1979 | /// Unary bitwise complement operator. |
1980 | /// |
1981 | /// \returns an APInt that is the bitwise complement of \p v. |
1982 | inline APInt operator~(APInt v) { |
1983 | v.flipAllBits(); |
1984 | return v; |
1985 | } |
1986 | |
1987 | inline APInt operator&(APInt a, const APInt &b) { |
1988 | a &= b; |
1989 | return a; |
1990 | } |
1991 | |
1992 | inline APInt operator&(const APInt &a, APInt &&b) { |
1993 | b &= a; |
1994 | return std::move(b); |
1995 | } |
1996 | |
1997 | inline APInt operator&(APInt a, uint64_t RHS) { |
1998 | a &= RHS; |
1999 | return a; |
2000 | } |
2001 | |
2002 | inline APInt operator&(uint64_t LHS, APInt b) { |
2003 | b &= LHS; |
2004 | return b; |
2005 | } |
2006 | |
2007 | inline APInt operator|(APInt a, const APInt &b) { |
2008 | a |= b; |
2009 | return a; |
2010 | } |
2011 | |
2012 | inline APInt operator|(const APInt &a, APInt &&b) { |
2013 | b |= a; |
2014 | return std::move(b); |
2015 | } |
2016 | |
2017 | inline APInt operator|(APInt a, uint64_t RHS) { |
2018 | a |= RHS; |
2019 | return a; |
2020 | } |
2021 | |
2022 | inline APInt operator|(uint64_t LHS, APInt b) { |
2023 | b |= LHS; |
2024 | return b; |
2025 | } |
2026 | |
2027 | inline APInt operator^(APInt a, const APInt &b) { |
2028 | a ^= b; |
2029 | return a; |
2030 | } |
2031 | |
2032 | inline APInt operator^(const APInt &a, APInt &&b) { |
2033 | b ^= a; |
2034 | return std::move(b); |
2035 | } |
2036 | |
2037 | inline APInt operator^(APInt a, uint64_t RHS) { |
2038 | a ^= RHS; |
2039 | return a; |
2040 | } |
2041 | |
2042 | inline APInt operator^(uint64_t LHS, APInt b) { |
2043 | b ^= LHS; |
2044 | return b; |
2045 | } |
2046 | |
2047 | inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) { |
2048 | I.print(OS, true); |
2049 | return OS; |
2050 | } |
2051 | |
2052 | inline APInt operator-(APInt v) { |
2053 | v.negate(); |
2054 | return v; |
2055 | } |
2056 | |
2057 | inline APInt operator+(APInt a, const APInt &b) { |
2058 | a += b; |
2059 | return a; |
2060 | } |
2061 | |
2062 | inline APInt operator+(const APInt &a, APInt &&b) { |
2063 | b += a; |
2064 | return std::move(b); |
2065 | } |
2066 | |
2067 | inline APInt operator+(APInt a, uint64_t RHS) { |
2068 | a += RHS; |
2069 | return a; |
2070 | } |
2071 | |
2072 | inline APInt operator+(uint64_t LHS, APInt b) { |
2073 | b += LHS; |
2074 | return b; |
2075 | } |
2076 | |
2077 | inline APInt operator-(APInt a, const APInt &b) { |
2078 | a -= b; |
2079 | return a; |
2080 | } |
2081 | |
2082 | inline APInt operator-(const APInt &a, APInt &&b) { |
2083 | b.negate(); |
2084 | b += a; |
2085 | return std::move(b); |
2086 | } |
2087 | |
2088 | inline APInt operator-(APInt a, uint64_t RHS) { |
2089 | a -= RHS; |
2090 | return a; |
2091 | } |
2092 | |
2093 | inline APInt operator-(uint64_t LHS, APInt b) { |
2094 | b.negate(); |
2095 | b += LHS; |
2096 | return b; |
2097 | } |
2098 | |
2099 | inline APInt operator*(APInt a, uint64_t RHS) { |
2100 | a *= RHS; |
2101 | return a; |
2102 | } |
2103 | |
2104 | inline APInt operator*(uint64_t LHS, APInt b) { |
2105 | b *= LHS; |
2106 | return b; |
2107 | } |
2108 | |
2109 | |
2110 | namespace APIntOps { |
2111 | |
2112 | /// Determine the smaller of two APInts considered to be signed. |
2113 | inline const APInt &smin(const APInt &A, const APInt &B) { |
2114 | return A.slt(B) ? A : B; |
2115 | } |
2116 | |
2117 | /// Determine the larger of two APInts considered to be signed. |
2118 | inline const APInt &smax(const APInt &A, const APInt &B) { |
2119 | return A.sgt(B) ? A : B; |
2120 | } |
2121 | |
2122 | /// Determine the smaller of two APInts considered to be signed. |
2123 | inline const APInt &umin(const APInt &A, const APInt &B) { |
2124 | return A.ult(B) ? A : B; |
2125 | } |
2126 | |
2127 | /// Determine the larger of two APInts considered to be unsigned. |
2128 | inline const APInt &umax(const APInt &A, const APInt &B) { |
2129 | return A.ugt(B) ? A : B; |
2130 | } |
2131 | |
2132 | /// Compute GCD of two unsigned APInt values. |
2133 | /// |
2134 | /// This function returns the greatest common divisor of the two APInt values |
2135 | /// using Stein's algorithm. |
2136 | /// |
2137 | /// \returns the greatest common divisor of A and B. |
2138 | APInt GreatestCommonDivisor(APInt A, APInt B); |
2139 | |
2140 | /// Converts the given APInt to a double value. |
2141 | /// |
2142 | /// Treats the APInt as an unsigned value for conversion purposes. |
2143 | inline double RoundAPIntToDouble(const APInt &APIVal) { |
2144 | return APIVal.roundToDouble(); |
2145 | } |
2146 | |
2147 | /// Converts the given APInt to a double value. |
2148 | /// |
2149 | /// Treats the APInt as a signed value for conversion purposes. |
2150 | inline double RoundSignedAPIntToDouble(const APInt &APIVal) { |
2151 | return APIVal.signedRoundToDouble(); |
2152 | } |
2153 | |
2154 | /// Converts the given APInt to a float vlalue. |
2155 | inline float RoundAPIntToFloat(const APInt &APIVal) { |
2156 | return float(RoundAPIntToDouble(APIVal)); |
2157 | } |
2158 | |
2159 | /// Converts the given APInt to a float value. |
2160 | /// |
2161 | /// Treast the APInt as a signed value for conversion purposes. |
2162 | inline float RoundSignedAPIntToFloat(const APInt &APIVal) { |
2163 | return float(APIVal.signedRoundToDouble()); |
2164 | } |
2165 | |
2166 | /// Converts the given double value into a APInt. |
2167 | /// |
2168 | /// This function convert a double value to an APInt value. |
2169 | APInt RoundDoubleToAPInt(double Double, unsigned width); |
2170 | |
2171 | /// Converts a float value into a APInt. |
2172 | /// |
2173 | /// Converts a float value into an APInt value. |
2174 | inline APInt RoundFloatToAPInt(float Float, unsigned width) { |
2175 | return RoundDoubleToAPInt(double(Float), width); |
2176 | } |
2177 | |
2178 | /// Return A unsign-divided by B, rounded by the given rounding mode. |
2179 | APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM); |
2180 | |
2181 | /// Return A sign-divided by B, rounded by the given rounding mode. |
2182 | APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM); |
2183 | |
2184 | /// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range |
2185 | /// (e.g. 32 for i32). |
2186 | /// This function finds the smallest number n, such that |
2187 | /// (a) n >= 0 and q(n) = 0, or |
2188 | /// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all |
2189 | /// integers, belong to two different intervals [Rk, Rk+R), |
2190 | /// where R = 2^BW, and k is an integer. |
2191 | /// The idea here is to find when q(n) "overflows" 2^BW, while at the |
2192 | /// same time "allowing" subtraction. In unsigned modulo arithmetic a |
2193 | /// subtraction (treated as addition of negated numbers) would always |
2194 | /// count as an overflow, but here we want to allow values to decrease |
2195 | /// and increase as long as they are within the same interval. |
2196 | /// Specifically, adding of two negative numbers should not cause an |
2197 | /// overflow (as long as the magnitude does not exceed the bith width). |
2198 | /// On the other hand, given a positive number, adding a negative |
2199 | /// number to it can give a negative result, which would cause the |
2200 | /// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is |
2201 | /// treated as a special case of an overflow. |
2202 | /// |
2203 | /// This function returns None if after finding k that minimizes the |
2204 | /// positive solution to q(n) = kR, both solutions are contained between |
2205 | /// two consecutive integers. |
2206 | /// |
2207 | /// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation |
2208 | /// in arithmetic modulo 2^BW, and treating the values as signed) by the |
2209 | /// virtue of *signed* overflow. This function will *not* find such an n, |
2210 | /// however it may find a value of n satisfying the inequalities due to |
2211 | /// an *unsigned* overflow (if the values are treated as unsigned). |
2212 | /// To find a solution for a signed overflow, treat it as a problem of |
2213 | /// finding an unsigned overflow with a range with of BW-1. |
2214 | /// |
2215 | /// The returned value may have a different bit width from the input |
2216 | /// coefficients. |
2217 | Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, |
2218 | unsigned RangeWidth); |
2219 | } // End of APIntOps namespace |
2220 | |
2221 | // See friend declaration above. This additional declaration is required in |
2222 | // order to compile LLVM with IBM xlC compiler. |
2223 | hash_code hash_value(const APInt &Arg); |
2224 | |
2225 | /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst |
2226 | /// with the integer held in IntVal. |
2227 | void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); |
2228 | |
2229 | /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting |
2230 | /// from Src into IntVal, which is assumed to be wide enough and to hold zero. |
2231 | void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes); |
2232 | |
2233 | } // namespace llvm |
2234 | |
2235 | #endif |
1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | /// |
9 | /// \file |
10 | /// This file describes how to lower LLVM code to machine code. This has two |
11 | /// main components: |
12 | /// |
13 | /// 1. Which ValueTypes are natively supported by the target. |
14 | /// 2. Which operations are supported for supported ValueTypes. |
15 | /// 3. Cost thresholds for alternative implementations of certain operations. |
16 | /// |
17 | /// In addition it has a few other components, like information about FP |
18 | /// immediates. |
19 | /// |
20 | //===----------------------------------------------------------------------===// |
21 | |
22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H |
23 | #define LLVM_CODEGEN_TARGETLOWERING_H |
24 | |
25 | #include "llvm/ADT/APInt.h" |
26 | #include "llvm/ADT/ArrayRef.h" |
27 | #include "llvm/ADT/DenseMap.h" |
28 | #include "llvm/ADT/STLExtras.h" |
29 | #include "llvm/ADT/SmallVector.h" |
30 | #include "llvm/ADT/StringRef.h" |
31 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
32 | #include "llvm/CodeGen/DAGCombine.h" |
33 | #include "llvm/CodeGen/ISDOpcodes.h" |
34 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
35 | #include "llvm/CodeGen/SelectionDAG.h" |
36 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
37 | #include "llvm/CodeGen/TargetCallingConv.h" |
38 | #include "llvm/CodeGen/ValueTypes.h" |
39 | #include "llvm/IR/Attributes.h" |
40 | #include "llvm/IR/CallSite.h" |
41 | #include "llvm/IR/CallingConv.h" |
42 | #include "llvm/IR/DataLayout.h" |
43 | #include "llvm/IR/DerivedTypes.h" |
44 | #include "llvm/IR/Function.h" |
45 | #include "llvm/IR/IRBuilder.h" |
46 | #include "llvm/IR/InlineAsm.h" |
47 | #include "llvm/IR/Instruction.h" |
48 | #include "llvm/IR/Instructions.h" |
49 | #include "llvm/IR/Type.h" |
50 | #include "llvm/MC/MCRegisterInfo.h" |
51 | #include "llvm/Support/Alignment.h" |
52 | #include "llvm/Support/AtomicOrdering.h" |
53 | #include "llvm/Support/Casting.h" |
54 | #include "llvm/Support/ErrorHandling.h" |
55 | #include "llvm/Support/MachineValueType.h" |
56 | #include "llvm/Target/TargetMachine.h" |
57 | #include <algorithm> |
58 | #include <cassert> |
59 | #include <climits> |
60 | #include <cstdint> |
61 | #include <iterator> |
62 | #include <map> |
63 | #include <string> |
64 | #include <utility> |
65 | #include <vector> |
66 | |
67 | namespace llvm { |
68 | |
69 | class BranchProbability; |
70 | class CCState; |
71 | class CCValAssign; |
72 | class Constant; |
73 | class FastISel; |
74 | class FunctionLoweringInfo; |
75 | class GlobalValue; |
76 | class GISelKnownBits; |
77 | class IntrinsicInst; |
78 | struct KnownBits; |
79 | class LLVMContext; |
80 | class MachineBasicBlock; |
81 | class MachineFunction; |
82 | class MachineInstr; |
83 | class MachineJumpTableInfo; |
84 | class MachineLoop; |
85 | class MachineRegisterInfo; |
86 | class MCContext; |
87 | class MCExpr; |
88 | class Module; |
89 | class TargetRegisterClass; |
90 | class TargetLibraryInfo; |
91 | class TargetRegisterInfo; |
92 | class Value; |
93 | |
94 | namespace Sched { |
95 | |
96 | enum Preference { |
97 | None, // No preference |
98 | Source, // Follow source order. |
99 | RegPressure, // Scheduling for lowest register pressure. |
100 | Hybrid, // Scheduling for both latency and register pressure. |
101 | ILP, // Scheduling for ILP in low register pressure mode. |
102 | VLIW // Scheduling for VLIW targets. |
103 | }; |
104 | |
105 | } // end namespace Sched |
106 | |
107 | /// This base class for TargetLowering contains the SelectionDAG-independent |
108 | /// parts that can be used from the rest of CodeGen. |
109 | class TargetLoweringBase { |
110 | public: |
111 | /// This enum indicates whether operations are valid for a target, and if not, |
112 | /// what action should be used to make them valid. |
113 | enum LegalizeAction : uint8_t { |
114 | Legal, // The target natively supports this operation. |
115 | Promote, // This operation should be executed in a larger type. |
116 | Expand, // Try to expand this to other ops, otherwise use a libcall. |
117 | LibCall, // Don't try to expand this to other ops, always use a libcall. |
118 | Custom // Use the LowerOperation hook to implement custom lowering. |
119 | }; |
120 | |
121 | /// This enum indicates whether a types are legal for a target, and if not, |
122 | /// what action should be used to make them valid. |
123 | enum LegalizeTypeAction : uint8_t { |
124 | TypeLegal, // The target natively supports this type. |
125 | TypePromoteInteger, // Replace this integer with a larger one. |
126 | TypeExpandInteger, // Split this integer into two of half the size. |
127 | TypeSoftenFloat, // Convert this float to a same size integer type. |
128 | TypeExpandFloat, // Split this float into two of half the size. |
129 | TypeScalarizeVector, // Replace this one-element vector with its element. |
130 | TypeSplitVector, // Split this vector into two of half the size. |
131 | TypeWidenVector, // This vector should be widened into a larger vector. |
132 | TypePromoteFloat // Replace this float with a larger one. |
133 | }; |
134 | |
135 | /// LegalizeKind holds the legalization kind that needs to happen to EVT |
136 | /// in order to type-legalize it. |
137 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
138 | |
139 | /// Enum that describes how the target represents true/false values. |
140 | enum BooleanContent { |
141 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
142 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
143 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
144 | }; |
145 | |
146 | /// Enum that describes what type of support for selects the target has. |
147 | enum SelectSupportKind { |
148 | ScalarValSelect, // The target supports scalar selects (ex: cmov). |
149 | ScalarCondVectorVal, // The target supports selects with a scalar condition |
150 | // and vector values (ex: cmov). |
151 | VectorMaskSelect // The target supports vector selects with a vector |
152 | // mask (ex: x86 blends). |
153 | }; |
154 | |
155 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
156 | /// to, if at all. Exists because different targets have different levels of |
157 | /// support for these atomic instructions, and also have different options |
158 | /// w.r.t. what they should expand to. |
159 | enum class AtomicExpansionKind { |
160 | None, // Don't expand the instruction. |
161 | LLSC, // Expand the instruction into loadlinked/storeconditional; used |
162 | // by ARM/AArch64. |
163 | LLOnly, // Expand the (load) instruction into just a load-linked, which has |
164 | // greater atomic guarantees than a normal load. |
165 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
166 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. |
167 | }; |
168 | |
169 | /// Enum that specifies when a multiplication should be expanded. |
170 | enum class MulExpansionKind { |
171 | Always, // Always expand the instruction. |
172 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
173 | // or custom. |
174 | }; |
175 | |
176 | class ArgListEntry { |
177 | public: |
178 | Value *Val = nullptr; |
179 | SDValue Node = SDValue(); |
180 | Type *Ty = nullptr; |
181 | bool IsSExt : 1; |
182 | bool IsZExt : 1; |
183 | bool IsInReg : 1; |
184 | bool IsSRet : 1; |
185 | bool IsNest : 1; |
186 | bool IsByVal : 1; |
187 | bool IsInAlloca : 1; |
188 | bool IsReturned : 1; |
189 | bool IsSwiftSelf : 1; |
190 | bool IsSwiftError : 1; |
191 | uint16_t Alignment = 0; |
192 | Type *ByValType = nullptr; |
193 | |
194 | ArgListEntry() |
195 | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), |
196 | IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), |
197 | IsSwiftSelf(false), IsSwiftError(false) {} |
198 | |
199 | void setAttributes(const CallBase *Call, unsigned ArgIdx); |
200 | |
201 | void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) { |
202 | return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx); |
203 | } |
204 | }; |
205 | using ArgListTy = std::vector<ArgListEntry>; |
206 | |
207 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
208 | ArgListTy &Args) const {}; |
209 | |
210 | static ISD::NodeType getExtendForContent(BooleanContent Content) { |
211 | switch (Content) { |
212 | case UndefinedBooleanContent: |
213 | // Extend by adding rubbish bits. |
214 | return ISD::ANY_EXTEND; |
215 | case ZeroOrOneBooleanContent: |
216 | // Extend by adding zero bits. |
217 | return ISD::ZERO_EXTEND; |
218 | case ZeroOrNegativeOneBooleanContent: |
219 | // Extend by copying the sign bit. |
220 | return ISD::SIGN_EXTEND; |
221 | } |
222 | llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 222); |
223 | } |
224 | |
225 | /// NOTE: The TargetMachine owns TLOF. |
226 | explicit TargetLoweringBase(const TargetMachine &TM); |
227 | TargetLoweringBase(const TargetLoweringBase &) = delete; |
228 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
229 | virtual ~TargetLoweringBase() = default; |
230 | |
231 | protected: |
232 | /// Initialize all of the actions to default values. |
233 | void initActions(); |
234 | |
235 | public: |
236 | const TargetMachine &getTargetMachine() const { return TM; } |
237 | |
238 | virtual bool useSoftFloat() const { return false; } |
239 | |
240 | /// Return the pointer type for the given address space, defaults to |
241 | /// the pointer type from the data layout. |
242 | /// FIXME: The default needs to be removed once all the code is updated. |
243 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
244 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
245 | } |
246 | |
247 | /// Return the in-memory pointer type for the given address space, defaults to |
248 | /// the pointer type from the data layout. FIXME: The default needs to be |
249 | /// removed once all the code is updated. |
250 | MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { |
251 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
252 | } |
253 | |
254 | /// Return the type for frame index, which is determined by |
255 | /// the alloca address space specified through the data layout. |
256 | MVT getFrameIndexTy(const DataLayout &DL) const { |
257 | return getPointerTy(DL, DL.getAllocaAddrSpace()); |
258 | } |
259 | |
260 | /// Return the type for operands of fence. |
261 | /// TODO: Let fence operands be of i32 type and remove this. |
262 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
263 | return getPointerTy(DL); |
264 | } |
265 | |
266 | /// EVT is not used in-tree, but is used by out-of-tree target. |
267 | /// A documentation for this function would be nice... |
268 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
269 | |
270 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, |
271 | bool LegalTypes = true) const; |
272 | |
273 | /// Returns the type to be used for the index operand of: |
274 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
275 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
276 | virtual MVT getVectorIdxTy(const DataLayout &DL) const { |
277 | return getPointerTy(DL); |
278 | } |
279 | |
280 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
281 | return true; |
282 | } |
283 | |
284 | /// Return true if it is profitable to convert a select of FP constants into |
285 | /// a constant pool load whose address depends on the select condition. The |
286 | /// parameter may be used to differentiate a select with FP compare from |
287 | /// integer compare. |
288 | virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
289 | return true; |
290 | } |
291 | |
292 | /// Return true if multiple condition registers are available. |
293 | bool hasMultipleConditionRegisters() const { |
294 | return HasMultipleConditionRegisters; |
295 | } |
296 | |
297 | /// Return true if the target has BitExtract instructions. |
298 | bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } |
299 | |
300 | /// Return the preferred vector type legalization action. |
301 | virtual TargetLoweringBase::LegalizeTypeAction |
302 | getPreferredVectorAction(MVT VT) const { |
303 | // The default action for one element vectors is to scalarize |
304 | if (VT.getVectorNumElements() == 1) |
305 | return TypeScalarizeVector; |
306 | // The default action for an odd-width vector is to widen. |
307 | if (!VT.isPow2VectorType()) |
308 | return TypeWidenVector; |
309 | // The default action for other vectors is to promote |
310 | return TypePromoteInteger; |
311 | } |
312 | |
313 | // There are two general methods for expanding a BUILD_VECTOR node: |
314 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
315 | // them together. |
316 | // 2. Build the vector on the stack and then load it. |
317 | // If this function returns true, then method (1) will be used, subject to |
318 | // the constraint that all of the necessary shuffles are legal (as determined |
319 | // by isShuffleMaskLegal). If this function returns false, then method (2) is |
320 | // always used. The vector type, and the number of defined values, are |
321 | // provided. |
322 | virtual bool |
323 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
324 | unsigned DefinedValues) const { |
325 | return DefinedValues < 3; |
326 | } |
327 | |
328 | /// Return true if integer divide is usually cheaper than a sequence of |
329 | /// several shifts, adds, and multiplies for this target. |
330 | /// The definition of "cheaper" may depend on whether we're optimizing |
331 | /// for speed or for size. |
332 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
333 | |
334 | /// Return true if the target can handle a standalone remainder operation. |
335 | virtual bool hasStandaloneRem(EVT VT) const { |
336 | return true; |
337 | } |
338 | |
339 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
340 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
341 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
342 | return false; |
343 | } |
344 | |
345 | /// Reciprocal estimate status values used by the functions below. |
346 | enum ReciprocalEstimate : int { |
347 | Unspecified = -1, |
348 | Disabled = 0, |
349 | Enabled = 1 |
350 | }; |
351 | |
352 | /// Return a ReciprocalEstimate enum value for a square root of the given type |
353 | /// based on the function's attributes. If the operation is not overridden by |
354 | /// the function's attributes, "Unspecified" is returned and target defaults |
355 | /// are expected to be used for instruction selection. |
356 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
357 | |
358 | /// Return a ReciprocalEstimate enum value for a division of the given type |
359 | /// based on the function's attributes. If the operation is not overridden by |
360 | /// the function's attributes, "Unspecified" is returned and target defaults |
361 | /// are expected to be used for instruction selection. |
362 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
363 | |
364 | /// Return the refinement step count for a square root of the given type based |
365 | /// on the function's attributes. If the operation is not overridden by |
366 | /// the function's attributes, "Unspecified" is returned and target defaults |
367 | /// are expected to be used for instruction selection. |
368 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
369 | |
370 | /// Return the refinement step count for a division of the given type based |
371 | /// on the function's attributes. If the operation is not overridden by |
372 | /// the function's attributes, "Unspecified" is returned and target defaults |
373 | /// are expected to be used for instruction selection. |
374 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
375 | |
376 | /// Returns true if target has indicated at least one type should be bypassed. |
377 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
378 | |
379 | /// Returns map of slow types for division or remainder with corresponding |
380 | /// fast types |
381 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
382 | return BypassSlowDivWidths; |
383 | } |
384 | |
385 | /// Return true if Flow Control is an expensive operation that should be |
386 | /// avoided. |
387 | bool isJumpExpensive() const { return JumpIsExpensive; } |
388 | |
389 | /// Return true if selects are only cheaper than branches if the branch is |
390 | /// unlikely to be predicted right. |
391 | bool isPredictableSelectExpensive() const { |
392 | return PredictableSelectIsExpensive; |
393 | } |
394 | |
395 | /// If a branch or a select condition is skewed in one direction by more than |
396 | /// this factor, it is very likely to be predicted correctly. |
397 | virtual BranchProbability getPredictableBranchThreshold() const; |
398 | |
399 | /// Return true if the following transform is beneficial: |
400 | /// fold (conv (load x)) -> (load (conv*)x) |
401 | /// On architectures that don't natively support some vector loads |
402 | /// efficiently, casting the load to a smaller vector of larger types and |
403 | /// loading is more efficient, however, this can be undone by optimizations in |
404 | /// dag combiner. |
405 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
406 | const SelectionDAG &DAG, |
407 | const MachineMemOperand &MMO) const { |
408 | // Don't do if we could do an indexed load on the original type, but not on |
409 | // the new one. |
410 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) |
411 | return true; |
412 | |
413 | MVT LoadMVT = LoadVT.getSimpleVT(); |
414 | |
415 | // Don't bother doing this if it's just going to be promoted again later, as |
416 | // doing so might interfere with other combines. |
417 | if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && |
418 | getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) |
419 | return false; |
420 | |
421 | bool Fast = false; |
422 | return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, |
423 | MMO, &Fast) && Fast; |
424 | } |
425 | |
426 | /// Return true if the following transform is beneficial: |
427 | /// (store (y (conv x)), y*)) -> (store x, (x*)) |
428 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, |
429 | const SelectionDAG &DAG, |
430 | const MachineMemOperand &MMO) const { |
431 | // Default to the same logic as loads. |
432 | return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO); |
433 | } |
434 | |
435 | /// Return true if it is expected to be cheaper to do a store of a non-zero |
436 | /// vector constant with the given size and type for the address space than to |
437 | /// store the individual scalar element constants. |
438 | virtual bool storeOfVectorConstantIsCheap(EVT MemVT, |
439 | unsigned NumElem, |
440 | unsigned AddrSpace) const { |
441 | return false; |
442 | } |
443 | |
444 | /// Allow store merging for the specified type after legalization in addition |
445 | /// to before legalization. This may transform stores that do not exist |
446 | /// earlier (for example, stores created from intrinsics). |
447 | virtual bool mergeStoresAfterLegalization(EVT MemVT) const { |
448 | return true; |
449 | } |
450 | |
451 | /// Returns if it's reasonable to merge stores to MemVT size. |
452 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
453 | const SelectionDAG &DAG) const { |
454 | return true; |
455 | } |
456 | |
457 | /// Return true if it is cheap to speculate a call to intrinsic cttz. |
458 | virtual bool isCheapToSpeculateCttz() const { |
459 | return false; |
460 | } |
461 | |
462 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. |
463 | virtual bool isCheapToSpeculateCtlz() const { |
464 | return false; |
465 | } |
466 | |
467 | /// Return true if ctlz instruction is fast. |
468 | virtual bool isCtlzFast() const { |
469 | return false; |
470 | } |
471 | |
472 | /// Return true if it is safe to transform an integer-domain bitwise operation |
473 | /// into the equivalent floating-point operation. This should be set to true |
474 | /// if the target has IEEE-754-compliant fabs/fneg operations for the input |
475 | /// type. |
476 | virtual bool hasBitPreservingFPLogic(EVT VT) const { |
477 | return false; |
478 | } |
479 | |
480 | /// Return true if it is cheaper to split the store of a merged int val |
481 | /// from a pair of smaller values into multiple stores. |
482 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
483 | return false; |
484 | } |
485 | |
486 | /// Return if the target supports combining a |
487 | /// chain like: |
488 | /// \code |
489 | /// %andResult = and %val1, #mask |
490 | /// %icmpResult = icmp %andResult, 0 |
491 | /// \endcode |
492 | /// into a single machine instruction of a form like: |
493 | /// \code |
494 | /// cc = test %register, #mask |
495 | /// \endcode |
496 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
497 | return false; |
498 | } |
499 | |
500 | /// Use bitwise logic to make pairs of compares more efficient. For example: |
501 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
502 | /// This should be true when it takes more than one instruction to lower |
503 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
504 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
505 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
506 | return false; |
507 | } |
508 | |
509 | /// Return the preferred operand type if the target has a quick way to compare |
510 | /// integer values of the given size. Assume that any legal integer type can |
511 | /// be compared efficiently. Targets may override this to allow illegal wide |
512 | /// types to return a vector type if there is support to compare that type. |
513 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
514 | MVT VT = MVT::getIntegerVT(NumBits); |
515 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; |
516 | } |
517 | |
518 | /// Return true if the target should transform: |
519 | /// (X & Y) == Y ---> (~X & Y) == 0 |
520 | /// (X & Y) != Y ---> (~X & Y) != 0 |
521 | /// |
522 | /// This may be profitable if the target has a bitwise and-not operation that |
523 | /// sets comparison flags. A target may want to limit the transformation based |
524 | /// on the type of Y or if Y is a constant. |
525 | /// |
526 | /// Note that the transform will not occur if Y is known to be a power-of-2 |
527 | /// because a mask and compare of a single bit can be handled by inverting the |
528 | /// predicate, for example: |
529 | /// (X & 8) == 8 ---> (X & 8) != 0 |
530 | virtual bool hasAndNotCompare(SDValue Y) const { |
531 | return false; |
532 | } |
533 | |
534 | /// Return true if the target has a bitwise and-not operation: |
535 | /// X = ~A & B |
536 | /// This can be used to simplify select or other instructions. |
537 | virtual bool hasAndNot(SDValue X) const { |
538 | // If the target has the more complex version of this operation, assume that |
539 | // it has this operation too. |
540 | return hasAndNotCompare(X); |
541 | } |
542 | |
543 | /// Return true if the target has a bit-test instruction: |
544 | /// (X & (1 << Y)) ==/!= 0 |
545 | /// This knowledge can be used to prevent breaking the pattern, |
546 | /// or creating it if it could be recognized. |
547 | virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } |
548 | |
549 | /// There are two ways to clear extreme bits (either low or high): |
550 | /// Mask: x & (-1 << y) (the instcombine canonical form) |
551 | /// Shifts: x >> y << y |
552 | /// Return true if the variant with 2 variable shifts is preferred. |
553 | /// Return false if there is no preference. |
554 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { |
555 | // By default, let's assume that no one prefers shifts. |
556 | return false; |
557 | } |
558 | |
559 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
560 | /// This is usually true on most targets. But some targets, like Thumb1, |
561 | /// have immediate shift instructions, but no immediate "and" instruction; |
562 | /// this makes the fold unprofitable. |
563 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
564 | CombineLevel Level) const { |
565 | return true; |
566 | } |
567 | |
568 | /// Should we tranform the IR-optimal check for whether given truncation |
569 | /// down into KeptBits would be truncating or not: |
570 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
571 | /// Into it's more traditional form: |
572 | /// ((%x << C) a>> C) dstcond %x |
573 | /// Return true if we should transform. |
574 | /// Return false if there is no preference. |
575 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, |
576 | unsigned KeptBits) const { |
577 | // By default, let's assume that no one prefers shifts. |
578 | return false; |
579 | } |
580 | |
581 | /// Given the pattern |
582 | /// (X & (C l>>/<< Y)) ==/!= 0 |
583 | /// return true if it should be transformed into: |
584 | /// ((X <</l>> Y) & C) ==/!= 0 |
585 | /// WARNING: if 'X' is a constant, the fold may deadlock! |
586 | /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() |
587 | /// here because it can end up being not linked in. |
588 | virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
589 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
590 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
591 | SelectionDAG &DAG) const { |
592 | if (hasBitTest(X, Y)) { |
593 | // One interesting pattern that we'd want to form is 'bit test': |
594 | // ((1 << Y) & C) ==/!= 0 |
595 | // But we also need to be careful not to try to reverse that fold. |
596 | |
597 | // Is this '1 << Y' ? |
598 | if (OldShiftOpcode == ISD::SHL && CC->isOne()) |
599 | return false; // Keep the 'bit test' pattern. |
600 | |
601 | // Will it be '1 << Y' after the transform ? |
602 | if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) |
603 | return true; // Do form the 'bit test' pattern. |
604 | } |
605 | |
606 | // If 'X' is a constant, and we transform, then we will immediately |
607 | // try to undo the fold, thus causing endless combine loop. |
608 | // So by default, let's assume everyone prefers the fold |
609 | // iff 'X' is not a constant. |
610 | return !XC; |
611 | } |
612 | |
613 | /// These two forms are equivalent: |
614 | /// sub %y, (xor %x, -1) |
615 | /// add (add %x, 1), %y |
616 | /// The variant with two add's is IR-canonical. |
617 | /// Some targets may prefer one to the other. |
618 | virtual bool preferIncOfAddToSubOfNot(EVT VT) const { |
619 | // By default, let's assume that everyone prefers the form with two add's. |
620 | return true; |
621 | } |
622 | |
623 | /// Return true if the target wants to use the optimization that |
624 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
625 | /// promotedInst1(...(promotedInstN(ext(load)))). |
626 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
627 | |
628 | /// Return true if the target can combine store(extractelement VectorTy, |
629 | /// Idx). |
630 | /// \p Cost[out] gives the cost of that transformation when this is true. |
631 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
632 | unsigned &Cost) const { |
633 | return false; |
634 | } |
635 | |
636 | /// Return true if inserting a scalar into a variable element of an undef |
637 | /// vector is more efficiently handled by splatting the scalar instead. |
638 | virtual bool shouldSplatInsEltVarIndex(EVT) const { |
639 | return false; |
640 | } |
641 | |
642 | /// Return true if target always beneficiates from combining into FMA for a |
643 | /// given value type. This must typically return false on targets where FMA |
644 | /// takes more cycles to execute than FADD. |
645 | virtual bool enableAggressiveFMAFusion(EVT VT) const { |
646 | return false; |
647 | } |
648 | |
649 | /// Return the ValueType of the result of SETCC operations. |
650 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
651 | EVT VT) const; |
652 | |
653 | /// Return the ValueType for comparison libcalls. Comparions libcalls include |
654 | /// floating point comparion calls, and Ordered/Unordered check calls on |
655 | /// floating point numbers. |
656 | virtual |
657 | MVT::SimpleValueType getCmpLibcallReturnType() const; |
658 | |
659 | /// For targets without i1 registers, this gives the nature of the high-bits |
660 | /// of boolean values held in types wider than i1. |
661 | /// |
662 | /// "Boolean values" are special true/false values produced by nodes like |
663 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
664 | /// Not to be confused with general values promoted from i1. Some cpus |
665 | /// distinguish between vectors of boolean and scalars; the isVec parameter |
666 | /// selects between the two kinds. For example on X86 a scalar boolean should |
667 | /// be zero extended from i1, while the elements of a vector of booleans |
668 | /// should be sign extended from i1. |
669 | /// |
670 | /// Some cpus also treat floating point types the same way as they treat |
671 | /// vectors instead of the way they treat scalars. |
672 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
673 | if (isVec) |
674 | return BooleanVectorContents; |
675 | return isFloat ? BooleanFloatContents : BooleanContents; |
676 | } |
677 | |
678 | BooleanContent getBooleanContents(EVT Type) const { |
679 | return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); |
680 | } |
681 | |
682 | /// Return target scheduling preference. |
683 | Sched::Preference getSchedulingPreference() const { |
684 | return SchedPreferenceInfo; |
685 | } |
686 | |
687 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
688 | /// for different nodes. This function returns the preference (or none) for |
689 | /// the given node. |
690 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
691 | return Sched::None; |
692 | } |
693 | |
694 | /// Return the register class that should be used for the specified value |
695 | /// type. |
696 | virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { |
697 | (void)isDivergent; |
698 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
699 | assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!") ? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 699, __PRETTY_FUNCTION__)); |
700 | return RC; |
701 | } |
702 | |
703 | /// Allows target to decide about the register class of the |
704 | /// specific value that is live outside the defining block. |
705 | /// Returns true if the value needs uniform register class. |
706 | virtual bool requiresUniformRegister(MachineFunction &MF, |
707 | const Value *) const { |
708 | return false; |
709 | } |
710 | |
711 | /// Return the 'representative' register class for the specified value |
712 | /// type. |
713 | /// |
714 | /// The 'representative' register class is the largest legal super-reg |
715 | /// register class for the register class of the value type. For example, on |
716 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
717 | /// register class is GR64 on x86_64. |
718 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
719 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
720 | return RC; |
721 | } |
722 | |
723 | /// Return the cost of the 'representative' register class for the specified |
724 | /// value type. |
725 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
726 | return RepRegClassCostForVT[VT.SimpleTy]; |
727 | } |
728 | |
729 | /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS |
730 | /// instructions, and false if a library call is preferred (e.g for code-size |
731 | /// reasons). |
732 | virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { |
733 | return true; |
734 | } |
735 | |
736 | /// Return true if the target has native support for the specified value type. |
737 | /// This means that it has a register that directly holds it without |
738 | /// promotions or expansions. |
739 | bool isTypeLegal(EVT VT) const { |
740 | assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof (RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 741, __PRETTY_FUNCTION__)) |
741 | (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof (RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 741, __PRETTY_FUNCTION__)); |
742 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
743 | } |
744 | |
745 | class ValueTypeActionImpl { |
746 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
747 | /// that indicates how instruction selection should deal with the type. |
748 | LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; |
749 | |
750 | public: |
751 | ValueTypeActionImpl() { |
752 | std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), |
753 | TypeLegal); |
754 | } |
755 | |
756 | LegalizeTypeAction getTypeAction(MVT VT) const { |
757 | return ValueTypeActions[VT.SimpleTy]; |
758 | } |
759 | |
760 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
761 | ValueTypeActions[VT.SimpleTy] = Action; |
762 | } |
763 | }; |
764 | |
765 | const ValueTypeActionImpl &getValueTypeActions() const { |
766 | return ValueTypeActions; |
767 | } |
768 | |
769 | /// Return how we should legalize values of this type, either it is already |
770 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
771 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
772 | /// integer type (return 'Expand'). 'Custom' is not an option. |
773 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
774 | return getTypeConversion(Context, VT).first; |
775 | } |
776 | LegalizeTypeAction getTypeAction(MVT VT) const { |
777 | return ValueTypeActions.getTypeAction(VT); |
778 | } |
779 | |
780 | /// For types supported by the target, this is an identity function. For |
781 | /// types that must be promoted to larger types, this returns the larger type |
782 | /// to promote to. For integer types that are larger than the largest integer |
783 | /// register, this contains one step in the expansion to get to the smaller |
784 | /// register. For illegal floating point types, this returns the integer type |
785 | /// to transform to. |
786 | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
787 | return getTypeConversion(Context, VT).second; |
788 | } |
789 | |
790 | /// For types supported by the target, this is an identity function. For |
791 | /// types that must be expanded (i.e. integer types that are larger than the |
792 | /// largest integer register or illegal floating point types), this returns |
793 | /// the largest legal type it will be expanded to. |
794 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
795 | assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail ("!VT.isVector()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 795, __PRETTY_FUNCTION__)); |
796 | while (true) { |
797 | switch (getTypeAction(Context, VT)) { |
798 | case TypeLegal: |
799 | return VT; |
800 | case TypeExpandInteger: |
801 | VT = getTypeToTransformTo(Context, VT); |
802 | break; |
803 | default: |
804 | llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 804); |
805 | } |
806 | } |
807 | } |
808 | |
809 | /// Vector types are broken down into some number of legal first class types. |
810 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
811 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
812 | /// turns into 4 EVT::i32 values with both PPC and X86. |
813 | /// |
814 | /// This method returns the number of registers needed, and the VT for each |
815 | /// register. It also returns the VT and quantity of the intermediate values |
816 | /// before they are promoted/expanded. |
817 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
818 | EVT &IntermediateVT, |
819 | unsigned &NumIntermediates, |
820 | MVT &RegisterVT) const; |
821 | |
822 | /// Certain targets such as MIPS require that some types such as vectors are |
823 | /// always broken down into scalars in some contexts. This occurs even if the |
824 | /// vector type is legal. |
825 | virtual unsigned getVectorTypeBreakdownForCallingConv( |
826 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
827 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
828 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
829 | RegisterVT); |
830 | } |
831 | |
832 | struct IntrinsicInfo { |
833 | unsigned opc = 0; // target opcode |
834 | EVT memVT; // memory VT |
835 | |
836 | // value representing memory location |
837 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; |
838 | |
839 | int offset = 0; // offset off of ptrVal |
840 | uint64_t size = 0; // the size of the memory location |
841 | // (taken from memVT if zero) |
842 | MaybeAlign align = Align::None(); // alignment |
843 | |
844 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; |
845 | IntrinsicInfo() = default; |
846 | }; |
847 | |
848 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
849 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
850 | /// true and store the intrinsic information into the IntrinsicInfo that was |
851 | /// passed to the function. |
852 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
853 | MachineFunction &, |
854 | unsigned /*Intrinsic*/) const { |
855 | return false; |
856 | } |
857 | |
858 | /// Returns true if the target can instruction select the specified FP |
859 | /// immediate natively. If false, the legalizer will materialize the FP |
860 | /// immediate as a load from a constant pool. |
861 | virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, |
862 | bool ForCodeSize = false) const { |
863 | return false; |
864 | } |
865 | |
866 | /// Targets can use this to indicate that they only support *some* |
867 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
868 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
869 | /// legal. |
870 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
871 | return true; |
872 | } |
873 | |
874 | /// Returns true if the operation can trap for the value type. |
875 | /// |
876 | /// VT must be a legal type. By default, we optimistically assume most |
877 | /// operations don't trap except for integer divide and remainder. |
878 | virtual bool canOpTrap(unsigned Op, EVT VT) const; |
879 | |
880 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
881 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
882 | /// constant pool entry. |
883 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, |
884 | EVT /*VT*/) const { |
885 | return false; |
886 | } |
887 | |
888 | /// Return how this operation should be treated: either it is legal, needs to |
889 | /// be promoted to a larger size, needs to be expanded to some other code |
890 | /// sequence, or the target has a custom expander for it. |
891 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
892 | if (VT.isExtended()) return Expand; |
893 | // If a target-specific SDNode requires legalization, require the target |
894 | // to provide custom legalization for it. |
895 | if (Op >= array_lengthof(OpActions[0])) return Custom; |
896 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
897 | } |
898 | |
899 | /// Custom method defined by each target to indicate if an operation which |
900 | /// may require a scale is supported natively by the target. |
901 | /// If not, the operation is illegal. |
902 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, |
903 | unsigned Scale) const { |
904 | return false; |
905 | } |
906 | |
907 | /// Some fixed point operations may be natively supported by the target but |
908 | /// only for specific scales. This method allows for checking |
909 | /// if the width is supported by the target for a given operation that may |
910 | /// depend on scale. |
911 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, |
912 | unsigned Scale) const { |
913 | auto Action = getOperationAction(Op, VT); |
914 | if (Action != Legal) |
915 | return Action; |
916 | |
917 | // This operation is supported in this type but may only work on specific |
918 | // scales. |
919 | bool Supported; |
920 | switch (Op) { |
921 | default: |
922 | llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation." , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 922); |
923 | case ISD::SMULFIX: |
924 | case ISD::SMULFIXSAT: |
925 | case ISD::UMULFIX: |
926 | case ISD::UMULFIXSAT: |
927 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); |
928 | break; |
929 | } |
930 | |
931 | return Supported ? Action : Expand; |
932 | } |
933 | |
934 | // If Op is a strict floating-point operation, return the result |
935 | // of getOperationAction for the equivalent non-strict operation. |
936 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { |
937 | unsigned EqOpc; |
938 | switch (Op) { |
939 | default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 939); |
940 | case ISD::STRICT_FADD: EqOpc = ISD::FADD; break; |
941 | case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break; |
942 | case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break; |
943 | case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break; |
944 | case ISD::STRICT_FREM: EqOpc = ISD::FREM; break; |
945 | case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; |
946 | case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; |
947 | case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; |
948 | case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; |
949 | case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; |
950 | case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; |
951 | case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; |
952 | case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; |
953 | case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; |
954 | case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; |
955 | case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; |
956 | case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; |
957 | case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; |
958 | case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; |
959 | case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; |
960 | case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; |
961 | case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; |
962 | case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; |
963 | case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; |
964 | case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; |
965 | case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; |
966 | case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; |
967 | case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; |
968 | } |
969 | |
970 | return getOperationAction(EqOpc, VT); |
971 | } |
972 | |
973 | /// Return true if the specified operation is legal on this target or can be |
974 | /// made legal with custom lowering. This is used to help guide high-level |
975 | /// lowering decisions. |
976 | bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { |
977 | return (VT == MVT::Other || isTypeLegal(VT)) && |
978 | (getOperationAction(Op, VT) == Legal || |
979 | getOperationAction(Op, VT) == Custom); |
980 | } |
981 | |
982 | /// Return true if the specified operation is legal on this target or can be |
983 | /// made legal using promotion. This is used to help guide high-level lowering |
984 | /// decisions. |
985 | bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { |
986 | return (VT == MVT::Other || isTypeLegal(VT)) && |
987 | (getOperationAction(Op, VT) == Legal || |
988 | getOperationAction(Op, VT) == Promote); |
989 | } |
990 | |
991 | /// Return true if the specified operation is legal on this target or can be |
992 | /// made legal with custom lowering or using promotion. This is used to help |
993 | /// guide high-level lowering decisions. |
994 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { |
995 | return (VT == MVT::Other || isTypeLegal(VT)) && |
996 | (getOperationAction(Op, VT) == Legal || |
997 | getOperationAction(Op, VT) == Custom || |
998 | getOperationAction(Op, VT) == Promote); |
999 | } |
1000 | |
1001 | /// Return true if the operation uses custom lowering, regardless of whether |
1002 | /// the type is legal or not. |
1003 | bool isOperationCustom(unsigned Op, EVT VT) const { |
1004 | return getOperationAction(Op, VT) == Custom; |
1005 | } |
1006 | |
1007 | /// Return true if lowering to a jump table is allowed. |
1008 | virtual bool areJTsAllowed(const Function *Fn) const { |
1009 | if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") |
1010 | return false; |
1011 | |
1012 | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
1013 | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
1014 | } |
1015 | |
1016 | /// Check whether the range [Low,High] fits in a machine word. |
1017 | bool rangeFitsInWord(const APInt &Low, const APInt &High, |
1018 | const DataLayout &DL) const { |
1019 | // FIXME: Using the pointer type doesn't seem ideal. |
1020 | uint64_t BW = DL.getIndexSizeInBits(0u); |
1021 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1; |
1022 | return Range <= BW; |
1023 | } |
1024 | |
1025 | /// Return true if lowering to a jump table is suitable for a set of case |
1026 | /// clusters which may contain \p NumCases cases, \p Range range of values. |
1027 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
1028 | uint64_t Range) const { |
1029 | // FIXME: This function check the maximum table size and density, but the |
1030 | // minimum size is not checked. It would be nice if the minimum size is |
1031 | // also combined within this function. Currently, the minimum size check is |
1032 | // performed in findJumpTable() in SelectionDAGBuiler and |
1033 | // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
1034 | const bool OptForSize = SI->getParent()->getParent()->hasOptSize(); |
1035 | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
1036 | const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); |
1037 | |
1038 | // Check whether the number of cases is small enough and |
1039 | // the range is dense enough for a jump table. |
1040 | if ((OptForSize || Range <= MaxJumpTableSize) && |
1041 | (NumCases * 100 >= Range * MinDensity)) { |
1042 | return true; |
1043 | } |
1044 | return false; |
1045 | } |
1046 | |
1047 | /// Return true if lowering to a bit test is suitable for a set of case |
1048 | /// clusters which contains \p NumDests unique destinations, \p Low and |
1049 | /// \p High as its lowest and highest case values, and expects \p NumCmps |
1050 | /// case value comparisons. Check if the number of destinations, comparison |
1051 | /// metric, and range are all suitable. |
1052 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
1053 | const APInt &Low, const APInt &High, |
1054 | const DataLayout &DL) const { |
1055 | // FIXME: I don't think NumCmps is the correct metric: a single case and a |
1056 | // range of cases both require only one branch to lower. Just looking at the |
1057 | // number of clusters and destinations should be enough to decide whether to |
1058 | // build bit tests. |
1059 | |
1060 | // To lower a range with bit tests, the range must fit the bitwidth of a |
1061 | // machine word. |
1062 | if (!rangeFitsInWord(Low, High, DL)) |
1063 | return false; |
1064 | |
1065 | // Decide whether it's profitable to lower this range with bit tests. Each |
1066 | // destination requires a bit test and branch, and there is an overall range |
1067 | // check branch. For a small number of clusters, separate comparisons might |
1068 | // be cheaper, and for many destinations, splitting the range might be |
1069 | // better. |
1070 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || |
1071 | (NumDests == 3 && NumCmps >= 6); |
1072 | } |
1073 | |
1074 | /// Return true if the specified operation is illegal on this target or |
1075 | /// unlikely to be made legal with custom lowering. This is used to help guide |
1076 | /// high-level lowering decisions. |
1077 | bool isOperationExpand(unsigned Op, EVT VT) const { |
1078 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
1079 | } |
1080 | |
1081 | /// Return true if the specified operation is legal on this target. |
1082 | bool isOperationLegal(unsigned Op, EVT VT) const { |
1083 | return (VT == MVT::Other || isTypeLegal(VT)) && |
1084 | getOperationAction(Op, VT) == Legal; |
1085 | } |
1086 | |
1087 | /// Return how this load with extension should be treated: either it is legal, |
1088 | /// needs to be promoted to a larger size, needs to be expanded to some other |
1089 | /// code sequence, or the target has a custom expander for it. |
1090 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
1091 | EVT MemVT) const { |
1092 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1093 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1094 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1095 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT ::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1096, __PRETTY_FUNCTION__)) |
1096 | MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT ::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1096, __PRETTY_FUNCTION__)); |
1097 | unsigned Shift = 4 * ExtType; |
1098 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
1099 | } |
1100 | |
1101 | /// Return true if the specified load with extension is legal on this target. |
1102 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1103 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
1104 | } |
1105 | |
1106 | /// Return true if the specified load with extension is legal or custom |
1107 | /// on this target. |
1108 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
1109 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
1110 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
1111 | } |
1112 | |
1113 | /// Return how this store with truncation should be treated: either it is |
1114 | /// legal, needs to be promoted to a larger size, needs to be expanded to some |
1115 | /// other code sequence, or the target has a custom expander for it. |
1116 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
1117 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
1118 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
1119 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
1120 | assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1121, __PRETTY_FUNCTION__)) |
1121 | "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1121, __PRETTY_FUNCTION__)); |
1122 | return TruncStoreActions[ValI][MemI]; |
1123 | } |
1124 | |
1125 | /// Return true if the specified store with truncation is legal on this |
1126 | /// target. |
1127 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
1128 | return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
1129 | } |
1130 | |
1131 | /// Return true if the specified store with truncation has solution on this |
1132 | /// target. |
1133 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
1134 | return isTypeLegal(ValVT) && |
1135 | (getTruncStoreAction(ValVT, MemVT) == Legal || |
1136 | getTruncStoreAction(ValVT, MemVT) == Custom); |
1137 | } |
1138 | |
1139 | /// Return how the indexed load should be treated: either it is legal, needs |
1140 | /// to be promoted to a larger size, needs to be expanded to some other code |
1141 | /// sequence, or the target has a custom expander for it. |
1142 | LegalizeAction |
1143 | getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
1144 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1145, __PRETTY_FUNCTION__)) |
1145 | "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1145, __PRETTY_FUNCTION__)); |
1146 | unsigned Ty = (unsigned)VT.SimpleTy; |
1147 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); |
1148 | } |
1149 | |
1150 | /// Return true if the specified indexed load is legal on this target. |
1151 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
1152 | return VT.isSimple() && |
1153 | (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || |
1154 | getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); |
1155 | } |
1156 | |
1157 | /// Return how the indexed store should be treated: either it is legal, needs |
1158 | /// to be promoted to a larger size, needs to be expanded to some other code |
1159 | /// sequence, or the target has a custom expander for it. |
1160 | LegalizeAction |
1161 | getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
1162 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1163, __PRETTY_FUNCTION__)) |
1163 | "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1163, __PRETTY_FUNCTION__)); |
1164 | unsigned Ty = (unsigned)VT.SimpleTy; |
1165 | return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); |
1166 | } |
1167 | |
1168 | /// Return true if the specified indexed load is legal on this target. |
1169 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
1170 | return VT.isSimple() && |
1171 | (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || |
1172 | getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); |
1173 | } |
1174 | |
1175 | /// Return how the condition code should be treated: either it is legal, needs |
1176 | /// to be expanded to some other code sequence, or the target has a custom |
1177 | /// expander for it. |
1178 | LegalizeAction |
1179 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
1180 | assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1182, __PRETTY_FUNCTION__)) |
1181 | ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1182, __PRETTY_FUNCTION__)) |
1182 | "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1182, __PRETTY_FUNCTION__)); |
1183 | // See setCondCodeAction for how this is encoded. |
1184 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
1185 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
1186 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
1187 | assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!" ) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1187, __PRETTY_FUNCTION__)); |
1188 | return Action; |
1189 | } |
1190 | |
1191 | /// Return true if the specified condition code is legal on this target. |
1192 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
1193 | return getCondCodeAction(CC, VT) == Legal; |
1194 | } |
1195 | |
1196 | /// Return true if the specified condition code is legal or custom on this |
1197 | /// target. |
1198 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { |
1199 | return getCondCodeAction(CC, VT) == Legal || |
1200 | getCondCodeAction(CC, VT) == Custom; |
1201 | } |
1202 | |
1203 | /// If the action for this operation is to promote, this method returns the |
1204 | /// ValueType to promote to. |
1205 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
1206 | assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!" ) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1207, __PRETTY_FUNCTION__)) |
1207 | "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!" ) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1207, __PRETTY_FUNCTION__)); |
1208 | |
1209 | // See if this has an explicit type specified. |
1210 | std::map<std::pair<unsigned, MVT::SimpleValueType>, |
1211 | MVT::SimpleValueType>::const_iterator PTTI = |
1212 | PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); |
1213 | if (PTTI != PromoteToType.end()) return PTTI->second; |
1214 | |
1215 | assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType." ) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1216, __PRETTY_FUNCTION__)) |
1216 | "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType." ) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1216, __PRETTY_FUNCTION__)); |
1217 | |
1218 | MVT NVT = VT; |
1219 | do { |
1220 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
1221 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && "Didn't find type to promote to!") ? static_cast< void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1222, __PRETTY_FUNCTION__)) |
1222 | "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && "Didn't find type to promote to!") ? static_cast< void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1222, __PRETTY_FUNCTION__)); |
1223 | } while (!isTypeLegal(NVT) || |
1224 | getOperationAction(Op, NVT) == Promote); |
1225 | return NVT; |
1226 | } |
1227 | |
1228 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
1229 | /// operations except for the pointer size. If AllowUnknown is true, this |
1230 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
1231 | /// otherwise it will assert. |
1232 | EVT getValueType(const DataLayout &DL, Type *Ty, |
1233 | bool AllowUnknown = false) const { |
1234 | // Lower scalar pointers to native pointer types. |
1235 | if (auto *PTy = dyn_cast<PointerType>(Ty)) |
1236 | return getPointerTy(DL, PTy->getAddressSpace()); |
1237 | |
1238 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { |
1239 | Type *EltTy = VTy->getElementType(); |
1240 | // Lower vectors of pointers to native pointer types. |
1241 | if (auto *PTy = dyn_cast<PointerType>(EltTy)) { |
1242 | EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace())); |
1243 | EltTy = PointerTy.getTypeForEVT(Ty->getContext()); |
1244 | } |
1245 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), |
1246 | VTy->getElementCount()); |
1247 | } |
1248 | |
1249 | return EVT::getEVT(Ty, AllowUnknown); |
1250 | } |
1251 | |
1252 | EVT getMemValueType(const DataLayout &DL, Type *Ty, |
1253 | bool AllowUnknown = false) const { |
1254 | // Lower scalar pointers to native pointer types. |
1255 | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) |
1256 | return getPointerMemTy(DL, PTy->getAddressSpace()); |
1257 | else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
1258 | Type *Elm = VTy->getElementType(); |
1259 | if (PointerType *PT = dyn_cast<PointerType>(Elm)) { |
1260 | EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace())); |
1261 | Elm = PointerTy.getTypeForEVT(Ty->getContext()); |
1262 | } |
1263 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), |
1264 | VTy->getNumElements()); |
1265 | } |
1266 | |
1267 | return getValueType(DL, Ty, AllowUnknown); |
1268 | } |
1269 | |
1270 | |
1271 | /// Return the MVT corresponding to this LLVM type. See getValueType. |
1272 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
1273 | bool AllowUnknown = false) const { |
1274 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
1275 | } |
1276 | |
1277 | /// Return the desired alignment for ByVal or InAlloca aggregate function |
1278 | /// arguments in the caller parameter area. This is the actual alignment, not |
1279 | /// its logarithm. |
1280 | virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
1281 | |
1282 | /// Return the type of registers that this ValueType will eventually require. |
1283 | MVT getRegisterType(MVT VT) const { |
1284 | assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1284, __PRETTY_FUNCTION__)); |
1285 | return RegisterTypeForVT[VT.SimpleTy]; |
1286 | } |
1287 | |
1288 | /// Return the type of registers that this ValueType will eventually require. |
1289 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
1290 | if (VT.isSimple()) { |
1291 | assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1292, __PRETTY_FUNCTION__)) |
1292 | array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1292, __PRETTY_FUNCTION__)); |
1293 | return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; |
1294 | } |
1295 | if (VT.isVector()) { |
1296 | EVT VT1; |
1297 | MVT RegisterVT; |
1298 | unsigned NumIntermediates; |
1299 | (void)getVectorTypeBreakdown(Context, VT, VT1, |
1300 | NumIntermediates, RegisterVT); |
1301 | return RegisterVT; |
1302 | } |
1303 | if (VT.isInteger()) { |
1304 | return getRegisterType(Context, getTypeToTransformTo(Context, VT)); |
1305 | } |
1306 | llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1306); |
1307 | } |
1308 | |
1309 | /// Return the number of registers that this ValueType will eventually |
1310 | /// require. |
1311 | /// |
1312 | /// This is one for any types promoted to live in larger registers, but may be |
1313 | /// more than one for types (like i64) that are split into pieces. For types |
1314 | /// like i140, which are first promoted then expanded, it is the number of |
1315 | /// registers needed to hold all the bits of the original type. For an i140 |
1316 | /// on a 32 bit machine this means 5 registers. |
1317 | unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { |
1318 | if (VT.isSimple()) { |
1319 | assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1320, __PRETTY_FUNCTION__)) |
1320 | array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1320, __PRETTY_FUNCTION__)); |
1321 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
1322 | } |
1323 | if (VT.isVector()) { |
1324 | EVT VT1; |
1325 | MVT VT2; |
1326 | unsigned NumIntermediates; |
1327 | return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); |
1328 | } |
1329 | if (VT.isInteger()) { |
1330 | unsigned BitWidth = VT.getSizeInBits(); |
1331 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
1332 | return (BitWidth + RegWidth - 1) / RegWidth; |
1333 | } |
1334 | llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1334); |
1335 | } |
1336 | |
1337 | /// Certain combinations of ABIs, Targets and features require that types |
1338 | /// are legal for some operations and not for other operations. |
1339 | /// For MIPS all vector types must be passed through the integer register set. |
1340 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
1341 | CallingConv::ID CC, EVT VT) const { |
1342 | return getRegisterType(Context, VT); |
1343 | } |
1344 | |
1345 | /// Certain targets require unusual breakdowns of certain types. For MIPS, |
1346 | /// this occurs when a vector type is used, as vector are passed through the |
1347 | /// integer register set. |
1348 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
1349 | CallingConv::ID CC, |
1350 | EVT VT) const { |
1351 | return getNumRegisters(Context, VT); |
1352 | } |
1353 | |
1354 | /// Certain targets have context senstive alignment requirements, where one |
1355 | /// type has the alignment requirement of another type. |
1356 | virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, |
1357 | DataLayout DL) const { |
1358 | return DL.getABITypeAlignment(ArgTy); |
1359 | } |
1360 | |
1361 | /// If true, then instruction selection should seek to shrink the FP constant |
1362 | /// of the specified type to a smaller type in order to save space and / or |
1363 | /// reduce runtime. |
1364 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
1365 | |
1366 | /// Return true if it is profitable to reduce a load to a smaller type. |
1367 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x |
1368 | virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
1369 | EVT NewVT) const { |
1370 | // By default, assume that it is cheaper to extract a subvector from a wide |
1371 | // vector load rather than creating multiple narrow vector loads. |
1372 | if (NewVT.isVector() && !Load->hasOneUse()) |
1373 | return false; |
1374 | |
1375 | return true; |
1376 | } |
1377 | |
1378 | /// When splitting a value of the specified type into parts, does the Lo |
1379 | /// or Hi part come first? This usually follows the endianness, except |
1380 | /// for ppcf128, where the Hi part always comes first. |
1381 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
1382 | return DL.isBigEndian() || VT == MVT::ppcf128; |
1383 | } |
1384 | |
1385 | /// If true, the target has custom DAG combine transformations that it can |
1386 | /// perform for the specified node. |
1387 | bool hasTargetDAGCombine(ISD::NodeType NT) const { |
1388 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray )) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1388, __PRETTY_FUNCTION__)); |
1389 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
1390 | } |
1391 | |
1392 | unsigned getGatherAllAliasesMaxDepth() const { |
1393 | return GatherAllAliasesMaxDepth; |
1394 | } |
1395 | |
1396 | /// Returns the size of the platform's va_list object. |
1397 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
1398 | return getPointerTy(DL).getSizeInBits(); |
1399 | } |
1400 | |
1401 | /// Get maximum # of store operations permitted for llvm.memset |
1402 | /// |
1403 | /// This function returns the maximum number of store operations permitted |
1404 | /// to replace a call to llvm.memset. The value is set by the target at the |
1405 | /// performance threshold for such a replacement. If OptSize is true, |
1406 | /// return the limit for functions that have OptSize attribute. |
1407 | unsigned getMaxStoresPerMemset(bool OptSize) const { |
1408 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; |
1409 | } |
1410 | |
1411 | /// Get maximum # of store operations permitted for llvm.memcpy |
1412 | /// |
1413 | /// This function returns the maximum number of store operations permitted |
1414 | /// to replace a call to llvm.memcpy. The value is set by the target at the |
1415 | /// performance threshold for such a replacement. If OptSize is true, |
1416 | /// return the limit for functions that have OptSize attribute. |
1417 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
1418 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; |
1419 | } |
1420 | |
1421 | /// \brief Get maximum # of store operations to be glued together |
1422 | /// |
1423 | /// This function returns the maximum number of store operations permitted |
1424 | /// to glue together during lowering of llvm.memcpy. The value is set by |
1425 | // the target at the performance threshold for such a replacement. |
1426 | virtual unsigned getMaxGluedStoresPerMemcpy() const { |
1427 | return MaxGluedStoresPerMemcpy; |
1428 | } |
1429 | |
1430 | /// Get maximum # of load operations permitted for memcmp |
1431 | /// |
1432 | /// This function returns the maximum number of load operations permitted |
1433 | /// to replace a call to memcmp. The value is set by the target at the |
1434 | /// performance threshold for such a replacement. If OptSize is true, |
1435 | /// return the limit for functions that have OptSize attribute. |
1436 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
1437 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; |
1438 | } |
1439 | |
1440 | /// Get maximum # of store operations permitted for llvm.memmove |
1441 | /// |
1442 | /// This function returns the maximum number of store operations permitted |
1443 | /// to replace a call to llvm.memmove. The value is set by the target at the |
1444 | /// performance threshold for such a replacement. If OptSize is true, |
1445 | /// return the limit for functions that have OptSize attribute. |
1446 | unsigned getMaxStoresPerMemmove(bool OptSize) const { |
1447 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; |
1448 | } |
1449 | |
1450 | /// Determine if the target supports unaligned memory accesses. |
1451 | /// |
1452 | /// This function returns true if the target allows unaligned memory accesses |
1453 | /// of the specified type in the given address space. If true, it also returns |
1454 | /// whether the unaligned memory access is "fast" in the last argument by |
1455 | /// reference. This is used, for example, in situations where an array |
1456 | /// copy/move/set is converted to a sequence of store operations. Its use |
1457 | /// helps to ensure that such replacements don't generate code that causes an |
1458 | /// alignment error (trap) on the target machine. |
1459 | virtual bool allowsMisalignedMemoryAccesses( |
1460 | EVT, unsigned AddrSpace = 0, unsigned Align = 1, |
1461 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1462 | bool * /*Fast*/ = nullptr) const { |
1463 | return false; |
1464 | } |
1465 | |
1466 | /// LLT handling variant. |
1467 | virtual bool allowsMisalignedMemoryAccesses( |
1468 | LLT, unsigned AddrSpace = 0, unsigned Align = 1, |
1469 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1470 | bool * /*Fast*/ = nullptr) const { |
1471 | return false; |
1472 | } |
1473 | |
1474 | /// This function returns true if the memory access is aligned or if the |
1475 | /// target allows this specific unaligned memory access. If the access is |
1476 | /// allowed, the optional final parameter returns if the access is also fast |
1477 | /// (as defined by the target). |
1478 | bool allowsMemoryAccessForAlignment( |
1479 | LLVMContext &Context, const DataLayout &DL, EVT VT, |
1480 | unsigned AddrSpace = 0, unsigned Alignment = 1, |
1481 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1482 | bool *Fast = nullptr) const; |
1483 | |
1484 | /// Return true if the memory access of this type is aligned or if the target |
1485 | /// allows this specific unaligned access for the given MachineMemOperand. |
1486 | /// If the access is allowed, the optional final parameter returns if the |
1487 | /// access is also fast (as defined by the target). |
1488 | bool allowsMemoryAccessForAlignment(LLVMContext &Context, |
1489 | const DataLayout &DL, EVT VT, |
1490 | const MachineMemOperand &MMO, |
1491 | bool *Fast = nullptr) const; |
1492 | |
1493 | /// Return true if the target supports a memory access of this type for the |
1494 | /// given address space and alignment. If the access is allowed, the optional |
1495 | /// final parameter returns if the access is also fast (as defined by the |
1496 | /// target). |
1497 | virtual bool |
1498 | allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1499 | unsigned AddrSpace = 0, unsigned Alignment = 1, |
1500 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
1501 | bool *Fast = nullptr) const; |
1502 | |
1503 | /// Return true if the target supports a memory access of this type for the |
1504 | /// given MachineMemOperand. If the access is allowed, the optional |
1505 | /// final parameter returns if the access is also fast (as defined by the |
1506 | /// target). |
1507 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
1508 | const MachineMemOperand &MMO, |
1509 | bool *Fast = nullptr) const; |
1510 | |
1511 | /// Returns the target specific optimal type for load and store operations as |
1512 | /// a result of memset, memcpy, and memmove lowering. |
1513 | /// |
1514 | /// If DstAlign is zero that means it's safe to destination alignment can |
1515 | /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't |
1516 | /// a need to check it against alignment requirement, probably because the |
1517 | /// source does not need to be loaded. If 'IsMemset' is true, that means it's |
1518 | /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of |
1519 | /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it |
1520 | /// does not need to be loaded. It returns EVT::Other if the type should be |
1521 | /// determined using generic target-independent logic. |
1522 | virtual EVT |
1523 | getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/, |
1524 | unsigned /*SrcAlign*/, bool /*IsMemset*/, |
1525 | bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, |
1526 | const AttributeList & /*FuncAttributes*/) const { |
1527 | return MVT::Other; |
1528 | } |
1529 | |
1530 | |
1531 | /// LLT returning variant. |
1532 | virtual LLT |
1533 | getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/, |
1534 | unsigned /*SrcAlign*/, bool /*IsMemset*/, |
1535 | bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, |
1536 | const AttributeList & /*FuncAttributes*/) const { |
1537 | return LLT(); |
1538 | } |
1539 | |
1540 | /// Returns true if it's safe to use load / store of the specified type to |
1541 | /// expand memcpy / memset inline. |
1542 | /// |
1543 | /// This is mostly true for all types except for some special cases. For |
1544 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
1545 | /// fstpl which also does type conversion. Note the specified type doesn't |
1546 | /// have to be legal as the hook is used before type legalization. |
1547 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
1548 | |
1549 | /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp. |
1550 | bool usesUnderscoreSetJmp() const { |
1551 | return UseUnderscoreSetJmp; |
1552 | } |
1553 | |
1554 | /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp. |
1555 | bool usesUnderscoreLongJmp() const { |
1556 | return UseUnderscoreLongJmp; |
1557 | } |
1558 | |
1559 | /// Return lower limit for number of blocks in a jump table. |
1560 | virtual unsigned getMinimumJumpTableEntries() const; |
1561 | |
1562 | /// Return lower limit of the density in a jump table. |
1563 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
1564 | |
1565 | /// Return upper limit for number of entries in a jump table. |
1566 | /// Zero if no limit. |
1567 | unsigned getMaximumJumpTableSize() const; |
1568 | |
1569 | virtual bool isJumpTableRelative() const { |
1570 | return TM.isPositionIndependent(); |
1571 | } |
1572 | |
1573 | /// If a physical register, this specifies the register that |
1574 | /// llvm.savestack/llvm.restorestack should save and restore. |
1575 | unsigned getStackPointerRegisterToSaveRestore() const { |
1576 | return StackPointerRegisterToSaveRestore; |
1577 | } |
1578 | |
1579 | /// If a physical register, this returns the register that receives the |
1580 | /// exception address on entry to an EH pad. |
1581 | virtual unsigned |
1582 | getExceptionPointerRegister(const Constant *PersonalityFn) const { |
1583 | // 0 is guaranteed to be the NoRegister value on all targets |
1584 | return 0; |
1585 | } |
1586 | |
1587 | /// If a physical register, this returns the register that receives the |
1588 | /// exception typeid on entry to a landing pad. |
1589 | virtual unsigned |
1590 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
1591 | // 0 is guaranteed to be the NoRegister value on all targets |
1592 | return 0; |
1593 | } |
1594 | |
1595 | virtual bool needsFixedCatchObjects() const { |
1596 | report_fatal_error("Funclet EH is not implemented for this target"); |
1597 | } |
1598 | |
1599 | /// Return the minimum stack alignment of an argument. |
1600 | Align getMinStackArgumentAlignment() const { |
1601 | return MinStackArgumentAlignment; |
1602 | } |
1603 | |
1604 | /// Return the minimum function alignment. |
1605 | Align getMinFunctionAlignment() const { return MinFunctionAlignment; } |
1606 | |
1607 | /// Return the preferred function alignment. |
1608 | Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } |
1609 | |
1610 | /// Return the preferred loop alignment. |
1611 | virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const { |
1612 | return PrefLoopAlignment; |
1613 | } |
1614 | |
1615 | /// Should loops be aligned even when the function is marked OptSize (but not |
1616 | /// MinSize). |
1617 | virtual bool alignLoopsWithOptSize() const { |
1618 | return false; |
1619 | } |
1620 | |
1621 | /// If the target has a standard location for the stack protector guard, |
1622 | /// returns the address of that location. Otherwise, returns nullptr. |
1623 | /// DEPRECATED: please override useLoadStackGuardNode and customize |
1624 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). |
1625 | virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; |
1626 | |
1627 | /// Inserts necessary declarations for SSP (stack protection) purpose. |
1628 | /// Should be used only when getIRStackGuard returns nullptr. |
1629 | virtual void insertSSPDeclarations(Module &M) const; |
1630 | |
1631 | /// Return the variable that's previously inserted by insertSSPDeclarations, |
1632 | /// if any, otherwise return nullptr. Should be used only when |
1633 | /// getIRStackGuard returns nullptr. |
1634 | virtual Value *getSDagStackGuard(const Module &M) const; |
1635 | |
1636 | /// If this function returns true, stack protection checks should XOR the |
1637 | /// frame pointer (or whichever pointer is used to address locals) into the |
1638 | /// stack guard value before checking it. getIRStackGuard must return nullptr |
1639 | /// if this returns true. |
1640 | virtual bool useStackGuardXorFP() const { return false; } |
1641 | |
1642 | /// If the target has a standard stack protection check function that |
1643 | /// performs validation and error handling, returns the function. Otherwise, |
1644 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
1645 | /// Should be used only when getIRStackGuard returns nullptr. |
1646 | virtual Function *getSSPStackGuardCheck(const Module &M) const; |
1647 | |
1648 | protected: |
1649 | Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, |
1650 | bool UseTLS) const; |
1651 | |
1652 | public: |
1653 | /// Returns the target-specific address of the unsafe stack pointer. |
1654 | virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; |
1655 | |
1656 | /// Returns the name of the symbol used to emit stack probes or the empty |
1657 | /// string if not applicable. |
1658 | virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { |
1659 | return ""; |
1660 | } |
1661 | |
1662 | /// Returns true if a cast between SrcAS and DestAS is a noop. |
1663 | virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
1664 | return false; |
1665 | } |
1666 | |
1667 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
1668 | /// are happy to sink it into basic blocks. A cast may be free, but not |
1669 | /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. |
1670 | virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
1671 | return isNoopAddrSpaceCast(SrcAS, DestAS); |
1672 | } |
1673 | |
1674 | /// Return true if the pointer arguments to CI should be aligned by aligning |
1675 | /// the object whose address is being passed. If so then MinSize is set to the |
1676 | /// minimum size the object must be to be aligned and PrefAlign is set to the |
1677 | /// preferred alignment. |
1678 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
1679 | unsigned & /*PrefAlign*/) const { |
1680 | return false; |
1681 | } |
1682 | |
1683 | //===--------------------------------------------------------------------===// |
1684 | /// \name Helpers for TargetTransformInfo implementations |
1685 | /// @{ |
1686 | |
1687 | /// Get the ISD node that corresponds to the Instruction class opcode. |
1688 | int InstructionOpcodeToISD(unsigned Opcode) const; |
1689 | |
1690 | /// Estimate the cost of type-legalization and the legalized type. |
1691 | std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, |
1692 | Type *Ty) const; |
1693 | |
1694 | /// @} |
1695 | |
1696 | //===--------------------------------------------------------------------===// |
1697 | /// \name Helpers for atomic expansion. |
1698 | /// @{ |
1699 | |
1700 | /// Returns the maximum atomic operation size (in bits) supported by |
1701 | /// the backend. Atomic operations greater than this size (as well |
1702 | /// as ones that are not naturally aligned), will be expanded by |
1703 | /// AtomicExpandPass into an __atomic_* library call. |
1704 | unsigned getMaxAtomicSizeInBitsSupported() const { |
1705 | return MaxAtomicSizeInBitsSupported; |
1706 | } |
1707 | |
1708 | /// Returns the size of the smallest cmpxchg or ll/sc instruction |
1709 | /// the backend supports. Any smaller operations are widened in |
1710 | /// AtomicExpandPass. |
1711 | /// |
1712 | /// Note that *unlike* operations above the maximum size, atomic ops |
1713 | /// are still natively supported below the minimum; they just |
1714 | /// require a more complex expansion. |
1715 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
1716 | |
1717 | /// Whether the target supports unaligned atomic operations. |
1718 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } |
1719 | |
1720 | /// Whether AtomicExpandPass should automatically insert fences and reduce |
1721 | /// ordering for this atomic. This should be true for most architectures with |
1722 | /// weak memory ordering. Defaults to false. |
1723 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
1724 | return false; |
1725 | } |
1726 | |
1727 | /// Perform a load-linked operation on Addr, returning a "Value *" with the |
1728 | /// corresponding pointee type. This may entail some non-trivial operations to |
1729 | /// truncate or reconstruct types that will be illegal in the backend. See |
1730 | /// ARMISelLowering for an example implementation. |
1731 | virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, |
1732 | AtomicOrdering Ord) const { |
1733 | llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1733); |
1734 | } |
1735 | |
1736 | /// Perform a store-conditional operation to Addr. Return the status of the |
1737 | /// store. This should be 0 if the store succeeded, non-zero otherwise. |
1738 | virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, |
1739 | Value *Addr, AtomicOrdering Ord) const { |
1740 | llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1740); |
1741 | } |
1742 | |
1743 | /// Perform a masked atomicrmw using a target-specific intrinsic. This |
1744 | /// represents the core LL/SC loop which will be lowered at a late stage by |
1745 | /// the backend. |
1746 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder, |
1747 | AtomicRMWInst *AI, |
1748 | Value *AlignedAddr, Value *Incr, |
1749 | Value *Mask, Value *ShiftAmt, |
1750 | AtomicOrdering Ord) const { |
1751 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1751); |
1752 | } |
1753 | |
1754 | /// Perform a masked cmpxchg using a target-specific intrinsic. This |
1755 | /// represents the core LL/SC loop which will be lowered at a late stage by |
1756 | /// the backend. |
1757 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( |
1758 | IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
1759 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
1760 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 1760); |
1761 | } |
1762 | |
1763 | /// Inserts in the IR a target-specific intrinsic specifying a fence. |
1764 | /// It is called by AtomicExpandPass before expanding an |
1765 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
1766 | /// if shouldInsertFencesForAtomic returns true. |
1767 | /// |
1768 | /// Inst is the original atomic instruction, prior to other expansions that |
1769 | /// may be performed. |
1770 | /// |
1771 | /// This function should either return a nullptr, or a pointer to an IR-level |
1772 | /// Instruction*. Even complex fence sequences can be represented by a |
1773 | /// single Instruction* through an intrinsic to be lowered later. |
1774 | /// Backends should override this method to produce target-specific intrinsic |
1775 | /// for their fences. |
1776 | /// FIXME: Please note that the default implementation here in terms of |
1777 | /// IR-level fences exists for historical/compatibility reasons and is |
1778 | /// *unsound* ! Fences cannot, in general, be used to restore sequential |
1779 | /// consistency. For example, consider the following example: |
1780 | /// atomic<int> x = y = 0; |
1781 | /// int r1, r2, r3, r4; |
1782 | /// Thread 0: |
1783 | /// x.store(1); |
1784 | /// Thread 1: |
1785 | /// y.store(1); |
1786 | /// Thread 2: |
1787 | /// r1 = x.load(); |
1788 | /// r2 = y.load(); |
1789 | /// Thread 3: |
1790 | /// r3 = y.load(); |
1791 | /// r4 = x.load(); |
1792 | /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all |
1793 | /// seq_cst. But if they are lowered to monotonic accesses, no amount of |
1794 | /// IR-level fences can prevent it. |
1795 | /// @{ |
1796 | virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, |
1797 | AtomicOrdering Ord) const { |
1798 | if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) |
1799 | return Builder.CreateFence(Ord); |
1800 | else |
1801 | return nullptr; |
1802 | } |
1803 | |
1804 | virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, |
1805 | Instruction *Inst, |
1806 | AtomicOrdering Ord) const { |
1807 | if (isAcquireOrStronger(Ord)) |
1808 | return Builder.CreateFence(Ord); |
1809 | else |
1810 | return nullptr; |
1811 | } |
1812 | /// @} |
1813 | |
1814 | // Emits code that executes when the comparison result in the ll/sc |
1815 | // expansion of a cmpxchg instruction is such that the store-conditional will |
1816 | // not execute. This makes it possible to balance out the load-linked with |
1817 | // a dedicated instruction, if desired. |
1818 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
1819 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
1820 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} |
1821 | |
1822 | /// Returns true if the given (atomic) store should be expanded by the |
1823 | /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. |
1824 | virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
1825 | return false; |
1826 | } |
1827 | |
1828 | /// Returns true if arguments should be sign-extended in lib calls. |
1829 | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
1830 | return IsSigned; |
1831 | } |
1832 | |
1833 | /// Returns true if arguments should be extended in lib calls. |
1834 | virtual bool shouldExtendTypeInLibCall(EVT Type) const { |
1835 | return true; |
1836 | } |
1837 | |
1838 | /// Returns how the given (atomic) load should be expanded by the |
1839 | /// IR-level AtomicExpand pass. |
1840 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
1841 | return AtomicExpansionKind::None; |
1842 | } |
1843 | |
1844 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level |
1845 | /// AtomicExpand pass. |
1846 | virtual AtomicExpansionKind |
1847 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
1848 | return AtomicExpansionKind::None; |
1849 | } |
1850 | |
1851 | /// Returns how the IR-level AtomicExpand pass should expand the given |
1852 | /// AtomicRMW, if at all. Default is to never expand. |
1853 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
1854 | return RMW->isFloatingPointOperation() ? |
1855 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; |
1856 | } |
1857 | |
1858 | /// On some platforms, an AtomicRMW that never actually modifies the value |
1859 | /// (such as fetch_add of 0) can be turned into a fence followed by an |
1860 | /// atomic load. This may sound useless, but it makes it possible for the |
1861 | /// processor to keep the cacheline shared, dramatically improving |
1862 | /// performance. And such idempotent RMWs are useful for implementing some |
1863 | /// kinds of locks, see for example (justification + benchmarks): |
1864 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
1865 | /// This method tries doing that transformation, returning the atomic load if |
1866 | /// it succeeds, and nullptr otherwise. |
1867 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
1868 | /// another round of expansion. |
1869 | virtual LoadInst * |
1870 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
1871 | return nullptr; |
1872 | } |
1873 | |
1874 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
1875 | /// SIGN_EXTEND, or ANY_EXTEND). |
1876 | virtual ISD::NodeType getExtendForAtomicOps() const { |
1877 | return ISD::ZERO_EXTEND; |
1878 | } |
1879 | |
1880 | /// @} |
1881 | |
1882 | /// Returns true if we should normalize |
1883 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
1884 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
1885 | /// that it saves us from materializing N0 and N1 in an integer register. |
1886 | /// Targets that are able to perform and/or on flags should return false here. |
1887 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
1888 | EVT VT) const { |
1889 | // If a target has multiple condition registers, then it likely has logical |
1890 | // operations on those registers. |
1891 | if (hasMultipleConditionRegisters()) |
1892 | return false; |
1893 | // Only do the transform if the value won't be split into multiple |
1894 | // registers. |
1895 | LegalizeTypeAction Action = getTypeAction(Context, VT); |
1896 | return Action != TypeExpandInteger && Action != TypeExpandFloat && |
1897 | Action != TypeSplitVector; |
1898 | } |
1899 | |
1900 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } |
1901 | |
1902 | /// Return true if a select of constants (select Cond, C1, C2) should be |
1903 | /// transformed into simple math ops with the condition value. For example: |
1904 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
1905 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
1906 | return false; |
1907 | } |
1908 | |
1909 | /// Return true if it is profitable to transform an integer |
1910 | /// multiplication-by-constant into simpler operations like shifts and adds. |
1911 | /// This may be true if the target does not directly support the |
1912 | /// multiplication operation for the specified type or the sequence of simpler |
1913 | /// ops is faster than the multiply. |
1914 | virtual bool decomposeMulByConstant(LLVMContext &Context, |
1915 | EVT VT, SDValue C) const { |
1916 | return false; |
1917 | } |
1918 | |
1919 | /// Return true if it is more correct/profitable to use strict FP_TO_INT |
1920 | /// conversion operations - canonicalizing the FP source value instead of |
1921 | /// converting all cases and then selecting based on value. |
1922 | /// This may be true if the target throws exceptions for out of bounds |
1923 | /// conversions or has fast FP CMOV. |
1924 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
1925 | bool IsSigned) const { |
1926 | return false; |
1927 | } |
1928 | |
1929 | //===--------------------------------------------------------------------===// |
1930 | // TargetLowering Configuration Methods - These methods should be invoked by |
1931 | // the derived class constructor to configure this object for the target. |
1932 | // |
1933 | protected: |
1934 | /// Specify how the target extends the result of integer and floating point |
1935 | /// boolean values from i1 to a wider type. See getBooleanContents. |
1936 | void setBooleanContents(BooleanContent Ty) { |
1937 | BooleanContents = Ty; |
1938 | BooleanFloatContents = Ty; |
1939 | } |
1940 | |
1941 | /// Specify how the target extends the result of integer and floating point |
1942 | /// boolean values from i1 to a wider type. See getBooleanContents. |
1943 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
1944 | BooleanContents = IntTy; |
1945 | BooleanFloatContents = FloatTy; |
1946 | } |
1947 | |
1948 | /// Specify how the target extends the result of a vector boolean value from a |
1949 | /// vector of i1 to a wider type. See getBooleanContents. |
1950 | void setBooleanVectorContents(BooleanContent Ty) { |
1951 | BooleanVectorContents = Ty; |
1952 | } |
1953 | |
1954 | /// Specify the target scheduling preference. |
1955 | void setSchedulingPreference(Sched::Preference Pref) { |
1956 | SchedPreferenceInfo = Pref; |
1957 | } |
1958 | |
1959 | /// Indicate whether this target prefers to use _setjmp to implement |
1960 | /// llvm.setjmp or the version without _. Defaults to false. |
1961 | void setUseUnderscoreSetJmp(bool Val) { |
1962 | UseUnderscoreSetJmp = Val; |
1963 | } |
1964 | |
1965 | /// Indicate whether this target prefers to use _longjmp to implement |
1966 | /// llvm.longjmp or the version without _. Defaults to false. |
1967 | void setUseUnderscoreLongJmp(bool Val) { |
1968 | UseUnderscoreLongJmp = Val; |
1969 | } |
1970 | |
1971 | /// Indicate the minimum number of blocks to generate jump tables. |
1972 | void setMinimumJumpTableEntries(unsigned Val); |
1973 | |
1974 | /// Indicate the maximum number of entries in jump tables. |
1975 | /// Set to zero to generate unlimited jump tables. |
1976 | void setMaximumJumpTableSize(unsigned); |
1977 | |
1978 | /// If set to a physical register, this specifies the register that |
1979 | /// llvm.savestack/llvm.restorestack should save and restore. |
1980 | void setStackPointerRegisterToSaveRestore(unsigned R) { |
1981 | StackPointerRegisterToSaveRestore = R; |
1982 | } |
1983 | |
1984 | /// Tells the code generator that the target has multiple (allocatable) |
1985 | /// condition registers that can be used to store the results of comparisons |
1986 | /// for use by selects and conditional branches. With multiple condition |
1987 | /// registers, the code generator will not aggressively sink comparisons into |
1988 | /// the blocks of their users. |
1989 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
1990 | HasMultipleConditionRegisters = hasManyRegs; |
1991 | } |
1992 | |
1993 | /// Tells the code generator that the target has BitExtract instructions. |
1994 | /// The code generator will aggressively sink "shift"s into the blocks of |
1995 | /// their users if the users will generate "and" instructions which can be |
1996 | /// combined with "shift" to BitExtract instructions. |
1997 | void setHasExtractBitsInsn(bool hasExtractInsn = true) { |
1998 | HasExtractBitsInsn = hasExtractInsn; |
1999 | } |
2000 | |
2001 | /// Tells the code generator not to expand logic operations on comparison |
2002 | /// predicates into separate sequences that increase the amount of flow |
2003 | /// control. |
2004 | void setJumpIsExpensive(bool isExpensive = true); |
2005 | |
2006 | /// Tells the code generator which bitwidths to bypass. |
2007 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
2008 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
2009 | } |
2010 | |
2011 | /// Add the specified register class as an available regclass for the |
2012 | /// specified value type. This indicates the selector can handle values of |
2013 | /// that class natively. |
2014 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
2015 | assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2015, __PRETTY_FUNCTION__)); |
2016 | RegClassForVT[VT.SimpleTy] = RC; |
2017 | } |
2018 | |
2019 | /// Return the largest legal super-reg register class of the register class |
2020 | /// for the specified type and its associated "cost". |
2021 | virtual std::pair<const TargetRegisterClass *, uint8_t> |
2022 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
2023 | |
2024 | /// Once all of the register classes are added, this allows us to compute |
2025 | /// derived properties we expose. |
2026 | void computeRegisterProperties(const TargetRegisterInfo *TRI); |
2027 | |
2028 | /// Indicate that the specified operation does not work with the specified |
2029 | /// type and indicate what to do about it. Note that VT may refer to either |
2030 | /// the type of a result or that of an operand of Op. |
2031 | void setOperationAction(unsigned Op, MVT VT, |
2032 | LegalizeAction Action) { |
2033 | assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2033, __PRETTY_FUNCTION__)); |
2034 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
2035 | } |
2036 | |
2037 | /// Indicate that the specified load with extension does not work with the |
2038 | /// specified type and indicate what to do about it. |
2039 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
2040 | LegalizeAction Action) { |
2041 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid () && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2042, __PRETTY_FUNCTION__)) |
2042 | MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid () && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2042, __PRETTY_FUNCTION__)); |
2043 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array" ) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2043, __PRETTY_FUNCTION__)); |
2044 | unsigned Shift = 4 * ExtType; |
2045 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
2046 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
2047 | } |
2048 | |
2049 | /// Indicate that the specified truncating store does not work with the |
2050 | /// specified type and indicate what to do about it. |
2051 | void setTruncStoreAction(MVT ValVT, MVT MemVT, |
2052 | LegalizeAction Action) { |
2053 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2053, __PRETTY_FUNCTION__)); |
2054 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
2055 | } |
2056 | |
2057 | /// Indicate that the specified indexed load does or does not work with the |
2058 | /// specified type and indicate what to do abort it. |
2059 | /// |
2060 | /// NOTE: All indexed mode loads are initialized to Expand in |
2061 | /// TargetLowering.cpp |
2062 | void setIndexedLoadAction(unsigned IdxMode, MVT VT, |
2063 | LegalizeAction Action) { |
2064 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2065, __PRETTY_FUNCTION__)) |
2065 | (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2065, __PRETTY_FUNCTION__)); |
2066 | // Load action are kept in the upper half. |
2067 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; |
2068 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; |
2069 | } |
2070 | |
2071 | /// Indicate that the specified indexed store does or does not work with the |
2072 | /// specified type and indicate what to do about it. |
2073 | /// |
2074 | /// NOTE: All indexed mode stores are initialized to Expand in |
2075 | /// TargetLowering.cpp |
2076 | void setIndexedStoreAction(unsigned IdxMode, MVT VT, |
2077 | LegalizeAction Action) { |
2078 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2079, __PRETTY_FUNCTION__)) |
2079 | (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2079, __PRETTY_FUNCTION__)); |
2080 | // Store action are kept in the lower half. |
2081 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; |
2082 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); |
2083 | } |
2084 | |
2085 | /// Indicate that the specified condition code is or isn't supported on the |
2086 | /// target and indicate what to do about it. |
2087 | void setCondCodeAction(ISD::CondCode CC, MVT VT, |
2088 | LegalizeAction Action) { |
2089 | assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions ) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2090, __PRETTY_FUNCTION__)) |
2090 | "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions ) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2090, __PRETTY_FUNCTION__)); |
2091 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array" ) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2091, __PRETTY_FUNCTION__)); |
2092 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit |
2093 | /// value and the upper 29 bits index into the second dimension of the array |
2094 | /// to select what 32-bit value to use. |
2095 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
2096 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
2097 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
2098 | } |
2099 | |
2100 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
2101 | /// to trying a larger integer/fp until it can find one that works. If that |
2102 | /// default is insufficient, this method can be used by the target to override |
2103 | /// the default. |
2104 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2105 | PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; |
2106 | } |
2107 | |
2108 | /// Convenience method to set an operation to Promote and specify the type |
2109 | /// in a single call. |
2110 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
2111 | setOperationAction(Opc, OrigVT, Promote); |
2112 | AddPromotedToType(Opc, OrigVT, DestVT); |
2113 | } |
2114 | |
2115 | /// Targets should invoke this method for each target independent node that |
2116 | /// they want to provide a custom DAG combiner for by implementing the |
2117 | /// PerformDAGCombine virtual method. |
2118 | void setTargetDAGCombine(ISD::NodeType NT) { |
2119 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray )) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2119, __PRETTY_FUNCTION__)); |
2120 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); |
2121 | } |
2122 | |
2123 | /// Set the target's minimum function alignment. |
2124 | void setMinFunctionAlignment(Align Alignment) { |
2125 | MinFunctionAlignment = Alignment; |
2126 | } |
2127 | |
2128 | /// Set the target's preferred function alignment. This should be set if |
2129 | /// there is a performance benefit to higher-than-minimum alignment |
2130 | void setPrefFunctionAlignment(Align Alignment) { |
2131 | PrefFunctionAlignment = Alignment; |
2132 | } |
2133 | |
2134 | /// Set the target's preferred loop alignment. Default alignment is one, it |
2135 | /// means the target does not care about loop alignment. The target may also |
2136 | /// override getPrefLoopAlignment to provide per-loop values. |
2137 | void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } |
2138 | |
2139 | /// Set the minimum stack alignment of an argument. |
2140 | void setMinStackArgumentAlignment(Align Alignment) { |
2141 | MinStackArgumentAlignment = Alignment; |
2142 | } |
2143 | |
2144 | /// Set the maximum atomic operation size supported by the |
2145 | /// backend. Atomic operations greater than this size (as well as |
2146 | /// ones that are not naturally aligned), will be expanded by |
2147 | /// AtomicExpandPass into an __atomic_* library call. |
2148 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
2149 | MaxAtomicSizeInBitsSupported = SizeInBits; |
2150 | } |
2151 | |
2152 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. |
2153 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
2154 | MinCmpXchgSizeInBits = SizeInBits; |
2155 | } |
2156 | |
2157 | /// Sets whether unaligned atomic operations are supported. |
2158 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { |
2159 | SupportsUnalignedAtomics = UnalignedSupported; |
2160 | } |
2161 | |
2162 | public: |
2163 | //===--------------------------------------------------------------------===// |
2164 | // Addressing mode description hooks (used by LSR etc). |
2165 | // |
2166 | |
2167 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
2168 | /// instructions reading the address. This allows as much computation as |
2169 | /// possible to be done in the address mode for that operand. This hook lets |
2170 | /// targets also pass back when this should be done on intrinsics which |
2171 | /// load/store. |
2172 | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, |
2173 | SmallVectorImpl<Value*> &/*Ops*/, |
2174 | Type *&/*AccessTy*/) const { |
2175 | return false; |
2176 | } |
2177 | |
2178 | /// This represents an addressing mode of: |
2179 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg |
2180 | /// If BaseGV is null, there is no BaseGV. |
2181 | /// If BaseOffs is zero, there is no base offset. |
2182 | /// If HasBaseReg is false, there is no base register. |
2183 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
2184 | /// no scale. |
2185 | struct AddrMode { |
2186 | GlobalValue *BaseGV = nullptr; |
2187 | int64_t BaseOffs = 0; |
2188 | bool HasBaseReg = false; |
2189 | int64_t Scale = 0; |
2190 | AddrMode() = default; |
2191 | }; |
2192 | |
2193 | /// Return true if the addressing mode represented by AM is legal for this |
2194 | /// target, for a load/store of the specified type. |
2195 | /// |
2196 | /// The type may be VoidTy, in which case only return true if the addressing |
2197 | /// mode is legal for a load/store of any legal type. TODO: Handle |
2198 | /// pre/postinc as well. |
2199 | /// |
2200 | /// If the address space cannot be determined, it will be -1. |
2201 | /// |
2202 | /// TODO: Remove default argument |
2203 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
2204 | Type *Ty, unsigned AddrSpace, |
2205 | Instruction *I = nullptr) const; |
2206 | |
2207 | /// Return the cost of the scaling factor used in the addressing mode |
2208 | /// represented by AM for this target, for a load/store of the specified type. |
2209 | /// |
2210 | /// If the AM is supported, the return value must be >= 0. |
2211 | /// If the AM is not supported, it returns a negative value. |
2212 | /// TODO: Handle pre/postinc as well. |
2213 | /// TODO: Remove default argument |
2214 | virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, |
2215 | Type *Ty, unsigned AS = 0) const { |
2216 | // Default: assume that any scaling factor used in a legal AM is free. |
2217 | if (isLegalAddressingMode(DL, AM, Ty, AS)) |
2218 | return 0; |
2219 | return -1; |
2220 | } |
2221 | |
2222 | /// Return true if the specified immediate is legal icmp immediate, that is |
2223 | /// the target has icmp instructions which can compare a register against the |
2224 | /// immediate without having to materialize the immediate into a register. |
2225 | virtual bool isLegalICmpImmediate(int64_t) const { |
2226 | return true; |
2227 | } |
2228 | |
2229 | /// Return true if the specified immediate is legal add immediate, that is the |
2230 | /// target has add instructions which can add a register with the immediate |
2231 | /// without having to materialize the immediate into a register. |
2232 | virtual bool isLegalAddImmediate(int64_t) const { |
2233 | return true; |
2234 | } |
2235 | |
2236 | /// Return true if the specified immediate is legal for the value input of a |
2237 | /// store instruction. |
2238 | virtual bool isLegalStoreImmediate(int64_t Value) const { |
2239 | // Default implementation assumes that at least 0 works since it is likely |
2240 | // that a zero register exists or a zero immediate is allowed. |
2241 | return Value == 0; |
2242 | } |
2243 | |
2244 | /// Return true if it's significantly cheaper to shift a vector by a uniform |
2245 | /// scalar than by an amount which will vary across each lane. On x86, for |
2246 | /// example, there is a "psllw" instruction for the former case, but no simple |
2247 | /// instruction for a general "a << b" operation on vectors. |
2248 | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { |
2249 | return false; |
2250 | } |
2251 | |
2252 | /// Returns true if the opcode is a commutative binary operation. |
2253 | virtual bool isCommutativeBinOp(unsigned Opcode) const { |
2254 | // FIXME: This should get its info from the td file. |
2255 | switch (Opcode) { |
2256 | case ISD::ADD: |
2257 | case ISD::SMIN: |
2258 | case ISD::SMAX: |
2259 | case ISD::UMIN: |
2260 | case ISD::UMAX: |
2261 | case ISD::MUL: |
2262 | case ISD::MULHU: |
2263 | case ISD::MULHS: |
2264 | case ISD::SMUL_LOHI: |
2265 | case ISD::UMUL_LOHI: |
2266 | case ISD::FADD: |
2267 | case ISD::FMUL: |
2268 | case ISD::AND: |
2269 | case ISD::OR: |
2270 | case ISD::XOR: |
2271 | case ISD::SADDO: |
2272 | case ISD::UADDO: |
2273 | case ISD::ADDC: |
2274 | case ISD::ADDE: |
2275 | case ISD::SADDSAT: |
2276 | case ISD::UADDSAT: |
2277 | case ISD::FMINNUM: |
2278 | case ISD::FMAXNUM: |
2279 | case ISD::FMINNUM_IEEE: |
2280 | case ISD::FMAXNUM_IEEE: |
2281 | case ISD::FMINIMUM: |
2282 | case ISD::FMAXIMUM: |
2283 | return true; |
2284 | default: return false; |
2285 | } |
2286 | } |
2287 | |
2288 | /// Return true if the node is a math/logic binary operator. |
2289 | virtual bool isBinOp(unsigned Opcode) const { |
2290 | // A commutative binop must be a binop. |
2291 | if (isCommutativeBinOp(Opcode)) |
2292 | return true; |
2293 | // These are non-commutative binops. |
2294 | switch (Opcode) { |
2295 | case ISD::SUB: |
2296 | case ISD::SHL: |
2297 | case ISD::SRL: |
2298 | case ISD::SRA: |
2299 | case ISD::SDIV: |
2300 | case ISD::UDIV: |
2301 | case ISD::SREM: |
2302 | case ISD::UREM: |
2303 | case ISD::FSUB: |
2304 | case ISD::FDIV: |
2305 | case ISD::FREM: |
2306 | return true; |
2307 | default: |
2308 | return false; |
2309 | } |
2310 | } |
2311 | |
2312 | /// Return true if it's free to truncate a value of type FromTy to type |
2313 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
2314 | /// by referencing its sub-register AX. |
2315 | /// Targets must return false when FromTy <= ToTy. |
2316 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
2317 | return false; |
2318 | } |
2319 | |
2320 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
2321 | /// whether a call is in tail position. Typically this means that both results |
2322 | /// would be assigned to the same register or stack slot, but it could mean |
2323 | /// the target performs adequate checks of its own before proceeding with the |
2324 | /// tail call. Targets must return false when FromTy <= ToTy. |
2325 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
2326 | return false; |
2327 | } |
2328 | |
2329 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { |
2330 | return false; |
2331 | } |
2332 | |
2333 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
2334 | |
2335 | /// Return true if the extension represented by \p I is free. |
2336 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
2337 | /// this method can use the context provided by \p I to decide |
2338 | /// whether or not \p I is free. |
2339 | /// This method extends the behavior of the is[Z|FP]ExtFree family. |
2340 | /// In other words, if is[Z|FP]Free returns true, then this method |
2341 | /// returns true as well. The converse is not true. |
2342 | /// The target can perform the adequate checks by overriding isExtFreeImpl. |
2343 | /// \pre \p I must be a sign, zero, or fp extension. |
2344 | bool isExtFree(const Instruction *I) const { |
2345 | switch (I->getOpcode()) { |
2346 | case Instruction::FPExt: |
2347 | if (isFPExtFree(EVT::getEVT(I->getType()), |
2348 | EVT::getEVT(I->getOperand(0)->getType()))) |
2349 | return true; |
2350 | break; |
2351 | case Instruction::ZExt: |
2352 | if (isZExtFree(I->getOperand(0)->getType(), I->getType())) |
2353 | return true; |
2354 | break; |
2355 | case Instruction::SExt: |
2356 | break; |
2357 | default: |
2358 | llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2358); |
2359 | } |
2360 | return isExtFreeImpl(I); |
2361 | } |
2362 | |
2363 | /// Return true if \p Load and \p Ext can form an ExtLoad. |
2364 | /// For example, in AArch64 |
2365 | /// %L = load i8, i8* %ptr |
2366 | /// %E = zext i8 %L to i32 |
2367 | /// can be lowered into one load instruction |
2368 | /// ldrb w0, [x0] |
2369 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
2370 | const DataLayout &DL) const { |
2371 | EVT VT = getValueType(DL, Ext->getType()); |
2372 | EVT LoadVT = getValueType(DL, Load->getType()); |
2373 | |
2374 | // If the load has other users and the truncate is not free, the ext |
2375 | // probably isn't free. |
2376 | if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && |
2377 | !isTruncateFree(Ext->getType(), Load->getType())) |
2378 | return false; |
2379 | |
2380 | // Check whether the target supports casts folded into loads. |
2381 | unsigned LType; |
2382 | if (isa<ZExtInst>(Ext)) |
2383 | LType = ISD::ZEXTLOAD; |
2384 | else { |
2385 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!") ? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2385, __PRETTY_FUNCTION__)); |
2386 | LType = ISD::SEXTLOAD; |
2387 | } |
2388 | |
2389 | return isLoadExtLegal(LType, VT, LoadVT); |
2390 | } |
2391 | |
2392 | /// Return true if any actual instruction that defines a value of type FromTy |
2393 | /// implicitly zero-extends the value to ToTy in the result register. |
2394 | /// |
2395 | /// The function should return true when it is likely that the truncate can |
2396 | /// be freely folded with an instruction defining a value of FromTy. If |
2397 | /// the defining instruction is unknown (because you're looking at a |
2398 | /// function argument, PHI, etc.) then the target may require an |
2399 | /// explicit truncate, which is not necessarily free, but this function |
2400 | /// does not deal with those cases. |
2401 | /// Targets must return false when FromTy >= ToTy. |
2402 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
2403 | return false; |
2404 | } |
2405 | |
2406 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { |
2407 | return false; |
2408 | } |
2409 | |
2410 | /// Return true if sign-extension from FromTy to ToTy is cheaper than |
2411 | /// zero-extension. |
2412 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { |
2413 | return false; |
2414 | } |
2415 | |
2416 | /// Return true if sinking I's operands to the same basic block as I is |
2417 | /// profitable, e.g. because the operands can be folded into a target |
2418 | /// instruction during instruction selection. After calling the function |
2419 | /// \p Ops contains the Uses to sink ordered by dominance (dominating users |
2420 | /// come first). |
2421 | virtual bool shouldSinkOperands(Instruction *I, |
2422 | SmallVectorImpl<Use *> &Ops) const { |
2423 | return false; |
2424 | } |
2425 | |
2426 | /// Return true if the target supplies and combines to a paired load |
2427 | /// two loaded values of type LoadedType next to each other in memory. |
2428 | /// RequiredAlignment gives the minimal alignment constraints that must be met |
2429 | /// to be able to select this paired load. |
2430 | /// |
2431 | /// This information is *not* used to generate actual paired loads, but it is |
2432 | /// used to generate a sequence of loads that is easier to combine into a |
2433 | /// paired load. |
2434 | /// For instance, something like this: |
2435 | /// a = load i64* addr |
2436 | /// b = trunc i64 a to i32 |
2437 | /// c = lshr i64 a, 32 |
2438 | /// d = trunc i64 c to i32 |
2439 | /// will be optimized into: |
2440 | /// b = load i32* addr1 |
2441 | /// d = load i32* addr2 |
2442 | /// Where addr1 = addr2 +/- sizeof(i32). |
2443 | /// |
2444 | /// In other words, unless the target performs a post-isel load combining, |
2445 | /// this information should not be provided because it will generate more |
2446 | /// loads. |
2447 | virtual bool hasPairedLoad(EVT /*LoadedType*/, |
2448 | unsigned & /*RequiredAlignment*/) const { |
2449 | return false; |
2450 | } |
2451 | |
2452 | /// Return true if the target has a vector blend instruction. |
2453 | virtual bool hasVectorBlend() const { return false; } |
2454 | |
2455 | /// Get the maximum supported factor for interleaved memory accesses. |
2456 | /// Default to be the minimum interleave factor: 2. |
2457 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
2458 | |
2459 | /// Lower an interleaved load to target specific intrinsics. Return |
2460 | /// true on success. |
2461 | /// |
2462 | /// \p LI is the vector load instruction. |
2463 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
2464 | /// \p Indices is the corresponding indices for each shufflevector. |
2465 | /// \p Factor is the interleave factor. |
2466 | virtual bool lowerInterleavedLoad(LoadInst *LI, |
2467 | ArrayRef<ShuffleVectorInst *> Shuffles, |
2468 | ArrayRef<unsigned> Indices, |
2469 | unsigned Factor) const { |
2470 | return false; |
2471 | } |
2472 | |
2473 | /// Lower an interleaved store to target specific intrinsics. Return |
2474 | /// true on success. |
2475 | /// |
2476 | /// \p SI is the vector store instruction. |
2477 | /// \p SVI is the shufflevector to RE-interleave the stored vector. |
2478 | /// \p Factor is the interleave factor. |
2479 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
2480 | unsigned Factor) const { |
2481 | return false; |
2482 | } |
2483 | |
2484 | /// Return true if zero-extending the specific node Val to type VT2 is free |
2485 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
2486 | /// because it's folded such as X86 zero-extending loads). |
2487 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
2488 | return isZExtFree(Val.getValueType(), VT2); |
2489 | } |
2490 | |
2491 | /// Return true if an fpext operation is free (for instance, because |
2492 | /// single-precision floating-point numbers are implicitly extended to |
2493 | /// double-precision). |
2494 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { |
2495 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2496, __PRETTY_FUNCTION__)) |
2496 | "invalid fpext types")((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2496, __PRETTY_FUNCTION__)); |
2497 | return false; |
2498 | } |
2499 | |
2500 | /// Return true if an fpext operation input to an \p Opcode operation is free |
2501 | /// (for instance, because half-precision floating-point numbers are |
2502 | /// implicitly extended to float-precision) for an FMA instruction. |
2503 | virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const { |
2504 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2505, __PRETTY_FUNCTION__)) |
2505 | "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2505, __PRETTY_FUNCTION__)); |
2506 | return isFPExtFree(DestVT, SrcVT); |
2507 | } |
2508 | |
2509 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
2510 | /// extend node) is profitable. |
2511 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
2512 | |
2513 | /// Return true if an fneg operation is free to the point where it is never |
2514 | /// worthwhile to replace it with a bitwise operation. |
2515 | virtual bool isFNegFree(EVT VT) const { |
2516 | assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2516, __PRETTY_FUNCTION__)); |
2517 | return false; |
2518 | } |
2519 | |
2520 | /// Return true if an fabs operation is free to the point where it is never |
2521 | /// worthwhile to replace it with a bitwise operation. |
2522 | virtual bool isFAbsFree(EVT VT) const { |
2523 | assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2523, __PRETTY_FUNCTION__)); |
2524 | return false; |
2525 | } |
2526 | |
2527 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
2528 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
2529 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
2530 | /// |
2531 | /// NOTE: This may be called before legalization on types for which FMAs are |
2532 | /// not legal, but should return true if those types will eventually legalize |
2533 | /// to types that support FMAs. After legalization, it will only be called on |
2534 | /// types that support FMAs (via Legal or Custom actions) |
2535 | virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { |
2536 | return false; |
2537 | } |
2538 | |
2539 | /// Return true if it's profitable to narrow operations of type VT1 to |
2540 | /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
2541 | /// i32 to i16. |
2542 | virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { |
2543 | return false; |
2544 | } |
2545 | |
2546 | /// Return true if it is beneficial to convert a load of a constant to |
2547 | /// just the constant itself. |
2548 | /// On some targets it might be more efficient to use a combination of |
2549 | /// arithmetic instructions to materialize the constant instead of loading it |
2550 | /// from a constant pool. |
2551 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
2552 | Type *Ty) const { |
2553 | return false; |
2554 | } |
2555 | |
2556 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
2557 | /// from this source type with this index. This is needed because |
2558 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
2559 | /// the first element, and only the target knows which lowering is cheap. |
2560 | virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
2561 | unsigned Index) const { |
2562 | return false; |
2563 | } |
2564 | |
2565 | /// Try to convert an extract element of a vector binary operation into an |
2566 | /// extract element followed by a scalar operation. |
2567 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { |
2568 | return false; |
2569 | } |
2570 | |
2571 | /// Return true if extraction of a scalar element from the given vector type |
2572 | /// at the given index is cheap. For example, if scalar operations occur on |
2573 | /// the same register file as vector operations, then an extract element may |
2574 | /// be a sub-register rename rather than an actual instruction. |
2575 | virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { |
2576 | return false; |
2577 | } |
2578 | |
2579 | /// Try to convert math with an overflow comparison into the corresponding DAG |
2580 | /// node operation. Targets may want to override this independently of whether |
2581 | /// the operation is legal/custom for the given type because it may obscure |
2582 | /// matching of other patterns. |
2583 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const { |
2584 | // TODO: The default logic is inherited from code in CodeGenPrepare. |
2585 | // The opcode should not make a difference by default? |
2586 | if (Opcode != ISD::UADDO) |
2587 | return false; |
2588 | |
2589 | // Allow the transform as long as we have an integer type that is not |
2590 | // obviously illegal and unsupported. |
2591 | if (VT.isVector()) |
2592 | return false; |
2593 | return VT.isSimple() || !isOperationExpand(Opcode, VT); |
2594 | } |
2595 | |
2596 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
2597 | // even if the vector itself has multiple uses. |
2598 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
2599 | return false; |
2600 | } |
2601 | |
2602 | // Return true if CodeGenPrepare should consider splitting large offset of a |
2603 | // GEP to make the GEP fit into the addressing mode and can be sunk into the |
2604 | // same blocks of its users. |
2605 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } |
2606 | |
2607 | //===--------------------------------------------------------------------===// |
2608 | // Runtime Library hooks |
2609 | // |
2610 | |
2611 | /// Rename the default libcall routine name for the specified libcall. |
2612 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
2613 | LibcallRoutineNames[Call] = Name; |
2614 | } |
2615 | |
2616 | /// Get the libcall routine name for the specified libcall. |
2617 | const char *getLibcallName(RTLIB::Libcall Call) const { |
2618 | return LibcallRoutineNames[Call]; |
2619 | } |
2620 | |
2621 | /// Override the default CondCode to be used to test the result of the |
2622 | /// comparison libcall against zero. |
2623 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
2624 | CmpLibcallCCs[Call] = CC; |
2625 | } |
2626 | |
2627 | /// Get the CondCode that's to be used to test the result of the comparison |
2628 | /// libcall against zero. |
2629 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
2630 | return CmpLibcallCCs[Call]; |
2631 | } |
2632 | |
2633 | /// Set the CallingConv that should be used for the specified libcall. |
2634 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
2635 | LibcallCallingConvs[Call] = CC; |
2636 | } |
2637 | |
2638 | /// Get the CallingConv that should be used for the specified libcall. |
2639 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
2640 | return LibcallCallingConvs[Call]; |
2641 | } |
2642 | |
2643 | /// Execute target specific actions to finalize target lowering. |
2644 | /// This is used to set extra flags in MachineFrameInformation and freezing |
2645 | /// the set of reserved registers. |
2646 | /// The default implementation just freezes the set of reserved registers. |
2647 | virtual void finalizeLowering(MachineFunction &MF) const; |
2648 | |
2649 | private: |
2650 | const TargetMachine &TM; |
2651 | |
2652 | /// Tells the code generator that the target has multiple (allocatable) |
2653 | /// condition registers that can be used to store the results of comparisons |
2654 | /// for use by selects and conditional branches. With multiple condition |
2655 | /// registers, the code generator will not aggressively sink comparisons into |
2656 | /// the blocks of their users. |
2657 | bool HasMultipleConditionRegisters; |
2658 | |
2659 | /// Tells the code generator that the target has BitExtract instructions. |
2660 | /// The code generator will aggressively sink "shift"s into the blocks of |
2661 | /// their users if the users will generate "and" instructions which can be |
2662 | /// combined with "shift" to BitExtract instructions. |
2663 | bool HasExtractBitsInsn; |
2664 | |
2665 | /// Tells the code generator to bypass slow divide or remainder |
2666 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
2667 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
2668 | /// div/rem when the operands are positive and less than 256. |
2669 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
2670 | |
2671 | /// Tells the code generator that it shouldn't generate extra flow control |
2672 | /// instructions and should attempt to combine flow control instructions via |
2673 | /// predication. |
2674 | bool JumpIsExpensive; |
2675 | |
2676 | /// This target prefers to use _setjmp to implement llvm.setjmp. |
2677 | /// |
2678 | /// Defaults to false. |
2679 | bool UseUnderscoreSetJmp; |
2680 | |
2681 | /// This target prefers to use _longjmp to implement llvm.longjmp. |
2682 | /// |
2683 | /// Defaults to false. |
2684 | bool UseUnderscoreLongJmp; |
2685 | |
2686 | /// Information about the contents of the high-bits in boolean values held in |
2687 | /// a type wider than i1. See getBooleanContents. |
2688 | BooleanContent BooleanContents; |
2689 | |
2690 | /// Information about the contents of the high-bits in boolean values held in |
2691 | /// a type wider than i1. See getBooleanContents. |
2692 | BooleanContent BooleanFloatContents; |
2693 | |
2694 | /// Information about the contents of the high-bits in boolean vector values |
2695 | /// when the element type is wider than i1. See getBooleanContents. |
2696 | BooleanContent BooleanVectorContents; |
2697 | |
2698 | /// The target scheduling preference: shortest possible total cycles or lowest |
2699 | /// register usage. |
2700 | Sched::Preference SchedPreferenceInfo; |
2701 | |
2702 | /// The minimum alignment that any argument on the stack needs to have. |
2703 | Align MinStackArgumentAlignment; |
2704 | |
2705 | /// The minimum function alignment (used when optimizing for size, and to |
2706 | /// prevent explicitly provided alignment from leading to incorrect code). |
2707 | Align MinFunctionAlignment; |
2708 | |
2709 | /// The preferred function alignment (used when alignment unspecified and |
2710 | /// optimizing for speed). |
2711 | Align PrefFunctionAlignment; |
2712 | |
2713 | /// The preferred loop alignment (in log2 bot in bytes). |
2714 | Align PrefLoopAlignment; |
2715 | |
2716 | /// Size in bits of the maximum atomics size the backend supports. |
2717 | /// Accesses larger than this will be expanded by AtomicExpandPass. |
2718 | unsigned MaxAtomicSizeInBitsSupported; |
2719 | |
2720 | /// Size in bits of the minimum cmpxchg or ll/sc operation the |
2721 | /// backend supports. |
2722 | unsigned MinCmpXchgSizeInBits; |
2723 | |
2724 | /// This indicates if the target supports unaligned atomic operations. |
2725 | bool SupportsUnalignedAtomics; |
2726 | |
2727 | /// If set to a physical register, this specifies the register that |
2728 | /// llvm.savestack/llvm.restorestack should save and restore. |
2729 | unsigned StackPointerRegisterToSaveRestore; |
2730 | |
2731 | /// This indicates the default register class to use for each ValueType the |
2732 | /// target supports natively. |
2733 | const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; |
2734 | unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; |
2735 | MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; |
2736 | |
2737 | /// This indicates the "representative" register class to use for each |
2738 | /// ValueType the target supports natively. This information is used by the |
2739 | /// scheduler to track register pressure. By default, the representative |
2740 | /// register class is the largest legal super-reg register class of the |
2741 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
2742 | /// representative class would be GR32. |
2743 | const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; |
2744 | |
2745 | /// This indicates the "cost" of the "representative" register class for each |
2746 | /// ValueType. The cost is used by the scheduler to approximate register |
2747 | /// pressure. |
2748 | uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; |
2749 | |
2750 | /// For any value types we are promoting or expanding, this contains the value |
2751 | /// type that we are changing to. For Expanded types, this contains one step |
2752 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
2753 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
2754 | /// the same type (e.g. i32 -> i32). |
2755 | MVT TransformToType[MVT::LAST_VALUETYPE]; |
2756 | |
2757 | /// For each operation and each value type, keep a LegalizeAction that |
2758 | /// indicates how instruction selection should deal with the operation. Most |
2759 | /// operations are Legal (aka, supported natively by the target), but |
2760 | /// operations that are not should be described. Note that operations on |
2761 | /// non-legal value types are not described here. |
2762 | LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; |
2763 | |
2764 | /// For each load extension type and each value type, keep a LegalizeAction |
2765 | /// that indicates how instruction selection should deal with a load of a |
2766 | /// specific value type and extension type. Uses 4-bits to store the action |
2767 | /// for each of the 4 load ext types. |
2768 | uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
2769 | |
2770 | /// For each value type pair keep a LegalizeAction that indicates whether a |
2771 | /// truncating store of a specific value type and truncating type is legal. |
2772 | LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
2773 | |
2774 | /// For each indexed mode and each value type, keep a pair of LegalizeAction |
2775 | /// that indicates how instruction selection should deal with the load / |
2776 | /// store. |
2777 | /// |
2778 | /// The first dimension is the value_type for the reference. The second |
2779 | /// dimension represents the various modes for load store. |
2780 | uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; |
2781 | |
2782 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
2783 | /// indicates how instruction selection should deal with the condition code. |
2784 | /// |
2785 | /// Because each CC action takes up 4 bits, we need to have the array size be |
2786 | /// large enough to fit all of the value types. This can be done by rounding |
2787 | /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. |
2788 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; |
2789 | |
2790 | ValueTypeActionImpl ValueTypeActions; |
2791 | |
2792 | private: |
2793 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
2794 | |
2795 | /// Targets can specify ISD nodes that they would like PerformDAGCombine |
2796 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
2797 | /// array. |
2798 | unsigned char |
2799 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT8-1)/CHAR_BIT8]; |
2800 | |
2801 | /// For operations that must be promoted to a specific type, this holds the |
2802 | /// destination type. This map should be sparse, so don't hold it as an |
2803 | /// array. |
2804 | /// |
2805 | /// Targets add entries to this map with AddPromotedToType(..), clients access |
2806 | /// this with getTypeToPromoteTo(..). |
2807 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
2808 | PromoteToType; |
2809 | |
2810 | /// Stores the name each libcall. |
2811 | const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; |
2812 | |
2813 | /// The ISD::CondCode that should be used to test the result of each of the |
2814 | /// comparison libcall against zero. |
2815 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
2816 | |
2817 | /// Stores the CallingConv that should be used for each libcall. |
2818 | CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; |
2819 | |
2820 | /// Set default libcall names and calling conventions. |
2821 | void InitLibcalls(const Triple &TT); |
2822 | |
2823 | protected: |
2824 | /// Return true if the extension represented by \p I is free. |
2825 | /// \pre \p I is a sign, zero, or fp extension and |
2826 | /// is[Z|FP]ExtFree of the related types is not true. |
2827 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
2828 | |
2829 | /// Depth that GatherAllAliases should should continue looking for chain |
2830 | /// dependencies when trying to find a more preferable chain. As an |
2831 | /// approximation, this should be more than the number of consecutive stores |
2832 | /// expected to be merged. |
2833 | unsigned GatherAllAliasesMaxDepth; |
2834 | |
2835 | /// \brief Specify maximum number of store instructions per memset call. |
2836 | /// |
2837 | /// When lowering \@llvm.memset this field specifies the maximum number of |
2838 | /// store operations that may be substituted for the call to memset. Targets |
2839 | /// must set this value based on the cost threshold for that target. Targets |
2840 | /// should assume that the memset will be done using as many of the largest |
2841 | /// store operations first, followed by smaller ones, if necessary, per |
2842 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
2843 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
2844 | /// store. This only applies to setting a constant array of a constant size. |
2845 | unsigned MaxStoresPerMemset; |
2846 | /// Likewise for functions with the OptSize attribute. |
2847 | unsigned MaxStoresPerMemsetOptSize; |
2848 | |
2849 | /// \brief Specify maximum number of store instructions per memcpy call. |
2850 | /// |
2851 | /// When lowering \@llvm.memcpy this field specifies the maximum number of |
2852 | /// store operations that may be substituted for a call to memcpy. Targets |
2853 | /// must set this value based on the cost threshold for that target. Targets |
2854 | /// should assume that the memcpy will be done using as many of the largest |
2855 | /// store operations first, followed by smaller ones, if necessary, per |
2856 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
2857 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
2858 | /// and one 1-byte store. This only applies to copying a constant array of |
2859 | /// constant size. |
2860 | unsigned MaxStoresPerMemcpy; |
2861 | /// Likewise for functions with the OptSize attribute. |
2862 | unsigned MaxStoresPerMemcpyOptSize; |
2863 | /// \brief Specify max number of store instructions to glue in inlined memcpy. |
2864 | /// |
2865 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number |
2866 | /// of store instructions to keep together. This helps in pairing and |
2867 | // vectorization later on. |
2868 | unsigned MaxGluedStoresPerMemcpy = 0; |
2869 | |
2870 | /// \brief Specify maximum number of load instructions per memcmp call. |
2871 | /// |
2872 | /// When lowering \@llvm.memcmp this field specifies the maximum number of |
2873 | /// pairs of load operations that may be substituted for a call to memcmp. |
2874 | /// Targets must set this value based on the cost threshold for that target. |
2875 | /// Targets should assume that the memcmp will be done using as many of the |
2876 | /// largest load operations first, followed by smaller ones, if necessary, per |
2877 | /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine |
2878 | /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load |
2879 | /// and one 1-byte load. This only applies to copying a constant array of |
2880 | /// constant size. |
2881 | unsigned MaxLoadsPerMemcmp; |
2882 | /// Likewise for functions with the OptSize attribute. |
2883 | unsigned MaxLoadsPerMemcmpOptSize; |
2884 | |
2885 | /// \brief Specify maximum number of store instructions per memmove call. |
2886 | /// |
2887 | /// When lowering \@llvm.memmove this field specifies the maximum number of |
2888 | /// store instructions that may be substituted for a call to memmove. Targets |
2889 | /// must set this value based on the cost threshold for that target. Targets |
2890 | /// should assume that the memmove will be done using as many of the largest |
2891 | /// store operations first, followed by smaller ones, if necessary, per |
2892 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
2893 | /// with 8-bit alignment would result in nine 1-byte stores. This only |
2894 | /// applies to copying a constant array of constant size. |
2895 | unsigned MaxStoresPerMemmove; |
2896 | /// Likewise for functions with the OptSize attribute. |
2897 | unsigned MaxStoresPerMemmoveOptSize; |
2898 | |
2899 | /// Tells the code generator that select is more expensive than a branch if |
2900 | /// the branch is usually predicted right. |
2901 | bool PredictableSelectIsExpensive; |
2902 | |
2903 | /// \see enableExtLdPromotion. |
2904 | bool EnableExtLdPromotion; |
2905 | |
2906 | /// Return true if the value types that can be represented by the specified |
2907 | /// register class are all legal. |
2908 | bool isLegalRC(const TargetRegisterInfo &TRI, |
2909 | const TargetRegisterClass &RC) const; |
2910 | |
2911 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
2912 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
2913 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
2914 | MachineBasicBlock *MBB) const; |
2915 | |
2916 | /// Replace/modify the XRay custom event operands with target-dependent |
2917 | /// details. |
2918 | MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, |
2919 | MachineBasicBlock *MBB) const; |
2920 | |
2921 | /// Replace/modify the XRay typed event operands with target-dependent |
2922 | /// details. |
2923 | MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, |
2924 | MachineBasicBlock *MBB) const; |
2925 | }; |
2926 | |
2927 | /// This class defines information used to lower LLVM code to legal SelectionDAG |
2928 | /// operators that the target instruction selector can accept natively. |
2929 | /// |
2930 | /// This class also defines callbacks that targets must implement to lower |
2931 | /// target-specific constructs to SelectionDAG operators. |
2932 | class TargetLowering : public TargetLoweringBase { |
2933 | public: |
2934 | struct DAGCombinerInfo; |
2935 | struct MakeLibCallOptions; |
2936 | |
2937 | TargetLowering(const TargetLowering &) = delete; |
2938 | TargetLowering &operator=(const TargetLowering &) = delete; |
2939 | |
2940 | /// NOTE: The TargetMachine owns TLOF. |
2941 | explicit TargetLowering(const TargetMachine &TM); |
2942 | |
2943 | bool isPositionIndependent() const; |
2944 | |
2945 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, |
2946 | FunctionLoweringInfo *FLI, |
2947 | LegacyDivergenceAnalysis *DA) const { |
2948 | return false; |
2949 | } |
2950 | |
2951 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { |
2952 | return false; |
2953 | } |
2954 | |
2955 | /// Returns true by value, base pointer and offset pointer and addressing mode |
2956 | /// by reference if the node's address can be legally represented as |
2957 | /// pre-indexed load / store address. |
2958 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
2959 | SDValue &/*Offset*/, |
2960 | ISD::MemIndexedMode &/*AM*/, |
2961 | SelectionDAG &/*DAG*/) const { |
2962 | return false; |
2963 | } |
2964 | |
2965 | /// Returns true by value, base pointer and offset pointer and addressing mode |
2966 | /// by reference if this node can be combined with a load / store to form a |
2967 | /// post-indexed load / store. |
2968 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
2969 | SDValue &/*Base*/, |
2970 | SDValue &/*Offset*/, |
2971 | ISD::MemIndexedMode &/*AM*/, |
2972 | SelectionDAG &/*DAG*/) const { |
2973 | return false; |
2974 | } |
2975 | |
2976 | /// Returns true if the specified base+offset is a legal indexed addressing |
2977 | /// mode for this target. \p MI is the load or store instruction that is being |
2978 | /// considered for transformation. |
2979 | virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, |
2980 | bool IsPre, MachineRegisterInfo &MRI) const { |
2981 | return false; |
2982 | } |
2983 | |
2984 | /// Return the entry encoding for a jump table in the current function. The |
2985 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
2986 | virtual unsigned getJumpTableEncoding() const; |
2987 | |
2988 | virtual const MCExpr * |
2989 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
2990 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
2991 | MCContext &/*Ctx*/) const { |
2992 | llvm_unreachable("Need to implement this hook if target has custom JTIs")::llvm::llvm_unreachable_internal("Need to implement this hook if target has custom JTIs" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 2992); |
2993 | } |
2994 | |
2995 | /// Returns relocation base for the given PIC jumptable. |
2996 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
2997 | SelectionDAG &DAG) const; |
2998 | |
2999 | /// This returns the relocation base for the given PIC jumptable, the same as |
3000 | /// getPICJumpTableRelocBase, but as an MCExpr. |
3001 | virtual const MCExpr * |
3002 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
3003 | unsigned JTI, MCContext &Ctx) const; |
3004 | |
3005 | /// Return true if folding a constant offset with the given GlobalAddress is |
3006 | /// legal. It is frequently not legal in PIC relocation models. |
3007 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
3008 | |
3009 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
3010 | SDValue &Chain) const; |
3011 | |
3012 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
3013 | SDValue &NewRHS, ISD::CondCode &CCCode, |
3014 | const SDLoc &DL, const SDValue OldLHS, |
3015 | const SDValue OldRHS) const; |
3016 | |
3017 | /// Returns a pair of (return value, chain). |
3018 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
3019 | std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, |
3020 | EVT RetVT, ArrayRef<SDValue> Ops, |
3021 | MakeLibCallOptions CallOptions, |
3022 | const SDLoc &dl) const; |
3023 | |
3024 | /// Check whether parameters to a call that are passed in callee saved |
3025 | /// registers are the same as from the calling function. This needs to be |
3026 | /// checked for tail call eligibility. |
3027 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
3028 | const uint32_t *CallerPreservedMask, |
3029 | const SmallVectorImpl<CCValAssign> &ArgLocs, |
3030 | const SmallVectorImpl<SDValue> &OutVals) const; |
3031 | |
3032 | //===--------------------------------------------------------------------===// |
3033 | // TargetLowering Optimization Methods |
3034 | // |
3035 | |
3036 | /// A convenience struct that encapsulates a DAG, and two SDValues for |
3037 | /// returning information from TargetLowering to its clients that want to |
3038 | /// combine. |
3039 | struct TargetLoweringOpt { |
3040 | SelectionDAG &DAG; |
3041 | bool LegalTys; |
3042 | bool LegalOps; |
3043 | SDValue Old; |
3044 | SDValue New; |
3045 | |
3046 | explicit TargetLoweringOpt(SelectionDAG &InDAG, |
3047 | bool LT, bool LO) : |
3048 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
3049 | |
3050 | bool LegalTypes() const { return LegalTys; } |
3051 | bool LegalOperations() const { return LegalOps; } |
3052 | |
3053 | bool CombineTo(SDValue O, SDValue N) { |
3054 | Old = O; |
3055 | New = N; |
3056 | return true; |
3057 | } |
3058 | }; |
3059 | |
3060 | /// Determines the optimal series of memory ops to replace the memset / memcpy. |
3061 | /// Return true if the number of memory ops is below the threshold (Limit). |
3062 | /// It returns the types of the sequence of memory ops to perform |
3063 | /// memset / memcpy by reference. |
3064 | bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, |
3065 | unsigned Limit, uint64_t Size, |
3066 | unsigned DstAlign, unsigned SrcAlign, |
3067 | bool IsMemset, |
3068 | bool ZeroMemset, |
3069 | bool MemcpyStrSrc, |
3070 | bool AllowOverlap, |
3071 | unsigned DstAS, unsigned SrcAS, |
3072 | const AttributeList &FuncAttributes) const; |
3073 | |
3074 | /// Check to see if the specified operand of the specified instruction is a |
3075 | /// constant integer. If so, check to see if there are any bits set in the |
3076 | /// constant that are not demanded. If so, shrink the constant and return |
3077 | /// true. |
3078 | bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
3079 | TargetLoweringOpt &TLO) const; |
3080 | |
3081 | // Target hook to do target-specific const optimization, which is called by |
3082 | // ShrinkDemandedConstant. This function should return true if the target |
3083 | // doesn't want ShrinkDemandedConstant to further optimize the constant. |
3084 | virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
3085 | TargetLoweringOpt &TLO) const { |
3086 | return false; |
3087 | } |
3088 | |
3089 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This |
3090 | /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be |
3091 | /// generalized for targets with other types of implicit widening casts. |
3092 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, |
3093 | TargetLoweringOpt &TLO) const; |
3094 | |
3095 | /// Look at Op. At this point, we know that only the DemandedBits bits of the |
3096 | /// result of Op are ever used downstream. If we can use this information to |
3097 | /// simplify Op, create a new simplified DAG node and return true, returning |
3098 | /// the original and new nodes in Old and New. Otherwise, analyze the |
3099 | /// expression and return a mask of KnownOne and KnownZero bits for the |
3100 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
3101 | /// be accurate for those bits in the Demanded masks. |
3102 | /// \p AssumeSingleUse When this parameter is true, this function will |
3103 | /// attempt to simplify \p Op even if there are multiple uses. |
3104 | /// Callers are responsible for correctly updating the DAG based on the |
3105 | /// results of this function, because simply replacing replacing TLO.Old |
3106 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
3107 | /// has multiple uses. |
3108 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3109 | const APInt &DemandedElts, KnownBits &Known, |
3110 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
3111 | bool AssumeSingleUse = false) const; |
3112 | |
3113 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. |
3114 | /// Adds Op back to the worklist upon success. |
3115 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
3116 | KnownBits &Known, TargetLoweringOpt &TLO, |
3117 | unsigned Depth = 0, |
3118 | bool AssumeSingleUse = false) const; |
3119 | |
3120 | /// Helper wrapper around SimplifyDemandedBits. |
3121 | /// Adds Op back to the worklist upon success. |
3122 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, |
3123 | DAGCombinerInfo &DCI) const; |
3124 | |
3125 | /// More limited version of SimplifyDemandedBits that can be used to "look |
3126 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
3127 | /// bitwise ops etc. |
3128 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, |
3129 | const APInt &DemandedElts, |
3130 | SelectionDAG &DAG, |
3131 | unsigned Depth) const; |
3132 | |
3133 | /// Look at Vector Op. At this point, we know that only the DemandedElts |
3134 | /// elements of the result of Op are ever used downstream. If we can use |
3135 | /// this information to simplify Op, create a new simplified DAG node and |
3136 | /// return true, storing the original and new nodes in TLO. |
3137 | /// Otherwise, analyze the expression and return a mask of KnownUndef and |
3138 | /// KnownZero elements for the expression (used to simplify the caller). |
3139 | /// The KnownUndef/Zero elements may only be accurate for those bits |
3140 | /// in the DemandedMask. |
3141 | /// \p AssumeSingleUse When this parameter is true, this function will |
3142 | /// attempt to simplify \p Op even if there are multiple uses. |
3143 | /// Callers are responsible for correctly updating the DAG based on the |
3144 | /// results of this function, because simply replacing replacing TLO.Old |
3145 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
3146 | /// has multiple uses. |
3147 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, |
3148 | APInt &KnownUndef, APInt &KnownZero, |
3149 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
3150 | bool AssumeSingleUse = false) const; |
3151 | |
3152 | /// Helper wrapper around SimplifyDemandedVectorElts. |
3153 | /// Adds Op back to the worklist upon success. |
3154 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
3155 | APInt &KnownUndef, APInt &KnownZero, |
3156 | DAGCombinerInfo &DCI) const; |
3157 | |
3158 | /// Determine which of the bits specified in Mask are known to be either zero |
3159 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
3160 | /// argument allows us to only collect the known bits that are shared by the |
3161 | /// requested vector elements. |
3162 | virtual void computeKnownBitsForTargetNode(const SDValue Op, |
3163 | KnownBits &Known, |
3164 | const APInt &DemandedElts, |
3165 | const SelectionDAG &DAG, |
3166 | unsigned Depth = 0) const; |
3167 | /// Determine which of the bits specified in Mask are known to be either zero |
3168 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
3169 | /// argument allows us to only collect the known bits that are shared by the |
3170 | /// requested vector elements. This is for GISel. |
3171 | virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, |
3172 | Register R, KnownBits &Known, |
3173 | const APInt &DemandedElts, |
3174 | const MachineRegisterInfo &MRI, |
3175 | unsigned Depth = 0) const; |
3176 | |
3177 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. |
3178 | /// Default implementation computes low bits based on alignment |
3179 | /// information. This should preserve known bits passed into it. |
3180 | virtual void computeKnownBitsForFrameIndex(const SDValue FIOp, |
3181 | KnownBits &Known, |
3182 | const APInt &DemandedElts, |
3183 | const SelectionDAG &DAG, |
3184 | unsigned Depth = 0) const; |
3185 | |
3186 | /// This method can be implemented by targets that want to expose additional |
3187 | /// information about sign bits to the DAG Combiner. The DemandedElts |
3188 | /// argument allows us to only collect the minimum sign bits that are shared |
3189 | /// by the requested vector elements. |
3190 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
3191 | const APInt &DemandedElts, |
3192 | const SelectionDAG &DAG, |
3193 | unsigned Depth = 0) const; |
3194 | |
3195 | /// Attempt to simplify any target nodes based on the demanded vector |
3196 | /// elements, returning true on success. Otherwise, analyze the expression and |
3197 | /// return a mask of KnownUndef and KnownZero elements for the expression |
3198 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be |
3199 | /// accurate for those bits in the DemandedMask. |
3200 | virtual bool SimplifyDemandedVectorEltsForTargetNode( |
3201 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, |
3202 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; |
3203 | |
3204 | /// Attempt to simplify any target nodes based on the demanded bits/elts, |
3205 | /// returning true on success. Otherwise, analyze the |
3206 | /// expression and return a mask of KnownOne and KnownZero bits for the |
3207 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
3208 | /// be accurate for those bits in the Demanded masks. |
3209 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
3210 | const APInt &DemandedBits, |
3211 | const APInt &DemandedElts, |
3212 | KnownBits &Known, |
3213 | TargetLoweringOpt &TLO, |
3214 | unsigned Depth = 0) const; |
3215 | |
3216 | /// More limited version of SimplifyDemandedBits that can be used to "look |
3217 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - |
3218 | /// bitwise ops etc. |
3219 | virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( |
3220 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
3221 | SelectionDAG &DAG, unsigned Depth) const; |
3222 | |
3223 | /// Tries to build a legal vector shuffle using the provided parameters |
3224 | /// or equivalent variations. The Mask argument maybe be modified as the |
3225 | /// function tries different variations. |
3226 | /// Returns an empty SDValue if the operation fails. |
3227 | SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, |
3228 | SDValue N1, MutableArrayRef<int> Mask, |
3229 | SelectionDAG &DAG) const; |
3230 | |
3231 | /// This method returns the constant pool value that will be loaded by LD. |
3232 | /// NOTE: You must check for implicit extensions of the constant by LD. |
3233 | virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; |
3234 | |
3235 | /// If \p SNaN is false, \returns true if \p Op is known to never be any |
3236 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling |
3237 | /// NaN. |
3238 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, |
3239 | const SelectionDAG &DAG, |
3240 | bool SNaN = false, |
3241 | unsigned Depth = 0) const; |
3242 | struct DAGCombinerInfo { |
3243 | void *DC; // The DAG Combiner object. |
3244 | CombineLevel Level; |
3245 | bool CalledByLegalizer; |
3246 | |
3247 | public: |
3248 | SelectionDAG &DAG; |
3249 | |
3250 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) |
3251 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} |
3252 | |
3253 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } |
3254 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } |
3255 | bool isAfterLegalizeDAG() const { |
3256 | return Level == AfterLegalizeDAG; |
3257 | } |
3258 | CombineLevel getDAGCombineLevel() { return Level; } |
3259 | bool isCalledByLegalizer() const { return CalledByLegalizer; } |
3260 | |
3261 | void AddToWorklist(SDNode *N); |
3262 | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); |
3263 | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); |
3264 | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); |
3265 | |
3266 | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); |
3267 | }; |
3268 | |
3269 | /// Return if the N is a constant or constant vector equal to the true value |
3270 | /// from getBooleanContents(). |
3271 | bool isConstTrueVal(const SDNode *N) const; |
3272 | |
3273 | /// Return if the N is a constant or constant vector equal to the false value |
3274 | /// from getBooleanContents(). |
3275 | bool isConstFalseVal(const SDNode *N) const; |
3276 | |
3277 | /// Return if \p N is a True value when extended to \p VT. |
3278 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; |
3279 | |
3280 | /// Try to simplify a setcc built with the specified operands and cc. If it is |
3281 | /// unable to simplify it, return a null SDValue. |
3282 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
3283 | bool foldBooleans, DAGCombinerInfo &DCI, |
3284 | const SDLoc &dl) const; |
3285 | |
3286 | // For targets which wrap address, unwrap for analysis. |
3287 | virtual SDValue unwrapAddress(SDValue N) const { return N; } |
3288 | |
3289 | /// Returns true (and the GlobalValue and the offset) if the node is a |
3290 | /// GlobalAddress + offset. |
3291 | virtual bool |
3292 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; |
3293 | |
3294 | /// This method will be invoked for all target nodes and for any |
3295 | /// target-independent nodes that the target has registered with invoke it |
3296 | /// for. |
3297 | /// |
3298 | /// The semantics are as follows: |
3299 | /// Return Value: |
3300 | /// SDValue.Val == 0 - No change was made |
3301 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. |
3302 | /// otherwise - N should be replaced by the returned Operand. |
3303 | /// |
3304 | /// In addition, methods provided by DAGCombinerInfo may be used to perform |
3305 | /// more complex transformations. |
3306 | /// |
3307 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
3308 | |
3309 | /// Return true if it is profitable to move this shift by a constant amount |
3310 | /// though its operand, adjusting any immediate operands as necessary to |
3311 | /// preserve semantics. This transformation may not be desirable if it |
3312 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield |
3313 | /// extraction in AArch64). By default, it returns true. |
3314 | /// |
3315 | /// @param N the shift node |
3316 | /// @param Level the current DAGCombine legalization level. |
3317 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, |
3318 | CombineLevel Level) const { |
3319 | return true; |
3320 | } |
3321 | |
3322 | // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern |
3323 | // to a shuffle and a truncate. |
3324 | // Example of such a combine: |
3325 | // v4i32 build_vector((extract_elt V, 1), |
3326 | // (extract_elt V, 3), |
3327 | // (extract_elt V, 5), |
3328 | // (extract_elt V, 7)) |
3329 | // --> |
3330 | // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64) |
3331 | virtual bool isDesirableToCombineBuildVectorToShuffleTruncate( |
3332 | ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const { |
3333 | return false; |
3334 | } |
3335 | |
3336 | /// Return true if the target has native support for the specified value type |
3337 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 |
3338 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
3339 | /// and some i16 instructions are slow. |
3340 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { |
3341 | // By default, assume all legal types are desirable. |
3342 | return isTypeLegal(VT); |
3343 | } |
3344 | |
3345 | /// Return true if it is profitable for dag combiner to transform a floating |
3346 | /// point op of specified opcode to a equivalent op of an integer |
3347 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. |
3348 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, |
3349 | EVT /*VT*/) const { |
3350 | return false; |
3351 | } |
3352 | |
3353 | /// This method query the target whether it is beneficial for dag combiner to |
3354 | /// promote the specified node. If true, it should return the desired |
3355 | /// promotion type by reference. |
3356 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { |
3357 | return false; |
3358 | } |
3359 | |
3360 | /// Return true if the target supports swifterror attribute. It optimizes |
3361 | /// loads and stores to reading and writing a specific register. |
3362 | virtual bool supportSwiftError() const { |
3363 | return false; |
3364 | } |
3365 | |
3366 | /// Return true if the target supports that a subset of CSRs for the given |
3367 | /// machine function is handled explicitly via copies. |
3368 | virtual bool supportSplitCSR(MachineFunction *MF) const { |
3369 | return false; |
3370 | } |
3371 | |
3372 | /// Perform necessary initialization to handle a subset of CSRs explicitly |
3373 | /// via copies. This function is called at the beginning of instruction |
3374 | /// selection. |
3375 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { |
3376 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3376); |
3377 | } |
3378 | |
3379 | /// Insert explicit copies in entry and exit blocks. We copy a subset of |
3380 | /// CSRs to virtual registers in the entry block, and copy them back to |
3381 | /// physical registers in the exit blocks. This function is called at the end |
3382 | /// of instruction selection. |
3383 | virtual void insertCopiesSplitCSR( |
3384 | MachineBasicBlock *Entry, |
3385 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
3386 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3386); |
3387 | } |
3388 | |
3389 | /// Return 1 if we can compute the negated form of the specified expression |
3390 | /// for the same cost as the expression itself, or 2 if we can compute the |
3391 | /// negated form more cheaply than the expression itself. Else return 0. |
3392 | virtual char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, |
3393 | bool LegalOperations, bool ForCodeSize, |
3394 | unsigned Depth = 0) const; |
3395 | |
3396 | /// If isNegatibleForFree returns true, return the newly negated expression. |
3397 | virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
3398 | bool LegalOperations, bool ForCodeSize, |
3399 | unsigned Depth = 0) const; |
3400 | |
3401 | //===--------------------------------------------------------------------===// |
3402 | // Lowering methods - These methods must be implemented by targets so that |
3403 | // the SelectionDAGBuilder code knows how to lower these. |
3404 | // |
3405 | |
3406 | /// This hook must be implemented to lower the incoming (formal) arguments, |
3407 | /// described by the Ins array, into the specified DAG. The implementation |
3408 | /// should fill in the InVals array with legal-type argument values, and |
3409 | /// return the resulting token chain value. |
3410 | virtual SDValue LowerFormalArguments( |
3411 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, |
3412 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, |
3413 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { |
3414 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3414); |
3415 | } |
3416 | |
3417 | /// This structure contains all information that is necessary for lowering |
3418 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder |
3419 | /// needs to lower a call, and targets will see this struct in their LowerCall |
3420 | /// implementation. |
3421 | struct CallLoweringInfo { |
3422 | SDValue Chain; |
3423 | Type *RetTy = nullptr; |
3424 | bool RetSExt : 1; |
3425 | bool RetZExt : 1; |
3426 | bool IsVarArg : 1; |
3427 | bool IsInReg : 1; |
3428 | bool DoesNotReturn : 1; |
3429 | bool IsReturnValueUsed : 1; |
3430 | bool IsConvergent : 1; |
3431 | bool IsPatchPoint : 1; |
3432 | |
3433 | // IsTailCall should be modified by implementations of |
3434 | // TargetLowering::LowerCall that perform tail call conversions. |
3435 | bool IsTailCall = false; |
3436 | |
3437 | // Is Call lowering done post SelectionDAG type legalization. |
3438 | bool IsPostTypeLegalization = false; |
3439 | |
3440 | unsigned NumFixedArgs = -1; |
3441 | CallingConv::ID CallConv = CallingConv::C; |
3442 | SDValue Callee; |
3443 | ArgListTy Args; |
3444 | SelectionDAG &DAG; |
3445 | SDLoc DL; |
3446 | ImmutableCallSite CS; |
3447 | SmallVector<ISD::OutputArg, 32> Outs; |
3448 | SmallVector<SDValue, 32> OutVals; |
3449 | SmallVector<ISD::InputArg, 32> Ins; |
3450 | SmallVector<SDValue, 4> InVals; |
3451 | |
3452 | CallLoweringInfo(SelectionDAG &DAG) |
3453 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), |
3454 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), |
3455 | IsPatchPoint(false), DAG(DAG) {} |
3456 | |
3457 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { |
3458 | DL = dl; |
3459 | return *this; |
3460 | } |
3461 | |
3462 | CallLoweringInfo &setChain(SDValue InChain) { |
3463 | Chain = InChain; |
3464 | return *this; |
3465 | } |
3466 | |
3467 | // setCallee with target/module-specific attributes |
3468 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, |
3469 | SDValue Target, ArgListTy &&ArgsList) { |
3470 | RetTy = ResultType; |
3471 | Callee = Target; |
3472 | CallConv = CC; |
3473 | NumFixedArgs = ArgsList.size(); |
3474 | Args = std::move(ArgsList); |
3475 | |
3476 | DAG.getTargetLoweringInfo().markLibCallAttributes( |
3477 | &(DAG.getMachineFunction()), CC, Args); |
3478 | return *this; |
3479 | } |
3480 | |
3481 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, |
3482 | SDValue Target, ArgListTy &&ArgsList) { |
3483 | RetTy = ResultType; |
3484 | Callee = Target; |
3485 | CallConv = CC; |
3486 | NumFixedArgs = ArgsList.size(); |
3487 | Args = std::move(ArgsList); |
3488 | return *this; |
3489 | } |
3490 | |
3491 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, |
3492 | SDValue Target, ArgListTy &&ArgsList, |
3493 | ImmutableCallSite Call) { |
3494 | RetTy = ResultType; |
3495 | |
3496 | IsInReg = Call.hasRetAttr(Attribute::InReg); |
3497 | DoesNotReturn = |
3498 | Call.doesNotReturn() || |
3499 | (!Call.isInvoke() && |
3500 | isa<UnreachableInst>(Call.getInstruction()->getNextNode())); |
3501 | IsVarArg = FTy->isVarArg(); |
3502 | IsReturnValueUsed = !Call.getInstruction()->use_empty(); |
3503 | RetSExt = Call.hasRetAttr(Attribute::SExt); |
3504 | RetZExt = Call.hasRetAttr(Attribute::ZExt); |
3505 | |
3506 | Callee = Target; |
3507 | |
3508 | CallConv = Call.getCallingConv(); |
3509 | NumFixedArgs = FTy->getNumParams(); |
3510 | Args = std::move(ArgsList); |
3511 | |
3512 | CS = Call; |
3513 | |
3514 | return *this; |
3515 | } |
3516 | |
3517 | CallLoweringInfo &setInRegister(bool Value = true) { |
3518 | IsInReg = Value; |
3519 | return *this; |
3520 | } |
3521 | |
3522 | CallLoweringInfo &setNoReturn(bool Value = true) { |
3523 | DoesNotReturn = Value; |
3524 | return *this; |
3525 | } |
3526 | |
3527 | CallLoweringInfo &setVarArg(bool Value = true) { |
3528 | IsVarArg = Value; |
3529 | return *this; |
3530 | } |
3531 | |
3532 | CallLoweringInfo &setTailCall(bool Value = true) { |
3533 | IsTailCall = Value; |
3534 | return *this; |
3535 | } |
3536 | |
3537 | CallLoweringInfo &setDiscardResult(bool Value = true) { |
3538 | IsReturnValueUsed = !Value; |
3539 | return *this; |
3540 | } |
3541 | |
3542 | CallLoweringInfo &setConvergent(bool Value = true) { |
3543 | IsConvergent = Value; |
3544 | return *this; |
3545 | } |
3546 | |
3547 | CallLoweringInfo &setSExtResult(bool Value = true) { |
3548 | RetSExt = Value; |
3549 | return *this; |
3550 | } |
3551 | |
3552 | CallLoweringInfo &setZExtResult(bool Value = true) { |
3553 | RetZExt = Value; |
3554 | return *this; |
3555 | } |
3556 | |
3557 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { |
3558 | IsPatchPoint = Value; |
3559 | return *this; |
3560 | } |
3561 | |
3562 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { |
3563 | IsPostTypeLegalization = Value; |
3564 | return *this; |
3565 | } |
3566 | |
3567 | ArgListTy &getArgs() { |
3568 | return Args; |
3569 | } |
3570 | }; |
3571 | |
3572 | /// This structure is used to pass arguments to makeLibCall function. |
3573 | struct MakeLibCallOptions { |
3574 | // By passing type list before soften to makeLibCall, the target hook |
3575 | // shouldExtendTypeInLibCall can get the original type before soften. |
3576 | ArrayRef<EVT> OpsVTBeforeSoften; |
3577 | EVT RetVTBeforeSoften; |
3578 | bool IsSExt : 1; |
3579 | bool DoesNotReturn : 1; |
3580 | bool IsReturnValueUsed : 1; |
3581 | bool IsPostTypeLegalization : 1; |
3582 | bool IsSoften : 1; |
3583 | |
3584 | MakeLibCallOptions() |
3585 | : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), |
3586 | IsPostTypeLegalization(false), IsSoften(false) {} |
3587 | |
3588 | MakeLibCallOptions &setSExt(bool Value = true) { |
3589 | IsSExt = Value; |
3590 | return *this; |
3591 | } |
3592 | |
3593 | MakeLibCallOptions &setNoReturn(bool Value = true) { |
3594 | DoesNotReturn = Value; |
3595 | return *this; |
3596 | } |
3597 | |
3598 | MakeLibCallOptions &setDiscardResult(bool Value = true) { |
3599 | IsReturnValueUsed = !Value; |
3600 | return *this; |
3601 | } |
3602 | |
3603 | MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { |
3604 | IsPostTypeLegalization = Value; |
3605 | return *this; |
3606 | } |
3607 | |
3608 | MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, |
3609 | bool Value = true) { |
3610 | OpsVTBeforeSoften = OpsVT; |
3611 | RetVTBeforeSoften = RetVT; |
3612 | IsSoften = Value; |
3613 | return *this; |
3614 | } |
3615 | }; |
3616 | |
3617 | /// This function lowers an abstract call to a function into an actual call. |
3618 | /// This returns a pair of operands. The first element is the return value |
3619 | /// for the function (if RetTy is not VoidTy). The second element is the |
3620 | /// outgoing token chain. It calls LowerCall to do the actual lowering. |
3621 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; |
3622 | |
3623 | /// This hook must be implemented to lower calls into the specified |
3624 | /// DAG. The outgoing arguments to the call are described by the Outs array, |
3625 | /// and the values to be returned by the call are described by the Ins |
3626 | /// array. The implementation should fill in the InVals array with legal-type |
3627 | /// return values from the call, and return the resulting token chain value. |
3628 | virtual SDValue |
3629 | LowerCall(CallLoweringInfo &/*CLI*/, |
3630 | SmallVectorImpl<SDValue> &/*InVals*/) const { |
3631 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3631); |
3632 | } |
3633 | |
3634 | /// Target-specific cleanup for formal ByVal parameters. |
3635 | virtual void HandleByVal(CCState *, unsigned &, unsigned) const {} |
3636 | |
3637 | /// This hook should be implemented to check whether the return values |
3638 | /// described by the Outs array can fit into the return registers. If false |
3639 | /// is returned, an sret-demotion is performed. |
3640 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, |
3641 | MachineFunction &/*MF*/, bool /*isVarArg*/, |
3642 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, |
3643 | LLVMContext &/*Context*/) const |
3644 | { |
3645 | // Return true by default to get preexisting behavior. |
3646 | return true; |
3647 | } |
3648 | |
3649 | /// This hook must be implemented to lower outgoing return values, described |
3650 | /// by the Outs array, into the specified DAG. The implementation should |
3651 | /// return the resulting token chain value. |
3652 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, |
3653 | bool /*isVarArg*/, |
3654 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, |
3655 | const SmallVectorImpl<SDValue> & /*OutVals*/, |
3656 | const SDLoc & /*dl*/, |
3657 | SelectionDAG & /*DAG*/) const { |
3658 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3658); |
3659 | } |
3660 | |
3661 | /// Return true if result of the specified node is used by a return node |
3662 | /// only. It also compute and return the input chain for the tail call. |
3663 | /// |
3664 | /// This is used to determine whether it is possible to codegen a libcall as |
3665 | /// tail call at legalization time. |
3666 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { |
3667 | return false; |
3668 | } |
3669 | |
3670 | /// Return true if the target may be able emit the call instruction as a tail |
3671 | /// call. This is used by optimization passes to determine if it's profitable |
3672 | /// to duplicate return instructions to enable tailcall optimization. |
3673 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { |
3674 | return false; |
3675 | } |
3676 | |
3677 | /// Return the builtin name for the __builtin___clear_cache intrinsic |
3678 | /// Default is to invoke the clear cache library call |
3679 | virtual const char * getClearCacheBuiltinName() const { |
3680 | return "__clear_cache"; |
3681 | } |
3682 | |
3683 | /// Return the register ID of the name passed in. Used by named register |
3684 | /// global variables extension. There is no target-independent behaviour |
3685 | /// so the default action is to bail. |
3686 | virtual Register getRegisterByName(const char* RegName, EVT VT, |
3687 | const MachineFunction &MF) const { |
3688 | report_fatal_error("Named registers not implemented for this target"); |
3689 | } |
3690 | |
3691 | /// Return the type that should be used to zero or sign extend a |
3692 | /// zeroext/signext integer return value. FIXME: Some C calling conventions |
3693 | /// require the return type to be promoted, but this is not true all the time, |
3694 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling |
3695 | /// conventions. The frontend should handle this and include all of the |
3696 | /// necessary information. |
3697 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
3698 | ISD::NodeType /*ExtendKind*/) const { |
3699 | EVT MinVT = getRegisterType(Context, MVT::i32); |
3700 | return VT.bitsLT(MinVT) ? MinVT : VT; |
3701 | } |
3702 | |
3703 | /// For some targets, an LLVM struct type must be broken down into multiple |
3704 | /// simple types, but the calling convention specifies that the entire struct |
3705 | /// must be passed in a block of consecutive registers. |
3706 | virtual bool |
3707 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, |
3708 | bool isVarArg) const { |
3709 | return false; |
3710 | } |
3711 | |
3712 | /// For most targets, an LLVM type must be broken down into multiple |
3713 | /// smaller types. Usually the halves are ordered according to the endianness |
3714 | /// but for some platform that would break. So this method will default to |
3715 | /// matching the endianness but can be overridden. |
3716 | virtual bool |
3717 | shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { |
3718 | return DL.isLittleEndian(); |
3719 | } |
3720 | |
3721 | /// Returns a 0 terminated array of registers that can be safely used as |
3722 | /// scratch registers. |
3723 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { |
3724 | return nullptr; |
3725 | } |
3726 | |
3727 | /// This callback is used to prepare for a volatile or atomic load. |
3728 | /// It takes a chain node as input and returns the chain for the load itself. |
3729 | /// |
3730 | /// Having a callback like this is necessary for targets like SystemZ, |
3731 | /// which allows a CPU to reuse the result of a previous load indefinitely, |
3732 | /// even if a cache-coherent store is performed by another CPU. The default |
3733 | /// implementation does nothing. |
3734 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, |
3735 | SelectionDAG &DAG) const { |
3736 | return Chain; |
3737 | } |
3738 | |
3739 | /// This callback is used to inspect load/store instructions and add |
3740 | /// target-specific MachineMemOperand flags to them. The default |
3741 | /// implementation does nothing. |
3742 | virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { |
3743 | return MachineMemOperand::MONone; |
3744 | } |
3745 | |
3746 | /// Should SelectionDAG lower an atomic store of the given kind as a normal |
3747 | /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to |
3748 | /// eventually migrate all targets to the using StoreSDNodes, but porting is |
3749 | /// being done target at a time. |
3750 | virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { |
3751 | assert(SI.isAtomic() && "violated precondition")((SI.isAtomic() && "violated precondition") ? static_cast <void> (0) : __assert_fail ("SI.isAtomic() && \"violated precondition\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3751, __PRETTY_FUNCTION__)); |
3752 | return false; |
3753 | } |
3754 | |
3755 | /// Should SelectionDAG lower an atomic load of the given kind as a normal |
3756 | /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to |
3757 | /// eventually migrate all targets to the using LoadSDNodes, but porting is |
3758 | /// being done target at a time. |
3759 | virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { |
3760 | assert(LI.isAtomic() && "violated precondition")((LI.isAtomic() && "violated precondition") ? static_cast <void> (0) : __assert_fail ("LI.isAtomic() && \"violated precondition\"" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3760, __PRETTY_FUNCTION__)); |
3761 | return false; |
3762 | } |
3763 | |
3764 | |
3765 | /// This callback is invoked by the type legalizer to legalize nodes with an |
3766 | /// illegal operand type but legal result types. It replaces the |
3767 | /// LowerOperation callback in the type Legalizer. The reason we can not do |
3768 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to |
3769 | /// use this callback. |
3770 | /// |
3771 | /// TODO: Consider merging with ReplaceNodeResults. |
3772 | /// |
3773 | /// The target places new result values for the node in Results (their number |
3774 | /// and types must exactly match those of the original return values of |
3775 | /// the node), or leaves Results empty, which indicates that the node is not |
3776 | /// to be custom lowered after all. |
3777 | /// The default implementation calls LowerOperation. |
3778 | virtual void LowerOperationWrapper(SDNode *N, |
3779 | SmallVectorImpl<SDValue> &Results, |
3780 | SelectionDAG &DAG) const; |
3781 | |
3782 | /// This callback is invoked for operations that are unsupported by the |
3783 | /// target, which are registered to use 'custom' lowering, and whose defined |
3784 | /// values are all legal. If the target has no operations that require custom |
3785 | /// lowering, it need not implement this. The default implementation of this |
3786 | /// aborts. |
3787 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; |
3788 | |
3789 | /// This callback is invoked when a node result type is illegal for the |
3790 | /// target, and the operation was registered to use 'custom' lowering for that |
3791 | /// result type. The target places new result values for the node in Results |
3792 | /// (their number and types must exactly match those of the original return |
3793 | /// values of the node), or leaves Results empty, which indicates that the |
3794 | /// node is not to be custom lowered after all. |
3795 | /// |
3796 | /// If the target has no operations that require custom lowering, it need not |
3797 | /// implement this. The default implementation aborts. |
3798 | virtual void ReplaceNodeResults(SDNode * /*N*/, |
3799 | SmallVectorImpl<SDValue> &/*Results*/, |
3800 | SelectionDAG &/*DAG*/) const { |
3801 | llvm_unreachable("ReplaceNodeResults not implemented for this target!")::llvm::llvm_unreachable_internal("ReplaceNodeResults not implemented for this target!" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 3801); |
3802 | } |
3803 | |
3804 | /// This method returns the name of a target specific DAG node. |
3805 | virtual const char *getTargetNodeName(unsigned Opcode) const; |
3806 | |
3807 | /// This method returns a target specific FastISel object, or null if the |
3808 | /// target does not support "fast" ISel. |
3809 | virtual FastISel *createFastISel(FunctionLoweringInfo &, |
3810 | const TargetLibraryInfo *) const { |
3811 | return nullptr; |
3812 | } |
3813 | |
3814 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, |
3815 | SelectionDAG &DAG) const; |
3816 | |
3817 | //===--------------------------------------------------------------------===// |
3818 | // Inline Asm Support hooks |
3819 | // |
3820 | |
3821 | /// This hook allows the target to expand an inline asm call to be explicit |
3822 | /// llvm code if it wants to. This is useful for turning simple inline asms |
3823 | /// into LLVM intrinsics, which gives the compiler more information about the |
3824 | /// behavior of the code. |
3825 | virtual bool ExpandInlineAsm(CallInst *) const { |
3826 | return false; |
3827 | } |
3828 | |
3829 | enum ConstraintType { |
3830 | C_Register, // Constraint represents specific register(s). |
3831 | C_RegisterClass, // Constraint represents any of register(s) in class. |
3832 | C_Memory, // Memory constraint. |
3833 | C_Immediate, // Requires an immediate. |
3834 | C_Other, // Something else. |
3835 | C_Unknown // Unsupported constraint. |
3836 | }; |
3837 | |
3838 | enum ConstraintWeight { |
3839 | // Generic weights. |
3840 | CW_Invalid = -1, // No match. |
3841 | CW_Okay = 0, // Acceptable. |
3842 | CW_Good = 1, // Good weight. |
3843 | CW_Better = 2, // Better weight. |
3844 | CW_Best = 3, // Best weight. |
3845 | |
3846 | // Well-known weights. |
3847 | CW_SpecificReg = CW_Okay, // Specific register operands. |
3848 | CW_Register = CW_Good, // Register operands. |
3849 | CW_Memory = CW_Better, // Memory operands. |
3850 | CW_Constant = CW_Best, // Constant operand. |
3851 | CW_Default = CW_Okay // Default or don't know type. |
3852 | }; |
3853 | |
3854 | /// This contains information for each constraint that we are lowering. |
3855 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { |
3856 | /// This contains the actual string for the code, like "m". TargetLowering |
3857 | /// picks the 'best' code from ConstraintInfo::Codes that most closely |
3858 | /// matches the operand. |
3859 | std::string ConstraintCode; |
3860 | |
3861 | /// Information about the constraint code, e.g. Register, RegisterClass, |
3862 | /// Memory, Other, Unknown. |
3863 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; |
3864 | |
3865 | /// If this is the result output operand or a clobber, this is null, |
3866 | /// otherwise it is the incoming operand to the CallInst. This gets |
3867 | /// modified as the asm is processed. |
3868 | Value *CallOperandVal = nullptr; |
3869 | |
3870 | /// The ValueType for the operand value. |
3871 | MVT ConstraintVT = MVT::Other; |
3872 | |
3873 | /// Copy constructor for copying from a ConstraintInfo. |
3874 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) |
3875 | : InlineAsm::ConstraintInfo(std::move(Info)) {} |
3876 | |
3877 | /// Return true of this is an input operand that is a matching constraint |
3878 | /// like "4". |
3879 | bool isMatchingInputConstraint() const; |
3880 | |
3881 | /// If this is an input matching constraint, this method returns the output |
3882 | /// operand it matches. |
3883 | unsigned getMatchedOperand() const; |
3884 | }; |
3885 | |
3886 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; |
3887 | |
3888 | /// Split up the constraint string from the inline assembly value into the |
3889 | /// specific constraints and their prefixes, and also tie in the associated |
3890 | /// operand values. If this returns an empty vector, and if the constraint |
3891 | /// string itself isn't empty, there was an error parsing. |
3892 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, |
3893 | const TargetRegisterInfo *TRI, |
3894 | ImmutableCallSite CS) const; |
3895 | |
3896 | /// Examine constraint type and operand type and determine a weight value. |
3897 | /// The operand object must already have been set up with the operand type. |
3898 | virtual ConstraintWeight getMultipleConstraintMatchWeight( |
3899 | AsmOperandInfo &info, int maIndex) const; |
3900 | |
3901 | /// Examine constraint string and operand type and determine a weight value. |
3902 | /// The operand object must already have been set up with the operand type. |
3903 | virtual ConstraintWeight getSingleConstraintMatchWeight( |
3904 | AsmOperandInfo &info, const char *constraint) const; |
3905 | |
3906 | /// Determines the constraint code and constraint type to use for the specific |
3907 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
3908 | /// If the actual operand being passed in is available, it can be passed in as |
3909 | /// Op, otherwise an empty SDValue can be passed. |
3910 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
3911 | SDValue Op, |
3912 | SelectionDAG *DAG = nullptr) const; |
3913 | |
3914 | /// Given a constraint, return the type of constraint it is for this target. |
3915 | virtual ConstraintType getConstraintType(StringRef Constraint) const; |
3916 | |
3917 | /// Given a physical register constraint (e.g. {edx}), return the register |
3918 | /// number and the register class for the register. |
3919 | /// |
3920 | /// Given a register class constraint, like 'r', if this corresponds directly |
3921 | /// to an LLVM register class, return a register of 0 and the register class |
3922 | /// pointer. |
3923 | /// |
3924 | /// This should only be used for C_Register constraints. On error, this |
3925 | /// returns a register number of 0 and a null register class pointer. |
3926 | virtual std::pair<unsigned, const TargetRegisterClass *> |
3927 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
3928 | StringRef Constraint, MVT VT) const; |
3929 | |
3930 | virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
3931 | if (ConstraintCode == "i") |
3932 | return InlineAsm::Constraint_i; |
3933 | else if (ConstraintCode == "m") |
3934 | return InlineAsm::Constraint_m; |
3935 | return InlineAsm::Constraint_Unknown; |
3936 | } |
3937 | |
3938 | /// Try to replace an X constraint, which matches anything, with another that |
3939 | /// has more specific requirements based on the type of the corresponding |
3940 | /// operand. This returns null if there is no replacement to make. |
3941 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; |
3942 | |
3943 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
3944 | /// add anything to Ops. |
3945 | virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, |
3946 | std::vector<SDValue> &Ops, |
3947 | SelectionDAG &DAG) const; |
3948 | |
3949 | // Lower custom output constraints. If invalid, return SDValue(). |
3950 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
3951 | SDLoc DL, |
3952 | const AsmOperandInfo &OpInfo, |
3953 | SelectionDAG &DAG) const; |
3954 | |
3955 | //===--------------------------------------------------------------------===// |
3956 | // Div utility functions |
3957 | // |
3958 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
3959 | SmallVectorImpl<SDNode *> &Created) const; |
3960 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
3961 | SmallVectorImpl<SDNode *> &Created) const; |
3962 | |
3963 | /// Targets may override this function to provide custom SDIV lowering for |
3964 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
3965 | /// assumes SDIV is expensive and replaces it with a series of other integer |
3966 | /// operations. |
3967 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
3968 | SelectionDAG &DAG, |
3969 | SmallVectorImpl<SDNode *> &Created) const; |
3970 | |
3971 | /// Indicate whether this target prefers to combine FDIVs with the same |
3972 | /// divisor. If the transform should never be done, return zero. If the |
3973 | /// transform should be done, return the minimum number of divisor uses |
3974 | /// that must exist. |
3975 | virtual unsigned combineRepeatedFPDivisors() const { |
3976 | return 0; |
3977 | } |
3978 | |
3979 | /// Hooks for building estimates in place of slower divisions and square |
3980 | /// roots. |
3981 | |
3982 | /// Return either a square root or its reciprocal estimate value for the input |
3983 | /// operand. |
3984 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
3985 | /// 'Enabled' as set by a potential default override attribute. |
3986 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
3987 | /// refinement iterations required to generate a sufficient (though not |
3988 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
3989 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson |
3990 | /// algorithm implementation that uses either one or two constants. |
3991 | /// The boolean Reciprocal is used to select whether the estimate is for the |
3992 | /// square root of the input operand or the reciprocal of its square root. |
3993 | /// A target may choose to implement its own refinement within this function. |
3994 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
3995 | /// any further refinement of the estimate. |
3996 | /// An empty SDValue return means no estimate sequence can be created. |
3997 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
3998 | int Enabled, int &RefinementSteps, |
3999 | bool &UseOneConstNR, bool Reciprocal) const { |
4000 | return SDValue(); |
4001 | } |
4002 | |
4003 | /// Return a reciprocal estimate value for the input operand. |
4004 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
4005 | /// 'Enabled' as set by a potential default override attribute. |
4006 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
4007 | /// refinement iterations required to generate a sufficient (though not |
4008 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
4009 | /// A target may choose to implement its own refinement within this function. |
4010 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
4011 | /// any further refinement of the estimate. |
4012 | /// An empty SDValue return means no estimate sequence can be created. |
4013 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
4014 | int Enabled, int &RefinementSteps) const { |
4015 | return SDValue(); |
4016 | } |
4017 | |
4018 | //===--------------------------------------------------------------------===// |
4019 | // Legalization utility functions |
4020 | // |
4021 | |
4022 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, |
4023 | /// respectively, each computing an n/2-bit part of the result. |
4024 | /// \param Result A vector that will be filled with the parts of the result |
4025 | /// in little-endian order. |
4026 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
4027 | /// if you want to control how low bits are extracted from the LHS. |
4028 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
4029 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
4030 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
4031 | /// \returns true if the node has been expanded, false if it has not |
4032 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, |
4033 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, |
4034 | SelectionDAG &DAG, MulExpansionKind Kind, |
4035 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
4036 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
4037 | |
4038 | /// Expand a MUL into two nodes. One that computes the high bits of |
4039 | /// the result and one that computes the low bits. |
4040 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. |
4041 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
4042 | /// if you want to control how low bits are extracted from the LHS. |
4043 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
4044 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
4045 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
4046 | /// \returns true if the node has been expanded. false if it has not |
4047 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, |
4048 | SelectionDAG &DAG, MulExpansionKind Kind, |
4049 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
4050 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
4051 | |
4052 | /// Expand funnel shift. |
4053 | /// \param N Node to expand |
4054 | /// \param Result output after conversion |
4055 | /// \returns True, if the expansion was successful, false otherwise |
4056 | bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4057 | |
4058 | /// Expand rotations. |
4059 | /// \param N Node to expand |
4060 | /// \param Result output after conversion |
4061 | /// \returns True, if the expansion was successful, false otherwise |
4062 | bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4063 | |
4064 | /// Expand float(f32) to SINT(i64) conversion |
4065 | /// \param N Node to expand |
4066 | /// \param Result output after conversion |
4067 | /// \returns True, if the expansion was successful, false otherwise |
4068 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4069 | |
4070 | /// Expand float to UINT conversion |
4071 | /// \param N Node to expand |
4072 | /// \param Result output after conversion |
4073 | /// \returns True, if the expansion was successful, false otherwise |
4074 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; |
4075 | |
4076 | /// Expand UINT(i64) to double(f64) conversion |
4077 | /// \param N Node to expand |
4078 | /// \param Result output after conversion |
4079 | /// \returns True, if the expansion was successful, false otherwise |
4080 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4081 | |
4082 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. |
4083 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; |
4084 | |
4085 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, |
4086 | /// vector nodes can only succeed if all operations are legal/custom. |
4087 | /// \param N Node to expand |
4088 | /// \param Result output after conversion |
4089 | /// \returns True, if the expansion was successful, false otherwise |
4090 | bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4091 | |
4092 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, |
4093 | /// vector nodes can only succeed if all operations are legal/custom. |
4094 | /// \param N Node to expand |
4095 | /// \param Result output after conversion |
4096 | /// \returns True, if the expansion was successful, false otherwise |
4097 | bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4098 | |
4099 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, |
4100 | /// vector nodes can only succeed if all operations are legal/custom. |
4101 | /// \param N Node to expand |
4102 | /// \param Result output after conversion |
4103 | /// \returns True, if the expansion was successful, false otherwise |
4104 | bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4105 | |
4106 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, |
4107 | /// vector nodes can only succeed if all operations are legal/custom. |
4108 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) |
4109 | /// \param N Node to expand |
4110 | /// \param Result output after conversion |
4111 | /// \returns True, if the expansion was successful, false otherwise |
4112 | bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
4113 | |
4114 | /// Turn load of vector type into a load of the individual elements. |
4115 | /// \param LD load to expand |
4116 | /// \returns MERGE_VALUEs of the scalar loads with their chains. |
4117 | SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const; |
4118 | |
4119 | // Turn a store of a vector type into stores of the individual elements. |
4120 | /// \param ST Store with a vector value type |
4121 | /// \returns MERGE_VALUs of the individual store chains. |
4122 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
4123 | |
4124 | /// Expands an unaligned load to 2 half-size loads for an integer, and |
4125 | /// possibly more for vectors. |
4126 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, |
4127 | SelectionDAG &DAG) const; |
4128 | |
4129 | /// Expands an unaligned store to 2 half-size stores for integer values, and |
4130 | /// possibly more for vectors. |
4131 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
4132 | |
4133 | /// Increments memory address \p Addr according to the type of the value |
4134 | /// \p DataVT that should be stored. If the data is stored in compressed |
4135 | /// form, the memory address should be incremented according to the number of |
4136 | /// the stored elements. This number is equal to the number of '1's bits |
4137 | /// in the \p Mask. |
4138 | /// \p DataVT is a vector type. \p Mask is a vector value. |
4139 | /// \p DataVT and \p Mask have the same number of vector elements. |
4140 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, |
4141 | EVT DataVT, SelectionDAG &DAG, |
4142 | bool IsCompressedMemory) const; |
4143 | |
4144 | /// Get a pointer to vector element \p Idx located in memory for a vector of |
4145 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of |
4146 | /// bounds the returned pointer is unspecified, but will be within the vector |
4147 | /// bounds. |
4148 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
4149 | SDValue Index) const; |
4150 | |
4151 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This |
4152 | /// method accepts integers as its arguments. |
4153 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; |
4154 | |
4155 | /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This |
4156 | /// method accepts integers as its arguments. |
4157 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; |
4158 | |
4159 | /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion |
4160 | /// always suceeds and populates the Result and Overflow arguments. |
4161 | void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
4162 | SelectionDAG &DAG) const; |
4163 | |
4164 | /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion |
4165 | /// always suceeds and populates the Result and Overflow arguments. |
4166 | void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
4167 | SelectionDAG &DAG) const; |
4168 | |
4169 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether |
4170 | /// expansion was successful and populates the Result and Overflow arguments. |
4171 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
4172 | SelectionDAG &DAG) const; |
4173 | |
4174 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, |
4175 | /// only the first Count elements of the vector are used. |
4176 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; |
4177 | |
4178 | //===--------------------------------------------------------------------===// |
4179 | // Instruction Emitting Hooks |
4180 | // |
4181 | |
4182 | /// This method should be implemented by targets that mark instructions with |
4183 | /// the 'usesCustomInserter' flag. These instructions are special in various |
4184 | /// ways, which require special support to insert. The specified MachineInstr |
4185 | /// is created but not inserted into any basic blocks, and this method is |
4186 | /// called to expand it into a sequence of instructions, potentially also |
4187 | /// creating new basic blocks and control flow. |
4188 | /// As long as the returned basic block is different (i.e., we created a new |
4189 | /// one), the custom inserter is free to modify the rest of \p MBB. |
4190 | virtual MachineBasicBlock * |
4191 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
4192 | |
4193 | /// This method should be implemented by targets that mark instructions with |
4194 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after |
4195 | /// instruction selection by target hooks. e.g. To fill in optional defs for |
4196 | /// ARM 's' setting instructions. |
4197 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, |
4198 | SDNode *Node) const; |
4199 | |
4200 | /// If this function returns true, SelectionDAGBuilder emits a |
4201 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. |
4202 | virtual bool useLoadStackGuardNode() const { |
4203 | return false; |
4204 | } |
4205 | |
4206 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
4207 | const SDLoc &DL) const { |
4208 | llvm_unreachable("not implemented for this target")::llvm::llvm_unreachable_internal("not implemented for this target" , "/build/llvm-toolchain-snapshot-10~svn373517/include/llvm/CodeGen/TargetLowering.h" , 4208); |
4209 | } |
4210 | |
4211 | /// Lower TLS global address SDNode for target independent emulated TLS model. |
4212 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, |
4213 | SelectionDAG &DAG) const; |
4214 | |
4215 | /// Expands target specific indirect branch for the case of JumpTable |
4216 | /// expanasion. |
4217 | virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, |
4218 | SelectionDAG &DAG) const { |
4219 | return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr); |
4220 | } |
4221 | |
4222 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) |
4223 | // If we're comparing for equality to zero and isCtlzFast is true, expose the |
4224 | // fact that this can be implemented as a ctlz/srl pair, so that the dag |
4225 | // combiner can fold the new nodes. |
4226 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; |
4227 | |
4228 | private: |
4229 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
4230 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
4231 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
4232 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
4233 | |
4234 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, |
4235 | SDValue N1, ISD::CondCode Cond, |
4236 | DAGCombinerInfo &DCI, |
4237 | const SDLoc &DL) const; |
4238 | |
4239 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 |
4240 | SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( |
4241 | EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, |
4242 | DAGCombinerInfo &DCI, const SDLoc &DL) const; |
4243 | |
4244 | SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, |
4245 | SDValue CompTargetNode, ISD::CondCode Cond, |
4246 | DAGCombinerInfo &DCI, const SDLoc &DL, |
4247 | SmallVectorImpl<SDNode *> &Created) const; |
4248 | SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
4249 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
4250 | const SDLoc &DL) const; |
4251 | |
4252 | SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, |
4253 | SDValue CompTargetNode, ISD::CondCode Cond, |
4254 | DAGCombinerInfo &DCI, const SDLoc &DL, |
4255 | SmallVectorImpl<SDNode *> &Created) const; |
4256 | SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
4257 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
4258 | const SDLoc &DL) const; |
4259 | }; |
4260 | |
4261 | /// Given an LLVM IR type and return type attributes, compute the return value |
4262 | /// EVTs and flags, and optionally also the offsets, if the return value is |
4263 | /// being lowered to memory. |
4264 | void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, |
4265 | SmallVectorImpl<ISD::OutputArg> &Outs, |
4266 | const TargetLowering &TLI, const DataLayout &DL); |
4267 | |
4268 | } // end namespace llvm |
4269 | |
4270 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |