File: | build/source/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp |
Warning: | line 1560, column 21 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- HexagonLoopIdiomRecognition.cpp ------------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "HexagonLoopIdiomRecognition.h" | |||
10 | #include "llvm/ADT/APInt.h" | |||
11 | #include "llvm/ADT/DenseMap.h" | |||
12 | #include "llvm/ADT/SetVector.h" | |||
13 | #include "llvm/ADT/SmallPtrSet.h" | |||
14 | #include "llvm/ADT/SmallSet.h" | |||
15 | #include "llvm/ADT/SmallVector.h" | |||
16 | #include "llvm/ADT/StringRef.h" | |||
17 | #include "llvm/Analysis/AliasAnalysis.h" | |||
18 | #include "llvm/Analysis/InstructionSimplify.h" | |||
19 | #include "llvm/Analysis/LoopAnalysisManager.h" | |||
20 | #include "llvm/Analysis/LoopInfo.h" | |||
21 | #include "llvm/Analysis/LoopPass.h" | |||
22 | #include "llvm/Analysis/MemoryLocation.h" | |||
23 | #include "llvm/Analysis/ScalarEvolution.h" | |||
24 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | |||
25 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
26 | #include "llvm/Analysis/ValueTracking.h" | |||
27 | #include "llvm/IR/Attributes.h" | |||
28 | #include "llvm/IR/BasicBlock.h" | |||
29 | #include "llvm/IR/Constant.h" | |||
30 | #include "llvm/IR/Constants.h" | |||
31 | #include "llvm/IR/DataLayout.h" | |||
32 | #include "llvm/IR/DebugLoc.h" | |||
33 | #include "llvm/IR/DerivedTypes.h" | |||
34 | #include "llvm/IR/Dominators.h" | |||
35 | #include "llvm/IR/Function.h" | |||
36 | #include "llvm/IR/IRBuilder.h" | |||
37 | #include "llvm/IR/InstrTypes.h" | |||
38 | #include "llvm/IR/Instruction.h" | |||
39 | #include "llvm/IR/Instructions.h" | |||
40 | #include "llvm/IR/IntrinsicInst.h" | |||
41 | #include "llvm/IR/Intrinsics.h" | |||
42 | #include "llvm/IR/IntrinsicsHexagon.h" | |||
43 | #include "llvm/IR/Module.h" | |||
44 | #include "llvm/IR/PassManager.h" | |||
45 | #include "llvm/IR/PatternMatch.h" | |||
46 | #include "llvm/IR/Type.h" | |||
47 | #include "llvm/IR/User.h" | |||
48 | #include "llvm/IR/Value.h" | |||
49 | #include "llvm/InitializePasses.h" | |||
50 | #include "llvm/Pass.h" | |||
51 | #include "llvm/Support/Casting.h" | |||
52 | #include "llvm/Support/CommandLine.h" | |||
53 | #include "llvm/Support/Compiler.h" | |||
54 | #include "llvm/Support/Debug.h" | |||
55 | #include "llvm/Support/ErrorHandling.h" | |||
56 | #include "llvm/Support/KnownBits.h" | |||
57 | #include "llvm/Support/raw_ostream.h" | |||
58 | #include "llvm/TargetParser/Triple.h" | |||
59 | #include "llvm/Transforms/Scalar.h" | |||
60 | #include "llvm/Transforms/Utils.h" | |||
61 | #include "llvm/Transforms/Utils/Local.h" | |||
62 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" | |||
63 | #include <algorithm> | |||
64 | #include <array> | |||
65 | #include <cassert> | |||
66 | #include <cstdint> | |||
67 | #include <cstdlib> | |||
68 | #include <deque> | |||
69 | #include <functional> | |||
70 | #include <iterator> | |||
71 | #include <map> | |||
72 | #include <set> | |||
73 | #include <utility> | |||
74 | #include <vector> | |||
75 | ||||
76 | #define DEBUG_TYPE"hexagon-lir" "hexagon-lir" | |||
77 | ||||
78 | using namespace llvm; | |||
79 | ||||
80 | static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom", | |||
81 | cl::Hidden, cl::init(false), | |||
82 | cl::desc("Disable generation of memcpy in loop idiom recognition")); | |||
83 | ||||
84 | static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom", | |||
85 | cl::Hidden, cl::init(false), | |||
86 | cl::desc("Disable generation of memmove in loop idiom recognition")); | |||
87 | ||||
88 | static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", | |||
89 | cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " | |||
90 | "check guarding the memmove.")); | |||
91 | ||||
92 | static cl::opt<unsigned> CompileTimeMemSizeThreshold( | |||
93 | "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), | |||
94 | cl::desc("Threshold (in bytes) to perform the transformation, if the " | |||
95 | "runtime loop count (mem transfer size) is known at compile-time.")); | |||
96 | ||||
97 | static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom", | |||
98 | cl::Hidden, cl::init(true), | |||
99 | cl::desc("Only enable generating memmove in non-nested loops")); | |||
100 | ||||
101 | static cl::opt<bool> HexagonVolatileMemcpy( | |||
102 | "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), | |||
103 | cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); | |||
104 | ||||
105 | static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000), | |||
106 | cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); | |||
107 | ||||
108 | static const char *HexagonVolatileMemcpyName | |||
109 | = "hexagon_memcpy_forward_vp4cp4n2"; | |||
110 | ||||
111 | ||||
112 | namespace llvm { | |||
113 | ||||
114 | void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &); | |||
115 | Pass *createHexagonLoopIdiomPass(); | |||
116 | ||||
117 | } // end namespace llvm | |||
118 | ||||
119 | namespace { | |||
120 | ||||
121 | class HexagonLoopIdiomRecognize { | |||
122 | public: | |||
123 | explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, | |||
124 | LoopInfo *LF, const TargetLibraryInfo *TLI, | |||
125 | ScalarEvolution *SE) | |||
126 | : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {} | |||
127 | ||||
128 | bool run(Loop *L); | |||
129 | ||||
130 | private: | |||
131 | int getSCEVStride(const SCEVAddRecExpr *StoreEv); | |||
132 | bool isLegalStore(Loop *CurLoop, StoreInst *SI); | |||
133 | void collectStores(Loop *CurLoop, BasicBlock *BB, | |||
134 | SmallVectorImpl<StoreInst *> &Stores); | |||
135 | bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount); | |||
136 | bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const; | |||
137 | bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount, | |||
138 | SmallVectorImpl<BasicBlock *> &ExitBlocks); | |||
139 | bool runOnCountableLoop(Loop *L); | |||
140 | ||||
141 | AliasAnalysis *AA; | |||
142 | const DataLayout *DL; | |||
143 | DominatorTree *DT; | |||
144 | LoopInfo *LF; | |||
145 | const TargetLibraryInfo *TLI; | |||
146 | ScalarEvolution *SE; | |||
147 | bool HasMemcpy, HasMemmove; | |||
148 | }; | |||
149 | ||||
150 | class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass { | |||
151 | public: | |||
152 | static char ID; | |||
153 | ||||
154 | explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) { | |||
155 | initializeHexagonLoopIdiomRecognizeLegacyPassPass( | |||
156 | *PassRegistry::getPassRegistry()); | |||
157 | } | |||
158 | ||||
159 | StringRef getPassName() const override { | |||
160 | return "Recognize Hexagon-specific loop idioms"; | |||
161 | } | |||
162 | ||||
163 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
164 | AU.addRequired<LoopInfoWrapperPass>(); | |||
165 | AU.addRequiredID(LoopSimplifyID); | |||
166 | AU.addRequiredID(LCSSAID); | |||
167 | AU.addRequired<AAResultsWrapperPass>(); | |||
168 | AU.addRequired<ScalarEvolutionWrapperPass>(); | |||
169 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
170 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
171 | AU.addPreserved<TargetLibraryInfoWrapperPass>(); | |||
172 | } | |||
173 | ||||
174 | bool runOnLoop(Loop *L, LPPassManager &LPM) override; | |||
175 | }; | |||
176 | ||||
177 | struct Simplifier { | |||
178 | struct Rule { | |||
179 | using FuncType = std::function<Value *(Instruction *, LLVMContext &)>; | |||
180 | Rule(StringRef N, FuncType F) : Name(N), Fn(F) {} | |||
181 | StringRef Name; // For debugging. | |||
182 | FuncType Fn; | |||
183 | }; | |||
184 | ||||
185 | void addRule(StringRef N, const Rule::FuncType &F) { | |||
186 | Rules.push_back(Rule(N, F)); | |||
187 | } | |||
188 | ||||
189 | private: | |||
190 | struct WorkListType { | |||
191 | WorkListType() = default; | |||
192 | ||||
193 | void push_back(Value *V) { | |||
194 | // Do not push back duplicates. | |||
195 | if (S.insert(V).second) | |||
196 | Q.push_back(V); | |||
197 | } | |||
198 | ||||
199 | Value *pop_front_val() { | |||
200 | Value *V = Q.front(); | |||
201 | Q.pop_front(); | |||
202 | S.erase(V); | |||
203 | return V; | |||
204 | } | |||
205 | ||||
206 | bool empty() const { return Q.empty(); } | |||
207 | ||||
208 | private: | |||
209 | std::deque<Value *> Q; | |||
210 | std::set<Value *> S; | |||
211 | }; | |||
212 | ||||
213 | using ValueSetType = std::set<Value *>; | |||
214 | ||||
215 | std::vector<Rule> Rules; | |||
216 | ||||
217 | public: | |||
218 | struct Context { | |||
219 | using ValueMapType = DenseMap<Value *, Value *>; | |||
220 | ||||
221 | Value *Root; | |||
222 | ValueSetType Used; // The set of all cloned values used by Root. | |||
223 | ValueSetType Clones; // The set of all cloned values. | |||
224 | LLVMContext &Ctx; | |||
225 | ||||
226 | Context(Instruction *Exp) | |||
227 | : Ctx(Exp->getParent()->getParent()->getContext()) { | |||
228 | initialize(Exp); | |||
229 | } | |||
230 | ||||
231 | ~Context() { cleanup(); } | |||
232 | ||||
233 | void print(raw_ostream &OS, const Value *V) const; | |||
234 | Value *materialize(BasicBlock *B, BasicBlock::iterator At); | |||
235 | ||||
236 | private: | |||
237 | friend struct Simplifier; | |||
238 | ||||
239 | void initialize(Instruction *Exp); | |||
240 | void cleanup(); | |||
241 | ||||
242 | template <typename FuncT> void traverse(Value *V, FuncT F); | |||
243 | void record(Value *V); | |||
244 | void use(Value *V); | |||
245 | void unuse(Value *V); | |||
246 | ||||
247 | bool equal(const Instruction *I, const Instruction *J) const; | |||
248 | Value *find(Value *Tree, Value *Sub) const; | |||
249 | Value *subst(Value *Tree, Value *OldV, Value *NewV); | |||
250 | void replace(Value *OldV, Value *NewV); | |||
251 | void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At); | |||
252 | }; | |||
253 | ||||
254 | Value *simplify(Context &C); | |||
255 | }; | |||
256 | ||||
257 | struct PE { | |||
258 | PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {} | |||
259 | ||||
260 | const Simplifier::Context &C; | |||
261 | const Value *V; | |||
262 | }; | |||
263 | ||||
264 | LLVM_ATTRIBUTE_USED__attribute__((__used__)) | |||
265 | raw_ostream &operator<<(raw_ostream &OS, const PE &P) { | |||
266 | P.C.print(OS, P.V ? P.V : P.C.Root); | |||
267 | return OS; | |||
268 | } | |||
269 | ||||
270 | } // end anonymous namespace | |||
271 | ||||
272 | char HexagonLoopIdiomRecognizeLegacyPass::ID = 0; | |||
273 | ||||
274 | INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce (PassRegistry &Registry) { | |||
275 | "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce (PassRegistry &Registry) { | |||
276 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | |||
277 | INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry); | |||
278 | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry); | |||
279 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry); | |||
280 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
281 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
282 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
283 | INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms" , "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag , initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std:: ref(Registry)); } | |||
284 | "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms" , "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag , initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std:: ref(Registry)); } | |||
285 | ||||
286 | template <typename FuncT> | |||
287 | void Simplifier::Context::traverse(Value *V, FuncT F) { | |||
288 | WorkListType Q; | |||
289 | Q.push_back(V); | |||
290 | ||||
291 | while (!Q.empty()) { | |||
292 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
293 | if (!U || U->getParent()) | |||
294 | continue; | |||
295 | if (!F(U)) | |||
296 | continue; | |||
297 | for (Value *Op : U->operands()) | |||
298 | Q.push_back(Op); | |||
299 | } | |||
300 | } | |||
301 | ||||
302 | void Simplifier::Context::print(raw_ostream &OS, const Value *V) const { | |||
303 | const auto *U = dyn_cast<const Instruction>(V); | |||
304 | if (!U) { | |||
305 | OS << V << '(' << *V << ')'; | |||
306 | return; | |||
307 | } | |||
308 | ||||
309 | if (U->getParent()) { | |||
310 | OS << U << '('; | |||
311 | U->printAsOperand(OS, true); | |||
312 | OS << ')'; | |||
313 | return; | |||
314 | } | |||
315 | ||||
316 | unsigned N = U->getNumOperands(); | |||
317 | if (N != 0) | |||
318 | OS << U << '('; | |||
319 | OS << U->getOpcodeName(); | |||
320 | for (const Value *Op : U->operands()) { | |||
321 | OS << ' '; | |||
322 | print(OS, Op); | |||
323 | } | |||
324 | if (N != 0) | |||
325 | OS << ')'; | |||
326 | } | |||
327 | ||||
328 | void Simplifier::Context::initialize(Instruction *Exp) { | |||
329 | // Perform a deep clone of the expression, set Root to the root | |||
330 | // of the clone, and build a map from the cloned values to the | |||
331 | // original ones. | |||
332 | ValueMapType M; | |||
333 | BasicBlock *Block = Exp->getParent(); | |||
334 | WorkListType Q; | |||
335 | Q.push_back(Exp); | |||
336 | ||||
337 | while (!Q.empty()) { | |||
338 | Value *V = Q.pop_front_val(); | |||
339 | if (M.find(V) != M.end()) | |||
340 | continue; | |||
341 | if (Instruction *U = dyn_cast<Instruction>(V)) { | |||
342 | if (isa<PHINode>(U) || U->getParent() != Block) | |||
343 | continue; | |||
344 | for (Value *Op : U->operands()) | |||
345 | Q.push_back(Op); | |||
346 | M.insert({U, U->clone()}); | |||
347 | } | |||
348 | } | |||
349 | ||||
350 | for (std::pair<Value*,Value*> P : M) { | |||
351 | Instruction *U = cast<Instruction>(P.second); | |||
352 | for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { | |||
353 | auto F = M.find(U->getOperand(i)); | |||
354 | if (F != M.end()) | |||
355 | U->setOperand(i, F->second); | |||
356 | } | |||
357 | } | |||
358 | ||||
359 | auto R = M.find(Exp); | |||
360 | assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail ("R != M.end()", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 360, __extension__ __PRETTY_FUNCTION__)); | |||
361 | Root = R->second; | |||
362 | ||||
363 | record(Root); | |||
364 | use(Root); | |||
365 | } | |||
366 | ||||
367 | void Simplifier::Context::record(Value *V) { | |||
368 | auto Record = [this](Instruction *U) -> bool { | |||
369 | Clones.insert(U); | |||
370 | return true; | |||
371 | }; | |||
372 | traverse(V, Record); | |||
373 | } | |||
374 | ||||
375 | void Simplifier::Context::use(Value *V) { | |||
376 | auto Use = [this](Instruction *U) -> bool { | |||
377 | Used.insert(U); | |||
378 | return true; | |||
379 | }; | |||
380 | traverse(V, Use); | |||
381 | } | |||
382 | ||||
383 | void Simplifier::Context::unuse(Value *V) { | |||
384 | if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr) | |||
385 | return; | |||
386 | ||||
387 | auto Unuse = [this](Instruction *U) -> bool { | |||
388 | if (!U->use_empty()) | |||
389 | return false; | |||
390 | Used.erase(U); | |||
391 | return true; | |||
392 | }; | |||
393 | traverse(V, Unuse); | |||
394 | } | |||
395 | ||||
396 | Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) { | |||
397 | if (Tree == OldV) | |||
398 | return NewV; | |||
399 | if (OldV == NewV) | |||
400 | return Tree; | |||
401 | ||||
402 | WorkListType Q; | |||
403 | Q.push_back(Tree); | |||
404 | while (!Q.empty()) { | |||
405 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
406 | // If U is not an instruction, or it's not a clone, skip it. | |||
407 | if (!U || U->getParent()) | |||
408 | continue; | |||
409 | for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { | |||
410 | Value *Op = U->getOperand(i); | |||
411 | if (Op == OldV) { | |||
412 | U->setOperand(i, NewV); | |||
413 | unuse(OldV); | |||
414 | } else { | |||
415 | Q.push_back(Op); | |||
416 | } | |||
417 | } | |||
418 | } | |||
419 | return Tree; | |||
420 | } | |||
421 | ||||
422 | void Simplifier::Context::replace(Value *OldV, Value *NewV) { | |||
423 | if (Root == OldV) { | |||
424 | Root = NewV; | |||
425 | use(Root); | |||
426 | return; | |||
427 | } | |||
428 | ||||
429 | // NewV may be a complex tree that has just been created by one of the | |||
430 | // transformation rules. We need to make sure that it is commoned with | |||
431 | // the existing Root to the maximum extent possible. | |||
432 | // Identify all subtrees of NewV (including NewV itself) that have | |||
433 | // equivalent counterparts in Root, and replace those subtrees with | |||
434 | // these counterparts. | |||
435 | WorkListType Q; | |||
436 | Q.push_back(NewV); | |||
437 | while (!Q.empty()) { | |||
438 | Value *V = Q.pop_front_val(); | |||
439 | Instruction *U = dyn_cast<Instruction>(V); | |||
440 | if (!U || U->getParent()) | |||
441 | continue; | |||
442 | if (Value *DupV = find(Root, V)) { | |||
443 | if (DupV != V) | |||
444 | NewV = subst(NewV, V, DupV); | |||
445 | } else { | |||
446 | for (Value *Op : U->operands()) | |||
447 | Q.push_back(Op); | |||
448 | } | |||
449 | } | |||
450 | ||||
451 | // Now, simply replace OldV with NewV in Root. | |||
452 | Root = subst(Root, OldV, NewV); | |||
453 | use(Root); | |||
454 | } | |||
455 | ||||
456 | void Simplifier::Context::cleanup() { | |||
457 | for (Value *V : Clones) { | |||
458 | Instruction *U = cast<Instruction>(V); | |||
459 | if (!U->getParent()) | |||
460 | U->dropAllReferences(); | |||
461 | } | |||
462 | ||||
463 | for (Value *V : Clones) { | |||
464 | Instruction *U = cast<Instruction>(V); | |||
465 | if (!U->getParent()) | |||
466 | U->deleteValue(); | |||
467 | } | |||
468 | } | |||
469 | ||||
470 | bool Simplifier::Context::equal(const Instruction *I, | |||
471 | const Instruction *J) const { | |||
472 | if (I == J) | |||
473 | return true; | |||
474 | if (!I->isSameOperationAs(J)) | |||
475 | return false; | |||
476 | if (isa<PHINode>(I)) | |||
477 | return I->isIdenticalTo(J); | |||
478 | ||||
479 | for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) { | |||
480 | Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i); | |||
481 | if (OpI == OpJ) | |||
482 | continue; | |||
483 | auto *InI = dyn_cast<const Instruction>(OpI); | |||
484 | auto *InJ = dyn_cast<const Instruction>(OpJ); | |||
485 | if (InI && InJ) { | |||
486 | if (!equal(InI, InJ)) | |||
487 | return false; | |||
488 | } else if (InI != InJ || !InI) | |||
489 | return false; | |||
490 | } | |||
491 | return true; | |||
492 | } | |||
493 | ||||
494 | Value *Simplifier::Context::find(Value *Tree, Value *Sub) const { | |||
495 | Instruction *SubI = dyn_cast<Instruction>(Sub); | |||
496 | WorkListType Q; | |||
497 | Q.push_back(Tree); | |||
498 | ||||
499 | while (!Q.empty()) { | |||
500 | Value *V = Q.pop_front_val(); | |||
501 | if (V == Sub) | |||
502 | return V; | |||
503 | Instruction *U = dyn_cast<Instruction>(V); | |||
504 | if (!U || U->getParent()) | |||
505 | continue; | |||
506 | if (SubI && equal(SubI, U)) | |||
507 | return U; | |||
508 | assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0) : __assert_fail ("!isa<PHINode>(U)", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 508, __extension__ __PRETTY_FUNCTION__)); | |||
509 | for (Value *Op : U->operands()) | |||
510 | Q.push_back(Op); | |||
511 | } | |||
512 | return nullptr; | |||
513 | } | |||
514 | ||||
515 | void Simplifier::Context::link(Instruction *I, BasicBlock *B, | |||
516 | BasicBlock::iterator At) { | |||
517 | if (I->getParent()) | |||
518 | return; | |||
519 | ||||
520 | for (Value *Op : I->operands()) { | |||
521 | if (Instruction *OpI = dyn_cast<Instruction>(Op)) | |||
522 | link(OpI, B, At); | |||
523 | } | |||
524 | ||||
525 | I->insertInto(B, At); | |||
526 | } | |||
527 | ||||
528 | Value *Simplifier::Context::materialize(BasicBlock *B, | |||
529 | BasicBlock::iterator At) { | |||
530 | if (Instruction *RootI = dyn_cast<Instruction>(Root)) | |||
531 | link(RootI, B, At); | |||
532 | return Root; | |||
533 | } | |||
534 | ||||
535 | Value *Simplifier::simplify(Context &C) { | |||
536 | WorkListType Q; | |||
537 | Q.push_back(C.Root); | |||
538 | unsigned Count = 0; | |||
539 | const unsigned Limit = SimplifyLimit; | |||
540 | ||||
541 | while (!Q.empty()) { | |||
542 | if (Count++ >= Limit) | |||
543 | break; | |||
544 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
545 | if (!U || U->getParent() || !C.Used.count(U)) | |||
546 | continue; | |||
547 | bool Changed = false; | |||
548 | for (Rule &R : Rules) { | |||
549 | Value *W = R.Fn(U, C.Ctx); | |||
550 | if (!W) | |||
551 | continue; | |||
552 | Changed = true; | |||
553 | C.record(W); | |||
554 | C.replace(U, W); | |||
555 | Q.push_back(C.Root); | |||
556 | break; | |||
557 | } | |||
558 | if (!Changed) { | |||
559 | for (Value *Op : U->operands()) | |||
560 | Q.push_back(Op); | |||
561 | } | |||
562 | } | |||
563 | return Count < Limit ? C.Root : nullptr; | |||
564 | } | |||
565 | ||||
566 | //===----------------------------------------------------------------------===// | |||
567 | // | |||
568 | // Implementation of PolynomialMultiplyRecognize | |||
569 | // | |||
570 | //===----------------------------------------------------------------------===// | |||
571 | ||||
572 | namespace { | |||
573 | ||||
574 | class PolynomialMultiplyRecognize { | |||
575 | public: | |||
576 | explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl, | |||
577 | const DominatorTree &dt, const TargetLibraryInfo &tli, | |||
578 | ScalarEvolution &se) | |||
579 | : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {} | |||
580 | ||||
581 | bool recognize(); | |||
582 | ||||
583 | private: | |||
584 | using ValueSeq = SetVector<Value *>; | |||
585 | ||||
586 | IntegerType *getPmpyType() const { | |||
587 | LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext(); | |||
588 | return IntegerType::get(Ctx, 32); | |||
589 | } | |||
590 | ||||
591 | bool isPromotableTo(Value *V, IntegerType *Ty); | |||
592 | void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB); | |||
593 | bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB); | |||
594 | ||||
595 | Value *getCountIV(BasicBlock *BB); | |||
596 | bool findCycle(Value *Out, Value *In, ValueSeq &Cycle); | |||
597 | void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early, | |||
598 | ValueSeq &Late); | |||
599 | bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late); | |||
600 | bool commutesWithShift(Instruction *I); | |||
601 | bool highBitsAreZero(Value *V, unsigned IterCount); | |||
602 | bool keepsHighBitsZero(Value *V, unsigned IterCount); | |||
603 | bool isOperandShifted(Instruction *I, Value *Op); | |||
604 | bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB, | |||
605 | unsigned IterCount); | |||
606 | void cleanupLoopBody(BasicBlock *LoopB); | |||
607 | ||||
608 | struct ParsedValues { | |||
609 | ParsedValues() = default; | |||
610 | ||||
611 | Value *M = nullptr; | |||
612 | Value *P = nullptr; | |||
613 | Value *Q = nullptr; | |||
614 | Value *R = nullptr; | |||
615 | Value *X = nullptr; | |||
616 | Instruction *Res = nullptr; | |||
617 | unsigned IterCount = 0; | |||
618 | bool Left = false; | |||
619 | bool Inv = false; | |||
620 | }; | |||
621 | ||||
622 | bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV); | |||
623 | bool matchRightShift(SelectInst *SelI, ParsedValues &PV); | |||
624 | bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB, | |||
625 | Value *CIV, ParsedValues &PV, bool PreScan); | |||
626 | unsigned getInverseMxN(unsigned QP); | |||
627 | Value *generate(BasicBlock::iterator At, ParsedValues &PV); | |||
628 | ||||
629 | void setupPreSimplifier(Simplifier &S); | |||
630 | void setupPostSimplifier(Simplifier &S); | |||
631 | ||||
632 | Loop *CurLoop; | |||
633 | const DataLayout &DL; | |||
634 | const DominatorTree &DT; | |||
635 | const TargetLibraryInfo &TLI; | |||
636 | ScalarEvolution &SE; | |||
637 | }; | |||
638 | ||||
639 | } // end anonymous namespace | |||
640 | ||||
641 | Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { | |||
642 | pred_iterator PI = pred_begin(BB), PE = pred_end(BB); | |||
643 | if (std::distance(PI, PE) != 2) | |||
644 | return nullptr; | |||
645 | BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI; | |||
646 | ||||
647 | for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { | |||
648 | auto *PN = cast<PHINode>(I); | |||
649 | Value *InitV = PN->getIncomingValueForBlock(PB); | |||
650 | if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero()) | |||
651 | continue; | |||
652 | Value *IterV = PN->getIncomingValueForBlock(BB); | |||
653 | auto *BO = dyn_cast<BinaryOperator>(IterV); | |||
654 | if (!BO) | |||
655 | continue; | |||
656 | if (BO->getOpcode() != Instruction::Add) | |||
657 | continue; | |||
658 | Value *IncV = nullptr; | |||
659 | if (BO->getOperand(0) == PN) | |||
660 | IncV = BO->getOperand(1); | |||
661 | else if (BO->getOperand(1) == PN) | |||
662 | IncV = BO->getOperand(0); | |||
663 | if (IncV == nullptr) | |||
664 | continue; | |||
665 | ||||
666 | if (auto *T = dyn_cast<ConstantInt>(IncV)) | |||
667 | if (T->getZExtValue() == 1) | |||
668 | return PN; | |||
669 | } | |||
670 | return nullptr; | |||
671 | } | |||
672 | ||||
673 | static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) { | |||
674 | for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) { | |||
675 | Use &TheUse = UI.getUse(); | |||
676 | ++UI; | |||
677 | if (auto *II = dyn_cast<Instruction>(TheUse.getUser())) | |||
678 | if (BB == II->getParent()) | |||
679 | II->replaceUsesOfWith(I, J); | |||
680 | } | |||
681 | } | |||
682 | ||||
683 | bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI, | |||
684 | Value *CIV, ParsedValues &PV) { | |||
685 | // Match the following: | |||
686 | // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R | |||
687 | // select (X & (1 << i)) == 0 ? R : R ^ (Q << i) | |||
688 | // The condition may also check for equality with the masked value, i.e | |||
689 | // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R | |||
690 | // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i); | |||
691 | ||||
692 | Value *CondV = SelI->getCondition(); | |||
693 | Value *TrueV = SelI->getTrueValue(); | |||
694 | Value *FalseV = SelI->getFalseValue(); | |||
695 | ||||
696 | using namespace PatternMatch; | |||
697 | ||||
698 | CmpInst::Predicate P; | |||
699 | Value *A = nullptr, *B = nullptr, *C = nullptr; | |||
700 | ||||
701 | if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) && | |||
702 | !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B))))) | |||
703 | return false; | |||
704 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
705 | return false; | |||
706 | // Matched: select (A & B) == C ? ... : ... | |||
707 | // select (A & B) != C ? ... : ... | |||
708 | ||||
709 | Value *X = nullptr, *Sh1 = nullptr; | |||
710 | // Check (A & B) for (X & (1 << i)): | |||
711 | if (match(A, m_Shl(m_One(), m_Specific(CIV)))) { | |||
712 | Sh1 = A; | |||
713 | X = B; | |||
714 | } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) { | |||
715 | Sh1 = B; | |||
716 | X = A; | |||
717 | } else { | |||
718 | // TODO: Could also check for an induction variable containing single | |||
719 | // bit shifted left by 1 in each iteration. | |||
720 | return false; | |||
721 | } | |||
722 | ||||
723 | bool TrueIfZero; | |||
724 | ||||
725 | // Check C against the possible values for comparison: 0 and (1 << i): | |||
726 | if (match(C, m_Zero())) | |||
727 | TrueIfZero = (P == CmpInst::ICMP_EQ); | |||
728 | else if (C == Sh1) | |||
729 | TrueIfZero = (P == CmpInst::ICMP_NE); | |||
730 | else | |||
731 | return false; | |||
732 | ||||
733 | // So far, matched: | |||
734 | // select (X & (1 << i)) ? ... : ... | |||
735 | // including variations of the check against zero/non-zero value. | |||
736 | ||||
737 | Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr; | |||
738 | if (TrueIfZero) { | |||
739 | ShouldSameV = TrueV; | |||
740 | ShouldXoredV = FalseV; | |||
741 | } else { | |||
742 | ShouldSameV = FalseV; | |||
743 | ShouldXoredV = TrueV; | |||
744 | } | |||
745 | ||||
746 | Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr; | |||
747 | Value *T = nullptr; | |||
748 | if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) { | |||
749 | // Matched: select +++ ? ... : Y ^ Z | |||
750 | // select +++ ? Y ^ Z : ... | |||
751 | // where +++ denotes previously checked matches. | |||
752 | if (ShouldSameV == Y) | |||
753 | T = Z; | |||
754 | else if (ShouldSameV == Z) | |||
755 | T = Y; | |||
756 | else | |||
757 | return false; | |||
758 | R = ShouldSameV; | |||
759 | // Matched: select +++ ? R : R ^ T | |||
760 | // select +++ ? R ^ T : R | |||
761 | // depending on TrueIfZero. | |||
762 | ||||
763 | } else if (match(ShouldSameV, m_Zero())) { | |||
764 | // Matched: select +++ ? 0 : ... | |||
765 | // select +++ ? ... : 0 | |||
766 | if (!SelI->hasOneUse()) | |||
767 | return false; | |||
768 | T = ShouldXoredV; | |||
769 | // Matched: select +++ ? 0 : T | |||
770 | // select +++ ? T : 0 | |||
771 | ||||
772 | Value *U = *SelI->user_begin(); | |||
773 | if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) && | |||
774 | !match(U, m_Xor(m_Value(R), m_Specific(SelI)))) | |||
775 | return false; | |||
776 | // Matched: xor (select +++ ? 0 : T), R | |||
777 | // xor (select +++ ? T : 0), R | |||
778 | } else | |||
779 | return false; | |||
780 | ||||
781 | // The xor input value T is isolated into its own match so that it could | |||
782 | // be checked against an induction variable containing a shifted bit | |||
783 | // (todo). | |||
784 | // For now, check against (Q << i). | |||
785 | if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) && | |||
786 | !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV))))) | |||
787 | return false; | |||
788 | // Matched: select +++ ? R : R ^ (Q << i) | |||
789 | // select +++ ? R ^ (Q << i) : R | |||
790 | ||||
791 | PV.X = X; | |||
792 | PV.Q = Q; | |||
793 | PV.R = R; | |||
794 | PV.Left = true; | |||
795 | return true; | |||
796 | } | |||
797 | ||||
798 | bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI, | |||
799 | ParsedValues &PV) { | |||
800 | // Match the following: | |||
801 | // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1) | |||
802 | // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q | |||
803 | // The condition may also check for equality with the masked value, i.e | |||
804 | // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1) | |||
805 | // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q | |||
806 | ||||
807 | Value *CondV = SelI->getCondition(); | |||
808 | Value *TrueV = SelI->getTrueValue(); | |||
809 | Value *FalseV = SelI->getFalseValue(); | |||
810 | ||||
811 | using namespace PatternMatch; | |||
812 | ||||
813 | Value *C = nullptr; | |||
814 | CmpInst::Predicate P; | |||
815 | bool TrueIfZero; | |||
816 | ||||
817 | if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) || | |||
818 | match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) { | |||
819 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
820 | return false; | |||
821 | // Matched: select C == 0 ? ... : ... | |||
822 | // select C != 0 ? ... : ... | |||
823 | TrueIfZero = (P == CmpInst::ICMP_EQ); | |||
824 | } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) || | |||
825 | match(CondV, m_ICmp(P, m_One(), m_Value(C)))) { | |||
826 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
827 | return false; | |||
828 | // Matched: select C == 1 ? ... : ... | |||
829 | // select C != 1 ? ... : ... | |||
830 | TrueIfZero = (P == CmpInst::ICMP_NE); | |||
831 | } else | |||
832 | return false; | |||
833 | ||||
834 | Value *X = nullptr; | |||
835 | if (!match(C, m_And(m_Value(X), m_One())) && | |||
836 | !match(C, m_And(m_One(), m_Value(X)))) | |||
837 | return false; | |||
838 | // Matched: select (X & 1) == +++ ? ... : ... | |||
839 | // select (X & 1) != +++ ? ... : ... | |||
840 | ||||
841 | Value *R = nullptr, *Q = nullptr; | |||
842 | if (TrueIfZero) { | |||
843 | // The select's condition is true if the tested bit is 0. | |||
844 | // TrueV must be the shift, FalseV must be the xor. | |||
845 | if (!match(TrueV, m_LShr(m_Value(R), m_One()))) | |||
846 | return false; | |||
847 | // Matched: select +++ ? (R >> 1) : ... | |||
848 | if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) && | |||
849 | !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV)))) | |||
850 | return false; | |||
851 | // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q | |||
852 | // with commuting ^. | |||
853 | } else { | |||
854 | // The select's condition is true if the tested bit is 1. | |||
855 | // TrueV must be the xor, FalseV must be the shift. | |||
856 | if (!match(FalseV, m_LShr(m_Value(R), m_One()))) | |||
857 | return false; | |||
858 | // Matched: select +++ ? ... : (R >> 1) | |||
859 | if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) && | |||
860 | !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV)))) | |||
861 | return false; | |||
862 | // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1) | |||
863 | // with commuting ^. | |||
864 | } | |||
865 | ||||
866 | PV.X = X; | |||
867 | PV.Q = Q; | |||
868 | PV.R = R; | |||
869 | PV.Left = false; | |||
870 | return true; | |||
871 | } | |||
872 | ||||
873 | bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI, | |||
874 | BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV, | |||
875 | bool PreScan) { | |||
876 | using namespace PatternMatch; | |||
877 | ||||
878 | // The basic pattern for R = P.Q is: | |||
879 | // for i = 0..31 | |||
880 | // R = phi (0, R') | |||
881 | // if (P & (1 << i)) ; test-bit(P, i) | |||
882 | // R' = R ^ (Q << i) | |||
883 | // | |||
884 | // Similarly, the basic pattern for R = (P/Q).Q - P | |||
885 | // for i = 0..31 | |||
886 | // R = phi(P, R') | |||
887 | // if (R & (1 << i)) | |||
888 | // R' = R ^ (Q << i) | |||
889 | ||||
890 | // There exist idioms, where instead of Q being shifted left, P is shifted | |||
891 | // right. This produces a result that is shifted right by 32 bits (the | |||
892 | // non-shifted result is 64-bit). | |||
893 | // | |||
894 | // For R = P.Q, this would be: | |||
895 | // for i = 0..31 | |||
896 | // R = phi (0, R') | |||
897 | // if ((P >> i) & 1) | |||
898 | // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must | |||
899 | // else ; be shifted by 1, not i. | |||
900 | // R' = R >> 1 | |||
901 | // | |||
902 | // And for the inverse: | |||
903 | // for i = 0..31 | |||
904 | // R = phi (P, R') | |||
905 | // if (R & 1) | |||
906 | // R' = (R >> 1) ^ Q | |||
907 | // else | |||
908 | // R' = R >> 1 | |||
909 | ||||
910 | // The left-shifting idioms share the same pattern: | |||
911 | // select (X & (1 << i)) ? R ^ (Q << i) : R | |||
912 | // Similarly for right-shifting idioms: | |||
913 | // select (X & 1) ? (R >> 1) ^ Q | |||
914 | ||||
915 | if (matchLeftShift(SelI, CIV, PV)) { | |||
916 | // If this is a pre-scan, getting this far is sufficient. | |||
917 | if (PreScan) | |||
918 | return true; | |||
919 | ||||
920 | // Need to make sure that the SelI goes back into R. | |||
921 | auto *RPhi = dyn_cast<PHINode>(PV.R); | |||
922 | if (!RPhi) | |||
923 | return false; | |||
924 | if (SelI != RPhi->getIncomingValueForBlock(LoopB)) | |||
925 | return false; | |||
926 | PV.Res = SelI; | |||
927 | ||||
928 | // If X is loop invariant, it must be the input polynomial, and the | |||
929 | // idiom is the basic polynomial multiply. | |||
930 | if (CurLoop->isLoopInvariant(PV.X)) { | |||
931 | PV.P = PV.X; | |||
932 | PV.Inv = false; | |||
933 | } else { | |||
934 | // X is not loop invariant. If X == R, this is the inverse pmpy. | |||
935 | // Otherwise, check for an xor with an invariant value. If the | |||
936 | // variable argument to the xor is R, then this is still a valid | |||
937 | // inverse pmpy. | |||
938 | PV.Inv = true; | |||
939 | if (PV.X != PV.R) { | |||
940 | Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr; | |||
941 | if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2)))) | |||
942 | return false; | |||
943 | auto *I1 = dyn_cast<Instruction>(X1); | |||
944 | auto *I2 = dyn_cast<Instruction>(X2); | |||
945 | if (!I1 || I1->getParent() != LoopB) { | |||
946 | Var = X2; | |||
947 | Inv = X1; | |||
948 | } else if (!I2 || I2->getParent() != LoopB) { | |||
949 | Var = X1; | |||
950 | Inv = X2; | |||
951 | } else | |||
952 | return false; | |||
953 | if (Var != PV.R) | |||
954 | return false; | |||
955 | PV.M = Inv; | |||
956 | } | |||
957 | // The input polynomial P still needs to be determined. It will be | |||
958 | // the entry value of R. | |||
959 | Value *EntryP = RPhi->getIncomingValueForBlock(PrehB); | |||
960 | PV.P = EntryP; | |||
961 | } | |||
962 | ||||
963 | return true; | |||
964 | } | |||
965 | ||||
966 | if (matchRightShift(SelI, PV)) { | |||
967 | // If this is an inverse pattern, the Q polynomial must be known at | |||
968 | // compile time. | |||
969 | if (PV.Inv && !isa<ConstantInt>(PV.Q)) | |||
970 | return false; | |||
971 | if (PreScan) | |||
972 | return true; | |||
973 | // There is no exact matching of right-shift pmpy. | |||
974 | return false; | |||
975 | } | |||
976 | ||||
977 | return false; | |||
978 | } | |||
979 | ||||
980 | bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, | |||
981 | IntegerType *DestTy) { | |||
982 | IntegerType *T = dyn_cast<IntegerType>(Val->getType()); | |||
983 | if (!T || T->getBitWidth() > DestTy->getBitWidth()) | |||
984 | return false; | |||
985 | if (T->getBitWidth() == DestTy->getBitWidth()) | |||
986 | return true; | |||
987 | // Non-instructions are promotable. The reason why an instruction may not | |||
988 | // be promotable is that it may produce a different result if its operands | |||
989 | // and the result are promoted, for example, it may produce more non-zero | |||
990 | // bits. While it would still be possible to represent the proper result | |||
991 | // in a wider type, it may require adding additional instructions (which | |||
992 | // we don't want to do). | |||
993 | Instruction *In = dyn_cast<Instruction>(Val); | |||
994 | if (!In) | |||
995 | return true; | |||
996 | // The bitwidth of the source type is smaller than the destination. | |||
997 | // Check if the individual operation can be promoted. | |||
998 | switch (In->getOpcode()) { | |||
999 | case Instruction::PHI: | |||
1000 | case Instruction::ZExt: | |||
1001 | case Instruction::And: | |||
1002 | case Instruction::Or: | |||
1003 | case Instruction::Xor: | |||
1004 | case Instruction::LShr: // Shift right is ok. | |||
1005 | case Instruction::Select: | |||
1006 | case Instruction::Trunc: | |||
1007 | return true; | |||
1008 | case Instruction::ICmp: | |||
1009 | if (CmpInst *CI = cast<CmpInst>(In)) | |||
1010 | return CI->isEquality() || CI->isUnsigned(); | |||
1011 | llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1011 ); | |||
1012 | case Instruction::Add: | |||
1013 | return In->hasNoSignedWrap() && In->hasNoUnsignedWrap(); | |||
1014 | } | |||
1015 | return false; | |||
1016 | } | |||
1017 | ||||
1018 | void PolynomialMultiplyRecognize::promoteTo(Instruction *In, | |||
1019 | IntegerType *DestTy, BasicBlock *LoopB) { | |||
1020 | Type *OrigTy = In->getType(); | |||
1021 | assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote" ) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1021 , __extension__ __PRETTY_FUNCTION__)); | |||
1022 | ||||
1023 | // Leave boolean values alone. | |||
1024 | if (!In->getType()->isIntegerTy(1)) | |||
1025 | In->mutateType(DestTy); | |||
1026 | unsigned DestBW = DestTy->getBitWidth(); | |||
1027 | ||||
1028 | // Handle PHIs. | |||
1029 | if (PHINode *P = dyn_cast<PHINode>(In)) { | |||
1030 | unsigned N = P->getNumIncomingValues(); | |||
1031 | for (unsigned i = 0; i != N; ++i) { | |||
1032 | BasicBlock *InB = P->getIncomingBlock(i); | |||
1033 | if (InB == LoopB) | |||
1034 | continue; | |||
1035 | Value *InV = P->getIncomingValue(i); | |||
1036 | IntegerType *Ty = cast<IntegerType>(InV->getType()); | |||
1037 | // Do not promote values in PHI nodes of type i1. | |||
1038 | if (Ty != P->getType()) { | |||
1039 | // If the value type does not match the PHI type, the PHI type | |||
1040 | // must have been promoted. | |||
1041 | assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ? void (0) : __assert_fail ("Ty->getBitWidth() < DestBW" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1041 , __extension__ __PRETTY_FUNCTION__)); | |||
1042 | InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy); | |||
1043 | P->setIncomingValue(i, InV); | |||
1044 | } | |||
1045 | } | |||
1046 | } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) { | |||
1047 | Value *Op = Z->getOperand(0); | |||
1048 | if (Op->getType() == Z->getType()) | |||
1049 | Z->replaceAllUsesWith(Op); | |||
1050 | Z->eraseFromParent(); | |||
1051 | return; | |||
1052 | } | |||
1053 | if (TruncInst *T = dyn_cast<TruncInst>(In)) { | |||
1054 | IntegerType *TruncTy = cast<IntegerType>(OrigTy); | |||
1055 | Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1); | |||
1056 | Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask); | |||
1057 | T->replaceAllUsesWith(And); | |||
1058 | T->eraseFromParent(); | |||
1059 | return; | |||
1060 | } | |||
1061 | ||||
1062 | // Promote immediates. | |||
1063 | for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { | |||
1064 | if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i))) | |||
1065 | if (CI->getType()->getBitWidth() < DestBW) | |||
1066 | In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); | |||
1067 | } | |||
1068 | } | |||
1069 | ||||
1070 | bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, | |||
1071 | BasicBlock *ExitB) { | |||
1072 | assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail ( "LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1072, __extension__ __PRETTY_FUNCTION__)); | |||
1073 | // Skip loops where the exit block has more than one predecessor. The values | |||
1074 | // coming from the loop block will be promoted to another type, and so the | |||
1075 | // values coming into the exit block from other predecessors would also have | |||
1076 | // to be promoted. | |||
1077 | if (!ExitB || (ExitB->getSinglePredecessor() != LoopB)) | |||
1078 | return false; | |||
1079 | IntegerType *DestTy = getPmpyType(); | |||
1080 | // Check if the exit values have types that are no wider than the type | |||
1081 | // that we want to promote to. | |||
1082 | unsigned DestBW = DestTy->getBitWidth(); | |||
1083 | for (PHINode &P : ExitB->phis()) { | |||
1084 | if (P.getNumIncomingValues() != 1) | |||
1085 | return false; | |||
1086 | assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void (0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1086, __extension__ __PRETTY_FUNCTION__)); | |||
1087 | IntegerType *T = dyn_cast<IntegerType>(P.getType()); | |||
1088 | if (!T || T->getBitWidth() > DestBW) | |||
1089 | return false; | |||
1090 | } | |||
1091 | ||||
1092 | // Check all instructions in the loop. | |||
1093 | for (Instruction &In : *LoopB) | |||
1094 | if (!In.isTerminator() && !isPromotableTo(&In, DestTy)) | |||
1095 | return false; | |||
1096 | ||||
1097 | // Perform the promotion. | |||
1098 | std::vector<Instruction*> LoopIns; | |||
1099 | std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns), | |||
1100 | [](Instruction &In) { return &In; }); | |||
1101 | for (Instruction *In : LoopIns) | |||
1102 | if (!In->isTerminator()) | |||
1103 | promoteTo(In, DestTy, LoopB); | |||
1104 | ||||
1105 | // Fix up the PHI nodes in the exit block. | |||
1106 | Instruction *EndI = ExitB->getFirstNonPHI(); | |||
1107 | BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end(); | |||
1108 | for (auto I = ExitB->begin(); I != End; ++I) { | |||
1109 | PHINode *P = dyn_cast<PHINode>(I); | |||
1110 | if (!P) | |||
1111 | break; | |||
1112 | Type *Ty0 = P->getIncomingValue(0)->getType(); | |||
1113 | Type *PTy = P->getType(); | |||
1114 | if (PTy != Ty0) { | |||
1115 | assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail ("Ty0 == DestTy", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1115, __extension__ __PRETTY_FUNCTION__)); | |||
1116 | // In order to create the trunc, P must have the promoted type. | |||
1117 | P->mutateType(Ty0); | |||
1118 | Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy); | |||
1119 | // In order for the RAUW to work, the types of P and T must match. | |||
1120 | P->mutateType(PTy); | |||
1121 | P->replaceAllUsesWith(T); | |||
1122 | // Final update of the P's type. | |||
1123 | P->mutateType(Ty0); | |||
1124 | cast<Instruction>(T)->setOperand(0, P); | |||
1125 | } | |||
1126 | } | |||
1127 | ||||
1128 | return true; | |||
1129 | } | |||
1130 | ||||
1131 | bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In, | |||
1132 | ValueSeq &Cycle) { | |||
1133 | // Out = ..., In, ... | |||
1134 | if (Out == In) | |||
1135 | return true; | |||
1136 | ||||
1137 | auto *BB = cast<Instruction>(Out)->getParent(); | |||
1138 | bool HadPhi = false; | |||
1139 | ||||
1140 | for (auto *U : Out->users()) { | |||
1141 | auto *I = dyn_cast<Instruction>(&*U); | |||
1142 | if (I == nullptr || I->getParent() != BB) | |||
1143 | continue; | |||
1144 | // Make sure that there are no multi-iteration cycles, e.g. | |||
1145 | // p1 = phi(p2) | |||
1146 | // p2 = phi(p1) | |||
1147 | // The cycle p1->p2->p1 would span two loop iterations. | |||
1148 | // Check that there is only one phi in the cycle. | |||
1149 | bool IsPhi = isa<PHINode>(I); | |||
1150 | if (IsPhi && HadPhi) | |||
1151 | return false; | |||
1152 | HadPhi |= IsPhi; | |||
1153 | if (!Cycle.insert(I)) | |||
1154 | return false; | |||
1155 | if (findCycle(I, In, Cycle)) | |||
1156 | break; | |||
1157 | Cycle.remove(I); | |||
1158 | } | |||
1159 | return !Cycle.empty(); | |||
1160 | } | |||
1161 | ||||
1162 | void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI, | |||
1163 | ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) { | |||
1164 | // All the values in the cycle that are between the phi node and the | |||
1165 | // divider instruction will be classified as "early", all other values | |||
1166 | // will be "late". | |||
1167 | ||||
1168 | bool IsE = true; | |||
1169 | unsigned I, N = Cycle.size(); | |||
1170 | for (I = 0; I < N; ++I) { | |||
1171 | Value *V = Cycle[I]; | |||
1172 | if (DivI == V) | |||
1173 | IsE = false; | |||
1174 | else if (!isa<PHINode>(V)) | |||
1175 | continue; | |||
1176 | // Stop if found either. | |||
1177 | break; | |||
1178 | } | |||
1179 | // "I" is the index of either DivI or the phi node, whichever was first. | |||
1180 | // "E" is "false" or "true" respectively. | |||
1181 | ValueSeq &First = !IsE ? Early : Late; | |||
1182 | for (unsigned J = 0; J < I; ++J) | |||
1183 | First.insert(Cycle[J]); | |||
1184 | ||||
1185 | ValueSeq &Second = IsE ? Early : Late; | |||
1186 | Second.insert(Cycle[I]); | |||
1187 | for (++I; I < N; ++I) { | |||
1188 | Value *V = Cycle[I]; | |||
1189 | if (DivI == V || isa<PHINode>(V)) | |||
1190 | break; | |||
1191 | Second.insert(V); | |||
1192 | } | |||
1193 | ||||
1194 | for (; I < N; ++I) | |||
1195 | First.insert(Cycle[I]); | |||
1196 | } | |||
1197 | ||||
1198 | bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI, | |||
1199 | ValueSeq &Early, ValueSeq &Late) { | |||
1200 | // Select is an exception, since the condition value does not have to be | |||
1201 | // classified in the same way as the true/false values. The true/false | |||
1202 | // values do have to be both early or both late. | |||
1203 | if (UseI->getOpcode() == Instruction::Select) { | |||
1204 | Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2); | |||
1205 | if (Early.count(TV) || Early.count(FV)) { | |||
1206 | if (Late.count(TV) || Late.count(FV)) | |||
1207 | return false; | |||
1208 | Early.insert(UseI); | |||
1209 | } else if (Late.count(TV) || Late.count(FV)) { | |||
1210 | if (Early.count(TV) || Early.count(FV)) | |||
1211 | return false; | |||
1212 | Late.insert(UseI); | |||
1213 | } | |||
1214 | return true; | |||
1215 | } | |||
1216 | ||||
1217 | // Not sure what would be the example of this, but the code below relies | |||
1218 | // on having at least one operand. | |||
1219 | if (UseI->getNumOperands() == 0) | |||
1220 | return true; | |||
1221 | ||||
1222 | bool AE = true, AL = true; | |||
1223 | for (auto &I : UseI->operands()) { | |||
1224 | if (Early.count(&*I)) | |||
1225 | AL = false; | |||
1226 | else if (Late.count(&*I)) | |||
1227 | AE = false; | |||
1228 | } | |||
1229 | // If the operands appear "all early" and "all late" at the same time, | |||
1230 | // then it means that none of them are actually classified as either. | |||
1231 | // This is harmless. | |||
1232 | if (AE && AL) | |||
1233 | return true; | |||
1234 | // Conversely, if they are neither "all early" nor "all late", then | |||
1235 | // we have a mixture of early and late operands that is not a known | |||
1236 | // exception. | |||
1237 | if (!AE && !AL) | |||
1238 | return false; | |||
1239 | ||||
1240 | // Check that we have covered the two special cases. | |||
1241 | assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail ("AE != AL", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1241, __extension__ __PRETTY_FUNCTION__)); | |||
1242 | ||||
1243 | if (AE) | |||
1244 | Early.insert(UseI); | |||
1245 | else | |||
1246 | Late.insert(UseI); | |||
1247 | return true; | |||
1248 | } | |||
1249 | ||||
1250 | bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) { | |||
1251 | switch (I->getOpcode()) { | |||
1252 | case Instruction::And: | |||
1253 | case Instruction::Or: | |||
1254 | case Instruction::Xor: | |||
1255 | case Instruction::LShr: | |||
1256 | case Instruction::Shl: | |||
1257 | case Instruction::Select: | |||
1258 | case Instruction::ICmp: | |||
1259 | case Instruction::PHI: | |||
1260 | break; | |||
1261 | default: | |||
1262 | return false; | |||
1263 | } | |||
1264 | return true; | |||
1265 | } | |||
1266 | ||||
1267 | bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, | |||
1268 | unsigned IterCount) { | |||
1269 | auto *T = dyn_cast<IntegerType>(V->getType()); | |||
1270 | if (!T) | |||
1271 | return false; | |||
1272 | ||||
1273 | KnownBits Known(T->getBitWidth()); | |||
1274 | computeKnownBits(V, Known, DL); | |||
1275 | return Known.countMinLeadingZeros() >= IterCount; | |||
1276 | } | |||
1277 | ||||
1278 | bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V, | |||
1279 | unsigned IterCount) { | |||
1280 | // Assume that all inputs to the value have the high bits zero. | |||
1281 | // Check if the value itself preserves the zeros in the high bits. | |||
1282 | if (auto *C = dyn_cast<ConstantInt>(V)) | |||
1283 | return C->getValue().countl_zero() >= IterCount; | |||
1284 | ||||
1285 | if (auto *I = dyn_cast<Instruction>(V)) { | |||
1286 | switch (I->getOpcode()) { | |||
1287 | case Instruction::And: | |||
1288 | case Instruction::Or: | |||
1289 | case Instruction::Xor: | |||
1290 | case Instruction::LShr: | |||
1291 | case Instruction::Select: | |||
1292 | case Instruction::ICmp: | |||
1293 | case Instruction::PHI: | |||
1294 | case Instruction::ZExt: | |||
1295 | return true; | |||
1296 | } | |||
1297 | } | |||
1298 | ||||
1299 | return false; | |||
1300 | } | |||
1301 | ||||
1302 | bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) { | |||
1303 | unsigned Opc = I->getOpcode(); | |||
1304 | if (Opc == Instruction::Shl || Opc == Instruction::LShr) | |||
1305 | return Op != I->getOperand(1); | |||
1306 | return true; | |||
1307 | } | |||
1308 | ||||
1309 | bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, | |||
1310 | BasicBlock *ExitB, unsigned IterCount) { | |||
1311 | Value *CIV = getCountIV(LoopB); | |||
1312 | if (CIV == nullptr) | |||
1313 | return false; | |||
1314 | auto *CIVTy = dyn_cast<IntegerType>(CIV->getType()); | |||
1315 | if (CIVTy == nullptr) | |||
1316 | return false; | |||
1317 | ||||
1318 | ValueSeq RShifts; | |||
1319 | ValueSeq Early, Late, Cycled; | |||
1320 | ||||
1321 | // Find all value cycles that contain logical right shifts by 1. | |||
1322 | for (Instruction &I : *LoopB) { | |||
1323 | using namespace PatternMatch; | |||
1324 | ||||
1325 | Value *V = nullptr; | |||
1326 | if (!match(&I, m_LShr(m_Value(V), m_One()))) | |||
1327 | continue; | |||
1328 | ValueSeq C; | |||
1329 | if (!findCycle(&I, V, C)) | |||
1330 | continue; | |||
1331 | ||||
1332 | // Found a cycle. | |||
1333 | C.insert(&I); | |||
1334 | classifyCycle(&I, C, Early, Late); | |||
1335 | Cycled.insert(C.begin(), C.end()); | |||
1336 | RShifts.insert(&I); | |||
1337 | } | |||
1338 | ||||
1339 | // Find the set of all values affected by the shift cycles, i.e. all | |||
1340 | // cycled values, and (recursively) all their users. | |||
1341 | ValueSeq Users(Cycled.begin(), Cycled.end()); | |||
1342 | for (unsigned i = 0; i < Users.size(); ++i) { | |||
1343 | Value *V = Users[i]; | |||
1344 | if (!isa<IntegerType>(V->getType())) | |||
1345 | return false; | |||
1346 | auto *R = cast<Instruction>(V); | |||
1347 | // If the instruction does not commute with shifts, the loop cannot | |||
1348 | // be unshifted. | |||
1349 | if (!commutesWithShift(R)) | |||
1350 | return false; | |||
1351 | for (User *U : R->users()) { | |||
1352 | auto *T = cast<Instruction>(U); | |||
1353 | // Skip users from outside of the loop. They will be handled later. | |||
1354 | // Also, skip the right-shifts and phi nodes, since they mix early | |||
1355 | // and late values. | |||
1356 | if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T)) | |||
1357 | continue; | |||
1358 | ||||
1359 | Users.insert(T); | |||
1360 | if (!classifyInst(T, Early, Late)) | |||
1361 | return false; | |||
1362 | } | |||
1363 | } | |||
1364 | ||||
1365 | if (Users.empty()) | |||
1366 | return false; | |||
1367 | ||||
1368 | // Verify that high bits remain zero. | |||
1369 | ValueSeq Internal(Users.begin(), Users.end()); | |||
1370 | ValueSeq Inputs; | |||
1371 | for (unsigned i = 0; i < Internal.size(); ++i) { | |||
1372 | auto *R = dyn_cast<Instruction>(Internal[i]); | |||
1373 | if (!R) | |||
1374 | continue; | |||
1375 | for (Value *Op : R->operands()) { | |||
1376 | auto *T = dyn_cast<Instruction>(Op); | |||
1377 | if (T && T->getParent() != LoopB) | |||
1378 | Inputs.insert(Op); | |||
1379 | else | |||
1380 | Internal.insert(Op); | |||
1381 | } | |||
1382 | } | |||
1383 | for (Value *V : Inputs) | |||
1384 | if (!highBitsAreZero(V, IterCount)) | |||
1385 | return false; | |||
1386 | for (Value *V : Internal) | |||
1387 | if (!keepsHighBitsZero(V, IterCount)) | |||
1388 | return false; | |||
1389 | ||||
1390 | // Finally, the work can be done. Unshift each user. | |||
1391 | IRBuilder<> IRB(LoopB); | |||
1392 | std::map<Value*,Value*> ShiftMap; | |||
1393 | ||||
1394 | using CastMapType = std::map<std::pair<Value *, Type *>, Value *>; | |||
1395 | ||||
1396 | CastMapType CastMap; | |||
1397 | ||||
1398 | auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V, | |||
1399 | IntegerType *Ty) -> Value* { | |||
1400 | auto H = CM.find(std::make_pair(V, Ty)); | |||
1401 | if (H != CM.end()) | |||
1402 | return H->second; | |||
1403 | Value *CV = IRB.CreateIntCast(V, Ty, false); | |||
1404 | CM.insert(std::make_pair(std::make_pair(V, Ty), CV)); | |||
1405 | return CV; | |||
1406 | }; | |||
1407 | ||||
1408 | for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) { | |||
1409 | using namespace PatternMatch; | |||
1410 | ||||
1411 | if (isa<PHINode>(I) || !Users.count(&*I)) | |||
1412 | continue; | |||
1413 | ||||
1414 | // Match lshr x, 1. | |||
1415 | Value *V = nullptr; | |||
1416 | if (match(&*I, m_LShr(m_Value(V), m_One()))) { | |||
1417 | replaceAllUsesOfWithIn(&*I, V, LoopB); | |||
1418 | continue; | |||
1419 | } | |||
1420 | // For each non-cycled operand, replace it with the corresponding | |||
1421 | // value shifted left. | |||
1422 | for (auto &J : I->operands()) { | |||
1423 | Value *Op = J.get(); | |||
1424 | if (!isOperandShifted(&*I, Op)) | |||
1425 | continue; | |||
1426 | if (Users.count(Op)) | |||
1427 | continue; | |||
1428 | // Skip shifting zeros. | |||
1429 | if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero()) | |||
1430 | continue; | |||
1431 | // Check if we have already generated a shift for this value. | |||
1432 | auto F = ShiftMap.find(Op); | |||
1433 | Value *W = (F != ShiftMap.end()) ? F->second : nullptr; | |||
1434 | if (W == nullptr) { | |||
1435 | IRB.SetInsertPoint(&*I); | |||
1436 | // First, the shift amount will be CIV or CIV+1, depending on | |||
1437 | // whether the value is early or late. Instead of creating CIV+1, | |||
1438 | // do a single shift of the value. | |||
1439 | Value *ShAmt = CIV, *ShVal = Op; | |||
1440 | auto *VTy = cast<IntegerType>(ShVal->getType()); | |||
1441 | auto *ATy = cast<IntegerType>(ShAmt->getType()); | |||
1442 | if (Late.count(&*I)) | |||
1443 | ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1)); | |||
1444 | // Second, the types of the shifted value and the shift amount | |||
1445 | // must match. | |||
1446 | if (VTy != ATy) { | |||
1447 | if (VTy->getBitWidth() < ATy->getBitWidth()) | |||
1448 | ShVal = upcast(CastMap, IRB, ShVal, ATy); | |||
1449 | else | |||
1450 | ShAmt = upcast(CastMap, IRB, ShAmt, VTy); | |||
1451 | } | |||
1452 | // Ready to generate the shift and memoize it. | |||
1453 | W = IRB.CreateShl(ShVal, ShAmt); | |||
1454 | ShiftMap.insert(std::make_pair(Op, W)); | |||
1455 | } | |||
1456 | I->replaceUsesOfWith(Op, W); | |||
1457 | } | |||
1458 | } | |||
1459 | ||||
1460 | // Update the users outside of the loop to account for having left | |||
1461 | // shifts. They would normally be shifted right in the loop, so shift | |||
1462 | // them right after the loop exit. | |||
1463 | // Take advantage of the loop-closed SSA form, which has all the post- | |||
1464 | // loop values in phi nodes. | |||
1465 | IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt()); | |||
1466 | for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) { | |||
1467 | if (!isa<PHINode>(P)) | |||
1468 | break; | |||
1469 | auto *PN = cast<PHINode>(P); | |||
1470 | Value *U = PN->getIncomingValueForBlock(LoopB); | |||
1471 | if (!Users.count(U)) | |||
1472 | continue; | |||
1473 | Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount)); | |||
1474 | PN->replaceAllUsesWith(S); | |||
1475 | // The above RAUW will create | |||
1476 | // S = lshr S, IterCount | |||
1477 | // so we need to fix it back into | |||
1478 | // S = lshr PN, IterCount | |||
1479 | cast<User>(S)->replaceUsesOfWith(S, PN); | |||
1480 | } | |||
1481 | ||||
1482 | return true; | |||
1483 | } | |||
1484 | ||||
1485 | void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { | |||
1486 | for (auto &I : *LoopB) | |||
1487 | if (Value *SV = simplifyInstruction(&I, {DL, &TLI, &DT})) | |||
1488 | I.replaceAllUsesWith(SV); | |||
1489 | ||||
1490 | for (Instruction &I : llvm::make_early_inc_range(*LoopB)) | |||
1491 | RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI); | |||
1492 | } | |||
1493 | ||||
1494 | unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) { | |||
1495 | // Arrays of coefficients of Q and the inverse, C. | |||
1496 | // Q[i] = coefficient at x^i. | |||
1497 | std::array<char,32> Q, C; | |||
1498 | ||||
1499 | for (unsigned i = 0; i < 32; ++i) { | |||
1500 | Q[i] = QP & 1; | |||
1501 | QP >>= 1; | |||
1502 | } | |||
1503 | assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail ("Q[0] == 1", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1503, __extension__ __PRETTY_FUNCTION__)); | |||
1504 | ||||
1505 | // Find C, such that | |||
1506 | // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1 | |||
1507 | // | |||
1508 | // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the | |||
1509 | // operations * and + are & and ^ respectively. | |||
1510 | // | |||
1511 | // Find C[i] recursively, by comparing i-th coefficient in the product | |||
1512 | // with 0 (or 1 for i=0). | |||
1513 | // | |||
1514 | // C[0] = 1, since C[0] = Q[0], and Q[0] = 1. | |||
1515 | C[0] = 1; | |||
1516 | for (unsigned i = 1; i < 32; ++i) { | |||
1517 | // Solve for C[i] in: | |||
1518 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0 | |||
1519 | // This is equivalent to | |||
1520 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0 | |||
1521 | // which is | |||
1522 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i] | |||
1523 | unsigned T = 0; | |||
1524 | for (unsigned j = 0; j < i; ++j) | |||
1525 | T = T ^ (C[j] & Q[i-j]); | |||
1526 | C[i] = T; | |||
1527 | } | |||
1528 | ||||
1529 | unsigned QV = 0; | |||
1530 | for (unsigned i = 0; i < 32; ++i) | |||
1531 | if (C[i]) | |||
1532 | QV |= (1 << i); | |||
1533 | ||||
1534 | return QV; | |||
1535 | } | |||
1536 | ||||
1537 | Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, | |||
1538 | ParsedValues &PV) { | |||
1539 | IRBuilder<> B(&*At); | |||
1540 | Module *M = At->getParent()->getParent()->getParent(); | |||
1541 | Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw); | |||
1542 | ||||
1543 | Value *P = PV.P, *Q = PV.Q, *P0 = P; | |||
1544 | unsigned IC = PV.IterCount; | |||
1545 | ||||
1546 | if (PV.M != nullptr) | |||
| ||||
1547 | P0 = P = B.CreateXor(P, PV.M); | |||
1548 | ||||
1549 | // Create a bit mask to clear the high bits beyond IterCount. | |||
1550 | auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC)); | |||
1551 | ||||
1552 | if (PV.IterCount != 32) | |||
1553 | P = B.CreateAnd(P, BMI); | |||
1554 | ||||
1555 | if (PV.Inv) { | |||
1556 | auto *QI = dyn_cast<ConstantInt>(PV.Q); | |||
1557 | assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth() <= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1557 , __extension__ __PRETTY_FUNCTION__)); | |||
1558 | ||||
1559 | // Again, clearing bits beyond IterCount. | |||
1560 | unsigned M = (1 << PV.IterCount) - 1; | |||
| ||||
1561 | unsigned Tmp = (QI->getZExtValue() | 1) & M; | |||
1562 | unsigned QV = getInverseMxN(Tmp) & M; | |||
1563 | auto *QVI = ConstantInt::get(QI->getType(), QV); | |||
1564 | P = B.CreateCall(PMF, {P, QVI}); | |||
1565 | P = B.CreateTrunc(P, QI->getType()); | |||
1566 | if (IC != 32) | |||
1567 | P = B.CreateAnd(P, BMI); | |||
1568 | } | |||
1569 | ||||
1570 | Value *R = B.CreateCall(PMF, {P, Q}); | |||
1571 | ||||
1572 | if (PV.M != nullptr) | |||
1573 | R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false)); | |||
1574 | ||||
1575 | return R; | |||
1576 | } | |||
1577 | ||||
1578 | static bool hasZeroSignBit(const Value *V) { | |||
1579 | if (const auto *CI = dyn_cast<const ConstantInt>(V)) | |||
1580 | return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; | |||
1581 | const Instruction *I = dyn_cast<const Instruction>(V); | |||
1582 | if (!I) | |||
1583 | return false; | |||
1584 | switch (I->getOpcode()) { | |||
1585 | case Instruction::LShr: | |||
1586 | if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1))) | |||
1587 | return SI->getZExtValue() > 0; | |||
1588 | return false; | |||
1589 | case Instruction::Or: | |||
1590 | case Instruction::Xor: | |||
1591 | return hasZeroSignBit(I->getOperand(0)) && | |||
1592 | hasZeroSignBit(I->getOperand(1)); | |||
1593 | case Instruction::And: | |||
1594 | return hasZeroSignBit(I->getOperand(0)) || | |||
1595 | hasZeroSignBit(I->getOperand(1)); | |||
1596 | } | |||
1597 | return false; | |||
1598 | } | |||
1599 | ||||
1600 | void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { | |||
1601 | S.addRule("sink-zext", | |||
1602 | // Sink zext past bitwise operations. | |||
1603 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1604 | if (I->getOpcode() != Instruction::ZExt) | |||
1605 | return nullptr; | |||
1606 | Instruction *T = dyn_cast<Instruction>(I->getOperand(0)); | |||
1607 | if (!T) | |||
1608 | return nullptr; | |||
1609 | switch (T->getOpcode()) { | |||
1610 | case Instruction::And: | |||
1611 | case Instruction::Or: | |||
1612 | case Instruction::Xor: | |||
1613 | break; | |||
1614 | default: | |||
1615 | return nullptr; | |||
1616 | } | |||
1617 | IRBuilder<> B(Ctx); | |||
1618 | return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(), | |||
1619 | B.CreateZExt(T->getOperand(0), I->getType()), | |||
1620 | B.CreateZExt(T->getOperand(1), I->getType())); | |||
1621 | }); | |||
1622 | S.addRule("xor/and -> and/xor", | |||
1623 | // (xor (and x a) (and y a)) -> (and (xor x y) a) | |||
1624 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1625 | if (I->getOpcode() != Instruction::Xor) | |||
1626 | return nullptr; | |||
1627 | Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0)); | |||
1628 | Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1)); | |||
1629 | if (!And0 || !And1) | |||
1630 | return nullptr; | |||
1631 | if (And0->getOpcode() != Instruction::And || | |||
1632 | And1->getOpcode() != Instruction::And) | |||
1633 | return nullptr; | |||
1634 | if (And0->getOperand(1) != And1->getOperand(1)) | |||
1635 | return nullptr; | |||
1636 | IRBuilder<> B(Ctx); | |||
1637 | return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), | |||
1638 | And0->getOperand(1)); | |||
1639 | }); | |||
1640 | S.addRule("sink binop into select", | |||
1641 | // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) | |||
1642 | // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) | |||
1643 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1644 | BinaryOperator *BO = dyn_cast<BinaryOperator>(I); | |||
1645 | if (!BO) | |||
1646 | return nullptr; | |||
1647 | Instruction::BinaryOps Op = BO->getOpcode(); | |||
1648 | if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) { | |||
1649 | IRBuilder<> B(Ctx); | |||
1650 | Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue(); | |||
1651 | Value *Z = BO->getOperand(1); | |||
1652 | return B.CreateSelect(Sel->getCondition(), | |||
1653 | B.CreateBinOp(Op, X, Z), | |||
1654 | B.CreateBinOp(Op, Y, Z)); | |||
1655 | } | |||
1656 | if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) { | |||
1657 | IRBuilder<> B(Ctx); | |||
1658 | Value *X = BO->getOperand(0); | |||
1659 | Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue(); | |||
1660 | return B.CreateSelect(Sel->getCondition(), | |||
1661 | B.CreateBinOp(Op, X, Y), | |||
1662 | B.CreateBinOp(Op, X, Z)); | |||
1663 | } | |||
1664 | return nullptr; | |||
1665 | }); | |||
1666 | S.addRule("fold select-select", | |||
1667 | // (select c (select c x y) z) -> (select c x z) | |||
1668 | // (select c x (select c y z)) -> (select c x z) | |||
1669 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1670 | SelectInst *Sel = dyn_cast<SelectInst>(I); | |||
1671 | if (!Sel) | |||
1672 | return nullptr; | |||
1673 | IRBuilder<> B(Ctx); | |||
1674 | Value *C = Sel->getCondition(); | |||
1675 | if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) { | |||
1676 | if (Sel0->getCondition() == C) | |||
1677 | return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue()); | |||
1678 | } | |||
1679 | if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) { | |||
1680 | if (Sel1->getCondition() == C) | |||
1681 | return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue()); | |||
1682 | } | |||
1683 | return nullptr; | |||
1684 | }); | |||
1685 | S.addRule("or-signbit -> xor-signbit", | |||
1686 | // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) | |||
1687 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1688 | if (I->getOpcode() != Instruction::Or) | |||
1689 | return nullptr; | |||
1690 | ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1)); | |||
1691 | if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) | |||
1692 | return nullptr; | |||
1693 | if (!hasZeroSignBit(I->getOperand(0))) | |||
1694 | return nullptr; | |||
1695 | return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb); | |||
1696 | }); | |||
1697 | S.addRule("sink lshr into binop", | |||
1698 | // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) | |||
1699 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1700 | if (I->getOpcode() != Instruction::LShr) | |||
1701 | return nullptr; | |||
1702 | BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0)); | |||
1703 | if (!BitOp) | |||
1704 | return nullptr; | |||
1705 | switch (BitOp->getOpcode()) { | |||
1706 | case Instruction::And: | |||
1707 | case Instruction::Or: | |||
1708 | case Instruction::Xor: | |||
1709 | break; | |||
1710 | default: | |||
1711 | return nullptr; | |||
1712 | } | |||
1713 | IRBuilder<> B(Ctx); | |||
1714 | Value *S = I->getOperand(1); | |||
1715 | return B.CreateBinOp(BitOp->getOpcode(), | |||
1716 | B.CreateLShr(BitOp->getOperand(0), S), | |||
1717 | B.CreateLShr(BitOp->getOperand(1), S)); | |||
1718 | }); | |||
1719 | S.addRule("expose bitop-const", | |||
1720 | // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) | |||
1721 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1722 | auto IsBitOp = [](unsigned Op) -> bool { | |||
1723 | switch (Op) { | |||
1724 | case Instruction::And: | |||
1725 | case Instruction::Or: | |||
1726 | case Instruction::Xor: | |||
1727 | return true; | |||
1728 | } | |||
1729 | return false; | |||
1730 | }; | |||
1731 | BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I); | |||
1732 | if (!BitOp1 || !IsBitOp(BitOp1->getOpcode())) | |||
1733 | return nullptr; | |||
1734 | BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0)); | |||
1735 | if (!BitOp2 || !IsBitOp(BitOp2->getOpcode())) | |||
1736 | return nullptr; | |||
1737 | ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1)); | |||
1738 | ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1)); | |||
1739 | if (!CA || !CB) | |||
1740 | return nullptr; | |||
1741 | IRBuilder<> B(Ctx); | |||
1742 | Value *X = BitOp2->getOperand(0); | |||
1743 | return B.CreateBinOp(BitOp2->getOpcode(), X, | |||
1744 | B.CreateBinOp(BitOp1->getOpcode(), CA, CB)); | |||
1745 | }); | |||
1746 | } | |||
1747 | ||||
1748 | void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) { | |||
1749 | S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a", | |||
1750 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1751 | if (I->getOpcode() != Instruction::And) | |||
1752 | return nullptr; | |||
1753 | Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0)); | |||
1754 | ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1)); | |||
1755 | if (!Xor || !C0) | |||
1756 | return nullptr; | |||
1757 | if (Xor->getOpcode() != Instruction::Xor) | |||
1758 | return nullptr; | |||
1759 | Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0)); | |||
1760 | Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1)); | |||
1761 | // Pick the first non-null and. | |||
1762 | if (!And0 || And0->getOpcode() != Instruction::And) | |||
1763 | std::swap(And0, And1); | |||
1764 | ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1)); | |||
1765 | if (!C1) | |||
1766 | return nullptr; | |||
1767 | uint32_t V0 = C0->getZExtValue(); | |||
1768 | uint32_t V1 = C1->getZExtValue(); | |||
1769 | if (V0 != (V0 & V1)) | |||
1770 | return nullptr; | |||
1771 | IRBuilder<> B(Ctx); | |||
1772 | return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0); | |||
1773 | }); | |||
1774 | } | |||
1775 | ||||
1776 | bool PolynomialMultiplyRecognize::recognize() { | |||
1777 | LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" << *CurLoop << '\n'; } } while (false) | |||
1778 | << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" << *CurLoop << '\n'; } } while (false); | |||
1779 | // Restrictions: | |||
1780 | // - The loop must consist of a single block. | |||
1781 | // - The iteration count must be known at compile-time. | |||
1782 | // - The loop must have an induction variable starting from 0, and | |||
1783 | // incremented in each iteration of the loop. | |||
1784 | BasicBlock *LoopB = CurLoop->getHeader(); | |||
1785 | LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop header:\n" << * LoopB; } } while (false); | |||
1786 | ||||
1787 | if (LoopB != CurLoop->getLoopLatch()) | |||
1788 | return false; | |||
1789 | BasicBlock *ExitB = CurLoop->getExitBlock(); | |||
1790 | if (ExitB == nullptr) | |||
1791 | return false; | |||
1792 | BasicBlock *EntryB = CurLoop->getLoopPreheader(); | |||
1793 | if (EntryB == nullptr) | |||
1794 | return false; | |||
1795 | ||||
1796 | unsigned IterCount = 0; | |||
1797 | const SCEV *CT = SE.getBackedgeTakenCount(CurLoop); | |||
1798 | if (isa<SCEVCouldNotCompute>(CT)) | |||
1799 | return false; | |||
1800 | if (auto *CV = dyn_cast<SCEVConstant>(CT)) | |||
1801 | IterCount = CV->getValue()->getZExtValue() + 1; | |||
1802 | ||||
1803 | Value *CIV = getCountIV(LoopB); | |||
1804 | ParsedValues PV; | |||
1805 | Simplifier PreSimp; | |||
1806 | PV.IterCount = IterCount; | |||
1807 | LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'; } } while ( false) | |||
1808 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'; } } while ( false); | |||
1809 | ||||
1810 | setupPreSimplifier(PreSimp); | |||
1811 | ||||
1812 | // Perform a preliminary scan of select instructions to see if any of them | |||
1813 | // looks like a generator of the polynomial multiply steps. Assume that a | |||
1814 | // loop can only contain a single transformable operation, so stop the | |||
1815 | // traversal after the first reasonable candidate was found. | |||
1816 | // XXX: Currently this approach can modify the loop before being 100% sure | |||
1817 | // that the transformation can be carried out. | |||
1818 | bool FoundPreScan = false; | |||
1819 | auto FeedsPHI = [LoopB](const Value *V) -> bool { | |||
1820 | for (const Value *U : V->users()) { | |||
1821 | if (const auto *P = dyn_cast<const PHINode>(U)) | |||
1822 | if (P->getParent() == LoopB) | |||
1823 | return true; | |||
1824 | } | |||
1825 | return false; | |||
1826 | }; | |||
1827 | for (Instruction &In : *LoopB) { | |||
1828 | SelectInst *SI = dyn_cast<SelectInst>(&In); | |||
1829 | if (!SI || !FeedsPHI(SI)) | |||
1830 | continue; | |||
1831 | ||||
1832 | Simplifier::Context C(SI); | |||
1833 | Value *T = PreSimp.simplify(C); | |||
1834 | SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI; | |||
1835 | LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'; } } while (false); | |||
1836 | if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) { | |||
1837 | FoundPreScan = true; | |||
1838 | if (SelI != SI) { | |||
1839 | Value *NewSel = C.materialize(LoopB, SI->getIterator()); | |||
1840 | SI->replaceAllUsesWith(NewSel); | |||
1841 | RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); | |||
1842 | } | |||
1843 | break; | |||
1844 | } | |||
1845 | } | |||
1846 | ||||
1847 | if (!FoundPreScan) { | |||
1848 | LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n" ; } } while (false); | |||
1849 | return false; | |||
1850 | } | |||
1851 | ||||
1852 | if (!PV.Left) { | |||
1853 | // The right shift version actually only returns the higher bits of | |||
1854 | // the result (each iteration discards the LSB). If we want to convert it | |||
1855 | // to a left-shifting loop, the working data type must be at least as | |||
1856 | // wide as the target's pmpy instruction. | |||
1857 | if (!promoteTypes(LoopB, ExitB)) | |||
1858 | return false; | |||
1859 | // Run post-promotion simplifications. | |||
1860 | Simplifier PostSimp; | |||
1861 | setupPostSimplifier(PostSimp); | |||
1862 | for (Instruction &In : *LoopB) { | |||
1863 | SelectInst *SI = dyn_cast<SelectInst>(&In); | |||
1864 | if (!SI || !FeedsPHI(SI)) | |||
1865 | continue; | |||
1866 | Simplifier::Context C(SI); | |||
1867 | Value *T = PostSimp.simplify(C); | |||
1868 | SelectInst *SelI = dyn_cast_or_null<SelectInst>(T); | |||
1869 | if (SelI != SI) { | |||
1870 | Value *NewSel = C.materialize(LoopB, SI->getIterator()); | |||
1871 | SI->replaceAllUsesWith(NewSel); | |||
1872 | RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); | |||
1873 | } | |||
1874 | break; | |||
1875 | } | |||
1876 | ||||
1877 | if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) | |||
1878 | return false; | |||
1879 | cleanupLoopBody(LoopB); | |||
1880 | } | |||
1881 | ||||
1882 | // Scan the loop again, find the generating select instruction. | |||
1883 | bool FoundScan = false; | |||
1884 | for (Instruction &In : *LoopB) { | |||
1885 | SelectInst *SelI = dyn_cast<SelectInst>(&In); | |||
1886 | if (!SelI) | |||
1887 | continue; | |||
1888 | LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI << '\n'; } } while (false); | |||
1889 | FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false); | |||
1890 | if (FoundScan) | |||
1891 | break; | |||
1892 | } | |||
1893 | assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail ("FoundScan", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1893, __extension__ __PRETTY_FUNCTION__)); | |||
1894 | ||||
1895 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1896 | StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1897 | if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1898 | dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1899 | elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1900 | dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1901 | << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1902 | dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1903 | if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1904 | dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1905 | dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1906 | dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1907 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false); | |||
1908 | ||||
1909 | BasicBlock::iterator At(EntryB->getTerminator()); | |||
1910 | Value *PM = generate(At, PV); | |||
1911 | if (PM == nullptr) | |||
1912 | return false; | |||
1913 | ||||
1914 | if (PM->getType() != PV.Res->getType()) | |||
1915 | PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false); | |||
1916 | ||||
1917 | PV.Res->replaceAllUsesWith(PM); | |||
1918 | PV.Res->eraseFromParent(); | |||
1919 | return true; | |||
1920 | } | |||
1921 | ||||
1922 | int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) { | |||
1923 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) | |||
1924 | return SC->getAPInt().getSExtValue(); | |||
1925 | return 0; | |||
1926 | } | |||
1927 | ||||
1928 | bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) { | |||
1929 | // Allow volatile stores if HexagonVolatileMemcpy is enabled. | |||
1930 | if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple()) | |||
1931 | return false; | |||
1932 | ||||
1933 | Value *StoredVal = SI->getValueOperand(); | |||
1934 | Value *StorePtr = SI->getPointerOperand(); | |||
1935 | ||||
1936 | // Reject stores that are so large that they overflow an unsigned. | |||
1937 | uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); | |||
1938 | if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) | |||
1939 | return false; | |||
1940 | ||||
1941 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
1942 | // loop, which indicates a strided store. If we have something else, it's a | |||
1943 | // random store we can't handle. | |||
1944 | auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | |||
1945 | if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) | |||
1946 | return false; | |||
1947 | ||||
1948 | // Check to see if the stride matches the size of the store. If so, then we | |||
1949 | // know that every byte is touched in the loop. | |||
1950 | int Stride = getSCEVStride(StoreEv); | |||
1951 | if (Stride == 0) | |||
1952 | return false; | |||
1953 | unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); | |||
1954 | if (StoreSize != unsigned(std::abs(Stride))) | |||
1955 | return false; | |||
1956 | ||||
1957 | // The store must be feeding a non-volatile load. | |||
1958 | LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); | |||
1959 | if (!LI || !LI->isSimple()) | |||
1960 | return false; | |||
1961 | ||||
1962 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
1963 | // loop, which indicates a strided load. If we have something else, it's a | |||
1964 | // random load we can't handle. | |||
1965 | Value *LoadPtr = LI->getPointerOperand(); | |||
1966 | auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr)); | |||
1967 | if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) | |||
1968 | return false; | |||
1969 | ||||
1970 | // The store and load must share the same stride. | |||
1971 | if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) | |||
1972 | return false; | |||
1973 | ||||
1974 | // Success. This store can be converted into a memcpy. | |||
1975 | return true; | |||
1976 | } | |||
1977 | ||||
1978 | /// mayLoopAccessLocation - Return true if the specified loop might access the | |||
1979 | /// specified pointer location, which is a loop-strided access. The 'Access' | |||
1980 | /// argument specifies what the verboten forms of access are (read or write). | |||
1981 | static bool | |||
1982 | mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, | |||
1983 | const SCEV *BECount, unsigned StoreSize, | |||
1984 | AliasAnalysis &AA, | |||
1985 | SmallPtrSetImpl<Instruction *> &Ignored) { | |||
1986 | // Get the location that may be stored across the loop. Since the access | |||
1987 | // is strided positively through memory, we say that the modified location | |||
1988 | // starts at the pointer and has infinite size. | |||
1989 | LocationSize AccessSize = LocationSize::afterPointer(); | |||
1990 | ||||
1991 | // If the loop iterates a fixed number of times, we can refine the access | |||
1992 | // size to be exactly the size of the memset, which is (BECount+1)*StoreSize | |||
1993 | if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) | |||
1994 | AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) * | |||
1995 | StoreSize); | |||
1996 | ||||
1997 | // TODO: For this to be really effective, we have to dive into the pointer | |||
1998 | // operand in the store. Store to &A[i] of 100 will always return may alias | |||
1999 | // with store of &A[100], we need to StoreLoc to be "A" with size of 100, | |||
2000 | // which will then no-alias a store to &A[100]. | |||
2001 | MemoryLocation StoreLoc(Ptr, AccessSize); | |||
2002 | ||||
2003 | for (auto *B : L->blocks()) | |||
2004 | for (auto &I : *B) | |||
2005 | if (Ignored.count(&I) == 0 && | |||
2006 | isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access)) | |||
2007 | return true; | |||
2008 | ||||
2009 | return false; | |||
2010 | } | |||
2011 | ||||
2012 | void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB, | |||
2013 | SmallVectorImpl<StoreInst*> &Stores) { | |||
2014 | Stores.clear(); | |||
2015 | for (Instruction &I : *BB) | |||
2016 | if (StoreInst *SI = dyn_cast<StoreInst>(&I)) | |||
2017 | if (isLegalStore(CurLoop, SI)) | |||
2018 | Stores.push_back(SI); | |||
2019 | } | |||
2020 | ||||
2021 | bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, | |||
2022 | StoreInst *SI, const SCEV *BECount) { | |||
2023 | assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025 , __extension__ __PRETTY_FUNCTION__)) | |||
2024 | "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025 , __extension__ __PRETTY_FUNCTION__)) | |||
2025 | "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025 , __extension__ __PRETTY_FUNCTION__)); | |||
2026 | ||||
2027 | Value *StorePtr = SI->getPointerOperand(); | |||
2028 | auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | |||
2029 | unsigned Stride = getSCEVStride(StoreEv); | |||
2030 | unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); | |||
2031 | if (Stride != StoreSize) | |||
2032 | return false; | |||
2033 | ||||
2034 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
2035 | // loop, which indicates a strided load. If we have something else, it's a | |||
2036 | // random load we can't handle. | |||
2037 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||
2038 | auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); | |||
2039 | ||||
2040 | // The trip count of the loop and the base pointer of the addrec SCEV is | |||
2041 | // guaranteed to be loop invariant, which means that it should dominate the | |||
2042 | // header. This allows us to insert code for it in the preheader. | |||
2043 | BasicBlock *Preheader = CurLoop->getLoopPreheader(); | |||
2044 | Instruction *ExpPt = Preheader->getTerminator(); | |||
2045 | IRBuilder<> Builder(ExpPt); | |||
2046 | SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom"); | |||
2047 | ||||
2048 | Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace()); | |||
2049 | ||||
2050 | // Okay, we have a strided store "p[i]" of a loaded value. We can turn | |||
2051 | // this into a memcpy/memmove in the loop preheader now if we want. However, | |||
2052 | // this would be unsafe to do if there is anything else in the loop that may | |||
2053 | // read or write the memory region we're storing to. For memcpy, this | |||
2054 | // includes the load that feeds the stores. Check for an alias by generating | |||
2055 | // the base address and checking everything. | |||
2056 | Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), | |||
2057 | Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt); | |||
2058 | Value *LoadBasePtr = nullptr; | |||
2059 | ||||
2060 | bool Overlap = false; | |||
2061 | bool DestVolatile = SI->isVolatile(); | |||
2062 | Type *BECountTy = BECount->getType(); | |||
2063 | ||||
2064 | if (DestVolatile) { | |||
2065 | // The trip count must fit in i32, since it is the type of the "num_words" | |||
2066 | // argument to hexagon_memcpy_forward_vp4cp4n2. | |||
2067 | if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) { | |||
2068 | CleanupAndExit: | |||
2069 | // If we generated new code for the base pointer, clean up. | |||
2070 | Expander.clear(); | |||
2071 | if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) { | |||
2072 | RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); | |||
2073 | StoreBasePtr = nullptr; | |||
2074 | } | |||
2075 | if (LoadBasePtr) { | |||
2076 | RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); | |||
2077 | LoadBasePtr = nullptr; | |||
2078 | } | |||
2079 | return false; | |||
2080 | } | |||
2081 | } | |||
2082 | ||||
2083 | SmallPtrSet<Instruction*, 2> Ignore1; | |||
2084 | Ignore1.insert(SI); | |||
2085 | if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, | |||
2086 | StoreSize, *AA, Ignore1)) { | |||
2087 | // Check if the load is the offending instruction. | |||
2088 | Ignore1.insert(LI); | |||
2089 | if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, | |||
2090 | BECount, StoreSize, *AA, Ignore1)) { | |||
2091 | // Still bad. Nothing we can do. | |||
2092 | goto CleanupAndExit; | |||
2093 | } | |||
2094 | // It worked with the load ignored. | |||
2095 | Overlap = true; | |||
2096 | } | |||
2097 | ||||
2098 | if (!Overlap) { | |||
2099 | if (DisableMemcpyIdiom || !HasMemcpy) | |||
2100 | goto CleanupAndExit; | |||
2101 | } else { | |||
2102 | // Don't generate memmove if this function will be inlined. This is | |||
2103 | // because the caller will undergo this transformation after inlining. | |||
2104 | Function *Func = CurLoop->getHeader()->getParent(); | |||
2105 | if (Func->hasFnAttribute(Attribute::AlwaysInline)) | |||
2106 | goto CleanupAndExit; | |||
2107 | ||||
2108 | // In case of a memmove, the call to memmove will be executed instead | |||
2109 | // of the loop, so we need to make sure that there is nothing else in | |||
2110 | // the loop than the load, store and instructions that these two depend | |||
2111 | // on. | |||
2112 | SmallVector<Instruction*,2> Insts; | |||
2113 | Insts.push_back(SI); | |||
2114 | Insts.push_back(LI); | |||
2115 | if (!coverLoop(CurLoop, Insts)) | |||
2116 | goto CleanupAndExit; | |||
2117 | ||||
2118 | if (DisableMemmoveIdiom || !HasMemmove) | |||
2119 | goto CleanupAndExit; | |||
2120 | bool IsNested = CurLoop->getParentLoop() != nullptr; | |||
2121 | if (IsNested && OnlyNonNestedMemmove) | |||
2122 | goto CleanupAndExit; | |||
2123 | } | |||
2124 | ||||
2125 | // For a memcpy, we have to make sure that the input array is not being | |||
2126 | // mutated by the loop. | |||
2127 | LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), | |||
2128 | Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt); | |||
2129 | ||||
2130 | SmallPtrSet<Instruction*, 2> Ignore2; | |||
2131 | Ignore2.insert(SI); | |||
2132 | if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, | |||
2133 | StoreSize, *AA, Ignore2)) | |||
2134 | goto CleanupAndExit; | |||
2135 | ||||
2136 | // Check the stride. | |||
2137 | bool StridePos = getSCEVStride(LoadEv) >= 0; | |||
2138 | ||||
2139 | // Currently, the volatile memcpy only emulates traversing memory forward. | |||
2140 | if (!StridePos && DestVolatile) | |||
2141 | goto CleanupAndExit; | |||
2142 | ||||
2143 | bool RuntimeCheck = (Overlap || DestVolatile); | |||
2144 | ||||
2145 | BasicBlock *ExitB; | |||
2146 | if (RuntimeCheck) { | |||
2147 | // The runtime check needs a single exit block. | |||
2148 | SmallVector<BasicBlock*, 8> ExitBlocks; | |||
2149 | CurLoop->getUniqueExitBlocks(ExitBlocks); | |||
2150 | if (ExitBlocks.size() != 1) | |||
2151 | goto CleanupAndExit; | |||
2152 | ExitB = ExitBlocks[0]; | |||
2153 | } | |||
2154 | ||||
2155 | // The # stored bytes is (BECount+1)*Size. Expand the trip count out to | |||
2156 | // pointer size if it isn't already. | |||
2157 | LLVMContext &Ctx = SI->getContext(); | |||
2158 | BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); | |||
2159 | DebugLoc DLoc = SI->getDebugLoc(); | |||
2160 | ||||
2161 | const SCEV *NumBytesS = | |||
2162 | SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); | |||
2163 | if (StoreSize != 1) | |||
2164 | NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), | |||
2165 | SCEV::FlagNUW); | |||
2166 | Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); | |||
2167 | if (Instruction *In = dyn_cast<Instruction>(NumBytes)) | |||
2168 | if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT})) | |||
2169 | NumBytes = Simp; | |||
2170 | ||||
2171 | CallInst *NewCall; | |||
2172 | ||||
2173 | if (RuntimeCheck) { | |||
2174 | unsigned Threshold = RuntimeMemSizeThreshold; | |||
2175 | if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) { | |||
2176 | uint64_t C = CI->getZExtValue(); | |||
2177 | if (Threshold != 0 && C < Threshold) | |||
2178 | goto CleanupAndExit; | |||
2179 | if (C < CompileTimeMemSizeThreshold) | |||
2180 | goto CleanupAndExit; | |||
2181 | } | |||
2182 | ||||
2183 | BasicBlock *Header = CurLoop->getHeader(); | |||
2184 | Function *Func = Header->getParent(); | |||
2185 | Loop *ParentL = LF->getLoopFor(Preheader); | |||
2186 | StringRef HeaderName = Header->getName(); | |||
2187 | ||||
2188 | // Create a new (empty) preheader, and update the PHI nodes in the | |||
2189 | // header to use the new preheader. | |||
2190 | BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph", | |||
2191 | Func, Header); | |||
2192 | if (ParentL) | |||
2193 | ParentL->addBasicBlockToLoop(NewPreheader, *LF); | |||
2194 | IRBuilder<>(NewPreheader).CreateBr(Header); | |||
2195 | for (auto &In : *Header) { | |||
2196 | PHINode *PN = dyn_cast<PHINode>(&In); | |||
2197 | if (!PN) | |||
2198 | break; | |||
2199 | int bx = PN->getBasicBlockIndex(Preheader); | |||
2200 | if (bx >= 0) | |||
2201 | PN->setIncomingBlock(bx, NewPreheader); | |||
2202 | } | |||
2203 | DT->addNewBlock(NewPreheader, Preheader); | |||
2204 | DT->changeImmediateDominator(Header, NewPreheader); | |||
2205 | ||||
2206 | // Check for safe conditions to execute memmove. | |||
2207 | // If stride is positive, copying things from higher to lower addresses | |||
2208 | // is equivalent to memmove. For negative stride, it's the other way | |||
2209 | // around. Copying forward in memory with positive stride may not be | |||
2210 | // same as memmove since we may be copying values that we just stored | |||
2211 | // in some previous iteration. | |||
2212 | Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy); | |||
2213 | Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy); | |||
2214 | Value *LowA = StridePos ? SA : LA; | |||
2215 | Value *HighA = StridePos ? LA : SA; | |||
2216 | Value *CmpA = Builder.CreateICmpULT(LowA, HighA); | |||
2217 | Value *Cond = CmpA; | |||
2218 | ||||
2219 | // Check for distance between pointers. Since the case LowA < HighA | |||
2220 | // is checked for above, assume LowA >= HighA. | |||
2221 | Value *Dist = Builder.CreateSub(LowA, HighA); | |||
2222 | Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist); | |||
2223 | Value *CmpEither = Builder.CreateOr(Cond, CmpD); | |||
2224 | Cond = CmpEither; | |||
2225 | ||||
2226 | if (Threshold != 0) { | |||
2227 | Type *Ty = NumBytes->getType(); | |||
2228 | Value *Thr = ConstantInt::get(Ty, Threshold); | |||
2229 | Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes); | |||
2230 | Value *CmpBoth = Builder.CreateAnd(Cond, CmpB); | |||
2231 | Cond = CmpBoth; | |||
2232 | } | |||
2233 | BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli", | |||
2234 | Func, NewPreheader); | |||
2235 | if (ParentL) | |||
2236 | ParentL->addBasicBlockToLoop(MemmoveB, *LF); | |||
2237 | Instruction *OldT = Preheader->getTerminator(); | |||
2238 | Builder.CreateCondBr(Cond, MemmoveB, NewPreheader); | |||
2239 | OldT->eraseFromParent(); | |||
2240 | Preheader->setName(Preheader->getName()+".old"); | |||
2241 | DT->addNewBlock(MemmoveB, Preheader); | |||
2242 | // Find the new immediate dominator of the exit block. | |||
2243 | BasicBlock *ExitD = Preheader; | |||
2244 | for (BasicBlock *PB : predecessors(ExitB)) { | |||
2245 | ExitD = DT->findNearestCommonDominator(ExitD, PB); | |||
2246 | if (!ExitD) | |||
2247 | break; | |||
2248 | } | |||
2249 | // If the prior immediate dominator of ExitB was dominated by the | |||
2250 | // old preheader, then the old preheader becomes the new immediate | |||
2251 | // dominator. Otherwise don't change anything (because the newly | |||
2252 | // added blocks are dominated by the old preheader). | |||
2253 | if (ExitD && DT->dominates(Preheader, ExitD)) { | |||
2254 | DomTreeNode *BN = DT->getNode(ExitB); | |||
2255 | DomTreeNode *DN = DT->getNode(ExitD); | |||
2256 | BN->setIDom(DN); | |||
2257 | } | |||
2258 | ||||
2259 | // Add a call to memmove to the conditional block. | |||
2260 | IRBuilder<> CondBuilder(MemmoveB); | |||
2261 | CondBuilder.CreateBr(ExitB); | |||
2262 | CondBuilder.SetInsertPoint(MemmoveB->getTerminator()); | |||
2263 | ||||
2264 | if (DestVolatile) { | |||
2265 | Type *Int32Ty = Type::getInt32Ty(Ctx); | |||
2266 | Type *Int32PtrTy = Type::getInt32PtrTy(Ctx); | |||
2267 | Type *VoidTy = Type::getVoidTy(Ctx); | |||
2268 | Module *M = Func->getParent(); | |||
2269 | FunctionCallee Fn = M->getOrInsertFunction( | |||
2270 | HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty); | |||
2271 | ||||
2272 | const SCEV *OneS = SE->getConstant(Int32Ty, 1); | |||
2273 | const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty); | |||
2274 | const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW); | |||
2275 | Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, | |||
2276 | MemmoveB->getTerminator()); | |||
2277 | if (Instruction *In = dyn_cast<Instruction>(NumWords)) | |||
2278 | if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT})) | |||
2279 | NumWords = Simp; | |||
2280 | ||||
2281 | Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) | |||
2282 | ? StoreBasePtr | |||
2283 | : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy); | |||
2284 | Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy) | |||
2285 | ? LoadBasePtr | |||
2286 | : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy); | |||
2287 | NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords}); | |||
2288 | } else { | |||
2289 | NewCall = CondBuilder.CreateMemMove( | |||
2290 | StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes); | |||
2291 | } | |||
2292 | } else { | |||
2293 | NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr, | |||
2294 | LI->getAlign(), NumBytes); | |||
2295 | // Okay, the memcpy has been formed. Zap the original store and | |||
2296 | // anything that feeds into it. | |||
2297 | RecursivelyDeleteTriviallyDeadInstructions(SI, TLI); | |||
2298 | } | |||
2299 | ||||
2300 | NewCall->setDebugLoc(DLoc); | |||
2301 | ||||
2302 | LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2303 | << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2304 | << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2305 | << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2306 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ); | |||
2307 | ||||
2308 | return true; | |||
2309 | } | |||
2310 | ||||
2311 | // Check if the instructions in Insts, together with their dependencies | |||
2312 | // cover the loop in the sense that the loop could be safely eliminated once | |||
2313 | // the instructions in Insts are removed. | |||
2314 | bool HexagonLoopIdiomRecognize::coverLoop(Loop *L, | |||
2315 | SmallVectorImpl<Instruction*> &Insts) const { | |||
2316 | SmallSet<BasicBlock*,8> LoopBlocks; | |||
2317 | for (auto *B : L->blocks()) | |||
2318 | LoopBlocks.insert(B); | |||
2319 | ||||
2320 | SetVector<Instruction*> Worklist(Insts.begin(), Insts.end()); | |||
2321 | ||||
2322 | // Collect all instructions from the loop that the instructions in Insts | |||
2323 | // depend on (plus their dependencies, etc.). These instructions will | |||
2324 | // constitute the expression trees that feed those in Insts, but the trees | |||
2325 | // will be limited only to instructions contained in the loop. | |||
2326 | for (unsigned i = 0; i < Worklist.size(); ++i) { | |||
2327 | Instruction *In = Worklist[i]; | |||
2328 | for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) { | |||
2329 | Instruction *OpI = dyn_cast<Instruction>(I); | |||
2330 | if (!OpI) | |||
2331 | continue; | |||
2332 | BasicBlock *PB = OpI->getParent(); | |||
2333 | if (!LoopBlocks.count(PB)) | |||
2334 | continue; | |||
2335 | Worklist.insert(OpI); | |||
2336 | } | |||
2337 | } | |||
2338 | ||||
2339 | // Scan all instructions in the loop, if any of them have a user outside | |||
2340 | // of the loop, or outside of the expressions collected above, then either | |||
2341 | // the loop has a side-effect visible outside of it, or there are | |||
2342 | // instructions in it that are not involved in the original set Insts. | |||
2343 | for (auto *B : L->blocks()) { | |||
2344 | for (auto &In : *B) { | |||
2345 | if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In)) | |||
2346 | continue; | |||
2347 | if (!Worklist.count(&In) && In.mayHaveSideEffects()) | |||
2348 | return false; | |||
2349 | for (auto *K : In.users()) { | |||
2350 | Instruction *UseI = dyn_cast<Instruction>(K); | |||
2351 | if (!UseI) | |||
2352 | continue; | |||
2353 | BasicBlock *UseB = UseI->getParent(); | |||
2354 | if (LF->getLoopFor(UseB) != L) | |||
2355 | return false; | |||
2356 | } | |||
2357 | } | |||
2358 | } | |||
2359 | ||||
2360 | return true; | |||
2361 | } | |||
2362 | ||||
2363 | /// runOnLoopBlock - Process the specified block, which lives in a counted loop | |||
2364 | /// with the specified backedge count. This block is known to be in the current | |||
2365 | /// loop and not in any subloops. | |||
2366 | bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, | |||
2367 | const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) { | |||
2368 | // We can only promote stores in this block if they are unconditionally | |||
2369 | // executed in the loop. For a block to be unconditionally executed, it has | |||
2370 | // to dominate all the exit blocks of the loop. Verify this now. | |||
2371 | auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool { | |||
2372 | return DT->dominates(BB, EB); | |||
2373 | }; | |||
2374 | if (!all_of(ExitBlocks, DominatedByBB)) | |||
2375 | return false; | |||
2376 | ||||
2377 | bool MadeChange = false; | |||
2378 | // Look for store instructions, which may be optimized to memset/memcpy. | |||
2379 | SmallVector<StoreInst*,8> Stores; | |||
2380 | collectStores(CurLoop, BB, Stores); | |||
2381 | ||||
2382 | // Optimize the store into a memcpy, if it feeds an similarly strided load. | |||
2383 | for (auto &SI : Stores) | |||
2384 | MadeChange |= processCopyingStore(CurLoop, SI, BECount); | |||
2385 | ||||
2386 | return MadeChange; | |||
2387 | } | |||
2388 | ||||
2389 | bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) { | |||
2390 | PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE); | |||
2391 | if (PMR.recognize()) | |||
2392 | return true; | |||
2393 | ||||
2394 | if (!HasMemcpy && !HasMemmove) | |||
2395 | return false; | |||
2396 | ||||
2397 | const SCEV *BECount = SE->getBackedgeTakenCount(L); | |||
2398 | assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400 , __extension__ __PRETTY_FUNCTION__)) | |||
2399 | "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400 , __extension__ __PRETTY_FUNCTION__)) | |||
2400 | "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400 , __extension__ __PRETTY_FUNCTION__)); | |||
2401 | ||||
2402 | SmallVector<BasicBlock *, 8> ExitBlocks; | |||
2403 | L->getUniqueExitBlocks(ExitBlocks); | |||
2404 | ||||
2405 | bool Changed = false; | |||
2406 | ||||
2407 | // Scan all the blocks in the loop that are not in subloops. | |||
2408 | for (auto *BB : L->getBlocks()) { | |||
2409 | // Ignore blocks in subloops. | |||
2410 | if (LF->getLoopFor(BB) != L) | |||
2411 | continue; | |||
2412 | Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks); | |||
2413 | } | |||
2414 | ||||
2415 | return Changed; | |||
2416 | } | |||
2417 | ||||
2418 | bool HexagonLoopIdiomRecognize::run(Loop *L) { | |||
2419 | const Module &M = *L->getHeader()->getParent()->getParent(); | |||
2420 | if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon) | |||
2421 | return false; | |||
2422 | ||||
2423 | // If the loop could not be converted to canonical form, it must have an | |||
2424 | // indirectbr in it, just give up. | |||
2425 | if (!L->getLoopPreheader()) | |||
2426 | return false; | |||
2427 | ||||
2428 | // Disable loop idiom recognition if the function's name is a common idiom. | |||
2429 | StringRef Name = L->getHeader()->getParent()->getName(); | |||
2430 | if (Name == "memset" || Name == "memcpy" || Name == "memmove") | |||
2431 | return false; | |||
2432 | ||||
2433 | DL = &L->getHeader()->getModule()->getDataLayout(); | |||
2434 | ||||
2435 | HasMemcpy = TLI->has(LibFunc_memcpy); | |||
2436 | HasMemmove = TLI->has(LibFunc_memmove); | |||
2437 | ||||
2438 | if (SE->hasLoopInvariantBackedgeTakenCount(L)) | |||
2439 | return runOnCountableLoop(L); | |||
2440 | return false; | |||
2441 | } | |||
2442 | ||||
2443 | bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L, | |||
2444 | LPPassManager &LPM) { | |||
2445 | if (skipLoop(L)) | |||
2446 | return false; | |||
2447 | ||||
2448 | auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
2449 | auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
2450 | auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | |||
2451 | auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( | |||
2452 | *L->getHeader()->getParent()); | |||
2453 | auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); | |||
2454 | return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L); | |||
2455 | } | |||
2456 | ||||
2457 | Pass *llvm::createHexagonLoopIdiomPass() { | |||
2458 | return new HexagonLoopIdiomRecognizeLegacyPass(); | |||
2459 | } | |||
2460 | ||||
2461 | PreservedAnalyses | |||
2462 | HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM, | |||
2463 | LoopStandardAnalysisResults &AR, | |||
2464 | LPMUpdater &U) { | |||
2465 | return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE) | |||
2466 | .run(&L) | |||
2467 | ? getLoopPassPreservedAnalyses() | |||
2468 | : PreservedAnalyses::all(); | |||
2469 | } |
1 | //===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | /// | ||||||||
9 | /// \file | ||||||||
10 | /// This file implements a class to represent arbitrary precision | ||||||||
11 | /// integral constant values and operations on them. | ||||||||
12 | /// | ||||||||
13 | //===----------------------------------------------------------------------===// | ||||||||
14 | |||||||||
15 | #ifndef LLVM_ADT_APINT_H | ||||||||
16 | #define LLVM_ADT_APINT_H | ||||||||
17 | |||||||||
18 | #include "llvm/Support/Compiler.h" | ||||||||
19 | #include "llvm/Support/MathExtras.h" | ||||||||
20 | #include <cassert> | ||||||||
21 | #include <climits> | ||||||||
22 | #include <cstring> | ||||||||
23 | #include <optional> | ||||||||
24 | #include <utility> | ||||||||
25 | |||||||||
26 | namespace llvm { | ||||||||
27 | class FoldingSetNodeID; | ||||||||
28 | class StringRef; | ||||||||
29 | class hash_code; | ||||||||
30 | class raw_ostream; | ||||||||
31 | |||||||||
32 | template <typename T> class SmallVectorImpl; | ||||||||
33 | template <typename T> class ArrayRef; | ||||||||
34 | template <typename T, typename Enable> struct DenseMapInfo; | ||||||||
35 | |||||||||
36 | class APInt; | ||||||||
37 | |||||||||
38 | inline APInt operator-(APInt); | ||||||||
39 | |||||||||
40 | //===----------------------------------------------------------------------===// | ||||||||
41 | // APInt Class | ||||||||
42 | //===----------------------------------------------------------------------===// | ||||||||
43 | |||||||||
44 | /// Class for arbitrary precision integers. | ||||||||
45 | /// | ||||||||
46 | /// APInt is a functional replacement for common case unsigned integer type like | ||||||||
47 | /// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width | ||||||||
48 | /// integer sizes and large integer value types such as 3-bits, 15-bits, or more | ||||||||
49 | /// than 64-bits of precision. APInt provides a variety of arithmetic operators | ||||||||
50 | /// and methods to manipulate integer values of any bit-width. It supports both | ||||||||
51 | /// the typical integer arithmetic and comparison operations as well as bitwise | ||||||||
52 | /// manipulation. | ||||||||
53 | /// | ||||||||
54 | /// The class has several invariants worth noting: | ||||||||
55 | /// * All bit, byte, and word positions are zero-based. | ||||||||
56 | /// * Once the bit width is set, it doesn't change except by the Truncate, | ||||||||
57 | /// SignExtend, or ZeroExtend operations. | ||||||||
58 | /// * All binary operators must be on APInt instances of the same bit width. | ||||||||
59 | /// Attempting to use these operators on instances with different bit | ||||||||
60 | /// widths will yield an assertion. | ||||||||
61 | /// * The value is stored canonically as an unsigned value. For operations | ||||||||
62 | /// where it makes a difference, there are both signed and unsigned variants | ||||||||
63 | /// of the operation. For example, sdiv and udiv. However, because the bit | ||||||||
64 | /// widths must be the same, operations such as Mul and Add produce the same | ||||||||
65 | /// results regardless of whether the values are interpreted as signed or | ||||||||
66 | /// not. | ||||||||
67 | /// * In general, the class tries to follow the style of computation that LLVM | ||||||||
68 | /// uses in its IR. This simplifies its use for LLVM. | ||||||||
69 | /// * APInt supports zero-bit-width values, but operations that require bits | ||||||||
70 | /// are not defined on it (e.g. you cannot ask for the sign of a zero-bit | ||||||||
71 | /// integer). This means that operations like zero extension and logical | ||||||||
72 | /// shifts are defined, but sign extension and ashr is not. Zero bit values | ||||||||
73 | /// compare and hash equal to themselves, and countLeadingZeros returns 0. | ||||||||
74 | /// | ||||||||
75 | class [[nodiscard]] APInt { | ||||||||
76 | public: | ||||||||
77 | typedef uint64_t WordType; | ||||||||
78 | |||||||||
79 | /// This enum is used to hold the constants we needed for APInt. | ||||||||
80 | enum : unsigned { | ||||||||
81 | /// Byte size of a word. | ||||||||
82 | APINT_WORD_SIZE = sizeof(WordType), | ||||||||
83 | /// Bits in a word. | ||||||||
84 | APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8 | ||||||||
85 | }; | ||||||||
86 | |||||||||
87 | enum class Rounding { | ||||||||
88 | DOWN, | ||||||||
89 | TOWARD_ZERO, | ||||||||
90 | UP, | ||||||||
91 | }; | ||||||||
92 | |||||||||
93 | static constexpr WordType WORDTYPE_MAX = ~WordType(0); | ||||||||
94 | |||||||||
95 | /// \name Constructors | ||||||||
96 | /// @{ | ||||||||
97 | |||||||||
98 | /// Create a new APInt of numBits width, initialized as val. | ||||||||
99 | /// | ||||||||
100 | /// If isSigned is true then val is treated as if it were a signed value | ||||||||
101 | /// (i.e. as an int64_t) and the appropriate sign extension to the bit width | ||||||||
102 | /// will be done. Otherwise, no sign extension occurs (high order bits beyond | ||||||||
103 | /// the range of val are zero filled). | ||||||||
104 | /// | ||||||||
105 | /// \param numBits the bit width of the constructed APInt | ||||||||
106 | /// \param val the initial value of the APInt | ||||||||
107 | /// \param isSigned how to treat signedness of val | ||||||||
108 | APInt(unsigned numBits, uint64_t val, bool isSigned = false) | ||||||||
109 | : BitWidth(numBits) { | ||||||||
110 | if (isSingleWord()) { | ||||||||
111 | U.VAL = val; | ||||||||
112 | clearUnusedBits(); | ||||||||
113 | } else { | ||||||||
114 | initSlowCase(val, isSigned); | ||||||||
115 | } | ||||||||
116 | } | ||||||||
117 | |||||||||
118 | /// Construct an APInt of numBits width, initialized as bigVal[]. | ||||||||
119 | /// | ||||||||
120 | /// Note that bigVal.size() can be smaller or larger than the corresponding | ||||||||
121 | /// bit width but any extraneous bits will be dropped. | ||||||||
122 | /// | ||||||||
123 | /// \param numBits the bit width of the constructed APInt | ||||||||
124 | /// \param bigVal a sequence of words to form the initial value of the APInt | ||||||||
125 | APInt(unsigned numBits, ArrayRef<uint64_t> bigVal); | ||||||||
126 | |||||||||
127 | /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but | ||||||||
128 | /// deprecated because this constructor is prone to ambiguity with the | ||||||||
129 | /// APInt(unsigned, uint64_t, bool) constructor. | ||||||||
130 | /// | ||||||||
131 | /// If this overload is ever deleted, care should be taken to prevent calls | ||||||||
132 | /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool) | ||||||||
133 | /// constructor. | ||||||||
134 | APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]); | ||||||||
135 | |||||||||
136 | /// Construct an APInt from a string representation. | ||||||||
137 | /// | ||||||||
138 | /// This constructor interprets the string \p str in the given radix. The | ||||||||
139 | /// interpretation stops when the first character that is not suitable for the | ||||||||
140 | /// radix is encountered, or the end of the string. Acceptable radix values | ||||||||
141 | /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the | ||||||||
142 | /// string to require more bits than numBits. | ||||||||
143 | /// | ||||||||
144 | /// \param numBits the bit width of the constructed APInt | ||||||||
145 | /// \param str the string to be interpreted | ||||||||
146 | /// \param radix the radix to use for the conversion | ||||||||
147 | APInt(unsigned numBits, StringRef str, uint8_t radix); | ||||||||
148 | |||||||||
149 | /// Default constructor that creates an APInt with a 1-bit zero value. | ||||||||
150 | explicit APInt() { U.VAL = 0; } | ||||||||
151 | |||||||||
152 | /// Copy Constructor. | ||||||||
153 | APInt(const APInt &that) : BitWidth(that.BitWidth) { | ||||||||
154 | if (isSingleWord()) | ||||||||
155 | U.VAL = that.U.VAL; | ||||||||
156 | else | ||||||||
157 | initSlowCase(that); | ||||||||
158 | } | ||||||||
159 | |||||||||
160 | /// Move Constructor. | ||||||||
161 | APInt(APInt &&that) : BitWidth(that.BitWidth) { | ||||||||
162 | memcpy(&U, &that.U, sizeof(U)); | ||||||||
163 | that.BitWidth = 0; | ||||||||
164 | } | ||||||||
165 | |||||||||
166 | /// Destructor. | ||||||||
167 | ~APInt() { | ||||||||
168 | if (needsCleanup()) | ||||||||
169 | delete[] U.pVal; | ||||||||
170 | } | ||||||||
171 | |||||||||
172 | /// @} | ||||||||
173 | /// \name Value Generators | ||||||||
174 | /// @{ | ||||||||
175 | |||||||||
176 | /// Get the '0' value for the specified bit-width. | ||||||||
177 | static APInt getZero(unsigned numBits) { return APInt(numBits, 0); } | ||||||||
178 | |||||||||
179 | /// NOTE: This is soft-deprecated. Please use `getZero()` instead. | ||||||||
180 | static APInt getNullValue(unsigned numBits) { return getZero(numBits); } | ||||||||
181 | |||||||||
182 | /// Return an APInt zero bits wide. | ||||||||
183 | static APInt getZeroWidth() { return getZero(0); } | ||||||||
184 | |||||||||
185 | /// Gets maximum unsigned value of APInt for specific bit width. | ||||||||
186 | static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); } | ||||||||
187 | |||||||||
188 | /// Gets maximum signed value of APInt for a specific bit width. | ||||||||
189 | static APInt getSignedMaxValue(unsigned numBits) { | ||||||||
190 | APInt API = getAllOnes(numBits); | ||||||||
191 | API.clearBit(numBits - 1); | ||||||||
192 | return API; | ||||||||
193 | } | ||||||||
194 | |||||||||
195 | /// Gets minimum unsigned value of APInt for a specific bit width. | ||||||||
196 | static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); } | ||||||||
197 | |||||||||
198 | /// Gets minimum signed value of APInt for a specific bit width. | ||||||||
199 | static APInt getSignedMinValue(unsigned numBits) { | ||||||||
200 | APInt API(numBits, 0); | ||||||||
201 | API.setBit(numBits - 1); | ||||||||
202 | return API; | ||||||||
203 | } | ||||||||
204 | |||||||||
205 | /// Get the SignMask for a specific bit width. | ||||||||
206 | /// | ||||||||
207 | /// This is just a wrapper function of getSignedMinValue(), and it helps code | ||||||||
208 | /// readability when we want to get a SignMask. | ||||||||
209 | static APInt getSignMask(unsigned BitWidth) { | ||||||||
210 | return getSignedMinValue(BitWidth); | ||||||||
211 | } | ||||||||
212 | |||||||||
213 | /// Return an APInt of a specified width with all bits set. | ||||||||
214 | static APInt getAllOnes(unsigned numBits) { | ||||||||
215 | return APInt(numBits, WORDTYPE_MAX, true); | ||||||||
216 | } | ||||||||
217 | |||||||||
218 | /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead. | ||||||||
219 | static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); } | ||||||||
220 | |||||||||
221 | /// Return an APInt with exactly one bit set in the result. | ||||||||
222 | static APInt getOneBitSet(unsigned numBits, unsigned BitNo) { | ||||||||
223 | APInt Res(numBits, 0); | ||||||||
224 | Res.setBit(BitNo); | ||||||||
225 | return Res; | ||||||||
226 | } | ||||||||
227 | |||||||||
228 | /// Get a value with a block of bits set. | ||||||||
229 | /// | ||||||||
230 | /// Constructs an APInt value that has a contiguous range of bits set. The | ||||||||
231 | /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other | ||||||||
232 | /// bits will be zero. For example, with parameters(32, 0, 16) you would get | ||||||||
233 | /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than | ||||||||
234 | /// \p hiBit. | ||||||||
235 | /// | ||||||||
236 | /// \param numBits the intended bit width of the result | ||||||||
237 | /// \param loBit the index of the lowest bit set. | ||||||||
238 | /// \param hiBit the index of the highest bit set. | ||||||||
239 | /// | ||||||||
240 | /// \returns An APInt value with the requested bits set. | ||||||||
241 | static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) { | ||||||||
242 | APInt Res(numBits, 0); | ||||||||
243 | Res.setBits(loBit, hiBit); | ||||||||
244 | return Res; | ||||||||
245 | } | ||||||||
246 | |||||||||
247 | /// Wrap version of getBitsSet. | ||||||||
248 | /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet. | ||||||||
249 | /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example, | ||||||||
250 | /// with parameters (32, 28, 4), you would get 0xF000000F. | ||||||||
251 | /// If \p hiBit is equal to \p loBit, you would get a result with all bits | ||||||||
252 | /// set. | ||||||||
253 | static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, | ||||||||
254 | unsigned hiBit) { | ||||||||
255 | APInt Res(numBits, 0); | ||||||||
256 | Res.setBitsWithWrap(loBit, hiBit); | ||||||||
257 | return Res; | ||||||||
258 | } | ||||||||
259 | |||||||||
260 | /// Constructs an APInt value that has a contiguous range of bits set. The | ||||||||
261 | /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other | ||||||||
262 | /// bits will be zero. For example, with parameters(32, 12) you would get | ||||||||
263 | /// 0xFFFFF000. | ||||||||
264 | /// | ||||||||
265 | /// \param numBits the intended bit width of the result | ||||||||
266 | /// \param loBit the index of the lowest bit to set. | ||||||||
267 | /// | ||||||||
268 | /// \returns An APInt value with the requested bits set. | ||||||||
269 | static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) { | ||||||||
270 | APInt Res(numBits, 0); | ||||||||
271 | Res.setBitsFrom(loBit); | ||||||||
272 | return Res; | ||||||||
273 | } | ||||||||
274 | |||||||||
275 | /// Constructs an APInt value that has the top hiBitsSet bits set. | ||||||||
276 | /// | ||||||||
277 | /// \param numBits the bitwidth of the result | ||||||||
278 | /// \param hiBitsSet the number of high-order bits set in the result. | ||||||||
279 | static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) { | ||||||||
280 | APInt Res(numBits, 0); | ||||||||
281 | Res.setHighBits(hiBitsSet); | ||||||||
282 | return Res; | ||||||||
283 | } | ||||||||
284 | |||||||||
285 | /// Constructs an APInt value that has the bottom loBitsSet bits set. | ||||||||
286 | /// | ||||||||
287 | /// \param numBits the bitwidth of the result | ||||||||
288 | /// \param loBitsSet the number of low-order bits set in the result. | ||||||||
289 | static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) { | ||||||||
290 | APInt Res(numBits, 0); | ||||||||
291 | Res.setLowBits(loBitsSet); | ||||||||
292 | return Res; | ||||||||
293 | } | ||||||||
294 | |||||||||
295 | /// Return a value containing V broadcasted over NewLen bits. | ||||||||
296 | static APInt getSplat(unsigned NewLen, const APInt &V); | ||||||||
297 | |||||||||
298 | /// @} | ||||||||
299 | /// \name Value Tests | ||||||||
300 | /// @{ | ||||||||
301 | |||||||||
302 | /// Determine if this APInt just has one word to store value. | ||||||||
303 | /// | ||||||||
304 | /// \returns true if the number of bits <= 64, false otherwise. | ||||||||
305 | bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; } | ||||||||
306 | |||||||||
307 | /// Determine sign of this APInt. | ||||||||
308 | /// | ||||||||
309 | /// This tests the high bit of this APInt to determine if it is set. | ||||||||
310 | /// | ||||||||
311 | /// \returns true if this APInt is negative, false otherwise | ||||||||
312 | bool isNegative() const { return (*this)[BitWidth - 1]; } | ||||||||
313 | |||||||||
314 | /// Determine if this APInt Value is non-negative (>= 0) | ||||||||
315 | /// | ||||||||
316 | /// This tests the high bit of the APInt to determine if it is unset. | ||||||||
317 | bool isNonNegative() const { return !isNegative(); } | ||||||||
318 | |||||||||
319 | /// Determine if sign bit of this APInt is set. | ||||||||
320 | /// | ||||||||
321 | /// This tests the high bit of this APInt to determine if it is set. | ||||||||
322 | /// | ||||||||
323 | /// \returns true if this APInt has its sign bit set, false otherwise. | ||||||||
324 | bool isSignBitSet() const { return (*this)[BitWidth - 1]; } | ||||||||
325 | |||||||||
326 | /// Determine if sign bit of this APInt is clear. | ||||||||
327 | /// | ||||||||
328 | /// This tests the high bit of this APInt to determine if it is clear. | ||||||||
329 | /// | ||||||||
330 | /// \returns true if this APInt has its sign bit clear, false otherwise. | ||||||||
331 | bool isSignBitClear() const { return !isSignBitSet(); } | ||||||||
332 | |||||||||
333 | /// Determine if this APInt Value is positive. | ||||||||
334 | /// | ||||||||
335 | /// This tests if the value of this APInt is positive (> 0). Note | ||||||||
336 | /// that 0 is not a positive value. | ||||||||
337 | /// | ||||||||
338 | /// \returns true if this APInt is positive. | ||||||||
339 | bool isStrictlyPositive() const { return isNonNegative() && !isZero(); } | ||||||||
340 | |||||||||
341 | /// Determine if this APInt Value is non-positive (<= 0). | ||||||||
342 | /// | ||||||||
343 | /// \returns true if this APInt is non-positive. | ||||||||
344 | bool isNonPositive() const { return !isStrictlyPositive(); } | ||||||||
345 | |||||||||
346 | /// Determine if this APInt Value only has the specified bit set. | ||||||||
347 | /// | ||||||||
348 | /// \returns true if this APInt only has the specified bit set. | ||||||||
349 | bool isOneBitSet(unsigned BitNo) const { | ||||||||
350 | return (*this)[BitNo] && popcount() == 1; | ||||||||
351 | } | ||||||||
352 | |||||||||
353 | /// Determine if all bits are set. This is true for zero-width values. | ||||||||
354 | bool isAllOnes() const { | ||||||||
355 | if (BitWidth == 0) | ||||||||
356 | return true; | ||||||||
357 | if (isSingleWord()) | ||||||||
358 | return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth); | ||||||||
359 | return countTrailingOnesSlowCase() == BitWidth; | ||||||||
360 | } | ||||||||
361 | |||||||||
362 | /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead. | ||||||||
363 | bool isAllOnesValue() const { return isAllOnes(); } | ||||||||
364 | |||||||||
365 | /// Determine if this value is zero, i.e. all bits are clear. | ||||||||
366 | bool isZero() const { | ||||||||
367 | if (isSingleWord()) | ||||||||
368 | return U.VAL == 0; | ||||||||
369 | return countLeadingZerosSlowCase() == BitWidth; | ||||||||
370 | } | ||||||||
371 | |||||||||
372 | /// NOTE: This is soft-deprecated. Please use `isZero()` instead. | ||||||||
373 | bool isNullValue() const { return isZero(); } | ||||||||
374 | |||||||||
375 | /// Determine if this is a value of 1. | ||||||||
376 | /// | ||||||||
377 | /// This checks to see if the value of this APInt is one. | ||||||||
378 | bool isOne() const { | ||||||||
379 | if (isSingleWord()) | ||||||||
380 | return U.VAL == 1; | ||||||||
381 | return countLeadingZerosSlowCase() == BitWidth - 1; | ||||||||
382 | } | ||||||||
383 | |||||||||
384 | /// NOTE: This is soft-deprecated. Please use `isOne()` instead. | ||||||||
385 | bool isOneValue() const { return isOne(); } | ||||||||
386 | |||||||||
387 | /// Determine if this is the largest unsigned value. | ||||||||
388 | /// | ||||||||
389 | /// This checks to see if the value of this APInt is the maximum unsigned | ||||||||
390 | /// value for the APInt's bit width. | ||||||||
391 | bool isMaxValue() const { return isAllOnes(); } | ||||||||
392 | |||||||||
393 | /// Determine if this is the largest signed value. | ||||||||
394 | /// | ||||||||
395 | /// This checks to see if the value of this APInt is the maximum signed | ||||||||
396 | /// value for the APInt's bit width. | ||||||||
397 | bool isMaxSignedValue() const { | ||||||||
398 | if (isSingleWord()) { | ||||||||
399 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 399, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
400 | return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1); | ||||||||
401 | } | ||||||||
402 | return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1; | ||||||||
403 | } | ||||||||
404 | |||||||||
405 | /// Determine if this is the smallest unsigned value. | ||||||||
406 | /// | ||||||||
407 | /// This checks to see if the value of this APInt is the minimum unsigned | ||||||||
408 | /// value for the APInt's bit width. | ||||||||
409 | bool isMinValue() const { return isZero(); } | ||||||||
410 | |||||||||
411 | /// Determine if this is the smallest signed value. | ||||||||
412 | /// | ||||||||
413 | /// This checks to see if the value of this APInt is the minimum signed | ||||||||
414 | /// value for the APInt's bit width. | ||||||||
415 | bool isMinSignedValue() const { | ||||||||
416 | if (isSingleWord()) { | ||||||||
417 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 417, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
418 | return U.VAL == (WordType(1) << (BitWidth - 1)); | ||||||||
419 | } | ||||||||
420 | return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1; | ||||||||
421 | } | ||||||||
422 | |||||||||
423 | /// Check if this APInt has an N-bits unsigned integer value. | ||||||||
424 | bool isIntN(unsigned N) const { return getActiveBits() <= N; } | ||||||||
425 | |||||||||
426 | /// Check if this APInt has an N-bits signed integer value. | ||||||||
427 | bool isSignedIntN(unsigned N) const { return getSignificantBits() <= N; } | ||||||||
428 | |||||||||
429 | /// Check if this APInt's value is a power of two greater than zero. | ||||||||
430 | /// | ||||||||
431 | /// \returns true if the argument APInt value is a power of two > 0. | ||||||||
432 | bool isPowerOf2() const { | ||||||||
433 | if (isSingleWord()) { | ||||||||
434 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 434, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
435 | return isPowerOf2_64(U.VAL); | ||||||||
436 | } | ||||||||
437 | return countPopulationSlowCase() == 1; | ||||||||
438 | } | ||||||||
439 | |||||||||
440 | /// Check if this APInt's negated value is a power of two greater than zero. | ||||||||
441 | bool isNegatedPowerOf2() const { | ||||||||
442 | assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed" ) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\"" , "llvm/include/llvm/ADT/APInt.h", 442, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
443 | if (isNonNegative()) | ||||||||
444 | return false; | ||||||||
445 | // NegatedPowerOf2 - shifted mask in the top bits. | ||||||||
446 | unsigned LO = countl_one(); | ||||||||
447 | unsigned TZ = countr_zero(); | ||||||||
448 | return (LO + TZ) == BitWidth; | ||||||||
449 | } | ||||||||
450 | |||||||||
451 | /// Check if the APInt's value is returned by getSignMask. | ||||||||
452 | /// | ||||||||
453 | /// \returns true if this is the value returned by getSignMask. | ||||||||
454 | bool isSignMask() const { return isMinSignedValue(); } | ||||||||
455 | |||||||||
456 | /// Convert APInt to a boolean value. | ||||||||
457 | /// | ||||||||
458 | /// This converts the APInt to a boolean value as a test against zero. | ||||||||
459 | bool getBoolValue() const { return !isZero(); } | ||||||||
460 | |||||||||
461 | /// If this value is smaller than the specified limit, return it, otherwise | ||||||||
462 | /// return the limit value. This causes the value to saturate to the limit. | ||||||||
463 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const { | ||||||||
464 | return ugt(Limit) ? Limit : getZExtValue(); | ||||||||
465 | } | ||||||||
466 | |||||||||
467 | /// Check if the APInt consists of a repeated bit pattern. | ||||||||
468 | /// | ||||||||
469 | /// e.g. 0x01010101 satisfies isSplat(8). | ||||||||
470 | /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit | ||||||||
471 | /// width without remainder. | ||||||||
472 | bool isSplat(unsigned SplatSizeInBits) const; | ||||||||
473 | |||||||||
474 | /// \returns true if this APInt value is a sequence of \param numBits ones | ||||||||
475 | /// starting at the least significant bit with the remainder zero. | ||||||||
476 | bool isMask(unsigned numBits) const { | ||||||||
477 | assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero" ) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\"" , "llvm/include/llvm/ADT/APInt.h", 477, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
478 | assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range" ) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\"" , "llvm/include/llvm/ADT/APInt.h", 478, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
479 | if (isSingleWord()) | ||||||||
480 | return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits)); | ||||||||
481 | unsigned Ones = countTrailingOnesSlowCase(); | ||||||||
482 | return (numBits == Ones) && | ||||||||
483 | ((Ones + countLeadingZerosSlowCase()) == BitWidth); | ||||||||
484 | } | ||||||||
485 | |||||||||
486 | /// \returns true if this APInt is a non-empty sequence of ones starting at | ||||||||
487 | /// the least significant bit with the remainder zero. | ||||||||
488 | /// Ex. isMask(0x0000FFFFU) == true. | ||||||||
489 | bool isMask() const { | ||||||||
490 | if (isSingleWord()) | ||||||||
491 | return isMask_64(U.VAL); | ||||||||
492 | unsigned Ones = countTrailingOnesSlowCase(); | ||||||||
493 | return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); | ||||||||
494 | } | ||||||||
495 | |||||||||
496 | /// Return true if this APInt value contains a non-empty sequence of ones with | ||||||||
497 | /// the remainder zero. | ||||||||
498 | bool isShiftedMask() const { | ||||||||
499 | if (isSingleWord()) | ||||||||
500 | return isShiftedMask_64(U.VAL); | ||||||||
501 | unsigned Ones = countPopulationSlowCase(); | ||||||||
502 | unsigned LeadZ = countLeadingZerosSlowCase(); | ||||||||
503 | return (Ones + LeadZ + countr_zero()) == BitWidth; | ||||||||
504 | } | ||||||||
505 | |||||||||
506 | /// Return true if this APInt value contains a non-empty sequence of ones with | ||||||||
507 | /// the remainder zero. If true, \p MaskIdx will specify the index of the | ||||||||
508 | /// lowest set bit and \p MaskLen is updated to specify the length of the | ||||||||
509 | /// mask, else neither are updated. | ||||||||
510 | bool isShiftedMask(unsigned &MaskIdx, unsigned &MaskLen) const { | ||||||||
511 | if (isSingleWord()) | ||||||||
512 | return isShiftedMask_64(U.VAL, MaskIdx, MaskLen); | ||||||||
513 | unsigned Ones = countPopulationSlowCase(); | ||||||||
514 | unsigned LeadZ = countLeadingZerosSlowCase(); | ||||||||
515 | unsigned TrailZ = countTrailingZerosSlowCase(); | ||||||||
516 | if ((Ones + LeadZ + TrailZ) != BitWidth) | ||||||||
517 | return false; | ||||||||
518 | MaskLen = Ones; | ||||||||
519 | MaskIdx = TrailZ; | ||||||||
520 | return true; | ||||||||
521 | } | ||||||||
522 | |||||||||
523 | /// Compute an APInt containing numBits highbits from this APInt. | ||||||||
524 | /// | ||||||||
525 | /// Get an APInt with the same BitWidth as this APInt, just zero mask the low | ||||||||
526 | /// bits and right shift to the least significant bit. | ||||||||
527 | /// | ||||||||
528 | /// \returns the high "numBits" bits of this APInt. | ||||||||
529 | APInt getHiBits(unsigned numBits) const; | ||||||||
530 | |||||||||
531 | /// Compute an APInt containing numBits lowbits from this APInt. | ||||||||
532 | /// | ||||||||
533 | /// Get an APInt with the same BitWidth as this APInt, just zero mask the high | ||||||||
534 | /// bits. | ||||||||
535 | /// | ||||||||
536 | /// \returns the low "numBits" bits of this APInt. | ||||||||
537 | APInt getLoBits(unsigned numBits) const; | ||||||||
538 | |||||||||
539 | /// Determine if two APInts have the same value, after zero-extending | ||||||||
540 | /// one of them (if needed!) to ensure that the bit-widths match. | ||||||||
541 | static bool isSameValue(const APInt &I1, const APInt &I2) { | ||||||||
542 | if (I1.getBitWidth() == I2.getBitWidth()) | ||||||||
543 | return I1 == I2; | ||||||||
544 | |||||||||
545 | if (I1.getBitWidth() > I2.getBitWidth()) | ||||||||
546 | return I1 == I2.zext(I1.getBitWidth()); | ||||||||
547 | |||||||||
548 | return I1.zext(I2.getBitWidth()) == I2; | ||||||||
549 | } | ||||||||
550 | |||||||||
551 | /// Overload to compute a hash_code for an APInt value. | ||||||||
552 | friend hash_code hash_value(const APInt &Arg); | ||||||||
553 | |||||||||
554 | /// This function returns a pointer to the internal storage of the APInt. | ||||||||
555 | /// This is useful for writing out the APInt in binary form without any | ||||||||
556 | /// conversions. | ||||||||
557 | const uint64_t *getRawData() const { | ||||||||
558 | if (isSingleWord()) | ||||||||
559 | return &U.VAL; | ||||||||
560 | return &U.pVal[0]; | ||||||||
561 | } | ||||||||
562 | |||||||||
563 | /// @} | ||||||||
564 | /// \name Unary Operators | ||||||||
565 | /// @{ | ||||||||
566 | |||||||||
567 | /// Postfix increment operator. Increment *this by 1. | ||||||||
568 | /// | ||||||||
569 | /// \returns a new APInt value representing the original value of *this. | ||||||||
570 | APInt operator++(int) { | ||||||||
571 | APInt API(*this); | ||||||||
572 | ++(*this); | ||||||||
573 | return API; | ||||||||
574 | } | ||||||||
575 | |||||||||
576 | /// Prefix increment operator. | ||||||||
577 | /// | ||||||||
578 | /// \returns *this incremented by one | ||||||||
579 | APInt &operator++(); | ||||||||
580 | |||||||||
581 | /// Postfix decrement operator. Decrement *this by 1. | ||||||||
582 | /// | ||||||||
583 | /// \returns a new APInt value representing the original value of *this. | ||||||||
584 | APInt operator--(int) { | ||||||||
585 | APInt API(*this); | ||||||||
586 | --(*this); | ||||||||
587 | return API; | ||||||||
588 | } | ||||||||
589 | |||||||||
590 | /// Prefix decrement operator. | ||||||||
591 | /// | ||||||||
592 | /// \returns *this decremented by one. | ||||||||
593 | APInt &operator--(); | ||||||||
594 | |||||||||
595 | /// Logical negation operation on this APInt returns true if zero, like normal | ||||||||
596 | /// integers. | ||||||||
597 | bool operator!() const { return isZero(); } | ||||||||
598 | |||||||||
599 | /// @} | ||||||||
600 | /// \name Assignment Operators | ||||||||
601 | /// @{ | ||||||||
602 | |||||||||
603 | /// Copy assignment operator. | ||||||||
604 | /// | ||||||||
605 | /// \returns *this after assignment of RHS. | ||||||||
606 | APInt &operator=(const APInt &RHS) { | ||||||||
607 | // The common case (both source or dest being inline) doesn't require | ||||||||
608 | // allocation or deallocation. | ||||||||
609 | if (isSingleWord() && RHS.isSingleWord()) { | ||||||||
610 | U.VAL = RHS.U.VAL; | ||||||||
611 | BitWidth = RHS.BitWidth; | ||||||||
612 | return *this; | ||||||||
613 | } | ||||||||
614 | |||||||||
615 | assignSlowCase(RHS); | ||||||||
616 | return *this; | ||||||||
617 | } | ||||||||
618 | |||||||||
619 | /// Move assignment operator. | ||||||||
620 | APInt &operator=(APInt &&that) { | ||||||||
621 | #ifdef EXPENSIVE_CHECKS | ||||||||
622 | // Some std::shuffle implementations still do self-assignment. | ||||||||
623 | if (this == &that) | ||||||||
624 | return *this; | ||||||||
625 | #endif | ||||||||
626 | assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported" ) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\"" , "llvm/include/llvm/ADT/APInt.h", 626, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
627 | if (!isSingleWord()) | ||||||||
628 | delete[] U.pVal; | ||||||||
629 | |||||||||
630 | // Use memcpy so that type based alias analysis sees both VAL and pVal | ||||||||
631 | // as modified. | ||||||||
632 | memcpy(&U, &that.U, sizeof(U)); | ||||||||
633 | |||||||||
634 | BitWidth = that.BitWidth; | ||||||||
635 | that.BitWidth = 0; | ||||||||
636 | return *this; | ||||||||
637 | } | ||||||||
638 | |||||||||
639 | /// Assignment operator. | ||||||||
640 | /// | ||||||||
641 | /// The RHS value is assigned to *this. If the significant bits in RHS exceed | ||||||||
642 | /// the bit width, the excess bits are truncated. If the bit width is larger | ||||||||
643 | /// than 64, the value is zero filled in the unspecified high order bits. | ||||||||
644 | /// | ||||||||
645 | /// \returns *this after assignment of RHS value. | ||||||||
646 | APInt &operator=(uint64_t RHS) { | ||||||||
647 | if (isSingleWord()) { | ||||||||
648 | U.VAL = RHS; | ||||||||
649 | return clearUnusedBits(); | ||||||||
650 | } | ||||||||
651 | U.pVal[0] = RHS; | ||||||||
652 | memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); | ||||||||
653 | return *this; | ||||||||
654 | } | ||||||||
655 | |||||||||
656 | /// Bitwise AND assignment operator. | ||||||||
657 | /// | ||||||||
658 | /// Performs a bitwise AND operation on this APInt and RHS. The result is | ||||||||
659 | /// assigned to *this. | ||||||||
660 | /// | ||||||||
661 | /// \returns *this after ANDing with RHS. | ||||||||
662 | APInt &operator&=(const APInt &RHS) { | ||||||||
663 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 663, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
664 | if (isSingleWord()) | ||||||||
665 | U.VAL &= RHS.U.VAL; | ||||||||
666 | else | ||||||||
667 | andAssignSlowCase(RHS); | ||||||||
668 | return *this; | ||||||||
669 | } | ||||||||
670 | |||||||||
671 | /// Bitwise AND assignment operator. | ||||||||
672 | /// | ||||||||
673 | /// Performs a bitwise AND operation on this APInt and RHS. RHS is | ||||||||
674 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
675 | /// the LHS. | ||||||||
676 | APInt &operator&=(uint64_t RHS) { | ||||||||
677 | if (isSingleWord()) { | ||||||||
678 | U.VAL &= RHS; | ||||||||
679 | return *this; | ||||||||
680 | } | ||||||||
681 | U.pVal[0] &= RHS; | ||||||||
682 | memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); | ||||||||
683 | return *this; | ||||||||
684 | } | ||||||||
685 | |||||||||
686 | /// Bitwise OR assignment operator. | ||||||||
687 | /// | ||||||||
688 | /// Performs a bitwise OR operation on this APInt and RHS. The result is | ||||||||
689 | /// assigned *this; | ||||||||
690 | /// | ||||||||
691 | /// \returns *this after ORing with RHS. | ||||||||
692 | APInt &operator|=(const APInt &RHS) { | ||||||||
693 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 693, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
694 | if (isSingleWord()) | ||||||||
695 | U.VAL |= RHS.U.VAL; | ||||||||
696 | else | ||||||||
697 | orAssignSlowCase(RHS); | ||||||||
698 | return *this; | ||||||||
699 | } | ||||||||
700 | |||||||||
701 | /// Bitwise OR assignment operator. | ||||||||
702 | /// | ||||||||
703 | /// Performs a bitwise OR operation on this APInt and RHS. RHS is | ||||||||
704 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
705 | /// the LHS. | ||||||||
706 | APInt &operator|=(uint64_t RHS) { | ||||||||
707 | if (isSingleWord()) { | ||||||||
708 | U.VAL |= RHS; | ||||||||
709 | return clearUnusedBits(); | ||||||||
710 | } | ||||||||
711 | U.pVal[0] |= RHS; | ||||||||
712 | return *this; | ||||||||
713 | } | ||||||||
714 | |||||||||
715 | /// Bitwise XOR assignment operator. | ||||||||
716 | /// | ||||||||
717 | /// Performs a bitwise XOR operation on this APInt and RHS. The result is | ||||||||
718 | /// assigned to *this. | ||||||||
719 | /// | ||||||||
720 | /// \returns *this after XORing with RHS. | ||||||||
721 | APInt &operator^=(const APInt &RHS) { | ||||||||
722 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 722, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
723 | if (isSingleWord()) | ||||||||
724 | U.VAL ^= RHS.U.VAL; | ||||||||
725 | else | ||||||||
726 | xorAssignSlowCase(RHS); | ||||||||
727 | return *this; | ||||||||
728 | } | ||||||||
729 | |||||||||
730 | /// Bitwise XOR assignment operator. | ||||||||
731 | /// | ||||||||
732 | /// Performs a bitwise XOR operation on this APInt and RHS. RHS is | ||||||||
733 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
734 | /// the LHS. | ||||||||
735 | APInt &operator^=(uint64_t RHS) { | ||||||||
736 | if (isSingleWord()) { | ||||||||
737 | U.VAL ^= RHS; | ||||||||
738 | return clearUnusedBits(); | ||||||||
739 | } | ||||||||
740 | U.pVal[0] ^= RHS; | ||||||||
741 | return *this; | ||||||||
742 | } | ||||||||
743 | |||||||||
744 | /// Multiplication assignment operator. | ||||||||
745 | /// | ||||||||
746 | /// Multiplies this APInt by RHS and assigns the result to *this. | ||||||||
747 | /// | ||||||||
748 | /// \returns *this | ||||||||
749 | APInt &operator*=(const APInt &RHS); | ||||||||
750 | APInt &operator*=(uint64_t RHS); | ||||||||
751 | |||||||||
752 | /// Addition assignment operator. | ||||||||
753 | /// | ||||||||
754 | /// Adds RHS to *this and assigns the result to *this. | ||||||||
755 | /// | ||||||||
756 | /// \returns *this | ||||||||
757 | APInt &operator+=(const APInt &RHS); | ||||||||
758 | APInt &operator+=(uint64_t RHS); | ||||||||
759 | |||||||||
760 | /// Subtraction assignment operator. | ||||||||
761 | /// | ||||||||
762 | /// Subtracts RHS from *this and assigns the result to *this. | ||||||||
763 | /// | ||||||||
764 | /// \returns *this | ||||||||
765 | APInt &operator-=(const APInt &RHS); | ||||||||
766 | APInt &operator-=(uint64_t RHS); | ||||||||
767 | |||||||||
768 | /// Left-shift assignment function. | ||||||||
769 | /// | ||||||||
770 | /// Shifts *this left by shiftAmt and assigns the result to *this. | ||||||||
771 | /// | ||||||||
772 | /// \returns *this after shifting left by ShiftAmt | ||||||||
773 | APInt &operator<<=(unsigned ShiftAmt) { | ||||||||
774 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "llvm/include/llvm/ADT/APInt.h", 774, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
775 | if (isSingleWord()) { | ||||||||
776 | if (ShiftAmt == BitWidth) | ||||||||
777 | U.VAL = 0; | ||||||||
778 | else | ||||||||
779 | U.VAL <<= ShiftAmt; | ||||||||
780 | return clearUnusedBits(); | ||||||||
781 | } | ||||||||
782 | shlSlowCase(ShiftAmt); | ||||||||
783 | return *this; | ||||||||
784 | } | ||||||||
785 | |||||||||
786 | /// Left-shift assignment function. | ||||||||
787 | /// | ||||||||
788 | /// Shifts *this left by shiftAmt and assigns the result to *this. | ||||||||
789 | /// | ||||||||
790 | /// \returns *this after shifting left by ShiftAmt | ||||||||
791 | APInt &operator<<=(const APInt &ShiftAmt); | ||||||||
792 | |||||||||
793 | /// @} | ||||||||
794 | /// \name Binary Operators | ||||||||
795 | /// @{ | ||||||||
796 | |||||||||
797 | /// Multiplication operator. | ||||||||
798 | /// | ||||||||
799 | /// Multiplies this APInt by RHS and returns the result. | ||||||||
800 | APInt operator*(const APInt &RHS) const; | ||||||||
801 | |||||||||
802 | /// Left logical shift operator. | ||||||||
803 | /// | ||||||||
804 | /// Shifts this APInt left by \p Bits and returns the result. | ||||||||
805 | APInt operator<<(unsigned Bits) const { return shl(Bits); } | ||||||||
806 | |||||||||
807 | /// Left logical shift operator. | ||||||||
808 | /// | ||||||||
809 | /// Shifts this APInt left by \p Bits and returns the result. | ||||||||
810 | APInt operator<<(const APInt &Bits) const { return shl(Bits); } | ||||||||
811 | |||||||||
812 | /// Arithmetic right-shift function. | ||||||||
813 | /// | ||||||||
814 | /// Arithmetic right-shift this APInt by shiftAmt. | ||||||||
815 | APInt ashr(unsigned ShiftAmt) const { | ||||||||
816 | APInt R(*this); | ||||||||
817 | R.ashrInPlace(ShiftAmt); | ||||||||
818 | return R; | ||||||||
819 | } | ||||||||
820 | |||||||||
821 | /// Arithmetic right-shift this APInt by ShiftAmt in place. | ||||||||
822 | void ashrInPlace(unsigned ShiftAmt) { | ||||||||
823 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "llvm/include/llvm/ADT/APInt.h", 823, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
824 | if (isSingleWord()) { | ||||||||
825 | int64_t SExtVAL = SignExtend64(U.VAL, BitWidth); | ||||||||
826 | if (ShiftAmt == BitWidth) | ||||||||
827 | U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. | ||||||||
828 | else | ||||||||
829 | U.VAL = SExtVAL >> ShiftAmt; | ||||||||
830 | clearUnusedBits(); | ||||||||
831 | return; | ||||||||
832 | } | ||||||||
833 | ashrSlowCase(ShiftAmt); | ||||||||
834 | } | ||||||||
835 | |||||||||
836 | /// Logical right-shift function. | ||||||||
837 | /// | ||||||||
838 | /// Logical right-shift this APInt by shiftAmt. | ||||||||
839 | APInt lshr(unsigned shiftAmt) const { | ||||||||
840 | APInt R(*this); | ||||||||
841 | R.lshrInPlace(shiftAmt); | ||||||||
842 | return R; | ||||||||
843 | } | ||||||||
844 | |||||||||
845 | /// Logical right-shift this APInt by ShiftAmt in place. | ||||||||
846 | void lshrInPlace(unsigned ShiftAmt) { | ||||||||
847 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "llvm/include/llvm/ADT/APInt.h", 847, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
848 | if (isSingleWord()) { | ||||||||
849 | if (ShiftAmt == BitWidth) | ||||||||
850 | U.VAL = 0; | ||||||||
851 | else | ||||||||
852 | U.VAL >>= ShiftAmt; | ||||||||
853 | return; | ||||||||
854 | } | ||||||||
855 | lshrSlowCase(ShiftAmt); | ||||||||
856 | } | ||||||||
857 | |||||||||
858 | /// Left-shift function. | ||||||||
859 | /// | ||||||||
860 | /// Left-shift this APInt by shiftAmt. | ||||||||
861 | APInt shl(unsigned shiftAmt) const { | ||||||||
862 | APInt R(*this); | ||||||||
863 | R <<= shiftAmt; | ||||||||
864 | return R; | ||||||||
865 | } | ||||||||
866 | |||||||||
867 | /// relative logical shift right | ||||||||
868 | APInt relativeLShr(int RelativeShift) const { | ||||||||
869 | return RelativeShift > 0 ? lshr(RelativeShift) : shl(-RelativeShift); | ||||||||
870 | } | ||||||||
871 | |||||||||
872 | /// relative logical shift left | ||||||||
873 | APInt relativeLShl(int RelativeShift) const { | ||||||||
874 | return relativeLShr(-RelativeShift); | ||||||||
875 | } | ||||||||
876 | |||||||||
877 | /// relative arithmetic shift right | ||||||||
878 | APInt relativeAShr(int RelativeShift) const { | ||||||||
879 | return RelativeShift > 0 ? ashr(RelativeShift) : shl(-RelativeShift); | ||||||||
880 | } | ||||||||
881 | |||||||||
882 | /// relative arithmetic shift left | ||||||||
883 | APInt relativeAShl(int RelativeShift) const { | ||||||||
884 | return relativeAShr(-RelativeShift); | ||||||||
885 | } | ||||||||
886 | |||||||||
887 | /// Rotate left by rotateAmt. | ||||||||
888 | APInt rotl(unsigned rotateAmt) const; | ||||||||
889 | |||||||||
890 | /// Rotate right by rotateAmt. | ||||||||
891 | APInt rotr(unsigned rotateAmt) const; | ||||||||
892 | |||||||||
893 | /// Arithmetic right-shift function. | ||||||||
894 | /// | ||||||||
895 | /// Arithmetic right-shift this APInt by shiftAmt. | ||||||||
896 | APInt ashr(const APInt &ShiftAmt) const { | ||||||||
897 | APInt R(*this); | ||||||||
898 | R.ashrInPlace(ShiftAmt); | ||||||||
899 | return R; | ||||||||
900 | } | ||||||||
901 | |||||||||
902 | /// Arithmetic right-shift this APInt by shiftAmt in place. | ||||||||
903 | void ashrInPlace(const APInt &shiftAmt); | ||||||||
904 | |||||||||
905 | /// Logical right-shift function. | ||||||||
906 | /// | ||||||||
907 | /// Logical right-shift this APInt by shiftAmt. | ||||||||
908 | APInt lshr(const APInt &ShiftAmt) const { | ||||||||
909 | APInt R(*this); | ||||||||
910 | R.lshrInPlace(ShiftAmt); | ||||||||
911 | return R; | ||||||||
912 | } | ||||||||
913 | |||||||||
914 | /// Logical right-shift this APInt by ShiftAmt in place. | ||||||||
915 | void lshrInPlace(const APInt &ShiftAmt); | ||||||||
916 | |||||||||
917 | /// Left-shift function. | ||||||||
918 | /// | ||||||||
919 | /// Left-shift this APInt by shiftAmt. | ||||||||
920 | APInt shl(const APInt &ShiftAmt) const { | ||||||||
921 | APInt R(*this); | ||||||||
922 | R <<= ShiftAmt; | ||||||||
923 | return R; | ||||||||
924 | } | ||||||||
925 | |||||||||
926 | /// Rotate left by rotateAmt. | ||||||||
927 | APInt rotl(const APInt &rotateAmt) const; | ||||||||
928 | |||||||||
929 | /// Rotate right by rotateAmt. | ||||||||
930 | APInt rotr(const APInt &rotateAmt) const; | ||||||||
931 | |||||||||
932 | /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is | ||||||||
933 | /// equivalent to: | ||||||||
934 | /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth) | ||||||||
935 | APInt concat(const APInt &NewLSB) const { | ||||||||
936 | /// If the result will be small, then both the merged values are small. | ||||||||
937 | unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth(); | ||||||||
938 | if (NewWidth <= APINT_BITS_PER_WORD) | ||||||||
939 | return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL); | ||||||||
940 | return concatSlowCase(NewLSB); | ||||||||
941 | } | ||||||||
942 | |||||||||
943 | /// Unsigned division operation. | ||||||||
944 | /// | ||||||||
945 | /// Perform an unsigned divide operation on this APInt by RHS. Both this and | ||||||||
946 | /// RHS are treated as unsigned quantities for purposes of this division. | ||||||||
947 | /// | ||||||||
948 | /// \returns a new APInt value containing the division result, rounded towards | ||||||||
949 | /// zero. | ||||||||
950 | APInt udiv(const APInt &RHS) const; | ||||||||
951 | APInt udiv(uint64_t RHS) const; | ||||||||
952 | |||||||||
953 | /// Signed division function for APInt. | ||||||||
954 | /// | ||||||||
955 | /// Signed divide this APInt by APInt RHS. | ||||||||
956 | /// | ||||||||
957 | /// The result is rounded towards zero. | ||||||||
958 | APInt sdiv(const APInt &RHS) const; | ||||||||
959 | APInt sdiv(int64_t RHS) const; | ||||||||
960 | |||||||||
961 | /// Unsigned remainder operation. | ||||||||
962 | /// | ||||||||
963 | /// Perform an unsigned remainder operation on this APInt with RHS being the | ||||||||
964 | /// divisor. Both this and RHS are treated as unsigned quantities for purposes | ||||||||
965 | /// of this operation. | ||||||||
966 | /// | ||||||||
967 | /// \returns a new APInt value containing the remainder result | ||||||||
968 | APInt urem(const APInt &RHS) const; | ||||||||
969 | uint64_t urem(uint64_t RHS) const; | ||||||||
970 | |||||||||
971 | /// Function for signed remainder operation. | ||||||||
972 | /// | ||||||||
973 | /// Signed remainder operation on APInt. | ||||||||
974 | /// | ||||||||
975 | /// Note that this is a true remainder operation and not a modulo operation | ||||||||
976 | /// because the sign follows the sign of the dividend which is *this. | ||||||||
977 | APInt srem(const APInt &RHS) const; | ||||||||
978 | int64_t srem(int64_t RHS) const; | ||||||||
979 | |||||||||
980 | /// Dual division/remainder interface. | ||||||||
981 | /// | ||||||||
982 | /// Sometimes it is convenient to divide two APInt values and obtain both the | ||||||||
983 | /// quotient and remainder. This function does both operations in the same | ||||||||
984 | /// computation making it a little more efficient. The pair of input arguments | ||||||||
985 | /// may overlap with the pair of output arguments. It is safe to call | ||||||||
986 | /// udivrem(X, Y, X, Y), for example. | ||||||||
987 | static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, | ||||||||
988 | APInt &Remainder); | ||||||||
989 | static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, | ||||||||
990 | uint64_t &Remainder); | ||||||||
991 | |||||||||
992 | static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, | ||||||||
993 | APInt &Remainder); | ||||||||
994 | static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient, | ||||||||
995 | int64_t &Remainder); | ||||||||
996 | |||||||||
997 | // Operations that return overflow indicators. | ||||||||
998 | APInt sadd_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
999 | APInt uadd_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1000 | APInt ssub_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1001 | APInt usub_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1002 | APInt sdiv_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1003 | APInt smul_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1004 | APInt umul_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1005 | APInt sshl_ov(const APInt &Amt, bool &Overflow) const; | ||||||||
1006 | APInt ushl_ov(const APInt &Amt, bool &Overflow) const; | ||||||||
1007 | |||||||||
1008 | // Operations that saturate | ||||||||
1009 | APInt sadd_sat(const APInt &RHS) const; | ||||||||
1010 | APInt uadd_sat(const APInt &RHS) const; | ||||||||
1011 | APInt ssub_sat(const APInt &RHS) const; | ||||||||
1012 | APInt usub_sat(const APInt &RHS) const; | ||||||||
1013 | APInt smul_sat(const APInt &RHS) const; | ||||||||
1014 | APInt umul_sat(const APInt &RHS) const; | ||||||||
1015 | APInt sshl_sat(const APInt &RHS) const; | ||||||||
1016 | APInt ushl_sat(const APInt &RHS) const; | ||||||||
1017 | |||||||||
1018 | /// Array-indexing support. | ||||||||
1019 | /// | ||||||||
1020 | /// \returns the bit value at bitPosition | ||||||||
1021 | bool operator[](unsigned bitPosition) const { | ||||||||
1022 | assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() && "Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\"" , "llvm/include/llvm/ADT/APInt.h", 1022, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1023 | return (maskBit(bitPosition) & getWord(bitPosition)) != 0; | ||||||||
1024 | } | ||||||||
1025 | |||||||||
1026 | /// @} | ||||||||
1027 | /// \name Comparison Operators | ||||||||
1028 | /// @{ | ||||||||
1029 | |||||||||
1030 | /// Equality operator. | ||||||||
1031 | /// | ||||||||
1032 | /// Compares this APInt with RHS for the validity of the equality | ||||||||
1033 | /// relationship. | ||||||||
1034 | bool operator==(const APInt &RHS) const { | ||||||||
1035 | assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth && "Comparison requires equal bit widths") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\"" , "llvm/include/llvm/ADT/APInt.h", 1035, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1036 | if (isSingleWord()) | ||||||||
1037 | return U.VAL == RHS.U.VAL; | ||||||||
1038 | return equalSlowCase(RHS); | ||||||||
1039 | } | ||||||||
1040 | |||||||||
1041 | /// Equality operator. | ||||||||
1042 | /// | ||||||||
1043 | /// Compares this APInt with a uint64_t for the validity of the equality | ||||||||
1044 | /// relationship. | ||||||||
1045 | /// | ||||||||
1046 | /// \returns true if *this == Val | ||||||||
1047 | bool operator==(uint64_t Val) const { | ||||||||
1048 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val; | ||||||||
1049 | } | ||||||||
1050 | |||||||||
1051 | /// Equality comparison. | ||||||||
1052 | /// | ||||||||
1053 | /// Compares this APInt with RHS for the validity of the equality | ||||||||
1054 | /// relationship. | ||||||||
1055 | /// | ||||||||
1056 | /// \returns true if *this == Val | ||||||||
1057 | bool eq(const APInt &RHS) const { return (*this) == RHS; } | ||||||||
1058 | |||||||||
1059 | /// Inequality operator. | ||||||||
1060 | /// | ||||||||
1061 | /// Compares this APInt with RHS for the validity of the inequality | ||||||||
1062 | /// relationship. | ||||||||
1063 | /// | ||||||||
1064 | /// \returns true if *this != Val | ||||||||
1065 | bool operator!=(const APInt &RHS) const { return !((*this) == RHS); } | ||||||||
1066 | |||||||||
1067 | /// Inequality operator. | ||||||||
1068 | /// | ||||||||
1069 | /// Compares this APInt with a uint64_t for the validity of the inequality | ||||||||
1070 | /// relationship. | ||||||||
1071 | /// | ||||||||
1072 | /// \returns true if *this != Val | ||||||||
1073 | bool operator!=(uint64_t Val) const { return !((*this) == Val); } | ||||||||
1074 | |||||||||
1075 | /// Inequality comparison | ||||||||
1076 | /// | ||||||||
1077 | /// Compares this APInt with RHS for the validity of the inequality | ||||||||
1078 | /// relationship. | ||||||||
1079 | /// | ||||||||
1080 | /// \returns true if *this != Val | ||||||||
1081 | bool ne(const APInt &RHS) const { return !((*this) == RHS); } | ||||||||
1082 | |||||||||
1083 | /// Unsigned less than comparison | ||||||||
1084 | /// | ||||||||
1085 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1086 | /// the validity of the less-than relationship. | ||||||||
1087 | /// | ||||||||
1088 | /// \returns true if *this < RHS when both are considered unsigned. | ||||||||
1089 | bool ult(const APInt &RHS) const { return compare(RHS) < 0; } | ||||||||
1090 | |||||||||
1091 | /// Unsigned less than comparison | ||||||||
1092 | /// | ||||||||
1093 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1094 | /// the validity of the less-than relationship. | ||||||||
1095 | /// | ||||||||
1096 | /// \returns true if *this < RHS when considered unsigned. | ||||||||
1097 | bool ult(uint64_t RHS) const { | ||||||||
1098 | // Only need to check active bits if not a single word. | ||||||||
1099 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS; | ||||||||
1100 | } | ||||||||
1101 | |||||||||
1102 | /// Signed less than comparison | ||||||||
1103 | /// | ||||||||
1104 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
1105 | /// validity of the less-than relationship. | ||||||||
1106 | /// | ||||||||
1107 | /// \returns true if *this < RHS when both are considered signed. | ||||||||
1108 | bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; } | ||||||||
1109 | |||||||||
1110 | /// Signed less than comparison | ||||||||
1111 | /// | ||||||||
1112 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
1113 | /// the validity of the less-than relationship. | ||||||||
1114 | /// | ||||||||
1115 | /// \returns true if *this < RHS when considered signed. | ||||||||
1116 | bool slt(int64_t RHS) const { | ||||||||
1117 | return (!isSingleWord() && getSignificantBits() > 64) | ||||||||
1118 | ? isNegative() | ||||||||
1119 | : getSExtValue() < RHS; | ||||||||
1120 | } | ||||||||
1121 | |||||||||
1122 | /// Unsigned less or equal comparison | ||||||||
1123 | /// | ||||||||
1124 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1125 | /// validity of the less-or-equal relationship. | ||||||||
1126 | /// | ||||||||
1127 | /// \returns true if *this <= RHS when both are considered unsigned. | ||||||||
1128 | bool ule(const APInt &RHS) const { return compare(RHS) <= 0; } | ||||||||
1129 | |||||||||
1130 | /// Unsigned less or equal comparison | ||||||||
1131 | /// | ||||||||
1132 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1133 | /// the validity of the less-or-equal relationship. | ||||||||
1134 | /// | ||||||||
1135 | /// \returns true if *this <= RHS when considered unsigned. | ||||||||
1136 | bool ule(uint64_t RHS) const { return !ugt(RHS); } | ||||||||
1137 | |||||||||
1138 | /// Signed less or equal comparison | ||||||||
1139 | /// | ||||||||
1140 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
1141 | /// validity of the less-or-equal relationship. | ||||||||
1142 | /// | ||||||||
1143 | /// \returns true if *this <= RHS when both are considered signed. | ||||||||
1144 | bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; } | ||||||||
1145 | |||||||||
1146 | /// Signed less or equal comparison | ||||||||
1147 | /// | ||||||||
1148 | /// Regards both *this as a signed quantity and compares it with RHS for the | ||||||||
1149 | /// validity of the less-or-equal relationship. | ||||||||
1150 | /// | ||||||||
1151 | /// \returns true if *this <= RHS when considered signed. | ||||||||
1152 | bool sle(uint64_t RHS) const { return !sgt(RHS); } | ||||||||
1153 | |||||||||
1154 | /// Unsigned greater than comparison | ||||||||
1155 | /// | ||||||||
1156 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1157 | /// the validity of the greater-than relationship. | ||||||||
1158 | /// | ||||||||
1159 | /// \returns true if *this > RHS when both are considered unsigned. | ||||||||
1160 | bool ugt(const APInt &RHS) const { return !ule(RHS); } | ||||||||
1161 | |||||||||
1162 | /// Unsigned greater than comparison | ||||||||
1163 | /// | ||||||||
1164 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1165 | /// the validity of the greater-than relationship. | ||||||||
1166 | /// | ||||||||
1167 | /// \returns true if *this > RHS when considered unsigned. | ||||||||
1168 | bool ugt(uint64_t RHS) const { | ||||||||
1169 | // Only need to check active bits if not a single word. | ||||||||
1170 | return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS; | ||||||||
1171 | } | ||||||||
1172 | |||||||||
1173 | /// Signed greater than comparison | ||||||||
1174 | /// | ||||||||
1175 | /// Regards both *this and RHS as signed quantities and compares them for the | ||||||||
1176 | /// validity of the greater-than relationship. | ||||||||
1177 | /// | ||||||||
1178 | /// \returns true if *this > RHS when both are considered signed. | ||||||||
1179 | bool sgt(const APInt &RHS) const { return !sle(RHS); } | ||||||||
1180 | |||||||||
1181 | /// Signed greater than comparison | ||||||||
1182 | /// | ||||||||
1183 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
1184 | /// the validity of the greater-than relationship. | ||||||||
1185 | /// | ||||||||
1186 | /// \returns true if *this > RHS when considered signed. | ||||||||
1187 | bool sgt(int64_t RHS) const { | ||||||||
1188 | return (!isSingleWord() && getSignificantBits() > 64) | ||||||||
1189 | ? !isNegative() | ||||||||
1190 | : getSExtValue() > RHS; | ||||||||
1191 | } | ||||||||
1192 | |||||||||
1193 | /// Unsigned greater or equal comparison | ||||||||
1194 | /// | ||||||||
1195 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1196 | /// validity of the greater-or-equal relationship. | ||||||||
1197 | /// | ||||||||
1198 | /// \returns true if *this >= RHS when both are considered unsigned. | ||||||||
1199 | bool uge(const APInt &RHS) const { return !ult(RHS); } | ||||||||
1200 | |||||||||
1201 | /// Unsigned greater or equal comparison | ||||||||
1202 | /// | ||||||||
1203 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1204 | /// the validity of the greater-or-equal relationship. | ||||||||
1205 | /// | ||||||||
1206 | /// \returns true if *this >= RHS when considered unsigned. | ||||||||
1207 | bool uge(uint64_t RHS) const { return !ult(RHS); } | ||||||||
1208 | |||||||||
1209 | /// Signed greater or equal comparison | ||||||||
1210 | /// | ||||||||
1211 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
1212 | /// validity of the greater-or-equal relationship. | ||||||||
1213 | /// | ||||||||
1214 | /// \returns true if *this >= RHS when both are considered signed. | ||||||||
1215 | bool sge(const APInt &RHS) const { return !slt(RHS); } | ||||||||
1216 | |||||||||
1217 | /// Signed greater or equal comparison | ||||||||
1218 | /// | ||||||||
1219 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
1220 | /// the validity of the greater-or-equal relationship. | ||||||||
1221 | /// | ||||||||
1222 | /// \returns true if *this >= RHS when considered signed. | ||||||||
1223 | bool sge(int64_t RHS) const { return !slt(RHS); } | ||||||||
1224 | |||||||||
1225 | /// This operation tests if there are any pairs of corresponding bits | ||||||||
1226 | /// between this APInt and RHS that are both set. | ||||||||
1227 | bool intersects(const APInt &RHS) const { | ||||||||
1228 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 1228, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1229 | if (isSingleWord()) | ||||||||
1230 | return (U.VAL & RHS.U.VAL) != 0; | ||||||||
1231 | return intersectsSlowCase(RHS); | ||||||||
1232 | } | ||||||||
1233 | |||||||||
1234 | /// This operation checks that all bits set in this APInt are also set in RHS. | ||||||||
1235 | bool isSubsetOf(const APInt &RHS) const { | ||||||||
1236 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "llvm/include/llvm/ADT/APInt.h", 1236, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1237 | if (isSingleWord()) | ||||||||
1238 | return (U.VAL & ~RHS.U.VAL) == 0; | ||||||||
1239 | return isSubsetOfSlowCase(RHS); | ||||||||
1240 | } | ||||||||
1241 | |||||||||
1242 | /// @} | ||||||||
1243 | /// \name Resizing Operators | ||||||||
1244 | /// @{ | ||||||||
1245 | |||||||||
1246 | /// Truncate to new width. | ||||||||
1247 | /// | ||||||||
1248 | /// Truncate the APInt to a specified width. It is an error to specify a width | ||||||||
1249 | /// that is greater than the current width. | ||||||||
1250 | APInt trunc(unsigned width) const; | ||||||||
1251 | |||||||||
1252 | /// Truncate to new width with unsigned saturation. | ||||||||
1253 | /// | ||||||||
1254 | /// If the APInt, treated as unsigned integer, can be losslessly truncated to | ||||||||
1255 | /// the new bitwidth, then return truncated APInt. Else, return max value. | ||||||||
1256 | APInt truncUSat(unsigned width) const; | ||||||||
1257 | |||||||||
1258 | /// Truncate to new width with signed saturation. | ||||||||
1259 | /// | ||||||||
1260 | /// If this APInt, treated as signed integer, can be losslessly truncated to | ||||||||
1261 | /// the new bitwidth, then return truncated APInt. Else, return either | ||||||||
1262 | /// signed min value if the APInt was negative, or signed max value. | ||||||||
1263 | APInt truncSSat(unsigned width) const; | ||||||||
1264 | |||||||||
1265 | /// Sign extend to a new width. | ||||||||
1266 | /// | ||||||||
1267 | /// This operation sign extends the APInt to a new width. If the high order | ||||||||
1268 | /// bit is set, the fill on the left will be done with 1 bits, otherwise zero. | ||||||||
1269 | /// It is an error to specify a width that is less than the | ||||||||
1270 | /// current width. | ||||||||
1271 | APInt sext(unsigned width) const; | ||||||||
1272 | |||||||||
1273 | /// Zero extend to a new width. | ||||||||
1274 | /// | ||||||||
1275 | /// This operation zero extends the APInt to a new width. The high order bits | ||||||||
1276 | /// are filled with 0 bits. It is an error to specify a width that is less | ||||||||
1277 | /// than the current width. | ||||||||
1278 | APInt zext(unsigned width) const; | ||||||||
1279 | |||||||||
1280 | /// Sign extend or truncate to width | ||||||||
1281 | /// | ||||||||
1282 | /// Make this APInt have the bit width given by \p width. The value is sign | ||||||||
1283 | /// extended, truncated, or left alone to make it that width. | ||||||||
1284 | APInt sextOrTrunc(unsigned width) const; | ||||||||
1285 | |||||||||
1286 | /// Zero extend or truncate to width | ||||||||
1287 | /// | ||||||||
1288 | /// Make this APInt have the bit width given by \p width. The value is zero | ||||||||
1289 | /// extended, truncated, or left alone to make it that width. | ||||||||
1290 | APInt zextOrTrunc(unsigned width) const; | ||||||||
1291 | |||||||||
1292 | /// @} | ||||||||
1293 | /// \name Bit Manipulation Operators | ||||||||
1294 | /// @{ | ||||||||
1295 | |||||||||
1296 | /// Set every bit to 1. | ||||||||
1297 | void setAllBits() { | ||||||||
1298 | if (isSingleWord()) | ||||||||
1299 | U.VAL = WORDTYPE_MAX; | ||||||||
1300 | else | ||||||||
1301 | // Set all the bits in all the words. | ||||||||
1302 | memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE); | ||||||||
1303 | // Clear the unused ones | ||||||||
1304 | clearUnusedBits(); | ||||||||
1305 | } | ||||||||
1306 | |||||||||
1307 | /// Set the given bit to 1 whose position is given as "bitPosition". | ||||||||
1308 | void setBit(unsigned BitPosition) { | ||||||||
1309 | assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth && "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1309, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1310 | WordType Mask = maskBit(BitPosition); | ||||||||
1311 | if (isSingleWord()) | ||||||||
1312 | U.VAL |= Mask; | ||||||||
1313 | else | ||||||||
1314 | U.pVal[whichWord(BitPosition)] |= Mask; | ||||||||
1315 | } | ||||||||
1316 | |||||||||
1317 | /// Set the sign bit to 1. | ||||||||
1318 | void setSignBit() { setBit(BitWidth - 1); } | ||||||||
1319 | |||||||||
1320 | /// Set a given bit to a given value. | ||||||||
1321 | void setBitVal(unsigned BitPosition, bool BitValue) { | ||||||||
1322 | if (BitValue) | ||||||||
1323 | setBit(BitPosition); | ||||||||
1324 | else | ||||||||
1325 | clearBit(BitPosition); | ||||||||
1326 | } | ||||||||
1327 | |||||||||
1328 | /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. | ||||||||
1329 | /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls | ||||||||
1330 | /// setBits when \p loBit < \p hiBit. | ||||||||
1331 | /// For \p loBit == \p hiBit wrap case, set every bit to 1. | ||||||||
1332 | void setBitsWithWrap(unsigned loBit, unsigned hiBit) { | ||||||||
1333 | assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range" ) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1333, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1334 | assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range" ) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1334, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1335 | if (loBit < hiBit) { | ||||||||
1336 | setBits(loBit, hiBit); | ||||||||
1337 | return; | ||||||||
1338 | } | ||||||||
1339 | setLowBits(hiBit); | ||||||||
1340 | setHighBits(BitWidth - loBit); | ||||||||
1341 | } | ||||||||
1342 | |||||||||
1343 | /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. | ||||||||
1344 | /// This function handles case when \p loBit <= \p hiBit. | ||||||||
1345 | void setBits(unsigned loBit, unsigned hiBit) { | ||||||||
1346 | assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range" ) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1346, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1347 | assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range" ) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1347, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1348 | assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit" ) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\"" , "llvm/include/llvm/ADT/APInt.h", 1348, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1349 | if (loBit == hiBit) | ||||||||
1350 | return; | ||||||||
1351 | if (loBit
| ||||||||
1352 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); | ||||||||
1353 | mask <<= loBit; | ||||||||
1354 | if (isSingleWord()) | ||||||||
1355 | U.VAL |= mask; | ||||||||
1356 | else | ||||||||
1357 | U.pVal[0] |= mask; | ||||||||
1358 | } else { | ||||||||
1359 | setBitsSlowCase(loBit, hiBit); | ||||||||
1360 | } | ||||||||
1361 | } | ||||||||
1362 | |||||||||
1363 | /// Set the top bits starting from loBit. | ||||||||
1364 | void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); } | ||||||||
1365 | |||||||||
1366 | /// Set the bottom loBits bits. | ||||||||
1367 | void setLowBits(unsigned loBits) { return setBits(0, loBits); } | ||||||||
1368 | |||||||||
1369 | /// Set the top hiBits bits. | ||||||||
1370 | void setHighBits(unsigned hiBits) { | ||||||||
1371 | return setBits(BitWidth - hiBits, BitWidth); | ||||||||
1372 | } | ||||||||
1373 | |||||||||
1374 | /// Set every bit to 0. | ||||||||
1375 | void clearAllBits() { | ||||||||
1376 | if (isSingleWord()) | ||||||||
1377 | U.VAL = 0; | ||||||||
1378 | else | ||||||||
1379 | memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE); | ||||||||
1380 | } | ||||||||
1381 | |||||||||
1382 | /// Set a given bit to 0. | ||||||||
1383 | /// | ||||||||
1384 | /// Set the given bit to 0 whose position is given as "bitPosition". | ||||||||
1385 | void clearBit(unsigned BitPosition) { | ||||||||
1386 | assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth && "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "llvm/include/llvm/ADT/APInt.h", 1386, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1387 | WordType Mask = ~maskBit(BitPosition); | ||||||||
1388 | if (isSingleWord()) | ||||||||
1389 | U.VAL &= Mask; | ||||||||
1390 | else | ||||||||
1391 | U.pVal[whichWord(BitPosition)] &= Mask; | ||||||||
1392 | } | ||||||||
1393 | |||||||||
1394 | /// Set bottom loBits bits to 0. | ||||||||
1395 | void clearLowBits(unsigned loBits) { | ||||||||
1396 | assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth" ) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\"" , "llvm/include/llvm/ADT/APInt.h", 1396, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1397 | APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits); | ||||||||
1398 | *this &= Keep; | ||||||||
1399 | } | ||||||||
1400 | |||||||||
1401 | /// Set the sign bit to 0. | ||||||||
1402 | void clearSignBit() { clearBit(BitWidth - 1); } | ||||||||
1403 | |||||||||
1404 | /// Toggle every bit to its opposite value. | ||||||||
1405 | void flipAllBits() { | ||||||||
1406 | if (isSingleWord()) { | ||||||||
1407 | U.VAL ^= WORDTYPE_MAX; | ||||||||
1408 | clearUnusedBits(); | ||||||||
1409 | } else { | ||||||||
1410 | flipAllBitsSlowCase(); | ||||||||
1411 | } | ||||||||
1412 | } | ||||||||
1413 | |||||||||
1414 | /// Toggles a given bit to its opposite value. | ||||||||
1415 | /// | ||||||||
1416 | /// Toggle a given bit to its opposite value whose position is given | ||||||||
1417 | /// as "bitPosition". | ||||||||
1418 | void flipBit(unsigned bitPosition); | ||||||||
1419 | |||||||||
1420 | /// Negate this APInt in place. | ||||||||
1421 | void negate() { | ||||||||
1422 | flipAllBits(); | ||||||||
1423 | ++(*this); | ||||||||
1424 | } | ||||||||
1425 | |||||||||
1426 | /// Insert the bits from a smaller APInt starting at bitPosition. | ||||||||
1427 | void insertBits(const APInt &SubBits, unsigned bitPosition); | ||||||||
1428 | void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits); | ||||||||
1429 | |||||||||
1430 | /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). | ||||||||
1431 | APInt extractBits(unsigned numBits, unsigned bitPosition) const; | ||||||||
1432 | uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const; | ||||||||
1433 | |||||||||
1434 | /// @} | ||||||||
1435 | /// \name Value Characterization Functions | ||||||||
1436 | /// @{ | ||||||||
1437 | |||||||||
1438 | /// Return the number of bits in the APInt. | ||||||||
1439 | unsigned getBitWidth() const { return BitWidth; } | ||||||||
1440 | |||||||||
1441 | /// Get the number of words. | ||||||||
1442 | /// | ||||||||
1443 | /// Here one word's bitwidth equals to that of uint64_t. | ||||||||
1444 | /// | ||||||||
1445 | /// \returns the number of words to hold the integer value of this APInt. | ||||||||
1446 | unsigned getNumWords() const { return getNumWords(BitWidth); } | ||||||||
1447 | |||||||||
1448 | /// Get the number of words. | ||||||||
1449 | /// | ||||||||
1450 | /// *NOTE* Here one word's bitwidth equals to that of uint64_t. | ||||||||
1451 | /// | ||||||||
1452 | /// \returns the number of words to hold the integer value with a given bit | ||||||||
1453 | /// width. | ||||||||
1454 | static unsigned getNumWords(unsigned BitWidth) { | ||||||||
1455 | return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; | ||||||||
1456 | } | ||||||||
1457 | |||||||||
1458 | /// Compute the number of active bits in the value | ||||||||
1459 | /// | ||||||||
1460 | /// This function returns the number of active bits which is defined as the | ||||||||
1461 | /// bit width minus the number of leading zeros. This is used in several | ||||||||
1462 | /// computations to see how "wide" the value is. | ||||||||
1463 | unsigned getActiveBits() const { return BitWidth - countl_zero(); } | ||||||||
1464 | |||||||||
1465 | /// Compute the number of active words in the value of this APInt. | ||||||||
1466 | /// | ||||||||
1467 | /// This is used in conjunction with getActiveData to extract the raw value of | ||||||||
1468 | /// the APInt. | ||||||||
1469 | unsigned getActiveWords() const { | ||||||||
1470 | unsigned numActiveBits = getActiveBits(); | ||||||||
1471 | return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1; | ||||||||
1472 | } | ||||||||
1473 | |||||||||
1474 | /// Get the minimum bit size for this signed APInt | ||||||||
1475 | /// | ||||||||
1476 | /// Computes the minimum bit width for this APInt while considering it to be a | ||||||||
1477 | /// signed (and probably negative) value. If the value is not negative, this | ||||||||
1478 | /// function returns the same value as getActiveBits()+1. Otherwise, it | ||||||||
1479 | /// returns the smallest bit width that will retain the negative value. For | ||||||||
1480 | /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so | ||||||||
1481 | /// for -1, this function will always return 1. | ||||||||
1482 | unsigned getSignificantBits() const { | ||||||||
1483 | return BitWidth - getNumSignBits() + 1; | ||||||||
1484 | } | ||||||||
1485 | |||||||||
1486 | /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead. | ||||||||
1487 | unsigned getMinSignedBits() const { return getSignificantBits(); } | ||||||||
1488 | |||||||||
1489 | /// Get zero extended value | ||||||||
1490 | /// | ||||||||
1491 | /// This method attempts to return the value of this APInt as a zero extended | ||||||||
1492 | /// uint64_t. The bitwidth must be <= 64 or the value must fit within a | ||||||||
1493 | /// uint64_t. Otherwise an assertion will result. | ||||||||
1494 | uint64_t getZExtValue() const { | ||||||||
1495 | if (isSingleWord()) | ||||||||
1496 | return U.VAL; | ||||||||
1497 | assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 && "Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\"" , "llvm/include/llvm/ADT/APInt.h", 1497, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1498 | return U.pVal[0]; | ||||||||
1499 | } | ||||||||
1500 | |||||||||
1501 | /// Get zero extended value if possible | ||||||||
1502 | /// | ||||||||
1503 | /// This method attempts to return the value of this APInt as a zero extended | ||||||||
1504 | /// uint64_t. The bitwidth must be <= 64 or the value must fit within a | ||||||||
1505 | /// uint64_t. Otherwise no value is returned. | ||||||||
1506 | std::optional<uint64_t> tryZExtValue() const { | ||||||||
1507 | return (getActiveBits() <= 64) ? std::optional<uint64_t>(getZExtValue()) | ||||||||
1508 | : std::nullopt; | ||||||||
1509 | }; | ||||||||
1510 | |||||||||
1511 | /// Get sign extended value | ||||||||
1512 | /// | ||||||||
1513 | /// This method attempts to return the value of this APInt as a sign extended | ||||||||
1514 | /// int64_t. The bit width must be <= 64 or the value must fit within an | ||||||||
1515 | /// int64_t. Otherwise an assertion will result. | ||||||||
1516 | int64_t getSExtValue() const { | ||||||||
1517 | if (isSingleWord()) | ||||||||
1518 | return SignExtend64(U.VAL, BitWidth); | ||||||||
1519 | assert(getSignificantBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getSignificantBits() <= 64 && "Too many bits for int64_t") ? void (0) : __assert_fail ("getSignificantBits() <= 64 && \"Too many bits for int64_t\"" , "llvm/include/llvm/ADT/APInt.h", 1519, __extension__ __PRETTY_FUNCTION__ )); | ||||||||
1520 | return int64_t(U.pVal[0]); | ||||||||
1521 | } | ||||||||
1522 | |||||||||
1523 | /// Get sign extended value if possible | ||||||||
1524 | /// | ||||||||
1525 | /// This method attempts to return the value of this APInt as a sign extended | ||||||||
1526 | /// int64_t. The bitwidth must be <= 64 or the value must fit within an | ||||||||
1527 | /// int64_t. Otherwise no value is returned. | ||||||||
1528 | std::optional<int64_t> trySExtValue() const { | ||||||||
1529 | return (getSignificantBits() <= 64) ? std::optional<int64_t>(getSExtValue()) | ||||||||
1530 | : std::nullopt; | ||||||||
1531 | }; | ||||||||
1532 | |||||||||
1533 | /// Get bits required for string value. | ||||||||
1534 | /// | ||||||||
1535 | /// This method determines how many bits are required to hold the APInt | ||||||||
1536 | /// equivalent of the string given by \p str. | ||||||||
1537 | static unsigned getBitsNeeded(StringRef str, uint8_t radix); | ||||||||
1538 | |||||||||
1539 | /// Get the bits that are sufficient to represent the string value. This may | ||||||||
1540 | /// over estimate the amount of bits required, but it does not require | ||||||||
1541 | /// parsing the value in the string. | ||||||||
1542 | static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix); | ||||||||
1543 | |||||||||
1544 | /// The APInt version of std::countl_zero. | ||||||||
1545 | /// | ||||||||
1546 | /// It counts the number of zeros from the most significant bit to the first | ||||||||
1547 | /// one bit. | ||||||||
1548 | /// | ||||||||
1549 | /// \returns BitWidth if the value is zero, otherwise returns the number of | ||||||||
1550 | /// zeros from the most significant bit to the first one bits. | ||||||||
1551 | unsigned countl_zero() const { | ||||||||
1552 | if (isSingleWord()) { | ||||||||
1553 | unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth; | ||||||||
1554 | return llvm::countl_zero(U.VAL) - unusedBits; | ||||||||
1555 | } | ||||||||
1556 | return countLeadingZerosSlowCase(); | ||||||||
1557 | } | ||||||||
1558 | |||||||||
1559 | unsigned countLeadingZeros() const { return countl_zero(); } | ||||||||
1560 | |||||||||
1561 | /// Count the number of leading one bits. | ||||||||
1562 | /// | ||||||||
1563 | /// This function is an APInt version of std::countl_one. It counts the number | ||||||||
1564 | /// of ones from the most significant bit to the first zero bit. | ||||||||
1565 | /// | ||||||||
1566 | /// \returns 0 if the high order bit is not set, otherwise returns the number | ||||||||
1567 | /// of 1 bits from the most significant to the least | ||||||||
1568 | unsigned countl_one() const { | ||||||||
1569 | if (isSingleWord()) { | ||||||||
1570 | if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false)) | ||||||||
1571 | return 0; | ||||||||
1572 | return llvm::countl_one(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); | ||||||||
1573 | } | ||||||||
1574 | return countLeadingOnesSlowCase(); | ||||||||
1575 | } | ||||||||
1576 | |||||||||
1577 | unsigned countLeadingOnes() const { return countl_one(); } | ||||||||
1578 | |||||||||
1579 | /// Computes the number of leading bits of this APInt that are equal to its | ||||||||
1580 | /// sign bit. | ||||||||
1581 | unsigned getNumSignBits() const { | ||||||||
1582 | return isNegative() ? countl_one() : countl_zero(); | ||||||||
1583 | } | ||||||||
1584 | |||||||||
1585 | /// Count the number of trailing zero bits. | ||||||||
1586 | /// | ||||||||
1587 | /// This function is an APInt version of the countr_zero. It counts the number | ||||||||
1588 | /// of zeros from the least significant bit to the first set bit. | ||||||||
1589 | /// | ||||||||
1590 | /// \returns BitWidth if the value is zero, otherwise returns the number of | ||||||||
1591 | /// zeros from the least significant bit to the first one bit. | ||||||||
1592 | unsigned countr_zero() const { | ||||||||
1593 | if (isSingleWord()) { | ||||||||
1594 | unsigned TrailingZeros = llvm::countr_zero(U.VAL); | ||||||||
1595 | return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros); | ||||||||
1596 | } | ||||||||
1597 | return countTrailingZerosSlowCase(); | ||||||||
1598 | } | ||||||||
1599 | |||||||||
1600 | unsigned countTrailingZeros() const { return countr_zero(); } | ||||||||
1601 | |||||||||
1602 | /// Count the number of trailing one bits. | ||||||||
1603 | /// | ||||||||
1604 | /// This function is an APInt version of std::countr_one. It counts the number | ||||||||
1605 | /// of ones from the least significant bit to the first zero bit. | ||||||||
1606 | /// | ||||||||
1607 | /// \returns BitWidth if the value is all ones, otherwise returns the number | ||||||||
1608 | /// of ones from the least significant bit to the first zero bit. | ||||||||
1609 | unsigned countr_one() const { | ||||||||
1610 | if (isSingleWord()) | ||||||||
1611 | return llvm::countr_one(U.VAL); | ||||||||
1612 | return countTrailingOnesSlowCase(); | ||||||||
1613 | } | ||||||||
1614 | |||||||||
1615 | unsigned countTrailingOnes() const { return countr_one(); } | ||||||||
1616 | |||||||||
1617 | /// Count the number of bits set. | ||||||||
1618 | /// | ||||||||
1619 | /// This function is an APInt version of std::popcount. It counts the number | ||||||||
1620 | /// of 1 bits in the APInt value. | ||||||||
1621 | /// | ||||||||
1622 | /// \returns 0 if the value is zero, otherwise returns the number of set bits. | ||||||||
1623 | unsigned popcount() const { | ||||||||
1624 | if (isSingleWord()) | ||||||||
1625 | return llvm::popcount(U.VAL); | ||||||||
1626 | return countPopulationSlowCase(); | ||||||||
1627 | } | ||||||||
1628 | |||||||||
1629 | unsigned countPopulation() const { return popcount(); } | ||||||||
1630 | |||||||||
1631 | /// @} | ||||||||
1632 | /// \name Conversion Functions | ||||||||
1633 | /// @{ | ||||||||
1634 | void print(raw_ostream &OS, bool isSigned) const; | ||||||||
1635 | |||||||||
1636 | /// Converts an APInt to a string and append it to Str. Str is commonly a | ||||||||
1637 | /// SmallString. | ||||||||
1638 | void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed, | ||||||||
1639 | bool formatAsCLiteral = false) const; | ||||||||
1640 | |||||||||
1641 | /// Considers the APInt to be unsigned and converts it into a string in the | ||||||||
1642 | /// radix given. The radix can be 2, 8, 10 16, or 36. | ||||||||
1643 | void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { | ||||||||
1644 | toString(Str, Radix, false, false); | ||||||||
1645 | } | ||||||||
1646 | |||||||||
1647 | /// Considers the APInt to be signed and converts it into a string in the | ||||||||
1648 | /// radix given. The radix can be 2, 8, 10, 16, or 36. | ||||||||
1649 | void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { | ||||||||
1650 | toString(Str, Radix, true, false); | ||||||||
1651 | } | ||||||||
1652 | |||||||||
1653 | /// \returns a byte-swapped representation of this APInt Value. | ||||||||
1654 | APInt byteSwap() const; | ||||||||
1655 | |||||||||
1656 | /// \returns the value with the bit representation reversed of this APInt | ||||||||
1657 | /// Value. | ||||||||
1658 | APInt reverseBits() const; | ||||||||
1659 | |||||||||
1660 | /// Converts this APInt to a double value. | ||||||||
1661 | double roundToDouble(bool isSigned) const; | ||||||||
1662 | |||||||||
1663 | /// Converts this unsigned APInt to a double value. | ||||||||
1664 | double roundToDouble() const { return roundToDouble(false); } | ||||||||
1665 | |||||||||
1666 | /// Converts this signed APInt to a double value. | ||||||||
1667 | double signedRoundToDouble() const { return roundToDouble(true); } | ||||||||
1668 | |||||||||
1669 | /// Converts APInt bits to a double | ||||||||
1670 | /// | ||||||||
1671 | /// The conversion does not do a translation from integer to double, it just | ||||||||
1672 | /// re-interprets the bits as a double. Note that it is valid to do this on | ||||||||
1673 | /// any bit width. Exactly 64 bits will be translated. | ||||||||
1674 | double bitsToDouble() const { return llvm::bit_cast<double>(getWord(0)); } | ||||||||
1675 | |||||||||
1676 | /// Converts APInt bits to a float | ||||||||
1677 | /// | ||||||||
1678 | /// The conversion does not do a translation from integer to float, it just | ||||||||
1679 | /// re-interprets the bits as a float. Note that it is valid to do this on | ||||||||
1680 | /// any bit width. Exactly 32 bits will be translated. | ||||||||
1681 | float bitsToFloat() const { | ||||||||
1682 | return llvm::bit_cast<float>(static_cast<uint32_t>(getWord(0))); | ||||||||
1683 | } | ||||||||
1684 | |||||||||
1685 | /// Converts a double to APInt bits. | ||||||||
1686 | /// | ||||||||
1687 | /// The conversion does not do a translation from double to integer, it just | ||||||||
1688 | /// re-interprets the bits of the double. | ||||||||
1689 | static APInt doubleToBits(double V) { | ||||||||
1690 | return APInt(sizeof(double) * CHAR_BIT8, llvm::bit_cast<uint64_t>(V)); | ||||||||
1691 | } | ||||||||
1692 | |||||||||
1693 | /// Converts a float to APInt bits. | ||||||||
1694 | /// | ||||||||
1695 | /// The conversion does not do a translation from float to integer, it just | ||||||||
1696 | /// re-interprets the bits of the float. | ||||||||
1697 | static APInt floatToBits(float V) { | ||||||||
1698 | return APInt(sizeof(float) * CHAR_BIT8, llvm::bit_cast<uint32_t>(V)); | ||||||||
1699 | } | ||||||||
1700 | |||||||||
1701 | /// @} | ||||||||
1702 | /// \name Mathematics Operations | ||||||||
1703 | /// @{ | ||||||||
1704 | |||||||||
1705 | /// \returns the floor log base 2 of this APInt. | ||||||||
1706 | unsigned logBase2() const { return getActiveBits() - 1; } | ||||||||
1707 | |||||||||
1708 | /// \returns the ceil log base 2 of this APInt. | ||||||||
1709 | unsigned ceilLogBase2() const { | ||||||||
1710 | APInt temp(*this); | ||||||||
1711 | --temp; | ||||||||
1712 | return temp.getActiveBits(); | ||||||||
1713 | } | ||||||||
1714 | |||||||||
1715 | /// \returns the nearest log base 2 of this APInt. Ties round up. | ||||||||
1716 | /// | ||||||||
1717 | /// NOTE: When we have a BitWidth of 1, we define: | ||||||||
1718 | /// | ||||||||
1719 | /// log2(0) = UINT32_MAX | ||||||||
1720 | /// log2(1) = 0 | ||||||||
1721 | /// | ||||||||
1722 | /// to get around any mathematical concerns resulting from | ||||||||
1723 | /// referencing 2 in a space where 2 does no exist. | ||||||||
1724 | unsigned nearestLogBase2() const; | ||||||||
1725 | |||||||||
1726 | /// \returns the log base 2 of this APInt if its an exact power of two, -1 | ||||||||
1727 | /// otherwise | ||||||||
1728 | int32_t exactLogBase2() const { | ||||||||
1729 | if (!isPowerOf2()) | ||||||||
1730 | return -1; | ||||||||
1731 | return logBase2(); | ||||||||
1732 | } | ||||||||
1733 | |||||||||
1734 | /// Compute the square root. | ||||||||
1735 | APInt sqrt() const; | ||||||||
1736 | |||||||||
1737 | /// Get the absolute value. If *this is < 0 then return -(*this), otherwise | ||||||||
1738 | /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit | ||||||||
1739 | /// wide APInt) is unchanged due to how negation works. | ||||||||
1740 | APInt abs() const { | ||||||||
1741 | if (isNegative()) | ||||||||
1742 | return -(*this); | ||||||||
1743 | return *this; | ||||||||
1744 | } | ||||||||
1745 | |||||||||
1746 | /// \returns the multiplicative inverse for a given modulo. | ||||||||
1747 | APInt multiplicativeInverse(const APInt &modulo) const; | ||||||||
1748 | |||||||||
1749 | /// @} | ||||||||
1750 | /// \name Building-block Operations for APInt and APFloat | ||||||||
1751 | /// @{ | ||||||||
1752 | |||||||||
1753 | // These building block operations operate on a representation of arbitrary | ||||||||
1754 | // precision, two's-complement, bignum integer values. They should be | ||||||||
1755 | // sufficient to implement APInt and APFloat bignum requirements. Inputs are | ||||||||
1756 | // generally a pointer to the base of an array of integer parts, representing | ||||||||
1757 | // an unsigned bignum, and a count of how many parts there are. | ||||||||
1758 | |||||||||
1759 | /// Sets the least significant part of a bignum to the input value, and zeroes | ||||||||
1760 | /// out higher parts. | ||||||||
1761 | static void tcSet(WordType *, WordType, unsigned); | ||||||||
1762 | |||||||||
1763 | /// Assign one bignum to another. | ||||||||
1764 | static void tcAssign(WordType *, const WordType *, unsigned); | ||||||||
1765 | |||||||||
1766 | /// Returns true if a bignum is zero, false otherwise. | ||||||||
1767 | static bool tcIsZero(const WordType *, unsigned); | ||||||||
1768 | |||||||||
1769 | /// Extract the given bit of a bignum; returns 0 or 1. Zero-based. | ||||||||
1770 | static int tcExtractBit(const WordType *, unsigned bit); | ||||||||
1771 | |||||||||
1772 | /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to | ||||||||
1773 | /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least | ||||||||
1774 | /// significant bit of DST. All high bits above srcBITS in DST are | ||||||||
1775 | /// zero-filled. | ||||||||
1776 | static void tcExtract(WordType *, unsigned dstCount, const WordType *, | ||||||||
1777 | unsigned srcBits, unsigned srcLSB); | ||||||||
1778 | |||||||||
1779 | /// Set the given bit of a bignum. Zero-based. | ||||||||
1780 | static void tcSetBit(WordType *, unsigned bit); | ||||||||
1781 | |||||||||
1782 | /// Clear the given bit of a bignum. Zero-based. | ||||||||
1783 | static void tcClearBit(WordType *, unsigned bit); | ||||||||
1784 | |||||||||
1785 | /// Returns the bit number of the least or most significant set bit of a | ||||||||
1786 | /// number. If the input number has no bits set -1U is returned. | ||||||||
1787 | static unsigned tcLSB(const WordType *, unsigned n); | ||||||||
1788 | static unsigned tcMSB(const WordType *parts, unsigned n); | ||||||||
1789 | |||||||||
1790 | /// Negate a bignum in-place. | ||||||||
1791 | static void tcNegate(WordType *, unsigned); | ||||||||
1792 | |||||||||
1793 | /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag. | ||||||||
1794 | static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned); | ||||||||
1795 | /// DST += RHS. Returns the carry flag. | ||||||||
1796 | static WordType tcAddPart(WordType *, WordType, unsigned); | ||||||||
1797 | |||||||||
1798 | /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag. | ||||||||
1799 | static WordType tcSubtract(WordType *, const WordType *, WordType carry, | ||||||||
1800 | unsigned); | ||||||||
1801 | /// DST -= RHS. Returns the carry flag. | ||||||||
1802 | static WordType tcSubtractPart(WordType *, WordType, unsigned); | ||||||||
1803 | |||||||||
1804 | /// DST += SRC * MULTIPLIER + PART if add is true | ||||||||
1805 | /// DST = SRC * MULTIPLIER + PART if add is false | ||||||||
1806 | /// | ||||||||
1807 | /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must | ||||||||
1808 | /// start at the same point, i.e. DST == SRC. | ||||||||
1809 | /// | ||||||||
1810 | /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned. | ||||||||
1811 | /// Otherwise DST is filled with the least significant DSTPARTS parts of the | ||||||||
1812 | /// result, and if all of the omitted higher parts were zero return zero, | ||||||||
1813 | /// otherwise overflow occurred and return one. | ||||||||
1814 | static int tcMultiplyPart(WordType *dst, const WordType *src, | ||||||||
1815 | WordType multiplier, WordType carry, | ||||||||
1816 | unsigned srcParts, unsigned dstParts, bool add); | ||||||||
1817 | |||||||||
1818 | /// DST = LHS * RHS, where DST has the same width as the operands and is | ||||||||
1819 | /// filled with the least significant parts of the result. Returns one if | ||||||||
1820 | /// overflow occurred, otherwise zero. DST must be disjoint from both | ||||||||
1821 | /// operands. | ||||||||
1822 | static int tcMultiply(WordType *, const WordType *, const WordType *, | ||||||||
1823 | unsigned); | ||||||||
1824 | |||||||||
1825 | /// DST = LHS * RHS, where DST has width the sum of the widths of the | ||||||||
1826 | /// operands. No overflow occurs. DST must be disjoint from both operands. | ||||||||
1827 | static void tcFullMultiply(WordType *, const WordType *, const WordType *, | ||||||||
1828 | unsigned, unsigned); | ||||||||
1829 | |||||||||
1830 | /// If RHS is zero LHS and REMAINDER are left unchanged, return one. | ||||||||
1831 | /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set | ||||||||
1832 | /// REMAINDER to the remainder, return zero. i.e. | ||||||||
1833 | /// | ||||||||
1834 | /// OLD_LHS = RHS * LHS + REMAINDER | ||||||||
1835 | /// | ||||||||
1836 | /// SCRATCH is a bignum of the same size as the operands and result for use by | ||||||||
1837 | /// the routine; its contents need not be initialized and are destroyed. LHS, | ||||||||
1838 | /// REMAINDER and SCRATCH must be distinct. | ||||||||
1839 | static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder, | ||||||||
1840 | WordType *scratch, unsigned parts); | ||||||||
1841 | |||||||||
1842 | /// Shift a bignum left Count bits. Shifted in bits are zero. There are no | ||||||||
1843 | /// restrictions on Count. | ||||||||
1844 | static void tcShiftLeft(WordType *, unsigned Words, unsigned Count); | ||||||||
1845 | |||||||||
1846 | /// Shift a bignum right Count bits. Shifted in bits are zero. There are no | ||||||||
1847 | /// restrictions on Count. | ||||||||
1848 | static void tcShiftRight(WordType *, unsigned Words, unsigned Count); | ||||||||
1849 | |||||||||
1850 | /// Comparison (unsigned) of two bignums. | ||||||||
1851 | static int tcCompare(const WordType *, const WordType *, unsigned); | ||||||||
1852 | |||||||||
1853 | /// Increment a bignum in-place. Return the carry flag. | ||||||||
1854 | static WordType tcIncrement(WordType *dst, unsigned parts) { | ||||||||
1855 | return tcAddPart(dst, 1, parts); | ||||||||
1856 | } | ||||||||
1857 | |||||||||
1858 | /// Decrement a bignum in-place. Return the borrow flag. | ||||||||
1859 | static WordType tcDecrement(WordType *dst, unsigned parts) { | ||||||||
1860 | return tcSubtractPart(dst, 1, parts); | ||||||||
1861 | } | ||||||||
1862 | |||||||||
1863 | /// Used to insert APInt objects, or objects that contain APInt objects, into | ||||||||
1864 | /// FoldingSets. | ||||||||
1865 | void Profile(FoldingSetNodeID &id) const; | ||||||||
1866 | |||||||||
1867 | /// debug method | ||||||||
1868 | void dump() const; | ||||||||
1869 | |||||||||
1870 | /// Returns whether this instance allocated memory. | ||||||||
1871 | bool needsCleanup() const { return !isSingleWord(); } | ||||||||
1872 | |||||||||
1873 | private: | ||||||||
1874 | /// This union is used to store the integer value. When the | ||||||||
1875 | /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. | ||||||||
1876 | union { | ||||||||
1877 | uint64_t VAL; ///< Used to store the <= 64 bits integer value. | ||||||||
1878 | uint64_t *pVal; ///< Used to store the >64 bits integer value. | ||||||||
1879 | } U; | ||||||||
1880 | |||||||||
1881 | unsigned BitWidth = 1; ///< The number of bits in this APInt. | ||||||||
1882 | |||||||||
1883 | friend struct DenseMapInfo<APInt, void>; | ||||||||
1884 | friend class APSInt; | ||||||||
1885 | |||||||||
1886 | /// This constructor is used only internally for speed of construction of | ||||||||
1887 | /// temporaries. It is unsafe since it takes ownership of the pointer, so it | ||||||||
1888 | /// is not public. | ||||||||
1889 | APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; } | ||||||||
1890 | |||||||||
1891 | /// Determine which word a bit is in. | ||||||||
1892 | /// | ||||||||
1893 | /// \returns the word position for the specified bit position. | ||||||||
1894 | static unsigned whichWord(unsigned bitPosition) { | ||||||||
1895 | return bitPosition / APINT_BITS_PER_WORD; | ||||||||
1896 | } | ||||||||
1897 | |||||||||
1898 | /// Determine which bit in a word the specified bit position is in. | ||||||||
1899 | static unsigned whichBit(unsigned bitPosition) { | ||||||||
1900 | return bitPosition % APINT_BITS_PER_WORD; | ||||||||
1901 | } | ||||||||
1902 | |||||||||
1903 | /// Get a single bit mask. | ||||||||
1904 | /// | ||||||||
1905 | /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set | ||||||||
1906 | /// This method generates and returns a uint64_t (word) mask for a single | ||||||||
1907 | /// bit at a specific bit position. This is used to mask the bit in the | ||||||||
1908 | /// corresponding word. | ||||||||
1909 | static uint64_t maskBit(unsigned bitPosition) { | ||||||||
1910 | return 1ULL << whichBit(bitPosition); | ||||||||
1911 | } | ||||||||
1912 | |||||||||
1913 | /// Clear unused high order bits | ||||||||
1914 | /// | ||||||||
1915 | /// This method is used internally to clear the top "N" bits in the high order | ||||||||
1916 | /// word that are not used by the APInt. This is needed after the most | ||||||||
1917 | /// significant word is assigned a value to ensure that those bits are | ||||||||
1918 | /// zero'd out. | ||||||||
1919 | APInt &clearUnusedBits() { | ||||||||
1920 | // Compute how many bits are used in the final word. | ||||||||
1921 | unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1; | ||||||||
1922 | |||||||||
1923 | // Mask out the high bits. | ||||||||
1924 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits); | ||||||||
1925 | if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false)) | ||||||||
1926 | mask = 0; | ||||||||
1927 | |||||||||
1928 | if (isSingleWord()) | ||||||||
1929 | U.VAL &= mask; | ||||||||
1930 | else | ||||||||
1931 | U.pVal[getNumWords() - 1] &= mask; | ||||||||
1932 | return *this; | ||||||||
1933 | } | ||||||||
1934 | |||||||||
1935 | /// Get the word corresponding to a bit position | ||||||||
1936 | /// \returns the corresponding word for the specified bit position. | ||||||||
1937 | uint64_t getWord(unsigned bitPosition) const { | ||||||||
1938 | return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; | ||||||||
1939 | } | ||||||||
1940 | |||||||||
1941 | /// Utility method to change the bit width of this APInt to new bit width, | ||||||||
1942 | /// allocating and/or deallocating as necessary. There is no guarantee on the | ||||||||
1943 | /// value of any bits upon return. Caller should populate the bits after. | ||||||||
1944 | void reallocate(unsigned NewBitWidth); | ||||||||
1945 | |||||||||
1946 | /// Convert a char array into an APInt | ||||||||
1947 | /// | ||||||||
1948 | /// \param radix 2, 8, 10, 16, or 36 | ||||||||
1949 | /// Converts a string into a number. The string must be non-empty | ||||||||
1950 | /// and well-formed as a number of the given base. The bit-width | ||||||||
1951 | /// must be sufficient to hold the result. | ||||||||
1952 | /// | ||||||||
1953 | /// This is used by the constructors that take string arguments. | ||||||||
1954 | /// | ||||||||
1955 | /// StringRef::getAsInteger is superficially similar but (1) does | ||||||||
1956 | /// not assume that the string is well-formed and (2) grows the | ||||||||
1957 | /// result to hold the input. | ||||||||
1958 | void fromString(unsigned numBits, StringRef str, uint8_t radix); | ||||||||
1959 | |||||||||
1960 | /// An internal division function for dividing APInts. | ||||||||
1961 | /// | ||||||||
1962 | /// This is used by the toString method to divide by the radix. It simply | ||||||||
1963 | /// provides a more convenient form of divide for internal use since KnuthDiv | ||||||||
1964 | /// has specific constraints on its inputs. If those constraints are not met | ||||||||
1965 | /// then it provides a simpler form of divide. | ||||||||
1966 | static void divide(const WordType *LHS, unsigned lhsWords, | ||||||||
1967 | const WordType *RHS, unsigned rhsWords, WordType *Quotient, | ||||||||
1968 | WordType *Remainder); | ||||||||
1969 | |||||||||
1970 | /// out-of-line slow case for inline constructor | ||||||||
1971 | void initSlowCase(uint64_t val, bool isSigned); | ||||||||
1972 | |||||||||
1973 | /// shared code between two array constructors | ||||||||
1974 | void initFromArray(ArrayRef<uint64_t> array); | ||||||||
1975 | |||||||||
1976 | /// out-of-line slow case for inline copy constructor | ||||||||
1977 | void initSlowCase(const APInt &that); | ||||||||
1978 | |||||||||
1979 | /// out-of-line slow case for shl | ||||||||
1980 | void shlSlowCase(unsigned ShiftAmt); | ||||||||
1981 | |||||||||
1982 | /// out-of-line slow case for lshr. | ||||||||
1983 | void lshrSlowCase(unsigned ShiftAmt); | ||||||||
1984 | |||||||||
1985 | /// out-of-line slow case for ashr. | ||||||||
1986 | void ashrSlowCase(unsigned ShiftAmt); | ||||||||
1987 | |||||||||
1988 | /// out-of-line slow case for operator= | ||||||||
1989 | void assignSlowCase(const APInt &RHS); | ||||||||
1990 | |||||||||
1991 | /// out-of-line slow case for operator== | ||||||||
1992 | bool equalSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
1993 | |||||||||
1994 | /// out-of-line slow case for countLeadingZeros | ||||||||
1995 | unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
1996 | |||||||||
1997 | /// out-of-line slow case for countLeadingOnes. | ||||||||
1998 | unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
1999 | |||||||||
2000 | /// out-of-line slow case for countTrailingZeros. | ||||||||
2001 | unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
2002 | |||||||||
2003 | /// out-of-line slow case for countTrailingOnes | ||||||||
2004 | unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
2005 | |||||||||
2006 | /// out-of-line slow case for countPopulation | ||||||||
2007 | unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
2008 | |||||||||
2009 | /// out-of-line slow case for intersects. | ||||||||
2010 | bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
2011 | |||||||||
2012 | /// out-of-line slow case for isSubsetOf. | ||||||||
2013 | bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
2014 | |||||||||
2015 | /// out-of-line slow case for setBits. | ||||||||
2016 | void setBitsSlowCase(unsigned loBit, unsigned hiBit); | ||||||||
2017 | |||||||||
2018 | /// out-of-line slow case for flipAllBits. | ||||||||
2019 | void flipAllBitsSlowCase(); | ||||||||
2020 | |||||||||
2021 | /// out-of-line slow case for concat. | ||||||||
2022 | APInt concatSlowCase(const APInt &NewLSB) const; | ||||||||
2023 | |||||||||
2024 | /// out-of-line slow case for operator&=. | ||||||||
2025 | void andAssignSlowCase(const APInt &RHS); | ||||||||
2026 | |||||||||
2027 | /// out-of-line slow case for operator|=. | ||||||||
2028 | void orAssignSlowCase(const APInt &RHS); | ||||||||
2029 | |||||||||
2030 | /// out-of-line slow case for operator^=. | ||||||||
2031 | void xorAssignSlowCase(const APInt &RHS); | ||||||||
2032 | |||||||||
2033 | /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal | ||||||||
2034 | /// to, or greater than RHS. | ||||||||
2035 | int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
2036 | |||||||||
2037 | /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal | ||||||||
2038 | /// to, or greater than RHS. | ||||||||
2039 | int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
2040 | |||||||||
2041 | /// @} | ||||||||
2042 | }; | ||||||||
2043 | |||||||||
2044 | inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; } | ||||||||
2045 | |||||||||
2046 | inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; } | ||||||||
2047 | |||||||||
2048 | /// Unary bitwise complement operator. | ||||||||
2049 | /// | ||||||||
2050 | /// \returns an APInt that is the bitwise complement of \p v. | ||||||||
2051 | inline APInt operator~(APInt v) { | ||||||||
2052 | v.flipAllBits(); | ||||||||
2053 | return v; | ||||||||
2054 | } | ||||||||
2055 | |||||||||
2056 | inline APInt operator&(APInt a, const APInt &b) { | ||||||||
2057 | a &= b; | ||||||||
2058 | return a; | ||||||||
2059 | } | ||||||||
2060 | |||||||||
2061 | inline APInt operator&(const APInt &a, APInt &&b) { | ||||||||
2062 | b &= a; | ||||||||
2063 | return std::move(b); | ||||||||
2064 | } | ||||||||
2065 | |||||||||
2066 | inline APInt operator&(APInt a, uint64_t RHS) { | ||||||||
2067 | a &= RHS; | ||||||||
2068 | return a; | ||||||||
2069 | } | ||||||||
2070 | |||||||||
2071 | inline APInt operator&(uint64_t LHS, APInt b) { | ||||||||
2072 | b &= LHS; | ||||||||
2073 | return b; | ||||||||
2074 | } | ||||||||
2075 | |||||||||
2076 | inline APInt operator|(APInt a, const APInt &b) { | ||||||||
2077 | a |= b; | ||||||||
2078 | return a; | ||||||||
2079 | } | ||||||||
2080 | |||||||||
2081 | inline APInt operator|(const APInt &a, APInt &&b) { | ||||||||
2082 | b |= a; | ||||||||
2083 | return std::move(b); | ||||||||
2084 | } | ||||||||
2085 | |||||||||
2086 | inline APInt operator|(APInt a, uint64_t RHS) { | ||||||||
2087 | a |= RHS; | ||||||||
2088 | return a; | ||||||||
2089 | } | ||||||||
2090 | |||||||||
2091 | inline APInt operator|(uint64_t LHS, APInt b) { | ||||||||
2092 | b |= LHS; | ||||||||
2093 | return b; | ||||||||
2094 | } | ||||||||
2095 | |||||||||
2096 | inline APInt operator^(APInt a, const APInt &b) { | ||||||||
2097 | a ^= b; | ||||||||
2098 | return a; | ||||||||
2099 | } | ||||||||
2100 | |||||||||
2101 | inline APInt operator^(const APInt &a, APInt &&b) { | ||||||||
2102 | b ^= a; | ||||||||
2103 | return std::move(b); | ||||||||
2104 | } | ||||||||
2105 | |||||||||
2106 | inline APInt operator^(APInt a, uint64_t RHS) { | ||||||||
2107 | a ^= RHS; | ||||||||
2108 | return a; | ||||||||
2109 | } | ||||||||
2110 | |||||||||
2111 | inline APInt operator^(uint64_t LHS, APInt b) { | ||||||||
2112 | b ^= LHS; | ||||||||
2113 | return b; | ||||||||
2114 | } | ||||||||
2115 | |||||||||
2116 | inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) { | ||||||||
2117 | I.print(OS, true); | ||||||||
2118 | return OS; | ||||||||
2119 | } | ||||||||
2120 | |||||||||
2121 | inline APInt operator-(APInt v) { | ||||||||
2122 | v.negate(); | ||||||||
2123 | return v; | ||||||||
2124 | } | ||||||||
2125 | |||||||||
2126 | inline APInt operator+(APInt a, const APInt &b) { | ||||||||
2127 | a += b; | ||||||||
2128 | return a; | ||||||||
2129 | } | ||||||||
2130 | |||||||||
2131 | inline APInt operator+(const APInt &a, APInt &&b) { | ||||||||
2132 | b += a; | ||||||||
2133 | return std::move(b); | ||||||||
2134 | } | ||||||||
2135 | |||||||||
2136 | inline APInt operator+(APInt a, uint64_t RHS) { | ||||||||
2137 | a += RHS; | ||||||||
2138 | return a; | ||||||||
2139 | } | ||||||||
2140 | |||||||||
2141 | inline APInt operator+(uint64_t LHS, APInt b) { | ||||||||
2142 | b += LHS; | ||||||||
2143 | return b; | ||||||||
2144 | } | ||||||||
2145 | |||||||||
2146 | inline APInt operator-(APInt a, const APInt &b) { | ||||||||
2147 | a -= b; | ||||||||
2148 | return a; | ||||||||
2149 | } | ||||||||
2150 | |||||||||
2151 | inline APInt operator-(const APInt &a, APInt &&b) { | ||||||||
2152 | b.negate(); | ||||||||
2153 | b += a; | ||||||||
2154 | return std::move(b); | ||||||||
2155 | } | ||||||||
2156 | |||||||||
2157 | inline APInt operator-(APInt a, uint64_t RHS) { | ||||||||
2158 | a -= RHS; | ||||||||
2159 | return a; | ||||||||
2160 | } | ||||||||
2161 | |||||||||
2162 | inline APInt operator-(uint64_t LHS, APInt b) { | ||||||||
2163 | b.negate(); | ||||||||
2164 | b += LHS; | ||||||||
2165 | return b; | ||||||||
2166 | } | ||||||||
2167 | |||||||||
2168 | inline APInt operator*(APInt a, uint64_t RHS) { | ||||||||
2169 | a *= RHS; | ||||||||
2170 | return a; | ||||||||
2171 | } | ||||||||
2172 | |||||||||
2173 | inline APInt operator*(uint64_t LHS, APInt b) { | ||||||||
2174 | b *= LHS; | ||||||||
2175 | return b; | ||||||||
2176 | } | ||||||||
2177 | |||||||||
2178 | namespace APIntOps { | ||||||||
2179 | |||||||||
2180 | /// Determine the smaller of two APInts considered to be signed. | ||||||||
2181 | inline const APInt &smin(const APInt &A, const APInt &B) { | ||||||||
2182 | return A.slt(B) ? A : B; | ||||||||
2183 | } | ||||||||
2184 | |||||||||
2185 | /// Determine the larger of two APInts considered to be signed. | ||||||||
2186 | inline const APInt &smax(const APInt &A, const APInt &B) { | ||||||||
2187 | return A.sgt(B) ? A : B; | ||||||||
2188 | } | ||||||||
2189 | |||||||||
2190 | /// Determine the smaller of two APInts considered to be unsigned. | ||||||||
2191 | inline const APInt &umin(const APInt &A, const APInt &B) { | ||||||||
2192 | return A.ult(B) ? A : B; | ||||||||
2193 | } | ||||||||
2194 | |||||||||
2195 | /// Determine the larger of two APInts considered to be unsigned. | ||||||||
2196 | inline const APInt &umax(const APInt &A, const APInt &B) { | ||||||||
2197 | return A.ugt(B) ? A : B; | ||||||||
2198 | } | ||||||||
2199 | |||||||||
2200 | /// Compute GCD of two unsigned APInt values. | ||||||||
2201 | /// | ||||||||
2202 | /// This function returns the greatest common divisor of the two APInt values | ||||||||
2203 | /// using Stein's algorithm. | ||||||||
2204 | /// | ||||||||
2205 | /// \returns the greatest common divisor of A and B. | ||||||||
2206 | APInt GreatestCommonDivisor(APInt A, APInt B); | ||||||||
2207 | |||||||||
2208 | /// Converts the given APInt to a double value. | ||||||||
2209 | /// | ||||||||
2210 | /// Treats the APInt as an unsigned value for conversion purposes. | ||||||||
2211 | inline double RoundAPIntToDouble(const APInt &APIVal) { | ||||||||
2212 | return APIVal.roundToDouble(); | ||||||||
2213 | } | ||||||||
2214 | |||||||||
2215 | /// Converts the given APInt to a double value. | ||||||||
2216 | /// | ||||||||
2217 | /// Treats the APInt as a signed value for conversion purposes. | ||||||||
2218 | inline double RoundSignedAPIntToDouble(const APInt &APIVal) { | ||||||||
2219 | return APIVal.signedRoundToDouble(); | ||||||||
2220 | } | ||||||||
2221 | |||||||||
2222 | /// Converts the given APInt to a float value. | ||||||||
2223 | inline float RoundAPIntToFloat(const APInt &APIVal) { | ||||||||
2224 | return float(RoundAPIntToDouble(APIVal)); | ||||||||
2225 | } | ||||||||
2226 | |||||||||
2227 | /// Converts the given APInt to a float value. | ||||||||
2228 | /// | ||||||||
2229 | /// Treats the APInt as a signed value for conversion purposes. | ||||||||
2230 | inline float RoundSignedAPIntToFloat(const APInt &APIVal) { | ||||||||
2231 | return float(APIVal.signedRoundToDouble()); | ||||||||
2232 | } | ||||||||
2233 | |||||||||
2234 | /// Converts the given double value into a APInt. | ||||||||
2235 | /// | ||||||||
2236 | /// This function convert a double value to an APInt value. | ||||||||
2237 | APInt RoundDoubleToAPInt(double Double, unsigned width); | ||||||||
2238 | |||||||||
2239 | /// Converts a float value into a APInt. | ||||||||
2240 | /// | ||||||||
2241 | /// Converts a float value into an APInt value. | ||||||||
2242 | inline APInt RoundFloatToAPInt(float Float, unsigned width) { | ||||||||
2243 | return RoundDoubleToAPInt(double(Float), width); | ||||||||
2244 | } | ||||||||
2245 | |||||||||
2246 | /// Return A unsign-divided by B, rounded by the given rounding mode. | ||||||||
2247 | APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM); | ||||||||
2248 | |||||||||
2249 | /// Return A sign-divided by B, rounded by the given rounding mode. | ||||||||
2250 | APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM); | ||||||||
2251 | |||||||||
2252 | /// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range | ||||||||
2253 | /// (e.g. 32 for i32). | ||||||||
2254 | /// This function finds the smallest number n, such that | ||||||||
2255 | /// (a) n >= 0 and q(n) = 0, or | ||||||||
2256 | /// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all | ||||||||
2257 | /// integers, belong to two different intervals [Rk, Rk+R), | ||||||||
2258 | /// where R = 2^BW, and k is an integer. | ||||||||
2259 | /// The idea here is to find when q(n) "overflows" 2^BW, while at the | ||||||||
2260 | /// same time "allowing" subtraction. In unsigned modulo arithmetic a | ||||||||
2261 | /// subtraction (treated as addition of negated numbers) would always | ||||||||
2262 | /// count as an overflow, but here we want to allow values to decrease | ||||||||
2263 | /// and increase as long as they are within the same interval. | ||||||||
2264 | /// Specifically, adding of two negative numbers should not cause an | ||||||||
2265 | /// overflow (as long as the magnitude does not exceed the bit width). | ||||||||
2266 | /// On the other hand, given a positive number, adding a negative | ||||||||
2267 | /// number to it can give a negative result, which would cause the | ||||||||
2268 | /// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is | ||||||||
2269 | /// treated as a special case of an overflow. | ||||||||
2270 | /// | ||||||||
2271 | /// This function returns std::nullopt if after finding k that minimizes the | ||||||||
2272 | /// positive solution to q(n) = kR, both solutions are contained between | ||||||||
2273 | /// two consecutive integers. | ||||||||
2274 | /// | ||||||||
2275 | /// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation | ||||||||
2276 | /// in arithmetic modulo 2^BW, and treating the values as signed) by the | ||||||||
2277 | /// virtue of *signed* overflow. This function will *not* find such an n, | ||||||||
2278 | /// however it may find a value of n satisfying the inequalities due to | ||||||||
2279 | /// an *unsigned* overflow (if the values are treated as unsigned). | ||||||||
2280 | /// To find a solution for a signed overflow, treat it as a problem of | ||||||||
2281 | /// finding an unsigned overflow with a range with of BW-1. | ||||||||
2282 | /// | ||||||||
2283 | /// The returned value may have a different bit width from the input | ||||||||
2284 | /// coefficients. | ||||||||
2285 | std::optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, | ||||||||
2286 | unsigned RangeWidth); | ||||||||
2287 | |||||||||
2288 | /// Compare two values, and if they are different, return the position of the | ||||||||
2289 | /// most significant bit that is different in the values. | ||||||||
2290 | std::optional<unsigned> GetMostSignificantDifferentBit(const APInt &A, | ||||||||
2291 | const APInt &B); | ||||||||
2292 | |||||||||
2293 | /// Splat/Merge neighboring bits to widen/narrow the bitmask represented | ||||||||
2294 | /// by \param A to \param NewBitWidth bits. | ||||||||
2295 | /// | ||||||||
2296 | /// MatchAnyBits: (Default) | ||||||||
2297 | /// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 | ||||||||
2298 | /// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111 | ||||||||
2299 | /// | ||||||||
2300 | /// MatchAllBits: | ||||||||
2301 | /// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011 | ||||||||
2302 | /// e.g. ScaleBitMask(0b00011011, 4) -> 0b0001 | ||||||||
2303 | /// A.getBitwidth() or NewBitWidth must be a whole multiples of the other. | ||||||||
2304 | APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth, | ||||||||
2305 | bool MatchAllBits = false); | ||||||||
2306 | } // namespace APIntOps | ||||||||
2307 | |||||||||
2308 | // See friend declaration above. This additional declaration is required in | ||||||||
2309 | // order to compile LLVM with IBM xlC compiler. | ||||||||
2310 | hash_code hash_value(const APInt &Arg); | ||||||||
2311 | |||||||||
2312 | /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst | ||||||||
2313 | /// with the integer held in IntVal. | ||||||||
2314 | void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); | ||||||||
2315 | |||||||||
2316 | /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting | ||||||||
2317 | /// from Src into IntVal, which is assumed to be wide enough and to hold zero. | ||||||||
2318 | void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes); | ||||||||
2319 | |||||||||
2320 | /// Provide DenseMapInfo for APInt. | ||||||||
2321 | template <> struct DenseMapInfo<APInt, void> { | ||||||||
2322 | static inline APInt getEmptyKey() { | ||||||||
2323 | APInt V(nullptr, 0); | ||||||||
2324 | V.U.VAL = ~0ULL; | ||||||||
2325 | return V; | ||||||||
2326 | } | ||||||||
2327 | |||||||||
2328 | static inline APInt getTombstoneKey() { | ||||||||
2329 | APInt V(nullptr, 0); | ||||||||
2330 | V.U.VAL = ~1ULL; | ||||||||
2331 | return V; | ||||||||
2332 | } | ||||||||
2333 | |||||||||
2334 | static unsigned getHashValue(const APInt &Key); | ||||||||
2335 | |||||||||
2336 | static bool isEqual(const APInt &LHS, const APInt &RHS) { | ||||||||
2337 | return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; | ||||||||
2338 | } | ||||||||
2339 | }; | ||||||||
2340 | |||||||||
2341 | } // namespace llvm | ||||||||
2342 | |||||||||
2343 | #endif |