File: | llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp |
Warning: | line 1565, column 21 The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- HexagonLoopIdiomRecognition.cpp ------------------------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | ||||
9 | #include "HexagonLoopIdiomRecognition.h" | |||
10 | #include "llvm/ADT/APInt.h" | |||
11 | #include "llvm/ADT/DenseMap.h" | |||
12 | #include "llvm/ADT/SetVector.h" | |||
13 | #include "llvm/ADT/SmallPtrSet.h" | |||
14 | #include "llvm/ADT/SmallSet.h" | |||
15 | #include "llvm/ADT/SmallVector.h" | |||
16 | #include "llvm/ADT/StringRef.h" | |||
17 | #include "llvm/ADT/Triple.h" | |||
18 | #include "llvm/Analysis/AliasAnalysis.h" | |||
19 | #include "llvm/Analysis/InstructionSimplify.h" | |||
20 | #include "llvm/Analysis/LoopAnalysisManager.h" | |||
21 | #include "llvm/Analysis/LoopInfo.h" | |||
22 | #include "llvm/Analysis/LoopPass.h" | |||
23 | #include "llvm/Analysis/MemoryLocation.h" | |||
24 | #include "llvm/Analysis/ScalarEvolution.h" | |||
25 | #include "llvm/Analysis/ScalarEvolutionExpressions.h" | |||
26 | #include "llvm/Analysis/TargetLibraryInfo.h" | |||
27 | #include "llvm/Analysis/ValueTracking.h" | |||
28 | #include "llvm/IR/Attributes.h" | |||
29 | #include "llvm/IR/BasicBlock.h" | |||
30 | #include "llvm/IR/Constant.h" | |||
31 | #include "llvm/IR/Constants.h" | |||
32 | #include "llvm/IR/DataLayout.h" | |||
33 | #include "llvm/IR/DebugLoc.h" | |||
34 | #include "llvm/IR/DerivedTypes.h" | |||
35 | #include "llvm/IR/Dominators.h" | |||
36 | #include "llvm/IR/Function.h" | |||
37 | #include "llvm/IR/IRBuilder.h" | |||
38 | #include "llvm/IR/InstrTypes.h" | |||
39 | #include "llvm/IR/Instruction.h" | |||
40 | #include "llvm/IR/Instructions.h" | |||
41 | #include "llvm/IR/IntrinsicInst.h" | |||
42 | #include "llvm/IR/Intrinsics.h" | |||
43 | #include "llvm/IR/IntrinsicsHexagon.h" | |||
44 | #include "llvm/IR/Module.h" | |||
45 | #include "llvm/IR/PassManager.h" | |||
46 | #include "llvm/IR/PatternMatch.h" | |||
47 | #include "llvm/IR/Type.h" | |||
48 | #include "llvm/IR/User.h" | |||
49 | #include "llvm/IR/Value.h" | |||
50 | #include "llvm/InitializePasses.h" | |||
51 | #include "llvm/Pass.h" | |||
52 | #include "llvm/Support/Casting.h" | |||
53 | #include "llvm/Support/CommandLine.h" | |||
54 | #include "llvm/Support/Compiler.h" | |||
55 | #include "llvm/Support/Debug.h" | |||
56 | #include "llvm/Support/ErrorHandling.h" | |||
57 | #include "llvm/Support/KnownBits.h" | |||
58 | #include "llvm/Support/raw_ostream.h" | |||
59 | #include "llvm/Transforms/Scalar.h" | |||
60 | #include "llvm/Transforms/Utils.h" | |||
61 | #include "llvm/Transforms/Utils/Local.h" | |||
62 | #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" | |||
63 | #include <algorithm> | |||
64 | #include <array> | |||
65 | #include <cassert> | |||
66 | #include <cstdint> | |||
67 | #include <cstdlib> | |||
68 | #include <deque> | |||
69 | #include <functional> | |||
70 | #include <iterator> | |||
71 | #include <map> | |||
72 | #include <set> | |||
73 | #include <utility> | |||
74 | #include <vector> | |||
75 | ||||
76 | #define DEBUG_TYPE"hexagon-lir" "hexagon-lir" | |||
77 | ||||
78 | using namespace llvm; | |||
79 | ||||
80 | static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom", | |||
81 | cl::Hidden, cl::init(false), | |||
82 | cl::desc("Disable generation of memcpy in loop idiom recognition")); | |||
83 | ||||
84 | static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom", | |||
85 | cl::Hidden, cl::init(false), | |||
86 | cl::desc("Disable generation of memmove in loop idiom recognition")); | |||
87 | ||||
88 | static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold", | |||
89 | cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime " | |||
90 | "check guarding the memmove.")); | |||
91 | ||||
92 | static cl::opt<unsigned> CompileTimeMemSizeThreshold( | |||
93 | "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64), | |||
94 | cl::desc("Threshold (in bytes) to perform the transformation, if the " | |||
95 | "runtime loop count (mem transfer size) is known at compile-time.")); | |||
96 | ||||
97 | static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom", | |||
98 | cl::Hidden, cl::init(true), | |||
99 | cl::desc("Only enable generating memmove in non-nested loops")); | |||
100 | ||||
101 | static cl::opt<bool> HexagonVolatileMemcpy( | |||
102 | "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false), | |||
103 | cl::desc("Enable Hexagon-specific memcpy for volatile destination.")); | |||
104 | ||||
105 | static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000), | |||
106 | cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR")); | |||
107 | ||||
108 | static const char *HexagonVolatileMemcpyName | |||
109 | = "hexagon_memcpy_forward_vp4cp4n2"; | |||
110 | ||||
111 | ||||
112 | namespace llvm { | |||
113 | ||||
114 | void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &); | |||
115 | Pass *createHexagonLoopIdiomPass(); | |||
116 | ||||
117 | } // end namespace llvm | |||
118 | ||||
119 | namespace { | |||
120 | ||||
121 | class HexagonLoopIdiomRecognize { | |||
122 | public: | |||
123 | explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT, | |||
124 | LoopInfo *LF, const TargetLibraryInfo *TLI, | |||
125 | ScalarEvolution *SE) | |||
126 | : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {} | |||
127 | ||||
128 | bool run(Loop *L); | |||
129 | ||||
130 | private: | |||
131 | int getSCEVStride(const SCEVAddRecExpr *StoreEv); | |||
132 | bool isLegalStore(Loop *CurLoop, StoreInst *SI); | |||
133 | void collectStores(Loop *CurLoop, BasicBlock *BB, | |||
134 | SmallVectorImpl<StoreInst *> &Stores); | |||
135 | bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount); | |||
136 | bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const; | |||
137 | bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount, | |||
138 | SmallVectorImpl<BasicBlock *> &ExitBlocks); | |||
139 | bool runOnCountableLoop(Loop *L); | |||
140 | ||||
141 | AliasAnalysis *AA; | |||
142 | const DataLayout *DL; | |||
143 | DominatorTree *DT; | |||
144 | LoopInfo *LF; | |||
145 | const TargetLibraryInfo *TLI; | |||
146 | ScalarEvolution *SE; | |||
147 | bool HasMemcpy, HasMemmove; | |||
148 | }; | |||
149 | ||||
150 | class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass { | |||
151 | public: | |||
152 | static char ID; | |||
153 | ||||
154 | explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) { | |||
155 | initializeHexagonLoopIdiomRecognizeLegacyPassPass( | |||
156 | *PassRegistry::getPassRegistry()); | |||
157 | } | |||
158 | ||||
159 | StringRef getPassName() const override { | |||
160 | return "Recognize Hexagon-specific loop idioms"; | |||
161 | } | |||
162 | ||||
163 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
164 | AU.addRequired<LoopInfoWrapperPass>(); | |||
165 | AU.addRequiredID(LoopSimplifyID); | |||
166 | AU.addRequiredID(LCSSAID); | |||
167 | AU.addRequired<AAResultsWrapperPass>(); | |||
168 | AU.addRequired<ScalarEvolutionWrapperPass>(); | |||
169 | AU.addRequired<DominatorTreeWrapperPass>(); | |||
170 | AU.addRequired<TargetLibraryInfoWrapperPass>(); | |||
171 | AU.addPreserved<TargetLibraryInfoWrapperPass>(); | |||
172 | } | |||
173 | ||||
174 | bool runOnLoop(Loop *L, LPPassManager &LPM) override; | |||
175 | }; | |||
176 | ||||
177 | struct Simplifier { | |||
178 | struct Rule { | |||
179 | using FuncType = std::function<Value *(Instruction *, LLVMContext &)>; | |||
180 | Rule(StringRef N, FuncType F) : Name(N), Fn(F) {} | |||
181 | StringRef Name; // For debugging. | |||
182 | FuncType Fn; | |||
183 | }; | |||
184 | ||||
185 | void addRule(StringRef N, const Rule::FuncType &F) { | |||
186 | Rules.push_back(Rule(N, F)); | |||
187 | } | |||
188 | ||||
189 | private: | |||
190 | struct WorkListType { | |||
191 | WorkListType() = default; | |||
192 | ||||
193 | void push_back(Value *V) { | |||
194 | // Do not push back duplicates. | |||
195 | if (!S.count(V)) { | |||
196 | Q.push_back(V); | |||
197 | S.insert(V); | |||
198 | } | |||
199 | } | |||
200 | ||||
201 | Value *pop_front_val() { | |||
202 | Value *V = Q.front(); | |||
203 | Q.pop_front(); | |||
204 | S.erase(V); | |||
205 | return V; | |||
206 | } | |||
207 | ||||
208 | bool empty() const { return Q.empty(); } | |||
209 | ||||
210 | private: | |||
211 | std::deque<Value *> Q; | |||
212 | std::set<Value *> S; | |||
213 | }; | |||
214 | ||||
215 | using ValueSetType = std::set<Value *>; | |||
216 | ||||
217 | std::vector<Rule> Rules; | |||
218 | ||||
219 | public: | |||
220 | struct Context { | |||
221 | using ValueMapType = DenseMap<Value *, Value *>; | |||
222 | ||||
223 | Value *Root; | |||
224 | ValueSetType Used; // The set of all cloned values used by Root. | |||
225 | ValueSetType Clones; // The set of all cloned values. | |||
226 | LLVMContext &Ctx; | |||
227 | ||||
228 | Context(Instruction *Exp) | |||
229 | : Ctx(Exp->getParent()->getParent()->getContext()) { | |||
230 | initialize(Exp); | |||
231 | } | |||
232 | ||||
233 | ~Context() { cleanup(); } | |||
234 | ||||
235 | void print(raw_ostream &OS, const Value *V) const; | |||
236 | Value *materialize(BasicBlock *B, BasicBlock::iterator At); | |||
237 | ||||
238 | private: | |||
239 | friend struct Simplifier; | |||
240 | ||||
241 | void initialize(Instruction *Exp); | |||
242 | void cleanup(); | |||
243 | ||||
244 | template <typename FuncT> void traverse(Value *V, FuncT F); | |||
245 | void record(Value *V); | |||
246 | void use(Value *V); | |||
247 | void unuse(Value *V); | |||
248 | ||||
249 | bool equal(const Instruction *I, const Instruction *J) const; | |||
250 | Value *find(Value *Tree, Value *Sub) const; | |||
251 | Value *subst(Value *Tree, Value *OldV, Value *NewV); | |||
252 | void replace(Value *OldV, Value *NewV); | |||
253 | void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At); | |||
254 | }; | |||
255 | ||||
256 | Value *simplify(Context &C); | |||
257 | }; | |||
258 | ||||
259 | struct PE { | |||
260 | PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {} | |||
261 | ||||
262 | const Simplifier::Context &C; | |||
263 | const Value *V; | |||
264 | }; | |||
265 | ||||
266 | LLVM_ATTRIBUTE_USED__attribute__((__used__)) | |||
267 | raw_ostream &operator<<(raw_ostream &OS, const PE &P) { | |||
268 | P.C.print(OS, P.V ? P.V : P.C.Root); | |||
269 | return OS; | |||
270 | } | |||
271 | ||||
272 | } // end anonymous namespace | |||
273 | ||||
274 | char HexagonLoopIdiomRecognizeLegacyPass::ID = 0; | |||
275 | ||||
276 | INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce (PassRegistry &Registry) { | |||
277 | "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce (PassRegistry &Registry) { | |||
278 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | |||
279 | INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry); | |||
280 | INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry); | |||
281 | INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry); | |||
282 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | |||
283 | INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry); | |||
284 | INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry); | |||
285 | INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms" , "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag , initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std:: ref(Registry)); } | |||
286 | "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms" , "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass ::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag ; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass (PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag , initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std:: ref(Registry)); } | |||
287 | ||||
288 | template <typename FuncT> | |||
289 | void Simplifier::Context::traverse(Value *V, FuncT F) { | |||
290 | WorkListType Q; | |||
291 | Q.push_back(V); | |||
292 | ||||
293 | while (!Q.empty()) { | |||
294 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
295 | if (!U || U->getParent()) | |||
296 | continue; | |||
297 | if (!F(U)) | |||
298 | continue; | |||
299 | for (Value *Op : U->operands()) | |||
300 | Q.push_back(Op); | |||
301 | } | |||
302 | } | |||
303 | ||||
304 | void Simplifier::Context::print(raw_ostream &OS, const Value *V) const { | |||
305 | const auto *U = dyn_cast<const Instruction>(V); | |||
306 | if (!U) { | |||
307 | OS << V << '(' << *V << ')'; | |||
308 | return; | |||
309 | } | |||
310 | ||||
311 | if (U->getParent()) { | |||
312 | OS << U << '('; | |||
313 | U->printAsOperand(OS, true); | |||
314 | OS << ')'; | |||
315 | return; | |||
316 | } | |||
317 | ||||
318 | unsigned N = U->getNumOperands(); | |||
319 | if (N != 0) | |||
320 | OS << U << '('; | |||
321 | OS << U->getOpcodeName(); | |||
322 | for (const Value *Op : U->operands()) { | |||
323 | OS << ' '; | |||
324 | print(OS, Op); | |||
325 | } | |||
326 | if (N != 0) | |||
327 | OS << ')'; | |||
328 | } | |||
329 | ||||
330 | void Simplifier::Context::initialize(Instruction *Exp) { | |||
331 | // Perform a deep clone of the expression, set Root to the root | |||
332 | // of the clone, and build a map from the cloned values to the | |||
333 | // original ones. | |||
334 | ValueMapType M; | |||
335 | BasicBlock *Block = Exp->getParent(); | |||
336 | WorkListType Q; | |||
337 | Q.push_back(Exp); | |||
338 | ||||
339 | while (!Q.empty()) { | |||
340 | Value *V = Q.pop_front_val(); | |||
341 | if (M.find(V) != M.end()) | |||
342 | continue; | |||
343 | if (Instruction *U = dyn_cast<Instruction>(V)) { | |||
344 | if (isa<PHINode>(U) || U->getParent() != Block) | |||
345 | continue; | |||
346 | for (Value *Op : U->operands()) | |||
347 | Q.push_back(Op); | |||
348 | M.insert({U, U->clone()}); | |||
349 | } | |||
350 | } | |||
351 | ||||
352 | for (std::pair<Value*,Value*> P : M) { | |||
353 | Instruction *U = cast<Instruction>(P.second); | |||
354 | for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { | |||
355 | auto F = M.find(U->getOperand(i)); | |||
356 | if (F != M.end()) | |||
357 | U->setOperand(i, F->second); | |||
358 | } | |||
359 | } | |||
360 | ||||
361 | auto R = M.find(Exp); | |||
362 | assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail ("R != M.end()", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 362, __extension__ __PRETTY_FUNCTION__)); | |||
363 | Root = R->second; | |||
364 | ||||
365 | record(Root); | |||
366 | use(Root); | |||
367 | } | |||
368 | ||||
369 | void Simplifier::Context::record(Value *V) { | |||
370 | auto Record = [this](Instruction *U) -> bool { | |||
371 | Clones.insert(U); | |||
372 | return true; | |||
373 | }; | |||
374 | traverse(V, Record); | |||
375 | } | |||
376 | ||||
377 | void Simplifier::Context::use(Value *V) { | |||
378 | auto Use = [this](Instruction *U) -> bool { | |||
379 | Used.insert(U); | |||
380 | return true; | |||
381 | }; | |||
382 | traverse(V, Use); | |||
383 | } | |||
384 | ||||
385 | void Simplifier::Context::unuse(Value *V) { | |||
386 | if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr) | |||
387 | return; | |||
388 | ||||
389 | auto Unuse = [this](Instruction *U) -> bool { | |||
390 | if (!U->use_empty()) | |||
391 | return false; | |||
392 | Used.erase(U); | |||
393 | return true; | |||
394 | }; | |||
395 | traverse(V, Unuse); | |||
396 | } | |||
397 | ||||
398 | Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) { | |||
399 | if (Tree == OldV) | |||
400 | return NewV; | |||
401 | if (OldV == NewV) | |||
402 | return Tree; | |||
403 | ||||
404 | WorkListType Q; | |||
405 | Q.push_back(Tree); | |||
406 | while (!Q.empty()) { | |||
407 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
408 | // If U is not an instruction, or it's not a clone, skip it. | |||
409 | if (!U || U->getParent()) | |||
410 | continue; | |||
411 | for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) { | |||
412 | Value *Op = U->getOperand(i); | |||
413 | if (Op == OldV) { | |||
414 | U->setOperand(i, NewV); | |||
415 | unuse(OldV); | |||
416 | } else { | |||
417 | Q.push_back(Op); | |||
418 | } | |||
419 | } | |||
420 | } | |||
421 | return Tree; | |||
422 | } | |||
423 | ||||
424 | void Simplifier::Context::replace(Value *OldV, Value *NewV) { | |||
425 | if (Root == OldV) { | |||
426 | Root = NewV; | |||
427 | use(Root); | |||
428 | return; | |||
429 | } | |||
430 | ||||
431 | // NewV may be a complex tree that has just been created by one of the | |||
432 | // transformation rules. We need to make sure that it is commoned with | |||
433 | // the existing Root to the maximum extent possible. | |||
434 | // Identify all subtrees of NewV (including NewV itself) that have | |||
435 | // equivalent counterparts in Root, and replace those subtrees with | |||
436 | // these counterparts. | |||
437 | WorkListType Q; | |||
438 | Q.push_back(NewV); | |||
439 | while (!Q.empty()) { | |||
440 | Value *V = Q.pop_front_val(); | |||
441 | Instruction *U = dyn_cast<Instruction>(V); | |||
442 | if (!U || U->getParent()) | |||
443 | continue; | |||
444 | if (Value *DupV = find(Root, V)) { | |||
445 | if (DupV != V) | |||
446 | NewV = subst(NewV, V, DupV); | |||
447 | } else { | |||
448 | for (Value *Op : U->operands()) | |||
449 | Q.push_back(Op); | |||
450 | } | |||
451 | } | |||
452 | ||||
453 | // Now, simply replace OldV with NewV in Root. | |||
454 | Root = subst(Root, OldV, NewV); | |||
455 | use(Root); | |||
456 | } | |||
457 | ||||
458 | void Simplifier::Context::cleanup() { | |||
459 | for (Value *V : Clones) { | |||
460 | Instruction *U = cast<Instruction>(V); | |||
461 | if (!U->getParent()) | |||
462 | U->dropAllReferences(); | |||
463 | } | |||
464 | ||||
465 | for (Value *V : Clones) { | |||
466 | Instruction *U = cast<Instruction>(V); | |||
467 | if (!U->getParent()) | |||
468 | U->deleteValue(); | |||
469 | } | |||
470 | } | |||
471 | ||||
472 | bool Simplifier::Context::equal(const Instruction *I, | |||
473 | const Instruction *J) const { | |||
474 | if (I == J) | |||
475 | return true; | |||
476 | if (!I->isSameOperationAs(J)) | |||
477 | return false; | |||
478 | if (isa<PHINode>(I)) | |||
479 | return I->isIdenticalTo(J); | |||
480 | ||||
481 | for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) { | |||
482 | Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i); | |||
483 | if (OpI == OpJ) | |||
484 | continue; | |||
485 | auto *InI = dyn_cast<const Instruction>(OpI); | |||
486 | auto *InJ = dyn_cast<const Instruction>(OpJ); | |||
487 | if (InI && InJ) { | |||
488 | if (!equal(InI, InJ)) | |||
489 | return false; | |||
490 | } else if (InI != InJ || !InI) | |||
491 | return false; | |||
492 | } | |||
493 | return true; | |||
494 | } | |||
495 | ||||
496 | Value *Simplifier::Context::find(Value *Tree, Value *Sub) const { | |||
497 | Instruction *SubI = dyn_cast<Instruction>(Sub); | |||
498 | WorkListType Q; | |||
499 | Q.push_back(Tree); | |||
500 | ||||
501 | while (!Q.empty()) { | |||
502 | Value *V = Q.pop_front_val(); | |||
503 | if (V == Sub) | |||
504 | return V; | |||
505 | Instruction *U = dyn_cast<Instruction>(V); | |||
506 | if (!U || U->getParent()) | |||
507 | continue; | |||
508 | if (SubI && equal(SubI, U)) | |||
509 | return U; | |||
510 | assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0) : __assert_fail ("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 510, __extension__ __PRETTY_FUNCTION__)); | |||
511 | for (Value *Op : U->operands()) | |||
512 | Q.push_back(Op); | |||
513 | } | |||
514 | return nullptr; | |||
515 | } | |||
516 | ||||
517 | void Simplifier::Context::link(Instruction *I, BasicBlock *B, | |||
518 | BasicBlock::iterator At) { | |||
519 | if (I->getParent()) | |||
520 | return; | |||
521 | ||||
522 | for (Value *Op : I->operands()) { | |||
523 | if (Instruction *OpI = dyn_cast<Instruction>(Op)) | |||
524 | link(OpI, B, At); | |||
525 | } | |||
526 | ||||
527 | B->getInstList().insert(At, I); | |||
528 | } | |||
529 | ||||
530 | Value *Simplifier::Context::materialize(BasicBlock *B, | |||
531 | BasicBlock::iterator At) { | |||
532 | if (Instruction *RootI = dyn_cast<Instruction>(Root)) | |||
533 | link(RootI, B, At); | |||
534 | return Root; | |||
535 | } | |||
536 | ||||
537 | Value *Simplifier::simplify(Context &C) { | |||
538 | WorkListType Q; | |||
539 | Q.push_back(C.Root); | |||
540 | unsigned Count = 0; | |||
541 | const unsigned Limit = SimplifyLimit; | |||
542 | ||||
543 | while (!Q.empty()) { | |||
544 | if (Count++ >= Limit) | |||
545 | break; | |||
546 | Instruction *U = dyn_cast<Instruction>(Q.pop_front_val()); | |||
547 | if (!U || U->getParent() || !C.Used.count(U)) | |||
548 | continue; | |||
549 | bool Changed = false; | |||
550 | for (Rule &R : Rules) { | |||
551 | Value *W = R.Fn(U, C.Ctx); | |||
552 | if (!W) | |||
553 | continue; | |||
554 | Changed = true; | |||
555 | C.record(W); | |||
556 | C.replace(U, W); | |||
557 | Q.push_back(C.Root); | |||
558 | break; | |||
559 | } | |||
560 | if (!Changed) { | |||
561 | for (Value *Op : U->operands()) | |||
562 | Q.push_back(Op); | |||
563 | } | |||
564 | } | |||
565 | return Count < Limit ? C.Root : nullptr; | |||
566 | } | |||
567 | ||||
568 | //===----------------------------------------------------------------------===// | |||
569 | // | |||
570 | // Implementation of PolynomialMultiplyRecognize | |||
571 | // | |||
572 | //===----------------------------------------------------------------------===// | |||
573 | ||||
574 | namespace { | |||
575 | ||||
576 | class PolynomialMultiplyRecognize { | |||
577 | public: | |||
578 | explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl, | |||
579 | const DominatorTree &dt, const TargetLibraryInfo &tli, | |||
580 | ScalarEvolution &se) | |||
581 | : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {} | |||
582 | ||||
583 | bool recognize(); | |||
584 | ||||
585 | private: | |||
586 | using ValueSeq = SetVector<Value *>; | |||
587 | ||||
588 | IntegerType *getPmpyType() const { | |||
589 | LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext(); | |||
590 | return IntegerType::get(Ctx, 32); | |||
591 | } | |||
592 | ||||
593 | bool isPromotableTo(Value *V, IntegerType *Ty); | |||
594 | void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB); | |||
595 | bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB); | |||
596 | ||||
597 | Value *getCountIV(BasicBlock *BB); | |||
598 | bool findCycle(Value *Out, Value *In, ValueSeq &Cycle); | |||
599 | void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early, | |||
600 | ValueSeq &Late); | |||
601 | bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late); | |||
602 | bool commutesWithShift(Instruction *I); | |||
603 | bool highBitsAreZero(Value *V, unsigned IterCount); | |||
604 | bool keepsHighBitsZero(Value *V, unsigned IterCount); | |||
605 | bool isOperandShifted(Instruction *I, Value *Op); | |||
606 | bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB, | |||
607 | unsigned IterCount); | |||
608 | void cleanupLoopBody(BasicBlock *LoopB); | |||
609 | ||||
610 | struct ParsedValues { | |||
611 | ParsedValues() = default; | |||
612 | ||||
613 | Value *M = nullptr; | |||
614 | Value *P = nullptr; | |||
615 | Value *Q = nullptr; | |||
616 | Value *R = nullptr; | |||
617 | Value *X = nullptr; | |||
618 | Instruction *Res = nullptr; | |||
619 | unsigned IterCount = 0; | |||
620 | bool Left = false; | |||
621 | bool Inv = false; | |||
622 | }; | |||
623 | ||||
624 | bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV); | |||
625 | bool matchRightShift(SelectInst *SelI, ParsedValues &PV); | |||
626 | bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB, | |||
627 | Value *CIV, ParsedValues &PV, bool PreScan); | |||
628 | unsigned getInverseMxN(unsigned QP); | |||
629 | Value *generate(BasicBlock::iterator At, ParsedValues &PV); | |||
630 | ||||
631 | void setupPreSimplifier(Simplifier &S); | |||
632 | void setupPostSimplifier(Simplifier &S); | |||
633 | ||||
634 | Loop *CurLoop; | |||
635 | const DataLayout &DL; | |||
636 | const DominatorTree &DT; | |||
637 | const TargetLibraryInfo &TLI; | |||
638 | ScalarEvolution &SE; | |||
639 | }; | |||
640 | ||||
641 | } // end anonymous namespace | |||
642 | ||||
643 | Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) { | |||
644 | pred_iterator PI = pred_begin(BB), PE = pred_end(BB); | |||
645 | if (std::distance(PI, PE) != 2) | |||
646 | return nullptr; | |||
647 | BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI; | |||
648 | ||||
649 | for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) { | |||
650 | auto *PN = cast<PHINode>(I); | |||
651 | Value *InitV = PN->getIncomingValueForBlock(PB); | |||
652 | if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero()) | |||
653 | continue; | |||
654 | Value *IterV = PN->getIncomingValueForBlock(BB); | |||
655 | auto *BO = dyn_cast<BinaryOperator>(IterV); | |||
656 | if (!BO) | |||
657 | continue; | |||
658 | if (BO->getOpcode() != Instruction::Add) | |||
659 | continue; | |||
660 | Value *IncV = nullptr; | |||
661 | if (BO->getOperand(0) == PN) | |||
662 | IncV = BO->getOperand(1); | |||
663 | else if (BO->getOperand(1) == PN) | |||
664 | IncV = BO->getOperand(0); | |||
665 | if (IncV == nullptr) | |||
666 | continue; | |||
667 | ||||
668 | if (auto *T = dyn_cast<ConstantInt>(IncV)) | |||
669 | if (T->getZExtValue() == 1) | |||
670 | return PN; | |||
671 | } | |||
672 | return nullptr; | |||
673 | } | |||
674 | ||||
675 | static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) { | |||
676 | for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) { | |||
677 | Use &TheUse = UI.getUse(); | |||
678 | ++UI; | |||
679 | if (auto *II = dyn_cast<Instruction>(TheUse.getUser())) | |||
680 | if (BB == II->getParent()) | |||
681 | II->replaceUsesOfWith(I, J); | |||
682 | } | |||
683 | } | |||
684 | ||||
685 | bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI, | |||
686 | Value *CIV, ParsedValues &PV) { | |||
687 | // Match the following: | |||
688 | // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R | |||
689 | // select (X & (1 << i)) == 0 ? R : R ^ (Q << i) | |||
690 | // The condition may also check for equality with the masked value, i.e | |||
691 | // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R | |||
692 | // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i); | |||
693 | ||||
694 | Value *CondV = SelI->getCondition(); | |||
695 | Value *TrueV = SelI->getTrueValue(); | |||
696 | Value *FalseV = SelI->getFalseValue(); | |||
697 | ||||
698 | using namespace PatternMatch; | |||
699 | ||||
700 | CmpInst::Predicate P; | |||
701 | Value *A = nullptr, *B = nullptr, *C = nullptr; | |||
702 | ||||
703 | if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) && | |||
704 | !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B))))) | |||
705 | return false; | |||
706 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
707 | return false; | |||
708 | // Matched: select (A & B) == C ? ... : ... | |||
709 | // select (A & B) != C ? ... : ... | |||
710 | ||||
711 | Value *X = nullptr, *Sh1 = nullptr; | |||
712 | // Check (A & B) for (X & (1 << i)): | |||
713 | if (match(A, m_Shl(m_One(), m_Specific(CIV)))) { | |||
714 | Sh1 = A; | |||
715 | X = B; | |||
716 | } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) { | |||
717 | Sh1 = B; | |||
718 | X = A; | |||
719 | } else { | |||
720 | // TODO: Could also check for an induction variable containing single | |||
721 | // bit shifted left by 1 in each iteration. | |||
722 | return false; | |||
723 | } | |||
724 | ||||
725 | bool TrueIfZero; | |||
726 | ||||
727 | // Check C against the possible values for comparison: 0 and (1 << i): | |||
728 | if (match(C, m_Zero())) | |||
729 | TrueIfZero = (P == CmpInst::ICMP_EQ); | |||
730 | else if (C == Sh1) | |||
731 | TrueIfZero = (P == CmpInst::ICMP_NE); | |||
732 | else | |||
733 | return false; | |||
734 | ||||
735 | // So far, matched: | |||
736 | // select (X & (1 << i)) ? ... : ... | |||
737 | // including variations of the check against zero/non-zero value. | |||
738 | ||||
739 | Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr; | |||
740 | if (TrueIfZero) { | |||
741 | ShouldSameV = TrueV; | |||
742 | ShouldXoredV = FalseV; | |||
743 | } else { | |||
744 | ShouldSameV = FalseV; | |||
745 | ShouldXoredV = TrueV; | |||
746 | } | |||
747 | ||||
748 | Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr; | |||
749 | Value *T = nullptr; | |||
750 | if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) { | |||
751 | // Matched: select +++ ? ... : Y ^ Z | |||
752 | // select +++ ? Y ^ Z : ... | |||
753 | // where +++ denotes previously checked matches. | |||
754 | if (ShouldSameV == Y) | |||
755 | T = Z; | |||
756 | else if (ShouldSameV == Z) | |||
757 | T = Y; | |||
758 | else | |||
759 | return false; | |||
760 | R = ShouldSameV; | |||
761 | // Matched: select +++ ? R : R ^ T | |||
762 | // select +++ ? R ^ T : R | |||
763 | // depending on TrueIfZero. | |||
764 | ||||
765 | } else if (match(ShouldSameV, m_Zero())) { | |||
766 | // Matched: select +++ ? 0 : ... | |||
767 | // select +++ ? ... : 0 | |||
768 | if (!SelI->hasOneUse()) | |||
769 | return false; | |||
770 | T = ShouldXoredV; | |||
771 | // Matched: select +++ ? 0 : T | |||
772 | // select +++ ? T : 0 | |||
773 | ||||
774 | Value *U = *SelI->user_begin(); | |||
775 | if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) && | |||
776 | !match(U, m_Xor(m_Value(R), m_Specific(SelI)))) | |||
777 | return false; | |||
778 | // Matched: xor (select +++ ? 0 : T), R | |||
779 | // xor (select +++ ? T : 0), R | |||
780 | } else | |||
781 | return false; | |||
782 | ||||
783 | // The xor input value T is isolated into its own match so that it could | |||
784 | // be checked against an induction variable containing a shifted bit | |||
785 | // (todo). | |||
786 | // For now, check against (Q << i). | |||
787 | if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) && | |||
788 | !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV))))) | |||
789 | return false; | |||
790 | // Matched: select +++ ? R : R ^ (Q << i) | |||
791 | // select +++ ? R ^ (Q << i) : R | |||
792 | ||||
793 | PV.X = X; | |||
794 | PV.Q = Q; | |||
795 | PV.R = R; | |||
796 | PV.Left = true; | |||
797 | return true; | |||
798 | } | |||
799 | ||||
800 | bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI, | |||
801 | ParsedValues &PV) { | |||
802 | // Match the following: | |||
803 | // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1) | |||
804 | // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q | |||
805 | // The condition may also check for equality with the masked value, i.e | |||
806 | // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1) | |||
807 | // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q | |||
808 | ||||
809 | Value *CondV = SelI->getCondition(); | |||
810 | Value *TrueV = SelI->getTrueValue(); | |||
811 | Value *FalseV = SelI->getFalseValue(); | |||
812 | ||||
813 | using namespace PatternMatch; | |||
814 | ||||
815 | Value *C = nullptr; | |||
816 | CmpInst::Predicate P; | |||
817 | bool TrueIfZero; | |||
818 | ||||
819 | if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) || | |||
820 | match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) { | |||
821 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
822 | return false; | |||
823 | // Matched: select C == 0 ? ... : ... | |||
824 | // select C != 0 ? ... : ... | |||
825 | TrueIfZero = (P == CmpInst::ICMP_EQ); | |||
826 | } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) || | |||
827 | match(CondV, m_ICmp(P, m_One(), m_Value(C)))) { | |||
828 | if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE) | |||
829 | return false; | |||
830 | // Matched: select C == 1 ? ... : ... | |||
831 | // select C != 1 ? ... : ... | |||
832 | TrueIfZero = (P == CmpInst::ICMP_NE); | |||
833 | } else | |||
834 | return false; | |||
835 | ||||
836 | Value *X = nullptr; | |||
837 | if (!match(C, m_And(m_Value(X), m_One())) && | |||
838 | !match(C, m_And(m_One(), m_Value(X)))) | |||
839 | return false; | |||
840 | // Matched: select (X & 1) == +++ ? ... : ... | |||
841 | // select (X & 1) != +++ ? ... : ... | |||
842 | ||||
843 | Value *R = nullptr, *Q = nullptr; | |||
844 | if (TrueIfZero) { | |||
845 | // The select's condition is true if the tested bit is 0. | |||
846 | // TrueV must be the shift, FalseV must be the xor. | |||
847 | if (!match(TrueV, m_LShr(m_Value(R), m_One()))) | |||
848 | return false; | |||
849 | // Matched: select +++ ? (R >> 1) : ... | |||
850 | if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) && | |||
851 | !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV)))) | |||
852 | return false; | |||
853 | // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q | |||
854 | // with commuting ^. | |||
855 | } else { | |||
856 | // The select's condition is true if the tested bit is 1. | |||
857 | // TrueV must be the xor, FalseV must be the shift. | |||
858 | if (!match(FalseV, m_LShr(m_Value(R), m_One()))) | |||
859 | return false; | |||
860 | // Matched: select +++ ? ... : (R >> 1) | |||
861 | if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) && | |||
862 | !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV)))) | |||
863 | return false; | |||
864 | // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1) | |||
865 | // with commuting ^. | |||
866 | } | |||
867 | ||||
868 | PV.X = X; | |||
869 | PV.Q = Q; | |||
870 | PV.R = R; | |||
871 | PV.Left = false; | |||
872 | return true; | |||
873 | } | |||
874 | ||||
875 | bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI, | |||
876 | BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV, | |||
877 | bool PreScan) { | |||
878 | using namespace PatternMatch; | |||
879 | ||||
880 | // The basic pattern for R = P.Q is: | |||
881 | // for i = 0..31 | |||
882 | // R = phi (0, R') | |||
883 | // if (P & (1 << i)) ; test-bit(P, i) | |||
884 | // R' = R ^ (Q << i) | |||
885 | // | |||
886 | // Similarly, the basic pattern for R = (P/Q).Q - P | |||
887 | // for i = 0..31 | |||
888 | // R = phi(P, R') | |||
889 | // if (R & (1 << i)) | |||
890 | // R' = R ^ (Q << i) | |||
891 | ||||
892 | // There exist idioms, where instead of Q being shifted left, P is shifted | |||
893 | // right. This produces a result that is shifted right by 32 bits (the | |||
894 | // non-shifted result is 64-bit). | |||
895 | // | |||
896 | // For R = P.Q, this would be: | |||
897 | // for i = 0..31 | |||
898 | // R = phi (0, R') | |||
899 | // if ((P >> i) & 1) | |||
900 | // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must | |||
901 | // else ; be shifted by 1, not i. | |||
902 | // R' = R >> 1 | |||
903 | // | |||
904 | // And for the inverse: | |||
905 | // for i = 0..31 | |||
906 | // R = phi (P, R') | |||
907 | // if (R & 1) | |||
908 | // R' = (R >> 1) ^ Q | |||
909 | // else | |||
910 | // R' = R >> 1 | |||
911 | ||||
912 | // The left-shifting idioms share the same pattern: | |||
913 | // select (X & (1 << i)) ? R ^ (Q << i) : R | |||
914 | // Similarly for right-shifting idioms: | |||
915 | // select (X & 1) ? (R >> 1) ^ Q | |||
916 | ||||
917 | if (matchLeftShift(SelI, CIV, PV)) { | |||
918 | // If this is a pre-scan, getting this far is sufficient. | |||
919 | if (PreScan) | |||
920 | return true; | |||
921 | ||||
922 | // Need to make sure that the SelI goes back into R. | |||
923 | auto *RPhi = dyn_cast<PHINode>(PV.R); | |||
924 | if (!RPhi) | |||
925 | return false; | |||
926 | if (SelI != RPhi->getIncomingValueForBlock(LoopB)) | |||
927 | return false; | |||
928 | PV.Res = SelI; | |||
929 | ||||
930 | // If X is loop invariant, it must be the input polynomial, and the | |||
931 | // idiom is the basic polynomial multiply. | |||
932 | if (CurLoop->isLoopInvariant(PV.X)) { | |||
933 | PV.P = PV.X; | |||
934 | PV.Inv = false; | |||
935 | } else { | |||
936 | // X is not loop invariant. If X == R, this is the inverse pmpy. | |||
937 | // Otherwise, check for an xor with an invariant value. If the | |||
938 | // variable argument to the xor is R, then this is still a valid | |||
939 | // inverse pmpy. | |||
940 | PV.Inv = true; | |||
941 | if (PV.X != PV.R) { | |||
942 | Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr; | |||
943 | if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2)))) | |||
944 | return false; | |||
945 | auto *I1 = dyn_cast<Instruction>(X1); | |||
946 | auto *I2 = dyn_cast<Instruction>(X2); | |||
947 | if (!I1 || I1->getParent() != LoopB) { | |||
948 | Var = X2; | |||
949 | Inv = X1; | |||
950 | } else if (!I2 || I2->getParent() != LoopB) { | |||
951 | Var = X1; | |||
952 | Inv = X2; | |||
953 | } else | |||
954 | return false; | |||
955 | if (Var != PV.R) | |||
956 | return false; | |||
957 | PV.M = Inv; | |||
958 | } | |||
959 | // The input polynomial P still needs to be determined. It will be | |||
960 | // the entry value of R. | |||
961 | Value *EntryP = RPhi->getIncomingValueForBlock(PrehB); | |||
962 | PV.P = EntryP; | |||
963 | } | |||
964 | ||||
965 | return true; | |||
966 | } | |||
967 | ||||
968 | if (matchRightShift(SelI, PV)) { | |||
969 | // If this is an inverse pattern, the Q polynomial must be known at | |||
970 | // compile time. | |||
971 | if (PV.Inv && !isa<ConstantInt>(PV.Q)) | |||
972 | return false; | |||
973 | if (PreScan) | |||
974 | return true; | |||
975 | // There is no exact matching of right-shift pmpy. | |||
976 | return false; | |||
977 | } | |||
978 | ||||
979 | return false; | |||
980 | } | |||
981 | ||||
982 | bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val, | |||
983 | IntegerType *DestTy) { | |||
984 | IntegerType *T = dyn_cast<IntegerType>(Val->getType()); | |||
985 | if (!T || T->getBitWidth() > DestTy->getBitWidth()) | |||
986 | return false; | |||
987 | if (T->getBitWidth() == DestTy->getBitWidth()) | |||
988 | return true; | |||
989 | // Non-instructions are promotable. The reason why an instruction may not | |||
990 | // be promotable is that it may produce a different result if its operands | |||
991 | // and the result are promoted, for example, it may produce more non-zero | |||
992 | // bits. While it would still be possible to represent the proper result | |||
993 | // in a wider type, it may require adding additional instructions (which | |||
994 | // we don't want to do). | |||
995 | Instruction *In = dyn_cast<Instruction>(Val); | |||
996 | if (!In) | |||
997 | return true; | |||
998 | // The bitwidth of the source type is smaller than the destination. | |||
999 | // Check if the individual operation can be promoted. | |||
1000 | switch (In->getOpcode()) { | |||
1001 | case Instruction::PHI: | |||
1002 | case Instruction::ZExt: | |||
1003 | case Instruction::And: | |||
1004 | case Instruction::Or: | |||
1005 | case Instruction::Xor: | |||
1006 | case Instruction::LShr: // Shift right is ok. | |||
1007 | case Instruction::Select: | |||
1008 | case Instruction::Trunc: | |||
1009 | return true; | |||
1010 | case Instruction::ICmp: | |||
1011 | if (CmpInst *CI = cast<CmpInst>(In)) | |||
1012 | return CI->isEquality() || CI->isUnsigned(); | |||
1013 | llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1013); | |||
1014 | case Instruction::Add: | |||
1015 | return In->hasNoSignedWrap() && In->hasNoUnsignedWrap(); | |||
1016 | } | |||
1017 | return false; | |||
1018 | } | |||
1019 | ||||
1020 | void PolynomialMultiplyRecognize::promoteTo(Instruction *In, | |||
1021 | IntegerType *DestTy, BasicBlock *LoopB) { | |||
1022 | Type *OrigTy = In->getType(); | |||
1023 | assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote" ) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1023, __extension__ __PRETTY_FUNCTION__)); | |||
1024 | ||||
1025 | // Leave boolean values alone. | |||
1026 | if (!In->getType()->isIntegerTy(1)) | |||
1027 | In->mutateType(DestTy); | |||
1028 | unsigned DestBW = DestTy->getBitWidth(); | |||
1029 | ||||
1030 | // Handle PHIs. | |||
1031 | if (PHINode *P = dyn_cast<PHINode>(In)) { | |||
1032 | unsigned N = P->getNumIncomingValues(); | |||
1033 | for (unsigned i = 0; i != N; ++i) { | |||
1034 | BasicBlock *InB = P->getIncomingBlock(i); | |||
1035 | if (InB == LoopB) | |||
1036 | continue; | |||
1037 | Value *InV = P->getIncomingValue(i); | |||
1038 | IntegerType *Ty = cast<IntegerType>(InV->getType()); | |||
1039 | // Do not promote values in PHI nodes of type i1. | |||
1040 | if (Ty != P->getType()) { | |||
1041 | // If the value type does not match the PHI type, the PHI type | |||
1042 | // must have been promoted. | |||
1043 | assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ? void (0) : __assert_fail ("Ty->getBitWidth() < DestBW" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1043, __extension__ __PRETTY_FUNCTION__)); | |||
1044 | InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy); | |||
1045 | P->setIncomingValue(i, InV); | |||
1046 | } | |||
1047 | } | |||
1048 | } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) { | |||
1049 | Value *Op = Z->getOperand(0); | |||
1050 | if (Op->getType() == Z->getType()) | |||
1051 | Z->replaceAllUsesWith(Op); | |||
1052 | Z->eraseFromParent(); | |||
1053 | return; | |||
1054 | } | |||
1055 | if (TruncInst *T = dyn_cast<TruncInst>(In)) { | |||
1056 | IntegerType *TruncTy = cast<IntegerType>(OrigTy); | |||
1057 | Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1); | |||
1058 | Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask); | |||
1059 | T->replaceAllUsesWith(And); | |||
1060 | T->eraseFromParent(); | |||
1061 | return; | |||
1062 | } | |||
1063 | ||||
1064 | // Promote immediates. | |||
1065 | for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) { | |||
1066 | if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i))) | |||
1067 | if (CI->getType()->getBitWidth() < DestBW) | |||
1068 | In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue())); | |||
1069 | } | |||
1070 | } | |||
1071 | ||||
1072 | bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB, | |||
1073 | BasicBlock *ExitB) { | |||
1074 | assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail ( "LoopB", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1074, __extension__ __PRETTY_FUNCTION__)); | |||
1075 | // Skip loops where the exit block has more than one predecessor. The values | |||
1076 | // coming from the loop block will be promoted to another type, and so the | |||
1077 | // values coming into the exit block from other predecessors would also have | |||
1078 | // to be promoted. | |||
1079 | if (!ExitB || (ExitB->getSinglePredecessor() != LoopB)) | |||
1080 | return false; | |||
1081 | IntegerType *DestTy = getPmpyType(); | |||
1082 | // Check if the exit values have types that are no wider than the type | |||
1083 | // that we want to promote to. | |||
1084 | unsigned DestBW = DestTy->getBitWidth(); | |||
1085 | for (PHINode &P : ExitB->phis()) { | |||
1086 | if (P.getNumIncomingValues() != 1) | |||
1087 | return false; | |||
1088 | assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void (0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1088, __extension__ __PRETTY_FUNCTION__)); | |||
1089 | IntegerType *T = dyn_cast<IntegerType>(P.getType()); | |||
1090 | if (!T || T->getBitWidth() > DestBW) | |||
1091 | return false; | |||
1092 | } | |||
1093 | ||||
1094 | // Check all instructions in the loop. | |||
1095 | for (Instruction &In : *LoopB) | |||
1096 | if (!In.isTerminator() && !isPromotableTo(&In, DestTy)) | |||
1097 | return false; | |||
1098 | ||||
1099 | // Perform the promotion. | |||
1100 | std::vector<Instruction*> LoopIns; | |||
1101 | std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns), | |||
1102 | [](Instruction &In) { return &In; }); | |||
1103 | for (Instruction *In : LoopIns) | |||
1104 | if (!In->isTerminator()) | |||
1105 | promoteTo(In, DestTy, LoopB); | |||
1106 | ||||
1107 | // Fix up the PHI nodes in the exit block. | |||
1108 | Instruction *EndI = ExitB->getFirstNonPHI(); | |||
1109 | BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end(); | |||
1110 | for (auto I = ExitB->begin(); I != End; ++I) { | |||
1111 | PHINode *P = dyn_cast<PHINode>(I); | |||
1112 | if (!P) | |||
1113 | break; | |||
1114 | Type *Ty0 = P->getIncomingValue(0)->getType(); | |||
1115 | Type *PTy = P->getType(); | |||
1116 | if (PTy != Ty0) { | |||
1117 | assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail ("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1117, __extension__ __PRETTY_FUNCTION__)); | |||
1118 | // In order to create the trunc, P must have the promoted type. | |||
1119 | P->mutateType(Ty0); | |||
1120 | Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy); | |||
1121 | // In order for the RAUW to work, the types of P and T must match. | |||
1122 | P->mutateType(PTy); | |||
1123 | P->replaceAllUsesWith(T); | |||
1124 | // Final update of the P's type. | |||
1125 | P->mutateType(Ty0); | |||
1126 | cast<Instruction>(T)->setOperand(0, P); | |||
1127 | } | |||
1128 | } | |||
1129 | ||||
1130 | return true; | |||
1131 | } | |||
1132 | ||||
1133 | bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In, | |||
1134 | ValueSeq &Cycle) { | |||
1135 | // Out = ..., In, ... | |||
1136 | if (Out == In) | |||
1137 | return true; | |||
1138 | ||||
1139 | auto *BB = cast<Instruction>(Out)->getParent(); | |||
1140 | bool HadPhi = false; | |||
1141 | ||||
1142 | for (auto U : Out->users()) { | |||
1143 | auto *I = dyn_cast<Instruction>(&*U); | |||
1144 | if (I == nullptr || I->getParent() != BB) | |||
1145 | continue; | |||
1146 | // Make sure that there are no multi-iteration cycles, e.g. | |||
1147 | // p1 = phi(p2) | |||
1148 | // p2 = phi(p1) | |||
1149 | // The cycle p1->p2->p1 would span two loop iterations. | |||
1150 | // Check that there is only one phi in the cycle. | |||
1151 | bool IsPhi = isa<PHINode>(I); | |||
1152 | if (IsPhi && HadPhi) | |||
1153 | return false; | |||
1154 | HadPhi |= IsPhi; | |||
1155 | if (Cycle.count(I)) | |||
1156 | return false; | |||
1157 | Cycle.insert(I); | |||
1158 | if (findCycle(I, In, Cycle)) | |||
1159 | break; | |||
1160 | Cycle.remove(I); | |||
1161 | } | |||
1162 | return !Cycle.empty(); | |||
1163 | } | |||
1164 | ||||
1165 | void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI, | |||
1166 | ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) { | |||
1167 | // All the values in the cycle that are between the phi node and the | |||
1168 | // divider instruction will be classified as "early", all other values | |||
1169 | // will be "late". | |||
1170 | ||||
1171 | bool IsE = true; | |||
1172 | unsigned I, N = Cycle.size(); | |||
1173 | for (I = 0; I < N; ++I) { | |||
1174 | Value *V = Cycle[I]; | |||
1175 | if (DivI == V) | |||
1176 | IsE = false; | |||
1177 | else if (!isa<PHINode>(V)) | |||
1178 | continue; | |||
1179 | // Stop if found either. | |||
1180 | break; | |||
1181 | } | |||
1182 | // "I" is the index of either DivI or the phi node, whichever was first. | |||
1183 | // "E" is "false" or "true" respectively. | |||
1184 | ValueSeq &First = !IsE ? Early : Late; | |||
1185 | for (unsigned J = 0; J < I; ++J) | |||
1186 | First.insert(Cycle[J]); | |||
1187 | ||||
1188 | ValueSeq &Second = IsE ? Early : Late; | |||
1189 | Second.insert(Cycle[I]); | |||
1190 | for (++I; I < N; ++I) { | |||
1191 | Value *V = Cycle[I]; | |||
1192 | if (DivI == V || isa<PHINode>(V)) | |||
1193 | break; | |||
1194 | Second.insert(V); | |||
1195 | } | |||
1196 | ||||
1197 | for (; I < N; ++I) | |||
1198 | First.insert(Cycle[I]); | |||
1199 | } | |||
1200 | ||||
1201 | bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI, | |||
1202 | ValueSeq &Early, ValueSeq &Late) { | |||
1203 | // Select is an exception, since the condition value does not have to be | |||
1204 | // classified in the same way as the true/false values. The true/false | |||
1205 | // values do have to be both early or both late. | |||
1206 | if (UseI->getOpcode() == Instruction::Select) { | |||
1207 | Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2); | |||
1208 | if (Early.count(TV) || Early.count(FV)) { | |||
1209 | if (Late.count(TV) || Late.count(FV)) | |||
1210 | return false; | |||
1211 | Early.insert(UseI); | |||
1212 | } else if (Late.count(TV) || Late.count(FV)) { | |||
1213 | if (Early.count(TV) || Early.count(FV)) | |||
1214 | return false; | |||
1215 | Late.insert(UseI); | |||
1216 | } | |||
1217 | return true; | |||
1218 | } | |||
1219 | ||||
1220 | // Not sure what would be the example of this, but the code below relies | |||
1221 | // on having at least one operand. | |||
1222 | if (UseI->getNumOperands() == 0) | |||
1223 | return true; | |||
1224 | ||||
1225 | bool AE = true, AL = true; | |||
1226 | for (auto &I : UseI->operands()) { | |||
1227 | if (Early.count(&*I)) | |||
1228 | AL = false; | |||
1229 | else if (Late.count(&*I)) | |||
1230 | AE = false; | |||
1231 | } | |||
1232 | // If the operands appear "all early" and "all late" at the same time, | |||
1233 | // then it means that none of them are actually classified as either. | |||
1234 | // This is harmless. | |||
1235 | if (AE && AL) | |||
1236 | return true; | |||
1237 | // Conversely, if they are neither "all early" nor "all late", then | |||
1238 | // we have a mixture of early and late operands that is not a known | |||
1239 | // exception. | |||
1240 | if (!AE && !AL) | |||
1241 | return false; | |||
1242 | ||||
1243 | // Check that we have covered the two special cases. | |||
1244 | assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail ("AE != AL", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1244, __extension__ __PRETTY_FUNCTION__)); | |||
1245 | ||||
1246 | if (AE) | |||
1247 | Early.insert(UseI); | |||
1248 | else | |||
1249 | Late.insert(UseI); | |||
1250 | return true; | |||
1251 | } | |||
1252 | ||||
1253 | bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) { | |||
1254 | switch (I->getOpcode()) { | |||
1255 | case Instruction::And: | |||
1256 | case Instruction::Or: | |||
1257 | case Instruction::Xor: | |||
1258 | case Instruction::LShr: | |||
1259 | case Instruction::Shl: | |||
1260 | case Instruction::Select: | |||
1261 | case Instruction::ICmp: | |||
1262 | case Instruction::PHI: | |||
1263 | break; | |||
1264 | default: | |||
1265 | return false; | |||
1266 | } | |||
1267 | return true; | |||
1268 | } | |||
1269 | ||||
1270 | bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V, | |||
1271 | unsigned IterCount) { | |||
1272 | auto *T = dyn_cast<IntegerType>(V->getType()); | |||
1273 | if (!T) | |||
1274 | return false; | |||
1275 | ||||
1276 | KnownBits Known(T->getBitWidth()); | |||
1277 | computeKnownBits(V, Known, DL); | |||
1278 | return Known.countMinLeadingZeros() >= IterCount; | |||
1279 | } | |||
1280 | ||||
1281 | bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V, | |||
1282 | unsigned IterCount) { | |||
1283 | // Assume that all inputs to the value have the high bits zero. | |||
1284 | // Check if the value itself preserves the zeros in the high bits. | |||
1285 | if (auto *C = dyn_cast<ConstantInt>(V)) | |||
1286 | return C->getValue().countLeadingZeros() >= IterCount; | |||
1287 | ||||
1288 | if (auto *I = dyn_cast<Instruction>(V)) { | |||
1289 | switch (I->getOpcode()) { | |||
1290 | case Instruction::And: | |||
1291 | case Instruction::Or: | |||
1292 | case Instruction::Xor: | |||
1293 | case Instruction::LShr: | |||
1294 | case Instruction::Select: | |||
1295 | case Instruction::ICmp: | |||
1296 | case Instruction::PHI: | |||
1297 | case Instruction::ZExt: | |||
1298 | return true; | |||
1299 | } | |||
1300 | } | |||
1301 | ||||
1302 | return false; | |||
1303 | } | |||
1304 | ||||
1305 | bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) { | |||
1306 | unsigned Opc = I->getOpcode(); | |||
1307 | if (Opc == Instruction::Shl || Opc == Instruction::LShr) | |||
1308 | return Op != I->getOperand(1); | |||
1309 | return true; | |||
1310 | } | |||
1311 | ||||
1312 | bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB, | |||
1313 | BasicBlock *ExitB, unsigned IterCount) { | |||
1314 | Value *CIV = getCountIV(LoopB); | |||
1315 | if (CIV == nullptr) | |||
1316 | return false; | |||
1317 | auto *CIVTy = dyn_cast<IntegerType>(CIV->getType()); | |||
1318 | if (CIVTy == nullptr) | |||
1319 | return false; | |||
1320 | ||||
1321 | ValueSeq RShifts; | |||
1322 | ValueSeq Early, Late, Cycled; | |||
1323 | ||||
1324 | // Find all value cycles that contain logical right shifts by 1. | |||
1325 | for (Instruction &I : *LoopB) { | |||
1326 | using namespace PatternMatch; | |||
1327 | ||||
1328 | Value *V = nullptr; | |||
1329 | if (!match(&I, m_LShr(m_Value(V), m_One()))) | |||
1330 | continue; | |||
1331 | ValueSeq C; | |||
1332 | if (!findCycle(&I, V, C)) | |||
1333 | continue; | |||
1334 | ||||
1335 | // Found a cycle. | |||
1336 | C.insert(&I); | |||
1337 | classifyCycle(&I, C, Early, Late); | |||
1338 | Cycled.insert(C.begin(), C.end()); | |||
1339 | RShifts.insert(&I); | |||
1340 | } | |||
1341 | ||||
1342 | // Find the set of all values affected by the shift cycles, i.e. all | |||
1343 | // cycled values, and (recursively) all their users. | |||
1344 | ValueSeq Users(Cycled.begin(), Cycled.end()); | |||
1345 | for (unsigned i = 0; i < Users.size(); ++i) { | |||
1346 | Value *V = Users[i]; | |||
1347 | if (!isa<IntegerType>(V->getType())) | |||
1348 | return false; | |||
1349 | auto *R = cast<Instruction>(V); | |||
1350 | // If the instruction does not commute with shifts, the loop cannot | |||
1351 | // be unshifted. | |||
1352 | if (!commutesWithShift(R)) | |||
1353 | return false; | |||
1354 | for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) { | |||
1355 | auto *T = cast<Instruction>(*I); | |||
1356 | // Skip users from outside of the loop. They will be handled later. | |||
1357 | // Also, skip the right-shifts and phi nodes, since they mix early | |||
1358 | // and late values. | |||
1359 | if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T)) | |||
1360 | continue; | |||
1361 | ||||
1362 | Users.insert(T); | |||
1363 | if (!classifyInst(T, Early, Late)) | |||
1364 | return false; | |||
1365 | } | |||
1366 | } | |||
1367 | ||||
1368 | if (Users.empty()) | |||
1369 | return false; | |||
1370 | ||||
1371 | // Verify that high bits remain zero. | |||
1372 | ValueSeq Internal(Users.begin(), Users.end()); | |||
1373 | ValueSeq Inputs; | |||
1374 | for (unsigned i = 0; i < Internal.size(); ++i) { | |||
1375 | auto *R = dyn_cast<Instruction>(Internal[i]); | |||
1376 | if (!R) | |||
1377 | continue; | |||
1378 | for (Value *Op : R->operands()) { | |||
1379 | auto *T = dyn_cast<Instruction>(Op); | |||
1380 | if (T && T->getParent() != LoopB) | |||
1381 | Inputs.insert(Op); | |||
1382 | else | |||
1383 | Internal.insert(Op); | |||
1384 | } | |||
1385 | } | |||
1386 | for (Value *V : Inputs) | |||
1387 | if (!highBitsAreZero(V, IterCount)) | |||
1388 | return false; | |||
1389 | for (Value *V : Internal) | |||
1390 | if (!keepsHighBitsZero(V, IterCount)) | |||
1391 | return false; | |||
1392 | ||||
1393 | // Finally, the work can be done. Unshift each user. | |||
1394 | IRBuilder<> IRB(LoopB); | |||
1395 | std::map<Value*,Value*> ShiftMap; | |||
1396 | ||||
1397 | using CastMapType = std::map<std::pair<Value *, Type *>, Value *>; | |||
1398 | ||||
1399 | CastMapType CastMap; | |||
1400 | ||||
1401 | auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V, | |||
1402 | IntegerType *Ty) -> Value* { | |||
1403 | auto H = CM.find(std::make_pair(V, Ty)); | |||
1404 | if (H != CM.end()) | |||
1405 | return H->second; | |||
1406 | Value *CV = IRB.CreateIntCast(V, Ty, false); | |||
1407 | CM.insert(std::make_pair(std::make_pair(V, Ty), CV)); | |||
1408 | return CV; | |||
1409 | }; | |||
1410 | ||||
1411 | for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) { | |||
1412 | using namespace PatternMatch; | |||
1413 | ||||
1414 | if (isa<PHINode>(I) || !Users.count(&*I)) | |||
1415 | continue; | |||
1416 | ||||
1417 | // Match lshr x, 1. | |||
1418 | Value *V = nullptr; | |||
1419 | if (match(&*I, m_LShr(m_Value(V), m_One()))) { | |||
1420 | replaceAllUsesOfWithIn(&*I, V, LoopB); | |||
1421 | continue; | |||
1422 | } | |||
1423 | // For each non-cycled operand, replace it with the corresponding | |||
1424 | // value shifted left. | |||
1425 | for (auto &J : I->operands()) { | |||
1426 | Value *Op = J.get(); | |||
1427 | if (!isOperandShifted(&*I, Op)) | |||
1428 | continue; | |||
1429 | if (Users.count(Op)) | |||
1430 | continue; | |||
1431 | // Skip shifting zeros. | |||
1432 | if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero()) | |||
1433 | continue; | |||
1434 | // Check if we have already generated a shift for this value. | |||
1435 | auto F = ShiftMap.find(Op); | |||
1436 | Value *W = (F != ShiftMap.end()) ? F->second : nullptr; | |||
1437 | if (W == nullptr) { | |||
1438 | IRB.SetInsertPoint(&*I); | |||
1439 | // First, the shift amount will be CIV or CIV+1, depending on | |||
1440 | // whether the value is early or late. Instead of creating CIV+1, | |||
1441 | // do a single shift of the value. | |||
1442 | Value *ShAmt = CIV, *ShVal = Op; | |||
1443 | auto *VTy = cast<IntegerType>(ShVal->getType()); | |||
1444 | auto *ATy = cast<IntegerType>(ShAmt->getType()); | |||
1445 | if (Late.count(&*I)) | |||
1446 | ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1)); | |||
1447 | // Second, the types of the shifted value and the shift amount | |||
1448 | // must match. | |||
1449 | if (VTy != ATy) { | |||
1450 | if (VTy->getBitWidth() < ATy->getBitWidth()) | |||
1451 | ShVal = upcast(CastMap, IRB, ShVal, ATy); | |||
1452 | else | |||
1453 | ShAmt = upcast(CastMap, IRB, ShAmt, VTy); | |||
1454 | } | |||
1455 | // Ready to generate the shift and memoize it. | |||
1456 | W = IRB.CreateShl(ShVal, ShAmt); | |||
1457 | ShiftMap.insert(std::make_pair(Op, W)); | |||
1458 | } | |||
1459 | I->replaceUsesOfWith(Op, W); | |||
1460 | } | |||
1461 | } | |||
1462 | ||||
1463 | // Update the users outside of the loop to account for having left | |||
1464 | // shifts. They would normally be shifted right in the loop, so shift | |||
1465 | // them right after the loop exit. | |||
1466 | // Take advantage of the loop-closed SSA form, which has all the post- | |||
1467 | // loop values in phi nodes. | |||
1468 | IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt()); | |||
1469 | for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) { | |||
1470 | if (!isa<PHINode>(P)) | |||
1471 | break; | |||
1472 | auto *PN = cast<PHINode>(P); | |||
1473 | Value *U = PN->getIncomingValueForBlock(LoopB); | |||
1474 | if (!Users.count(U)) | |||
1475 | continue; | |||
1476 | Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount)); | |||
1477 | PN->replaceAllUsesWith(S); | |||
1478 | // The above RAUW will create | |||
1479 | // S = lshr S, IterCount | |||
1480 | // so we need to fix it back into | |||
1481 | // S = lshr PN, IterCount | |||
1482 | cast<User>(S)->replaceUsesOfWith(S, PN); | |||
1483 | } | |||
1484 | ||||
1485 | return true; | |||
1486 | } | |||
1487 | ||||
1488 | void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) { | |||
1489 | for (auto &I : *LoopB) | |||
1490 | if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT})) | |||
1491 | I.replaceAllUsesWith(SV); | |||
1492 | ||||
1493 | for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) { | |||
1494 | N = std::next(I); | |||
1495 | RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI); | |||
1496 | } | |||
1497 | } | |||
1498 | ||||
1499 | unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) { | |||
1500 | // Arrays of coefficients of Q and the inverse, C. | |||
1501 | // Q[i] = coefficient at x^i. | |||
1502 | std::array<char,32> Q, C; | |||
1503 | ||||
1504 | for (unsigned i = 0; i < 32; ++i) { | |||
1505 | Q[i] = QP & 1; | |||
1506 | QP >>= 1; | |||
1507 | } | |||
1508 | assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail ("Q[0] == 1", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1508, __extension__ __PRETTY_FUNCTION__)); | |||
1509 | ||||
1510 | // Find C, such that | |||
1511 | // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1 | |||
1512 | // | |||
1513 | // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the | |||
1514 | // operations * and + are & and ^ respectively. | |||
1515 | // | |||
1516 | // Find C[i] recursively, by comparing i-th coefficient in the product | |||
1517 | // with 0 (or 1 for i=0). | |||
1518 | // | |||
1519 | // C[0] = 1, since C[0] = Q[0], and Q[0] = 1. | |||
1520 | C[0] = 1; | |||
1521 | for (unsigned i = 1; i < 32; ++i) { | |||
1522 | // Solve for C[i] in: | |||
1523 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0 | |||
1524 | // This is equivalent to | |||
1525 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0 | |||
1526 | // which is | |||
1527 | // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i] | |||
1528 | unsigned T = 0; | |||
1529 | for (unsigned j = 0; j < i; ++j) | |||
1530 | T = T ^ (C[j] & Q[i-j]); | |||
1531 | C[i] = T; | |||
1532 | } | |||
1533 | ||||
1534 | unsigned QV = 0; | |||
1535 | for (unsigned i = 0; i < 32; ++i) | |||
1536 | if (C[i]) | |||
1537 | QV |= (1 << i); | |||
1538 | ||||
1539 | return QV; | |||
1540 | } | |||
1541 | ||||
1542 | Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At, | |||
1543 | ParsedValues &PV) { | |||
1544 | IRBuilder<> B(&*At); | |||
1545 | Module *M = At->getParent()->getParent()->getParent(); | |||
1546 | Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw); | |||
1547 | ||||
1548 | Value *P = PV.P, *Q = PV.Q, *P0 = P; | |||
1549 | unsigned IC = PV.IterCount; | |||
1550 | ||||
1551 | if (PV.M != nullptr) | |||
| ||||
1552 | P0 = P = B.CreateXor(P, PV.M); | |||
1553 | ||||
1554 | // Create a bit mask to clear the high bits beyond IterCount. | |||
1555 | auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC)); | |||
1556 | ||||
1557 | if (PV.IterCount != 32) | |||
1558 | P = B.CreateAnd(P, BMI); | |||
1559 | ||||
1560 | if (PV.Inv) { | |||
1561 | auto *QI = dyn_cast<ConstantInt>(PV.Q); | |||
1562 | assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth() <= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1562, __extension__ __PRETTY_FUNCTION__)); | |||
1563 | ||||
1564 | // Again, clearing bits beyond IterCount. | |||
1565 | unsigned M = (1 << PV.IterCount) - 1; | |||
| ||||
1566 | unsigned Tmp = (QI->getZExtValue() | 1) & M; | |||
1567 | unsigned QV = getInverseMxN(Tmp) & M; | |||
1568 | auto *QVI = ConstantInt::get(QI->getType(), QV); | |||
1569 | P = B.CreateCall(PMF, {P, QVI}); | |||
1570 | P = B.CreateTrunc(P, QI->getType()); | |||
1571 | if (IC != 32) | |||
1572 | P = B.CreateAnd(P, BMI); | |||
1573 | } | |||
1574 | ||||
1575 | Value *R = B.CreateCall(PMF, {P, Q}); | |||
1576 | ||||
1577 | if (PV.M != nullptr) | |||
1578 | R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false)); | |||
1579 | ||||
1580 | return R; | |||
1581 | } | |||
1582 | ||||
1583 | static bool hasZeroSignBit(const Value *V) { | |||
1584 | if (const auto *CI = dyn_cast<const ConstantInt>(V)) | |||
1585 | return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0; | |||
1586 | const Instruction *I = dyn_cast<const Instruction>(V); | |||
1587 | if (!I) | |||
1588 | return false; | |||
1589 | switch (I->getOpcode()) { | |||
1590 | case Instruction::LShr: | |||
1591 | if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1))) | |||
1592 | return SI->getZExtValue() > 0; | |||
1593 | return false; | |||
1594 | case Instruction::Or: | |||
1595 | case Instruction::Xor: | |||
1596 | return hasZeroSignBit(I->getOperand(0)) && | |||
1597 | hasZeroSignBit(I->getOperand(1)); | |||
1598 | case Instruction::And: | |||
1599 | return hasZeroSignBit(I->getOperand(0)) || | |||
1600 | hasZeroSignBit(I->getOperand(1)); | |||
1601 | } | |||
1602 | return false; | |||
1603 | } | |||
1604 | ||||
1605 | void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) { | |||
1606 | S.addRule("sink-zext", | |||
1607 | // Sink zext past bitwise operations. | |||
1608 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1609 | if (I->getOpcode() != Instruction::ZExt) | |||
1610 | return nullptr; | |||
1611 | Instruction *T = dyn_cast<Instruction>(I->getOperand(0)); | |||
1612 | if (!T) | |||
1613 | return nullptr; | |||
1614 | switch (T->getOpcode()) { | |||
1615 | case Instruction::And: | |||
1616 | case Instruction::Or: | |||
1617 | case Instruction::Xor: | |||
1618 | break; | |||
1619 | default: | |||
1620 | return nullptr; | |||
1621 | } | |||
1622 | IRBuilder<> B(Ctx); | |||
1623 | return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(), | |||
1624 | B.CreateZExt(T->getOperand(0), I->getType()), | |||
1625 | B.CreateZExt(T->getOperand(1), I->getType())); | |||
1626 | }); | |||
1627 | S.addRule("xor/and -> and/xor", | |||
1628 | // (xor (and x a) (and y a)) -> (and (xor x y) a) | |||
1629 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1630 | if (I->getOpcode() != Instruction::Xor) | |||
1631 | return nullptr; | |||
1632 | Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0)); | |||
1633 | Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1)); | |||
1634 | if (!And0 || !And1) | |||
1635 | return nullptr; | |||
1636 | if (And0->getOpcode() != Instruction::And || | |||
1637 | And1->getOpcode() != Instruction::And) | |||
1638 | return nullptr; | |||
1639 | if (And0->getOperand(1) != And1->getOperand(1)) | |||
1640 | return nullptr; | |||
1641 | IRBuilder<> B(Ctx); | |||
1642 | return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)), | |||
1643 | And0->getOperand(1)); | |||
1644 | }); | |||
1645 | S.addRule("sink binop into select", | |||
1646 | // (Op (select c x y) z) -> (select c (Op x z) (Op y z)) | |||
1647 | // (Op x (select c y z)) -> (select c (Op x y) (Op x z)) | |||
1648 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1649 | BinaryOperator *BO = dyn_cast<BinaryOperator>(I); | |||
1650 | if (!BO) | |||
1651 | return nullptr; | |||
1652 | Instruction::BinaryOps Op = BO->getOpcode(); | |||
1653 | if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) { | |||
1654 | IRBuilder<> B(Ctx); | |||
1655 | Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue(); | |||
1656 | Value *Z = BO->getOperand(1); | |||
1657 | return B.CreateSelect(Sel->getCondition(), | |||
1658 | B.CreateBinOp(Op, X, Z), | |||
1659 | B.CreateBinOp(Op, Y, Z)); | |||
1660 | } | |||
1661 | if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) { | |||
1662 | IRBuilder<> B(Ctx); | |||
1663 | Value *X = BO->getOperand(0); | |||
1664 | Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue(); | |||
1665 | return B.CreateSelect(Sel->getCondition(), | |||
1666 | B.CreateBinOp(Op, X, Y), | |||
1667 | B.CreateBinOp(Op, X, Z)); | |||
1668 | } | |||
1669 | return nullptr; | |||
1670 | }); | |||
1671 | S.addRule("fold select-select", | |||
1672 | // (select c (select c x y) z) -> (select c x z) | |||
1673 | // (select c x (select c y z)) -> (select c x z) | |||
1674 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1675 | SelectInst *Sel = dyn_cast<SelectInst>(I); | |||
1676 | if (!Sel) | |||
1677 | return nullptr; | |||
1678 | IRBuilder<> B(Ctx); | |||
1679 | Value *C = Sel->getCondition(); | |||
1680 | if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) { | |||
1681 | if (Sel0->getCondition() == C) | |||
1682 | return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue()); | |||
1683 | } | |||
1684 | if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) { | |||
1685 | if (Sel1->getCondition() == C) | |||
1686 | return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue()); | |||
1687 | } | |||
1688 | return nullptr; | |||
1689 | }); | |||
1690 | S.addRule("or-signbit -> xor-signbit", | |||
1691 | // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0) | |||
1692 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1693 | if (I->getOpcode() != Instruction::Or) | |||
1694 | return nullptr; | |||
1695 | ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1)); | |||
1696 | if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit()) | |||
1697 | return nullptr; | |||
1698 | if (!hasZeroSignBit(I->getOperand(0))) | |||
1699 | return nullptr; | |||
1700 | return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb); | |||
1701 | }); | |||
1702 | S.addRule("sink lshr into binop", | |||
1703 | // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c)) | |||
1704 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1705 | if (I->getOpcode() != Instruction::LShr) | |||
1706 | return nullptr; | |||
1707 | BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0)); | |||
1708 | if (!BitOp) | |||
1709 | return nullptr; | |||
1710 | switch (BitOp->getOpcode()) { | |||
1711 | case Instruction::And: | |||
1712 | case Instruction::Or: | |||
1713 | case Instruction::Xor: | |||
1714 | break; | |||
1715 | default: | |||
1716 | return nullptr; | |||
1717 | } | |||
1718 | IRBuilder<> B(Ctx); | |||
1719 | Value *S = I->getOperand(1); | |||
1720 | return B.CreateBinOp(BitOp->getOpcode(), | |||
1721 | B.CreateLShr(BitOp->getOperand(0), S), | |||
1722 | B.CreateLShr(BitOp->getOperand(1), S)); | |||
1723 | }); | |||
1724 | S.addRule("expose bitop-const", | |||
1725 | // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b)) | |||
1726 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1727 | auto IsBitOp = [](unsigned Op) -> bool { | |||
1728 | switch (Op) { | |||
1729 | case Instruction::And: | |||
1730 | case Instruction::Or: | |||
1731 | case Instruction::Xor: | |||
1732 | return true; | |||
1733 | } | |||
1734 | return false; | |||
1735 | }; | |||
1736 | BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I); | |||
1737 | if (!BitOp1 || !IsBitOp(BitOp1->getOpcode())) | |||
1738 | return nullptr; | |||
1739 | BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0)); | |||
1740 | if (!BitOp2 || !IsBitOp(BitOp2->getOpcode())) | |||
1741 | return nullptr; | |||
1742 | ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1)); | |||
1743 | ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1)); | |||
1744 | if (!CA || !CB) | |||
1745 | return nullptr; | |||
1746 | IRBuilder<> B(Ctx); | |||
1747 | Value *X = BitOp2->getOperand(0); | |||
1748 | return B.CreateBinOp(BitOp2->getOpcode(), X, | |||
1749 | B.CreateBinOp(BitOp1->getOpcode(), CA, CB)); | |||
1750 | }); | |||
1751 | } | |||
1752 | ||||
1753 | void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) { | |||
1754 | S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a", | |||
1755 | [](Instruction *I, LLVMContext &Ctx) -> Value* { | |||
1756 | if (I->getOpcode() != Instruction::And) | |||
1757 | return nullptr; | |||
1758 | Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0)); | |||
1759 | ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1)); | |||
1760 | if (!Xor || !C0) | |||
1761 | return nullptr; | |||
1762 | if (Xor->getOpcode() != Instruction::Xor) | |||
1763 | return nullptr; | |||
1764 | Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0)); | |||
1765 | Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1)); | |||
1766 | // Pick the first non-null and. | |||
1767 | if (!And0 || And0->getOpcode() != Instruction::And) | |||
1768 | std::swap(And0, And1); | |||
1769 | ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1)); | |||
1770 | if (!C1) | |||
1771 | return nullptr; | |||
1772 | uint32_t V0 = C0->getZExtValue(); | |||
1773 | uint32_t V1 = C1->getZExtValue(); | |||
1774 | if (V0 != (V0 & V1)) | |||
1775 | return nullptr; | |||
1776 | IRBuilder<> B(Ctx); | |||
1777 | return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0); | |||
1778 | }); | |||
1779 | } | |||
1780 | ||||
1781 | bool PolynomialMultiplyRecognize::recognize() { | |||
1782 | LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" << *CurLoop << '\n'; } } while (false) | |||
1783 | << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n" << *CurLoop << '\n'; } } while (false); | |||
1784 | // Restrictions: | |||
1785 | // - The loop must consist of a single block. | |||
1786 | // - The iteration count must be known at compile-time. | |||
1787 | // - The loop must have an induction variable starting from 0, and | |||
1788 | // incremented in each iteration of the loop. | |||
1789 | BasicBlock *LoopB = CurLoop->getHeader(); | |||
1790 | LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop header:\n" << * LoopB; } } while (false); | |||
1791 | ||||
1792 | if (LoopB != CurLoop->getLoopLatch()) | |||
1793 | return false; | |||
1794 | BasicBlock *ExitB = CurLoop->getExitBlock(); | |||
1795 | if (ExitB == nullptr) | |||
1796 | return false; | |||
1797 | BasicBlock *EntryB = CurLoop->getLoopPreheader(); | |||
1798 | if (EntryB == nullptr) | |||
1799 | return false; | |||
1800 | ||||
1801 | unsigned IterCount = 0; | |||
1802 | const SCEV *CT = SE.getBackedgeTakenCount(CurLoop); | |||
1803 | if (isa<SCEVCouldNotCompute>(CT)) | |||
1804 | return false; | |||
1805 | if (auto *CV = dyn_cast<SCEVConstant>(CT)) | |||
1806 | IterCount = CV->getValue()->getZExtValue() + 1; | |||
1807 | ||||
1808 | Value *CIV = getCountIV(LoopB); | |||
1809 | ParsedValues PV; | |||
1810 | Simplifier PreSimp; | |||
1811 | PV.IterCount = IterCount; | |||
1812 | LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'; } } while ( false) | |||
1813 | << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n'; } } while ( false); | |||
1814 | ||||
1815 | setupPreSimplifier(PreSimp); | |||
1816 | ||||
1817 | // Perform a preliminary scan of select instructions to see if any of them | |||
1818 | // looks like a generator of the polynomial multiply steps. Assume that a | |||
1819 | // loop can only contain a single transformable operation, so stop the | |||
1820 | // traversal after the first reasonable candidate was found. | |||
1821 | // XXX: Currently this approach can modify the loop before being 100% sure | |||
1822 | // that the transformation can be carried out. | |||
1823 | bool FoundPreScan = false; | |||
1824 | auto FeedsPHI = [LoopB](const Value *V) -> bool { | |||
1825 | for (const Value *U : V->users()) { | |||
1826 | if (const auto *P = dyn_cast<const PHINode>(U)) | |||
1827 | if (P->getParent() == LoopB) | |||
1828 | return true; | |||
1829 | } | |||
1830 | return false; | |||
1831 | }; | |||
1832 | for (Instruction &In : *LoopB) { | |||
1833 | SelectInst *SI = dyn_cast<SelectInst>(&In); | |||
1834 | if (!SI || !FeedsPHI(SI)) | |||
1835 | continue; | |||
1836 | ||||
1837 | Simplifier::Context C(SI); | |||
1838 | Value *T = PreSimp.simplify(C); | |||
1839 | SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI; | |||
1840 | LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n'; } } while (false); | |||
1841 | if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) { | |||
1842 | FoundPreScan = true; | |||
1843 | if (SelI != SI) { | |||
1844 | Value *NewSel = C.materialize(LoopB, SI->getIterator()); | |||
1845 | SI->replaceAllUsesWith(NewSel); | |||
1846 | RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); | |||
1847 | } | |||
1848 | break; | |||
1849 | } | |||
1850 | } | |||
1851 | ||||
1852 | if (!FoundPreScan) { | |||
1853 | LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n" ; } } while (false); | |||
1854 | return false; | |||
1855 | } | |||
1856 | ||||
1857 | if (!PV.Left) { | |||
1858 | // The right shift version actually only returns the higher bits of | |||
1859 | // the result (each iteration discards the LSB). If we want to convert it | |||
1860 | // to a left-shifting loop, the working data type must be at least as | |||
1861 | // wide as the target's pmpy instruction. | |||
1862 | if (!promoteTypes(LoopB, ExitB)) | |||
1863 | return false; | |||
1864 | // Run post-promotion simplifications. | |||
1865 | Simplifier PostSimp; | |||
1866 | setupPostSimplifier(PostSimp); | |||
1867 | for (Instruction &In : *LoopB) { | |||
1868 | SelectInst *SI = dyn_cast<SelectInst>(&In); | |||
1869 | if (!SI || !FeedsPHI(SI)) | |||
1870 | continue; | |||
1871 | Simplifier::Context C(SI); | |||
1872 | Value *T = PostSimp.simplify(C); | |||
1873 | SelectInst *SelI = dyn_cast_or_null<SelectInst>(T); | |||
1874 | if (SelI != SI) { | |||
1875 | Value *NewSel = C.materialize(LoopB, SI->getIterator()); | |||
1876 | SI->replaceAllUsesWith(NewSel); | |||
1877 | RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI); | |||
1878 | } | |||
1879 | break; | |||
1880 | } | |||
1881 | ||||
1882 | if (!convertShiftsToLeft(LoopB, ExitB, IterCount)) | |||
1883 | return false; | |||
1884 | cleanupLoopBody(LoopB); | |||
1885 | } | |||
1886 | ||||
1887 | // Scan the loop again, find the generating select instruction. | |||
1888 | bool FoundScan = false; | |||
1889 | for (Instruction &In : *LoopB) { | |||
1890 | SelectInst *SelI = dyn_cast<SelectInst>(&In); | |||
1891 | if (!SelI) | |||
1892 | continue; | |||
1893 | LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI << '\n'; } } while (false); | |||
1894 | FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false); | |||
1895 | if (FoundScan) | |||
1896 | break; | |||
1897 | } | |||
1898 | assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail ("FoundScan", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 1898, __extension__ __PRETTY_FUNCTION__)); | |||
1899 | ||||
1900 | LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1901 | StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1902 | if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1903 | dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1904 | elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1905 | dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1906 | << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1907 | dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1908 | if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1909 | dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1910 | dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1911 | dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false) | |||
1912 | })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if (!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + " << PP << "\n"; dbgs () << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n"; if (PV.M) dbgs() << " M:" << *PV.M << "\n"; dbgs() << " Q:" << *PV.Q << "\n"; dbgs() << " Iteration count:" << PV.IterCount << "\n"; }; } } while (false); | |||
1913 | ||||
1914 | BasicBlock::iterator At(EntryB->getTerminator()); | |||
1915 | Value *PM = generate(At, PV); | |||
1916 | if (PM == nullptr) | |||
1917 | return false; | |||
1918 | ||||
1919 | if (PM->getType() != PV.Res->getType()) | |||
1920 | PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false); | |||
1921 | ||||
1922 | PV.Res->replaceAllUsesWith(PM); | |||
1923 | PV.Res->eraseFromParent(); | |||
1924 | return true; | |||
1925 | } | |||
1926 | ||||
1927 | int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) { | |||
1928 | if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) | |||
1929 | return SC->getAPInt().getSExtValue(); | |||
1930 | return 0; | |||
1931 | } | |||
1932 | ||||
1933 | bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) { | |||
1934 | // Allow volatile stores if HexagonVolatileMemcpy is enabled. | |||
1935 | if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple()) | |||
1936 | return false; | |||
1937 | ||||
1938 | Value *StoredVal = SI->getValueOperand(); | |||
1939 | Value *StorePtr = SI->getPointerOperand(); | |||
1940 | ||||
1941 | // Reject stores that are so large that they overflow an unsigned. | |||
1942 | uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType()); | |||
1943 | if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) | |||
1944 | return false; | |||
1945 | ||||
1946 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
1947 | // loop, which indicates a strided store. If we have something else, it's a | |||
1948 | // random store we can't handle. | |||
1949 | auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | |||
1950 | if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) | |||
1951 | return false; | |||
1952 | ||||
1953 | // Check to see if the stride matches the size of the store. If so, then we | |||
1954 | // know that every byte is touched in the loop. | |||
1955 | int Stride = getSCEVStride(StoreEv); | |||
1956 | if (Stride == 0) | |||
1957 | return false; | |||
1958 | unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); | |||
1959 | if (StoreSize != unsigned(std::abs(Stride))) | |||
1960 | return false; | |||
1961 | ||||
1962 | // The store must be feeding a non-volatile load. | |||
1963 | LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand()); | |||
1964 | if (!LI || !LI->isSimple()) | |||
1965 | return false; | |||
1966 | ||||
1967 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
1968 | // loop, which indicates a strided load. If we have something else, it's a | |||
1969 | // random load we can't handle. | |||
1970 | Value *LoadPtr = LI->getPointerOperand(); | |||
1971 | auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr)); | |||
1972 | if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine()) | |||
1973 | return false; | |||
1974 | ||||
1975 | // The store and load must share the same stride. | |||
1976 | if (StoreEv->getOperand(1) != LoadEv->getOperand(1)) | |||
1977 | return false; | |||
1978 | ||||
1979 | // Success. This store can be converted into a memcpy. | |||
1980 | return true; | |||
1981 | } | |||
1982 | ||||
1983 | /// mayLoopAccessLocation - Return true if the specified loop might access the | |||
1984 | /// specified pointer location, which is a loop-strided access. The 'Access' | |||
1985 | /// argument specifies what the verboten forms of access are (read or write). | |||
1986 | static bool | |||
1987 | mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L, | |||
1988 | const SCEV *BECount, unsigned StoreSize, | |||
1989 | AliasAnalysis &AA, | |||
1990 | SmallPtrSetImpl<Instruction *> &Ignored) { | |||
1991 | // Get the location that may be stored across the loop. Since the access | |||
1992 | // is strided positively through memory, we say that the modified location | |||
1993 | // starts at the pointer and has infinite size. | |||
1994 | LocationSize AccessSize = LocationSize::afterPointer(); | |||
1995 | ||||
1996 | // If the loop iterates a fixed number of times, we can refine the access | |||
1997 | // size to be exactly the size of the memset, which is (BECount+1)*StoreSize | |||
1998 | if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) | |||
1999 | AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) * | |||
2000 | StoreSize); | |||
2001 | ||||
2002 | // TODO: For this to be really effective, we have to dive into the pointer | |||
2003 | // operand in the store. Store to &A[i] of 100 will always return may alias | |||
2004 | // with store of &A[100], we need to StoreLoc to be "A" with size of 100, | |||
2005 | // which will then no-alias a store to &A[100]. | |||
2006 | MemoryLocation StoreLoc(Ptr, AccessSize); | |||
2007 | ||||
2008 | for (auto *B : L->blocks()) | |||
2009 | for (auto &I : *B) | |||
2010 | if (Ignored.count(&I) == 0 && | |||
2011 | isModOrRefSet( | |||
2012 | intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access))) | |||
2013 | return true; | |||
2014 | ||||
2015 | return false; | |||
2016 | } | |||
2017 | ||||
2018 | void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB, | |||
2019 | SmallVectorImpl<StoreInst*> &Stores) { | |||
2020 | Stores.clear(); | |||
2021 | for (Instruction &I : *BB) | |||
2022 | if (StoreInst *SI = dyn_cast<StoreInst>(&I)) | |||
2023 | if (isLegalStore(CurLoop, SI)) | |||
2024 | Stores.push_back(SI); | |||
2025 | } | |||
2026 | ||||
2027 | bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop, | |||
2028 | StoreInst *SI, const SCEV *BECount) { | |||
2029 | assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 2031, __extension__ __PRETTY_FUNCTION__)) | |||
2030 | "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 2031, __extension__ __PRETTY_FUNCTION__)) | |||
2031 | "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile () && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy" "to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 2031, __extension__ __PRETTY_FUNCTION__)); | |||
2032 | ||||
2033 | Value *StorePtr = SI->getPointerOperand(); | |||
2034 | auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); | |||
2035 | unsigned Stride = getSCEVStride(StoreEv); | |||
2036 | unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType()); | |||
2037 | if (Stride != StoreSize) | |||
2038 | return false; | |||
2039 | ||||
2040 | // See if the pointer expression is an AddRec like {base,+,1} on the current | |||
2041 | // loop, which indicates a strided load. If we have something else, it's a | |||
2042 | // random load we can't handle. | |||
2043 | auto *LI = cast<LoadInst>(SI->getValueOperand()); | |||
2044 | auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand())); | |||
2045 | ||||
2046 | // The trip count of the loop and the base pointer of the addrec SCEV is | |||
2047 | // guaranteed to be loop invariant, which means that it should dominate the | |||
2048 | // header. This allows us to insert code for it in the preheader. | |||
2049 | BasicBlock *Preheader = CurLoop->getLoopPreheader(); | |||
2050 | Instruction *ExpPt = Preheader->getTerminator(); | |||
2051 | IRBuilder<> Builder(ExpPt); | |||
2052 | SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom"); | |||
2053 | ||||
2054 | Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace()); | |||
2055 | ||||
2056 | // Okay, we have a strided store "p[i]" of a loaded value. We can turn | |||
2057 | // this into a memcpy/memmove in the loop preheader now if we want. However, | |||
2058 | // this would be unsafe to do if there is anything else in the loop that may | |||
2059 | // read or write the memory region we're storing to. For memcpy, this | |||
2060 | // includes the load that feeds the stores. Check for an alias by generating | |||
2061 | // the base address and checking everything. | |||
2062 | Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(), | |||
2063 | Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt); | |||
2064 | Value *LoadBasePtr = nullptr; | |||
2065 | ||||
2066 | bool Overlap = false; | |||
2067 | bool DestVolatile = SI->isVolatile(); | |||
2068 | Type *BECountTy = BECount->getType(); | |||
2069 | ||||
2070 | if (DestVolatile) { | |||
2071 | // The trip count must fit in i32, since it is the type of the "num_words" | |||
2072 | // argument to hexagon_memcpy_forward_vp4cp4n2. | |||
2073 | if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) { | |||
2074 | CleanupAndExit: | |||
2075 | // If we generated new code for the base pointer, clean up. | |||
2076 | Expander.clear(); | |||
2077 | if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) { | |||
2078 | RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI); | |||
2079 | StoreBasePtr = nullptr; | |||
2080 | } | |||
2081 | if (LoadBasePtr) { | |||
2082 | RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI); | |||
2083 | LoadBasePtr = nullptr; | |||
2084 | } | |||
2085 | return false; | |||
2086 | } | |||
2087 | } | |||
2088 | ||||
2089 | SmallPtrSet<Instruction*, 2> Ignore1; | |||
2090 | Ignore1.insert(SI); | |||
2091 | if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount, | |||
2092 | StoreSize, *AA, Ignore1)) { | |||
2093 | // Check if the load is the offending instruction. | |||
2094 | Ignore1.insert(LI); | |||
2095 | if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, | |||
2096 | BECount, StoreSize, *AA, Ignore1)) { | |||
2097 | // Still bad. Nothing we can do. | |||
2098 | goto CleanupAndExit; | |||
2099 | } | |||
2100 | // It worked with the load ignored. | |||
2101 | Overlap = true; | |||
2102 | } | |||
2103 | ||||
2104 | if (!Overlap) { | |||
2105 | if (DisableMemcpyIdiom || !HasMemcpy) | |||
2106 | goto CleanupAndExit; | |||
2107 | } else { | |||
2108 | // Don't generate memmove if this function will be inlined. This is | |||
2109 | // because the caller will undergo this transformation after inlining. | |||
2110 | Function *Func = CurLoop->getHeader()->getParent(); | |||
2111 | if (Func->hasFnAttribute(Attribute::AlwaysInline)) | |||
2112 | goto CleanupAndExit; | |||
2113 | ||||
2114 | // In case of a memmove, the call to memmove will be executed instead | |||
2115 | // of the loop, so we need to make sure that there is nothing else in | |||
2116 | // the loop than the load, store and instructions that these two depend | |||
2117 | // on. | |||
2118 | SmallVector<Instruction*,2> Insts; | |||
2119 | Insts.push_back(SI); | |||
2120 | Insts.push_back(LI); | |||
2121 | if (!coverLoop(CurLoop, Insts)) | |||
2122 | goto CleanupAndExit; | |||
2123 | ||||
2124 | if (DisableMemmoveIdiom || !HasMemmove) | |||
2125 | goto CleanupAndExit; | |||
2126 | bool IsNested = CurLoop->getParentLoop() != nullptr; | |||
2127 | if (IsNested && OnlyNonNestedMemmove) | |||
2128 | goto CleanupAndExit; | |||
2129 | } | |||
2130 | ||||
2131 | // For a memcpy, we have to make sure that the input array is not being | |||
2132 | // mutated by the loop. | |||
2133 | LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(), | |||
2134 | Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt); | |||
2135 | ||||
2136 | SmallPtrSet<Instruction*, 2> Ignore2; | |||
2137 | Ignore2.insert(SI); | |||
2138 | if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount, | |||
2139 | StoreSize, *AA, Ignore2)) | |||
2140 | goto CleanupAndExit; | |||
2141 | ||||
2142 | // Check the stride. | |||
2143 | bool StridePos = getSCEVStride(LoadEv) >= 0; | |||
2144 | ||||
2145 | // Currently, the volatile memcpy only emulates traversing memory forward. | |||
2146 | if (!StridePos && DestVolatile) | |||
2147 | goto CleanupAndExit; | |||
2148 | ||||
2149 | bool RuntimeCheck = (Overlap || DestVolatile); | |||
2150 | ||||
2151 | BasicBlock *ExitB; | |||
2152 | if (RuntimeCheck) { | |||
2153 | // The runtime check needs a single exit block. | |||
2154 | SmallVector<BasicBlock*, 8> ExitBlocks; | |||
2155 | CurLoop->getUniqueExitBlocks(ExitBlocks); | |||
2156 | if (ExitBlocks.size() != 1) | |||
2157 | goto CleanupAndExit; | |||
2158 | ExitB = ExitBlocks[0]; | |||
2159 | } | |||
2160 | ||||
2161 | // The # stored bytes is (BECount+1)*Size. Expand the trip count out to | |||
2162 | // pointer size if it isn't already. | |||
2163 | LLVMContext &Ctx = SI->getContext(); | |||
2164 | BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy); | |||
2165 | DebugLoc DLoc = SI->getDebugLoc(); | |||
2166 | ||||
2167 | const SCEV *NumBytesS = | |||
2168 | SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW); | |||
2169 | if (StoreSize != 1) | |||
2170 | NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize), | |||
2171 | SCEV::FlagNUW); | |||
2172 | Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt); | |||
2173 | if (Instruction *In = dyn_cast<Instruction>(NumBytes)) | |||
2174 | if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) | |||
2175 | NumBytes = Simp; | |||
2176 | ||||
2177 | CallInst *NewCall; | |||
2178 | ||||
2179 | if (RuntimeCheck) { | |||
2180 | unsigned Threshold = RuntimeMemSizeThreshold; | |||
2181 | if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) { | |||
2182 | uint64_t C = CI->getZExtValue(); | |||
2183 | if (Threshold != 0 && C < Threshold) | |||
2184 | goto CleanupAndExit; | |||
2185 | if (C < CompileTimeMemSizeThreshold) | |||
2186 | goto CleanupAndExit; | |||
2187 | } | |||
2188 | ||||
2189 | BasicBlock *Header = CurLoop->getHeader(); | |||
2190 | Function *Func = Header->getParent(); | |||
2191 | Loop *ParentL = LF->getLoopFor(Preheader); | |||
2192 | StringRef HeaderName = Header->getName(); | |||
2193 | ||||
2194 | // Create a new (empty) preheader, and update the PHI nodes in the | |||
2195 | // header to use the new preheader. | |||
2196 | BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph", | |||
2197 | Func, Header); | |||
2198 | if (ParentL) | |||
2199 | ParentL->addBasicBlockToLoop(NewPreheader, *LF); | |||
2200 | IRBuilder<>(NewPreheader).CreateBr(Header); | |||
2201 | for (auto &In : *Header) { | |||
2202 | PHINode *PN = dyn_cast<PHINode>(&In); | |||
2203 | if (!PN) | |||
2204 | break; | |||
2205 | int bx = PN->getBasicBlockIndex(Preheader); | |||
2206 | if (bx >= 0) | |||
2207 | PN->setIncomingBlock(bx, NewPreheader); | |||
2208 | } | |||
2209 | DT->addNewBlock(NewPreheader, Preheader); | |||
2210 | DT->changeImmediateDominator(Header, NewPreheader); | |||
2211 | ||||
2212 | // Check for safe conditions to execute memmove. | |||
2213 | // If stride is positive, copying things from higher to lower addresses | |||
2214 | // is equivalent to memmove. For negative stride, it's the other way | |||
2215 | // around. Copying forward in memory with positive stride may not be | |||
2216 | // same as memmove since we may be copying values that we just stored | |||
2217 | // in some previous iteration. | |||
2218 | Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy); | |||
2219 | Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy); | |||
2220 | Value *LowA = StridePos ? SA : LA; | |||
2221 | Value *HighA = StridePos ? LA : SA; | |||
2222 | Value *CmpA = Builder.CreateICmpULT(LowA, HighA); | |||
2223 | Value *Cond = CmpA; | |||
2224 | ||||
2225 | // Check for distance between pointers. Since the case LowA < HighA | |||
2226 | // is checked for above, assume LowA >= HighA. | |||
2227 | Value *Dist = Builder.CreateSub(LowA, HighA); | |||
2228 | Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist); | |||
2229 | Value *CmpEither = Builder.CreateOr(Cond, CmpD); | |||
2230 | Cond = CmpEither; | |||
2231 | ||||
2232 | if (Threshold != 0) { | |||
2233 | Type *Ty = NumBytes->getType(); | |||
2234 | Value *Thr = ConstantInt::get(Ty, Threshold); | |||
2235 | Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes); | |||
2236 | Value *CmpBoth = Builder.CreateAnd(Cond, CmpB); | |||
2237 | Cond = CmpBoth; | |||
2238 | } | |||
2239 | BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli", | |||
2240 | Func, NewPreheader); | |||
2241 | if (ParentL) | |||
2242 | ParentL->addBasicBlockToLoop(MemmoveB, *LF); | |||
2243 | Instruction *OldT = Preheader->getTerminator(); | |||
2244 | Builder.CreateCondBr(Cond, MemmoveB, NewPreheader); | |||
2245 | OldT->eraseFromParent(); | |||
2246 | Preheader->setName(Preheader->getName()+".old"); | |||
2247 | DT->addNewBlock(MemmoveB, Preheader); | |||
2248 | // Find the new immediate dominator of the exit block. | |||
2249 | BasicBlock *ExitD = Preheader; | |||
2250 | for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) { | |||
2251 | BasicBlock *PB = *PI; | |||
2252 | ExitD = DT->findNearestCommonDominator(ExitD, PB); | |||
2253 | if (!ExitD) | |||
2254 | break; | |||
2255 | } | |||
2256 | // If the prior immediate dominator of ExitB was dominated by the | |||
2257 | // old preheader, then the old preheader becomes the new immediate | |||
2258 | // dominator. Otherwise don't change anything (because the newly | |||
2259 | // added blocks are dominated by the old preheader). | |||
2260 | if (ExitD && DT->dominates(Preheader, ExitD)) { | |||
2261 | DomTreeNode *BN = DT->getNode(ExitB); | |||
2262 | DomTreeNode *DN = DT->getNode(ExitD); | |||
2263 | BN->setIDom(DN); | |||
2264 | } | |||
2265 | ||||
2266 | // Add a call to memmove to the conditional block. | |||
2267 | IRBuilder<> CondBuilder(MemmoveB); | |||
2268 | CondBuilder.CreateBr(ExitB); | |||
2269 | CondBuilder.SetInsertPoint(MemmoveB->getTerminator()); | |||
2270 | ||||
2271 | if (DestVolatile) { | |||
2272 | Type *Int32Ty = Type::getInt32Ty(Ctx); | |||
2273 | Type *Int32PtrTy = Type::getInt32PtrTy(Ctx); | |||
2274 | Type *VoidTy = Type::getVoidTy(Ctx); | |||
2275 | Module *M = Func->getParent(); | |||
2276 | FunctionCallee Fn = M->getOrInsertFunction( | |||
2277 | HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty); | |||
2278 | ||||
2279 | const SCEV *OneS = SE->getConstant(Int32Ty, 1); | |||
2280 | const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty); | |||
2281 | const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW); | |||
2282 | Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty, | |||
2283 | MemmoveB->getTerminator()); | |||
2284 | if (Instruction *In = dyn_cast<Instruction>(NumWords)) | |||
2285 | if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT})) | |||
2286 | NumWords = Simp; | |||
2287 | ||||
2288 | Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy) | |||
2289 | ? StoreBasePtr | |||
2290 | : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy); | |||
2291 | Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy) | |||
2292 | ? LoadBasePtr | |||
2293 | : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy); | |||
2294 | NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords}); | |||
2295 | } else { | |||
2296 | NewCall = CondBuilder.CreateMemMove( | |||
2297 | StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes); | |||
2298 | } | |||
2299 | } else { | |||
2300 | NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr, | |||
2301 | LI->getAlign(), NumBytes); | |||
2302 | // Okay, the memcpy has been formed. Zap the original store and | |||
2303 | // anything that feeds into it. | |||
2304 | RecursivelyDeleteTriviallyDeadInstructions(SI, TLI); | |||
2305 | } | |||
2306 | ||||
2307 | NewCall->setDebugLoc(DLoc); | |||
2308 | ||||
2309 | LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2310 | << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2311 | << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2312 | << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ) | |||
2313 | << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("hexagon-lir")) { dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ") << *NewCall << "\n" << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"; } } while (false ); | |||
2314 | ||||
2315 | return true; | |||
2316 | } | |||
2317 | ||||
2318 | // Check if the instructions in Insts, together with their dependencies | |||
2319 | // cover the loop in the sense that the loop could be safely eliminated once | |||
2320 | // the instructions in Insts are removed. | |||
2321 | bool HexagonLoopIdiomRecognize::coverLoop(Loop *L, | |||
2322 | SmallVectorImpl<Instruction*> &Insts) const { | |||
2323 | SmallSet<BasicBlock*,8> LoopBlocks; | |||
2324 | for (auto *B : L->blocks()) | |||
2325 | LoopBlocks.insert(B); | |||
2326 | ||||
2327 | SetVector<Instruction*> Worklist(Insts.begin(), Insts.end()); | |||
2328 | ||||
2329 | // Collect all instructions from the loop that the instructions in Insts | |||
2330 | // depend on (plus their dependencies, etc.). These instructions will | |||
2331 | // constitute the expression trees that feed those in Insts, but the trees | |||
2332 | // will be limited only to instructions contained in the loop. | |||
2333 | for (unsigned i = 0; i < Worklist.size(); ++i) { | |||
2334 | Instruction *In = Worklist[i]; | |||
2335 | for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) { | |||
2336 | Instruction *OpI = dyn_cast<Instruction>(I); | |||
2337 | if (!OpI) | |||
2338 | continue; | |||
2339 | BasicBlock *PB = OpI->getParent(); | |||
2340 | if (!LoopBlocks.count(PB)) | |||
2341 | continue; | |||
2342 | Worklist.insert(OpI); | |||
2343 | } | |||
2344 | } | |||
2345 | ||||
2346 | // Scan all instructions in the loop, if any of them have a user outside | |||
2347 | // of the loop, or outside of the expressions collected above, then either | |||
2348 | // the loop has a side-effect visible outside of it, or there are | |||
2349 | // instructions in it that are not involved in the original set Insts. | |||
2350 | for (auto *B : L->blocks()) { | |||
2351 | for (auto &In : *B) { | |||
2352 | if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In)) | |||
2353 | continue; | |||
2354 | if (!Worklist.count(&In) && In.mayHaveSideEffects()) | |||
2355 | return false; | |||
2356 | for (auto K : In.users()) { | |||
2357 | Instruction *UseI = dyn_cast<Instruction>(K); | |||
2358 | if (!UseI) | |||
2359 | continue; | |||
2360 | BasicBlock *UseB = UseI->getParent(); | |||
2361 | if (LF->getLoopFor(UseB) != L) | |||
2362 | return false; | |||
2363 | } | |||
2364 | } | |||
2365 | } | |||
2366 | ||||
2367 | return true; | |||
2368 | } | |||
2369 | ||||
2370 | /// runOnLoopBlock - Process the specified block, which lives in a counted loop | |||
2371 | /// with the specified backedge count. This block is known to be in the current | |||
2372 | /// loop and not in any subloops. | |||
2373 | bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, | |||
2374 | const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) { | |||
2375 | // We can only promote stores in this block if they are unconditionally | |||
2376 | // executed in the loop. For a block to be unconditionally executed, it has | |||
2377 | // to dominate all the exit blocks of the loop. Verify this now. | |||
2378 | auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool { | |||
2379 | return DT->dominates(BB, EB); | |||
2380 | }; | |||
2381 | if (!all_of(ExitBlocks, DominatedByBB)) | |||
2382 | return false; | |||
2383 | ||||
2384 | bool MadeChange = false; | |||
2385 | // Look for store instructions, which may be optimized to memset/memcpy. | |||
2386 | SmallVector<StoreInst*,8> Stores; | |||
2387 | collectStores(CurLoop, BB, Stores); | |||
2388 | ||||
2389 | // Optimize the store into a memcpy, if it feeds an similarly strided load. | |||
2390 | for (auto &SI : Stores) | |||
2391 | MadeChange |= processCopyingStore(CurLoop, SI, BECount); | |||
2392 | ||||
2393 | return MadeChange; | |||
2394 | } | |||
2395 | ||||
2396 | bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) { | |||
2397 | PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE); | |||
2398 | if (PMR.recognize()) | |||
2399 | return true; | |||
2400 | ||||
2401 | if (!HasMemcpy && !HasMemmove) | |||
2402 | return false; | |||
2403 | ||||
2404 | const SCEV *BECount = SE->getBackedgeTakenCount(L); | |||
2405 | assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 2407, __extension__ __PRETTY_FUNCTION__)) | |||
2406 | "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 2407, __extension__ __PRETTY_FUNCTION__)) | |||
2407 | "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount ) && "runOnCountableLoop() called on a loop without a predictable" "backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp" , 2407, __extension__ __PRETTY_FUNCTION__)); | |||
2408 | ||||
2409 | SmallVector<BasicBlock *, 8> ExitBlocks; | |||
2410 | L->getUniqueExitBlocks(ExitBlocks); | |||
2411 | ||||
2412 | bool Changed = false; | |||
2413 | ||||
2414 | // Scan all the blocks in the loop that are not in subloops. | |||
2415 | for (auto *BB : L->getBlocks()) { | |||
2416 | // Ignore blocks in subloops. | |||
2417 | if (LF->getLoopFor(BB) != L) | |||
2418 | continue; | |||
2419 | Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks); | |||
2420 | } | |||
2421 | ||||
2422 | return Changed; | |||
2423 | } | |||
2424 | ||||
2425 | bool HexagonLoopIdiomRecognize::run(Loop *L) { | |||
2426 | const Module &M = *L->getHeader()->getParent()->getParent(); | |||
2427 | if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon) | |||
2428 | return false; | |||
2429 | ||||
2430 | // If the loop could not be converted to canonical form, it must have an | |||
2431 | // indirectbr in it, just give up. | |||
2432 | if (!L->getLoopPreheader()) | |||
2433 | return false; | |||
2434 | ||||
2435 | // Disable loop idiom recognition if the function's name is a common idiom. | |||
2436 | StringRef Name = L->getHeader()->getParent()->getName(); | |||
2437 | if (Name == "memset" || Name == "memcpy" || Name == "memmove") | |||
2438 | return false; | |||
2439 | ||||
2440 | DL = &L->getHeader()->getModule()->getDataLayout(); | |||
2441 | ||||
2442 | HasMemcpy = TLI->has(LibFunc_memcpy); | |||
2443 | HasMemmove = TLI->has(LibFunc_memmove); | |||
2444 | ||||
2445 | if (SE->hasLoopInvariantBackedgeTakenCount(L)) | |||
2446 | return runOnCountableLoop(L); | |||
2447 | return false; | |||
2448 | } | |||
2449 | ||||
2450 | bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L, | |||
2451 | LPPassManager &LPM) { | |||
2452 | if (skipLoop(L)) | |||
2453 | return false; | |||
2454 | ||||
2455 | auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
2456 | auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
2457 | auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | |||
2458 | auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( | |||
2459 | *L->getHeader()->getParent()); | |||
2460 | auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); | |||
2461 | return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L); | |||
2462 | } | |||
2463 | ||||
2464 | Pass *llvm::createHexagonLoopIdiomPass() { | |||
2465 | return new HexagonLoopIdiomRecognizeLegacyPass(); | |||
2466 | } | |||
2467 | ||||
2468 | PreservedAnalyses | |||
2469 | HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM, | |||
2470 | LoopStandardAnalysisResults &AR, | |||
2471 | LPMUpdater &U) { | |||
2472 | return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE) | |||
2473 | .run(&L) | |||
2474 | ? getLoopPassPreservedAnalyses() | |||
2475 | : PreservedAnalyses::all(); | |||
2476 | } |
1 | //===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | /// | ||||||||
9 | /// \file | ||||||||
10 | /// This file implements a class to represent arbitrary precision | ||||||||
11 | /// integral constant values and operations on them. | ||||||||
12 | /// | ||||||||
13 | //===----------------------------------------------------------------------===// | ||||||||
14 | |||||||||
15 | #ifndef LLVM_ADT_APINT_H | ||||||||
16 | #define LLVM_ADT_APINT_H | ||||||||
17 | |||||||||
18 | #include "llvm/Support/Compiler.h" | ||||||||
19 | #include "llvm/Support/MathExtras.h" | ||||||||
20 | #include <cassert> | ||||||||
21 | #include <climits> | ||||||||
22 | #include <cstring> | ||||||||
23 | #include <utility> | ||||||||
24 | |||||||||
25 | namespace llvm { | ||||||||
26 | class FoldingSetNodeID; | ||||||||
27 | class StringRef; | ||||||||
28 | class hash_code; | ||||||||
29 | class raw_ostream; | ||||||||
30 | |||||||||
31 | template <typename T> class SmallVectorImpl; | ||||||||
32 | template <typename T> class ArrayRef; | ||||||||
33 | template <typename T> class Optional; | ||||||||
34 | template <typename T> struct DenseMapInfo; | ||||||||
35 | |||||||||
36 | class APInt; | ||||||||
37 | |||||||||
38 | inline APInt operator-(APInt); | ||||||||
39 | |||||||||
40 | //===----------------------------------------------------------------------===// | ||||||||
41 | // APInt Class | ||||||||
42 | //===----------------------------------------------------------------------===// | ||||||||
43 | |||||||||
44 | /// Class for arbitrary precision integers. | ||||||||
45 | /// | ||||||||
46 | /// APInt is a functional replacement for common case unsigned integer type like | ||||||||
47 | /// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width | ||||||||
48 | /// integer sizes and large integer value types such as 3-bits, 15-bits, or more | ||||||||
49 | /// than 64-bits of precision. APInt provides a variety of arithmetic operators | ||||||||
50 | /// and methods to manipulate integer values of any bit-width. It supports both | ||||||||
51 | /// the typical integer arithmetic and comparison operations as well as bitwise | ||||||||
52 | /// manipulation. | ||||||||
53 | /// | ||||||||
54 | /// The class has several invariants worth noting: | ||||||||
55 | /// * All bit, byte, and word positions are zero-based. | ||||||||
56 | /// * Once the bit width is set, it doesn't change except by the Truncate, | ||||||||
57 | /// SignExtend, or ZeroExtend operations. | ||||||||
58 | /// * All binary operators must be on APInt instances of the same bit width. | ||||||||
59 | /// Attempting to use these operators on instances with different bit | ||||||||
60 | /// widths will yield an assertion. | ||||||||
61 | /// * The value is stored canonically as an unsigned value. For operations | ||||||||
62 | /// where it makes a difference, there are both signed and unsigned variants | ||||||||
63 | /// of the operation. For example, sdiv and udiv. However, because the bit | ||||||||
64 | /// widths must be the same, operations such as Mul and Add produce the same | ||||||||
65 | /// results regardless of whether the values are interpreted as signed or | ||||||||
66 | /// not. | ||||||||
67 | /// * In general, the class tries to follow the style of computation that LLVM | ||||||||
68 | /// uses in its IR. This simplifies its use for LLVM. | ||||||||
69 | /// | ||||||||
70 | class LLVM_NODISCARD[[clang::warn_unused_result]] APInt { | ||||||||
71 | public: | ||||||||
72 | typedef uint64_t WordType; | ||||||||
73 | |||||||||
74 | /// This enum is used to hold the constants we needed for APInt. | ||||||||
75 | enum : unsigned { | ||||||||
76 | /// Byte size of a word. | ||||||||
77 | APINT_WORD_SIZE = sizeof(WordType), | ||||||||
78 | /// Bits in a word. | ||||||||
79 | APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8 | ||||||||
80 | }; | ||||||||
81 | |||||||||
82 | enum class Rounding { | ||||||||
83 | DOWN, | ||||||||
84 | TOWARD_ZERO, | ||||||||
85 | UP, | ||||||||
86 | }; | ||||||||
87 | |||||||||
88 | static constexpr WordType WORDTYPE_MAX = ~WordType(0); | ||||||||
89 | |||||||||
90 | private: | ||||||||
91 | /// This union is used to store the integer value. When the | ||||||||
92 | /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal. | ||||||||
93 | union { | ||||||||
94 | uint64_t VAL; ///< Used to store the <= 64 bits integer value. | ||||||||
95 | uint64_t *pVal; ///< Used to store the >64 bits integer value. | ||||||||
96 | } U; | ||||||||
97 | |||||||||
98 | unsigned BitWidth; ///< The number of bits in this APInt. | ||||||||
99 | |||||||||
100 | friend struct DenseMapInfo<APInt>; | ||||||||
101 | |||||||||
102 | friend class APSInt; | ||||||||
103 | |||||||||
104 | /// Fast internal constructor | ||||||||
105 | /// | ||||||||
106 | /// This constructor is used only internally for speed of construction of | ||||||||
107 | /// temporaries. It is unsafe for general use so it is not public. | ||||||||
108 | APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { | ||||||||
109 | U.pVal = val; | ||||||||
110 | } | ||||||||
111 | |||||||||
112 | /// Determine which word a bit is in. | ||||||||
113 | /// | ||||||||
114 | /// \returns the word position for the specified bit position. | ||||||||
115 | static unsigned whichWord(unsigned bitPosition) { | ||||||||
116 | return bitPosition / APINT_BITS_PER_WORD; | ||||||||
117 | } | ||||||||
118 | |||||||||
119 | /// Determine which bit in a word a bit is in. | ||||||||
120 | /// | ||||||||
121 | /// \returns the bit position in a word for the specified bit position | ||||||||
122 | /// in the APInt. | ||||||||
123 | static unsigned whichBit(unsigned bitPosition) { | ||||||||
124 | return bitPosition % APINT_BITS_PER_WORD; | ||||||||
125 | } | ||||||||
126 | |||||||||
127 | /// Get a single bit mask. | ||||||||
128 | /// | ||||||||
129 | /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set | ||||||||
130 | /// This method generates and returns a uint64_t (word) mask for a single | ||||||||
131 | /// bit at a specific bit position. This is used to mask the bit in the | ||||||||
132 | /// corresponding word. | ||||||||
133 | static uint64_t maskBit(unsigned bitPosition) { | ||||||||
134 | return 1ULL << whichBit(bitPosition); | ||||||||
135 | } | ||||||||
136 | |||||||||
137 | /// Clear unused high order bits | ||||||||
138 | /// | ||||||||
139 | /// This method is used internally to clear the top "N" bits in the high order | ||||||||
140 | /// word that are not used by the APInt. This is needed after the most | ||||||||
141 | /// significant word is assigned a value to ensure that those bits are | ||||||||
142 | /// zero'd out. | ||||||||
143 | APInt &clearUnusedBits() { | ||||||||
144 | // Compute how many bits are used in the final word | ||||||||
145 | unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1; | ||||||||
146 | |||||||||
147 | // Mask out the high bits. | ||||||||
148 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits); | ||||||||
149 | if (isSingleWord()) | ||||||||
150 | U.VAL &= mask; | ||||||||
151 | else | ||||||||
152 | U.pVal[getNumWords() - 1] &= mask; | ||||||||
153 | return *this; | ||||||||
154 | } | ||||||||
155 | |||||||||
156 | /// Get the word corresponding to a bit position | ||||||||
157 | /// \returns the corresponding word for the specified bit position. | ||||||||
158 | uint64_t getWord(unsigned bitPosition) const { | ||||||||
159 | return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)]; | ||||||||
160 | } | ||||||||
161 | |||||||||
162 | /// Utility method to change the bit width of this APInt to new bit width, | ||||||||
163 | /// allocating and/or deallocating as necessary. There is no guarantee on the | ||||||||
164 | /// value of any bits upon return. Caller should populate the bits after. | ||||||||
165 | void reallocate(unsigned NewBitWidth); | ||||||||
166 | |||||||||
167 | /// Convert a char array into an APInt | ||||||||
168 | /// | ||||||||
169 | /// \param radix 2, 8, 10, 16, or 36 | ||||||||
170 | /// Converts a string into a number. The string must be non-empty | ||||||||
171 | /// and well-formed as a number of the given base. The bit-width | ||||||||
172 | /// must be sufficient to hold the result. | ||||||||
173 | /// | ||||||||
174 | /// This is used by the constructors that take string arguments. | ||||||||
175 | /// | ||||||||
176 | /// StringRef::getAsInteger is superficially similar but (1) does | ||||||||
177 | /// not assume that the string is well-formed and (2) grows the | ||||||||
178 | /// result to hold the input. | ||||||||
179 | void fromString(unsigned numBits, StringRef str, uint8_t radix); | ||||||||
180 | |||||||||
181 | /// An internal division function for dividing APInts. | ||||||||
182 | /// | ||||||||
183 | /// This is used by the toString method to divide by the radix. It simply | ||||||||
184 | /// provides a more convenient form of divide for internal use since KnuthDiv | ||||||||
185 | /// has specific constraints on its inputs. If those constraints are not met | ||||||||
186 | /// then it provides a simpler form of divide. | ||||||||
187 | static void divide(const WordType *LHS, unsigned lhsWords, | ||||||||
188 | const WordType *RHS, unsigned rhsWords, WordType *Quotient, | ||||||||
189 | WordType *Remainder); | ||||||||
190 | |||||||||
191 | /// out-of-line slow case for inline constructor | ||||||||
192 | void initSlowCase(uint64_t val, bool isSigned); | ||||||||
193 | |||||||||
194 | /// shared code between two array constructors | ||||||||
195 | void initFromArray(ArrayRef<uint64_t> array); | ||||||||
196 | |||||||||
197 | /// out-of-line slow case for inline copy constructor | ||||||||
198 | void initSlowCase(const APInt &that); | ||||||||
199 | |||||||||
200 | /// out-of-line slow case for shl | ||||||||
201 | void shlSlowCase(unsigned ShiftAmt); | ||||||||
202 | |||||||||
203 | /// out-of-line slow case for lshr. | ||||||||
204 | void lshrSlowCase(unsigned ShiftAmt); | ||||||||
205 | |||||||||
206 | /// out-of-line slow case for ashr. | ||||||||
207 | void ashrSlowCase(unsigned ShiftAmt); | ||||||||
208 | |||||||||
209 | /// out-of-line slow case for operator= | ||||||||
210 | void AssignSlowCase(const APInt &RHS); | ||||||||
211 | |||||||||
212 | /// out-of-line slow case for operator== | ||||||||
213 | bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
214 | |||||||||
215 | /// out-of-line slow case for countLeadingZeros | ||||||||
216 | unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
217 | |||||||||
218 | /// out-of-line slow case for countLeadingOnes. | ||||||||
219 | unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
220 | |||||||||
221 | /// out-of-line slow case for countTrailingZeros. | ||||||||
222 | unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
223 | |||||||||
224 | /// out-of-line slow case for countTrailingOnes | ||||||||
225 | unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
226 | |||||||||
227 | /// out-of-line slow case for countPopulation | ||||||||
228 | unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__)); | ||||||||
229 | |||||||||
230 | /// out-of-line slow case for intersects. | ||||||||
231 | bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
232 | |||||||||
233 | /// out-of-line slow case for isSubsetOf. | ||||||||
234 | bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
235 | |||||||||
236 | /// out-of-line slow case for setBits. | ||||||||
237 | void setBitsSlowCase(unsigned loBit, unsigned hiBit); | ||||||||
238 | |||||||||
239 | /// out-of-line slow case for flipAllBits. | ||||||||
240 | void flipAllBitsSlowCase(); | ||||||||
241 | |||||||||
242 | /// out-of-line slow case for operator&=. | ||||||||
243 | void AndAssignSlowCase(const APInt& RHS); | ||||||||
244 | |||||||||
245 | /// out-of-line slow case for operator|=. | ||||||||
246 | void OrAssignSlowCase(const APInt& RHS); | ||||||||
247 | |||||||||
248 | /// out-of-line slow case for operator^=. | ||||||||
249 | void XorAssignSlowCase(const APInt& RHS); | ||||||||
250 | |||||||||
251 | /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal | ||||||||
252 | /// to, or greater than RHS. | ||||||||
253 | int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
254 | |||||||||
255 | /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal | ||||||||
256 | /// to, or greater than RHS. | ||||||||
257 | int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__)); | ||||||||
258 | |||||||||
259 | public: | ||||||||
260 | /// \name Constructors | ||||||||
261 | /// @{ | ||||||||
262 | |||||||||
263 | /// Create a new APInt of numBits width, initialized as val. | ||||||||
264 | /// | ||||||||
265 | /// If isSigned is true then val is treated as if it were a signed value | ||||||||
266 | /// (i.e. as an int64_t) and the appropriate sign extension to the bit width | ||||||||
267 | /// will be done. Otherwise, no sign extension occurs (high order bits beyond | ||||||||
268 | /// the range of val are zero filled). | ||||||||
269 | /// | ||||||||
270 | /// \param numBits the bit width of the constructed APInt | ||||||||
271 | /// \param val the initial value of the APInt | ||||||||
272 | /// \param isSigned how to treat signedness of val | ||||||||
273 | APInt(unsigned numBits, uint64_t val, bool isSigned = false) | ||||||||
274 | : BitWidth(numBits) { | ||||||||
275 | assert(BitWidth && "bitwidth too small")(static_cast <bool> (BitWidth && "bitwidth too small" ) ? void (0) : __assert_fail ("BitWidth && \"bitwidth too small\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 275, __extension__ __PRETTY_FUNCTION__)); | ||||||||
276 | if (isSingleWord()) { | ||||||||
277 | U.VAL = val; | ||||||||
278 | clearUnusedBits(); | ||||||||
279 | } else { | ||||||||
280 | initSlowCase(val, isSigned); | ||||||||
281 | } | ||||||||
282 | } | ||||||||
283 | |||||||||
284 | /// Construct an APInt of numBits width, initialized as bigVal[]. | ||||||||
285 | /// | ||||||||
286 | /// Note that bigVal.size() can be smaller or larger than the corresponding | ||||||||
287 | /// bit width but any extraneous bits will be dropped. | ||||||||
288 | /// | ||||||||
289 | /// \param numBits the bit width of the constructed APInt | ||||||||
290 | /// \param bigVal a sequence of words to form the initial value of the APInt | ||||||||
291 | APInt(unsigned numBits, ArrayRef<uint64_t> bigVal); | ||||||||
292 | |||||||||
293 | /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but | ||||||||
294 | /// deprecated because this constructor is prone to ambiguity with the | ||||||||
295 | /// APInt(unsigned, uint64_t, bool) constructor. | ||||||||
296 | /// | ||||||||
297 | /// If this overload is ever deleted, care should be taken to prevent calls | ||||||||
298 | /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool) | ||||||||
299 | /// constructor. | ||||||||
300 | APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]); | ||||||||
301 | |||||||||
302 | /// Construct an APInt from a string representation. | ||||||||
303 | /// | ||||||||
304 | /// This constructor interprets the string \p str in the given radix. The | ||||||||
305 | /// interpretation stops when the first character that is not suitable for the | ||||||||
306 | /// radix is encountered, or the end of the string. Acceptable radix values | ||||||||
307 | /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the | ||||||||
308 | /// string to require more bits than numBits. | ||||||||
309 | /// | ||||||||
310 | /// \param numBits the bit width of the constructed APInt | ||||||||
311 | /// \param str the string to be interpreted | ||||||||
312 | /// \param radix the radix to use for the conversion | ||||||||
313 | APInt(unsigned numBits, StringRef str, uint8_t radix); | ||||||||
314 | |||||||||
315 | /// Simply makes *this a copy of that. | ||||||||
316 | /// Copy Constructor. | ||||||||
317 | APInt(const APInt &that) : BitWidth(that.BitWidth) { | ||||||||
318 | if (isSingleWord()) | ||||||||
319 | U.VAL = that.U.VAL; | ||||||||
320 | else | ||||||||
321 | initSlowCase(that); | ||||||||
322 | } | ||||||||
323 | |||||||||
324 | /// Move Constructor. | ||||||||
325 | APInt(APInt &&that) : BitWidth(that.BitWidth) { | ||||||||
326 | memcpy(&U, &that.U, sizeof(U)); | ||||||||
327 | that.BitWidth = 0; | ||||||||
328 | } | ||||||||
329 | |||||||||
330 | /// Destructor. | ||||||||
331 | ~APInt() { | ||||||||
332 | if (needsCleanup()) | ||||||||
333 | delete[] U.pVal; | ||||||||
334 | } | ||||||||
335 | |||||||||
336 | /// Default constructor that creates an uninteresting APInt | ||||||||
337 | /// representing a 1-bit zero value. | ||||||||
338 | /// | ||||||||
339 | /// This is useful for object deserialization (pair this with the static | ||||||||
340 | /// method Read). | ||||||||
341 | explicit APInt() : BitWidth(1) { U.VAL = 0; } | ||||||||
342 | |||||||||
343 | /// Returns whether this instance allocated memory. | ||||||||
344 | bool needsCleanup() const { return !isSingleWord(); } | ||||||||
345 | |||||||||
346 | /// Used to insert APInt objects, or objects that contain APInt objects, into | ||||||||
347 | /// FoldingSets. | ||||||||
348 | void Profile(FoldingSetNodeID &id) const; | ||||||||
349 | |||||||||
350 | /// @} | ||||||||
351 | /// \name Value Tests | ||||||||
352 | /// @{ | ||||||||
353 | |||||||||
354 | /// Determine if this APInt just has one word to store value. | ||||||||
355 | /// | ||||||||
356 | /// \returns true if the number of bits <= 64, false otherwise. | ||||||||
357 | bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; } | ||||||||
358 | |||||||||
359 | /// Determine sign of this APInt. | ||||||||
360 | /// | ||||||||
361 | /// This tests the high bit of this APInt to determine if it is set. | ||||||||
362 | /// | ||||||||
363 | /// \returns true if this APInt is negative, false otherwise | ||||||||
364 | bool isNegative() const { return (*this)[BitWidth - 1]; } | ||||||||
365 | |||||||||
366 | /// Determine if this APInt Value is non-negative (>= 0) | ||||||||
367 | /// | ||||||||
368 | /// This tests the high bit of the APInt to determine if it is unset. | ||||||||
369 | bool isNonNegative() const { return !isNegative(); } | ||||||||
370 | |||||||||
371 | /// Determine if sign bit of this APInt is set. | ||||||||
372 | /// | ||||||||
373 | /// This tests the high bit of this APInt to determine if it is set. | ||||||||
374 | /// | ||||||||
375 | /// \returns true if this APInt has its sign bit set, false otherwise. | ||||||||
376 | bool isSignBitSet() const { return (*this)[BitWidth-1]; } | ||||||||
377 | |||||||||
378 | /// Determine if sign bit of this APInt is clear. | ||||||||
379 | /// | ||||||||
380 | /// This tests the high bit of this APInt to determine if it is clear. | ||||||||
381 | /// | ||||||||
382 | /// \returns true if this APInt has its sign bit clear, false otherwise. | ||||||||
383 | bool isSignBitClear() const { return !isSignBitSet(); } | ||||||||
384 | |||||||||
385 | /// Determine if this APInt Value is positive. | ||||||||
386 | /// | ||||||||
387 | /// This tests if the value of this APInt is positive (> 0). Note | ||||||||
388 | /// that 0 is not a positive value. | ||||||||
389 | /// | ||||||||
390 | /// \returns true if this APInt is positive. | ||||||||
391 | bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); } | ||||||||
392 | |||||||||
393 | /// Determine if this APInt Value is non-positive (<= 0). | ||||||||
394 | /// | ||||||||
395 | /// \returns true if this APInt is non-positive. | ||||||||
396 | bool isNonPositive() const { return !isStrictlyPositive(); } | ||||||||
397 | |||||||||
398 | /// Determine if all bits are set | ||||||||
399 | /// | ||||||||
400 | /// This checks to see if the value has all bits of the APInt are set or not. | ||||||||
401 | bool isAllOnesValue() const { | ||||||||
402 | if (isSingleWord()) | ||||||||
403 | return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth); | ||||||||
404 | return countTrailingOnesSlowCase() == BitWidth; | ||||||||
405 | } | ||||||||
406 | |||||||||
407 | /// Determine if all bits are clear | ||||||||
408 | /// | ||||||||
409 | /// This checks to see if the value has all bits of the APInt are clear or | ||||||||
410 | /// not. | ||||||||
411 | bool isNullValue() const { return !*this; } | ||||||||
412 | |||||||||
413 | /// Determine if this is a value of 1. | ||||||||
414 | /// | ||||||||
415 | /// This checks to see if the value of this APInt is one. | ||||||||
416 | bool isOneValue() const { | ||||||||
417 | if (isSingleWord()) | ||||||||
418 | return U.VAL == 1; | ||||||||
419 | return countLeadingZerosSlowCase() == BitWidth - 1; | ||||||||
420 | } | ||||||||
421 | |||||||||
422 | /// Determine if this is the largest unsigned value. | ||||||||
423 | /// | ||||||||
424 | /// This checks to see if the value of this APInt is the maximum unsigned | ||||||||
425 | /// value for the APInt's bit width. | ||||||||
426 | bool isMaxValue() const { return isAllOnesValue(); } | ||||||||
427 | |||||||||
428 | /// Determine if this is the largest signed value. | ||||||||
429 | /// | ||||||||
430 | /// This checks to see if the value of this APInt is the maximum signed | ||||||||
431 | /// value for the APInt's bit width. | ||||||||
432 | bool isMaxSignedValue() const { | ||||||||
433 | if (isSingleWord()) | ||||||||
434 | return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1); | ||||||||
435 | return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1; | ||||||||
436 | } | ||||||||
437 | |||||||||
438 | /// Determine if this is the smallest unsigned value. | ||||||||
439 | /// | ||||||||
440 | /// This checks to see if the value of this APInt is the minimum unsigned | ||||||||
441 | /// value for the APInt's bit width. | ||||||||
442 | bool isMinValue() const { return isNullValue(); } | ||||||||
443 | |||||||||
444 | /// Determine if this is the smallest signed value. | ||||||||
445 | /// | ||||||||
446 | /// This checks to see if the value of this APInt is the minimum signed | ||||||||
447 | /// value for the APInt's bit width. | ||||||||
448 | bool isMinSignedValue() const { | ||||||||
449 | if (isSingleWord()) | ||||||||
450 | return U.VAL == (WordType(1) << (BitWidth - 1)); | ||||||||
451 | return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1; | ||||||||
452 | } | ||||||||
453 | |||||||||
454 | /// Check if this APInt has an N-bits unsigned integer value. | ||||||||
455 | bool isIntN(unsigned N) const { | ||||||||
456 | assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void ( 0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 456, __extension__ __PRETTY_FUNCTION__)); | ||||||||
457 | return getActiveBits() <= N; | ||||||||
458 | } | ||||||||
459 | |||||||||
460 | /// Check if this APInt has an N-bits signed integer value. | ||||||||
461 | bool isSignedIntN(unsigned N) const { | ||||||||
462 | assert(N && "N == 0 ???")(static_cast <bool> (N && "N == 0 ???") ? void ( 0) : __assert_fail ("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 462, __extension__ __PRETTY_FUNCTION__)); | ||||||||
463 | return getMinSignedBits() <= N; | ||||||||
464 | } | ||||||||
465 | |||||||||
466 | /// Check if this APInt's value is a power of two greater than zero. | ||||||||
467 | /// | ||||||||
468 | /// \returns true if the argument APInt value is a power of two > 0. | ||||||||
469 | bool isPowerOf2() const { | ||||||||
470 | if (isSingleWord()) | ||||||||
471 | return isPowerOf2_64(U.VAL); | ||||||||
472 | return countPopulationSlowCase() == 1; | ||||||||
473 | } | ||||||||
474 | |||||||||
475 | /// Check if the APInt's value is returned by getSignMask. | ||||||||
476 | /// | ||||||||
477 | /// \returns true if this is the value returned by getSignMask. | ||||||||
478 | bool isSignMask() const { return isMinSignedValue(); } | ||||||||
479 | |||||||||
480 | /// Convert APInt to a boolean value. | ||||||||
481 | /// | ||||||||
482 | /// This converts the APInt to a boolean value as a test against zero. | ||||||||
483 | bool getBoolValue() const { return !!*this; } | ||||||||
484 | |||||||||
485 | /// If this value is smaller than the specified limit, return it, otherwise | ||||||||
486 | /// return the limit value. This causes the value to saturate to the limit. | ||||||||
487 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const { | ||||||||
488 | return ugt(Limit) ? Limit : getZExtValue(); | ||||||||
489 | } | ||||||||
490 | |||||||||
491 | /// Check if the APInt consists of a repeated bit pattern. | ||||||||
492 | /// | ||||||||
493 | /// e.g. 0x01010101 satisfies isSplat(8). | ||||||||
494 | /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit | ||||||||
495 | /// width without remainder. | ||||||||
496 | bool isSplat(unsigned SplatSizeInBits) const; | ||||||||
497 | |||||||||
498 | /// \returns true if this APInt value is a sequence of \param numBits ones | ||||||||
499 | /// starting at the least significant bit with the remainder zero. | ||||||||
500 | bool isMask(unsigned numBits) const { | ||||||||
501 | assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero" ) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 501, __extension__ __PRETTY_FUNCTION__)); | ||||||||
502 | assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range" ) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 502, __extension__ __PRETTY_FUNCTION__)); | ||||||||
503 | if (isSingleWord()) | ||||||||
504 | return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits)); | ||||||||
505 | unsigned Ones = countTrailingOnesSlowCase(); | ||||||||
506 | return (numBits == Ones) && | ||||||||
507 | ((Ones + countLeadingZerosSlowCase()) == BitWidth); | ||||||||
508 | } | ||||||||
509 | |||||||||
510 | /// \returns true if this APInt is a non-empty sequence of ones starting at | ||||||||
511 | /// the least significant bit with the remainder zero. | ||||||||
512 | /// Ex. isMask(0x0000FFFFU) == true. | ||||||||
513 | bool isMask() const { | ||||||||
514 | if (isSingleWord()) | ||||||||
515 | return isMask_64(U.VAL); | ||||||||
516 | unsigned Ones = countTrailingOnesSlowCase(); | ||||||||
517 | return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth); | ||||||||
518 | } | ||||||||
519 | |||||||||
520 | /// Return true if this APInt value contains a sequence of ones with | ||||||||
521 | /// the remainder zero. | ||||||||
522 | bool isShiftedMask() const { | ||||||||
523 | if (isSingleWord()) | ||||||||
524 | return isShiftedMask_64(U.VAL); | ||||||||
525 | unsigned Ones = countPopulationSlowCase(); | ||||||||
526 | unsigned LeadZ = countLeadingZerosSlowCase(); | ||||||||
527 | return (Ones + LeadZ + countTrailingZeros()) == BitWidth; | ||||||||
528 | } | ||||||||
529 | |||||||||
530 | /// @} | ||||||||
531 | /// \name Value Generators | ||||||||
532 | /// @{ | ||||||||
533 | |||||||||
534 | /// Gets maximum unsigned value of APInt for specific bit width. | ||||||||
535 | static APInt getMaxValue(unsigned numBits) { | ||||||||
536 | return getAllOnesValue(numBits); | ||||||||
537 | } | ||||||||
538 | |||||||||
539 | /// Gets maximum signed value of APInt for a specific bit width. | ||||||||
540 | static APInt getSignedMaxValue(unsigned numBits) { | ||||||||
541 | APInt API = getAllOnesValue(numBits); | ||||||||
542 | API.clearBit(numBits - 1); | ||||||||
543 | return API; | ||||||||
544 | } | ||||||||
545 | |||||||||
546 | /// Gets minimum unsigned value of APInt for a specific bit width. | ||||||||
547 | static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); } | ||||||||
548 | |||||||||
549 | /// Gets minimum signed value of APInt for a specific bit width. | ||||||||
550 | static APInt getSignedMinValue(unsigned numBits) { | ||||||||
551 | APInt API(numBits, 0); | ||||||||
552 | API.setBit(numBits - 1); | ||||||||
553 | return API; | ||||||||
554 | } | ||||||||
555 | |||||||||
556 | /// Get the SignMask for a specific bit width. | ||||||||
557 | /// | ||||||||
558 | /// This is just a wrapper function of getSignedMinValue(), and it helps code | ||||||||
559 | /// readability when we want to get a SignMask. | ||||||||
560 | static APInt getSignMask(unsigned BitWidth) { | ||||||||
561 | return getSignedMinValue(BitWidth); | ||||||||
562 | } | ||||||||
563 | |||||||||
564 | /// Get the all-ones value. | ||||||||
565 | /// | ||||||||
566 | /// \returns the all-ones value for an APInt of the specified bit-width. | ||||||||
567 | static APInt getAllOnesValue(unsigned numBits) { | ||||||||
568 | return APInt(numBits, WORDTYPE_MAX, true); | ||||||||
569 | } | ||||||||
570 | |||||||||
571 | /// Get the '0' value. | ||||||||
572 | /// | ||||||||
573 | /// \returns the '0' value for an APInt of the specified bit-width. | ||||||||
574 | static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); } | ||||||||
575 | |||||||||
576 | /// Compute an APInt containing numBits highbits from this APInt. | ||||||||
577 | /// | ||||||||
578 | /// Get an APInt with the same BitWidth as this APInt, just zero mask | ||||||||
579 | /// the low bits and right shift to the least significant bit. | ||||||||
580 | /// | ||||||||
581 | /// \returns the high "numBits" bits of this APInt. | ||||||||
582 | APInt getHiBits(unsigned numBits) const; | ||||||||
583 | |||||||||
584 | /// Compute an APInt containing numBits lowbits from this APInt. | ||||||||
585 | /// | ||||||||
586 | /// Get an APInt with the same BitWidth as this APInt, just zero mask | ||||||||
587 | /// the high bits. | ||||||||
588 | /// | ||||||||
589 | /// \returns the low "numBits" bits of this APInt. | ||||||||
590 | APInt getLoBits(unsigned numBits) const; | ||||||||
591 | |||||||||
592 | /// Return an APInt with exactly one bit set in the result. | ||||||||
593 | static APInt getOneBitSet(unsigned numBits, unsigned BitNo) { | ||||||||
594 | APInt Res(numBits, 0); | ||||||||
595 | Res.setBit(BitNo); | ||||||||
596 | return Res; | ||||||||
597 | } | ||||||||
598 | |||||||||
599 | /// Get a value with a block of bits set. | ||||||||
600 | /// | ||||||||
601 | /// Constructs an APInt value that has a contiguous range of bits set. The | ||||||||
602 | /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other | ||||||||
603 | /// bits will be zero. For example, with parameters(32, 0, 16) you would get | ||||||||
604 | /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than | ||||||||
605 | /// \p hiBit. | ||||||||
606 | /// | ||||||||
607 | /// \param numBits the intended bit width of the result | ||||||||
608 | /// \param loBit the index of the lowest bit set. | ||||||||
609 | /// \param hiBit the index of the highest bit set. | ||||||||
610 | /// | ||||||||
611 | /// \returns An APInt value with the requested bits set. | ||||||||
612 | static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) { | ||||||||
613 | assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit" ) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 613, __extension__ __PRETTY_FUNCTION__)); | ||||||||
614 | APInt Res(numBits, 0); | ||||||||
615 | Res.setBits(loBit, hiBit); | ||||||||
616 | return Res; | ||||||||
617 | } | ||||||||
618 | |||||||||
619 | /// Wrap version of getBitsSet. | ||||||||
620 | /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet. | ||||||||
621 | /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example, | ||||||||
622 | /// with parameters (32, 28, 4), you would get 0xF000000F. | ||||||||
623 | /// If \p hiBit is equal to \p loBit, you would get a result with all bits | ||||||||
624 | /// set. | ||||||||
625 | static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit, | ||||||||
626 | unsigned hiBit) { | ||||||||
627 | APInt Res(numBits, 0); | ||||||||
628 | Res.setBitsWithWrap(loBit, hiBit); | ||||||||
629 | return Res; | ||||||||
630 | } | ||||||||
631 | |||||||||
632 | /// Get a value with upper bits starting at loBit set. | ||||||||
633 | /// | ||||||||
634 | /// Constructs an APInt value that has a contiguous range of bits set. The | ||||||||
635 | /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other | ||||||||
636 | /// bits will be zero. For example, with parameters(32, 12) you would get | ||||||||
637 | /// 0xFFFFF000. | ||||||||
638 | /// | ||||||||
639 | /// \param numBits the intended bit width of the result | ||||||||
640 | /// \param loBit the index of the lowest bit to set. | ||||||||
641 | /// | ||||||||
642 | /// \returns An APInt value with the requested bits set. | ||||||||
643 | static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) { | ||||||||
644 | APInt Res(numBits, 0); | ||||||||
645 | Res.setBitsFrom(loBit); | ||||||||
646 | return Res; | ||||||||
647 | } | ||||||||
648 | |||||||||
649 | /// Get a value with high bits set | ||||||||
650 | /// | ||||||||
651 | /// Constructs an APInt value that has the top hiBitsSet bits set. | ||||||||
652 | /// | ||||||||
653 | /// \param numBits the bitwidth of the result | ||||||||
654 | /// \param hiBitsSet the number of high-order bits set in the result. | ||||||||
655 | static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) { | ||||||||
656 | APInt Res(numBits, 0); | ||||||||
657 | Res.setHighBits(hiBitsSet); | ||||||||
658 | return Res; | ||||||||
659 | } | ||||||||
660 | |||||||||
661 | /// Get a value with low bits set | ||||||||
662 | /// | ||||||||
663 | /// Constructs an APInt value that has the bottom loBitsSet bits set. | ||||||||
664 | /// | ||||||||
665 | /// \param numBits the bitwidth of the result | ||||||||
666 | /// \param loBitsSet the number of low-order bits set in the result. | ||||||||
667 | static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) { | ||||||||
668 | APInt Res(numBits, 0); | ||||||||
669 | Res.setLowBits(loBitsSet); | ||||||||
670 | return Res; | ||||||||
671 | } | ||||||||
672 | |||||||||
673 | /// Return a value containing V broadcasted over NewLen bits. | ||||||||
674 | static APInt getSplat(unsigned NewLen, const APInt &V); | ||||||||
675 | |||||||||
676 | /// Determine if two APInts have the same value, after zero-extending | ||||||||
677 | /// one of them (if needed!) to ensure that the bit-widths match. | ||||||||
678 | static bool isSameValue(const APInt &I1, const APInt &I2) { | ||||||||
679 | if (I1.getBitWidth() == I2.getBitWidth()) | ||||||||
680 | return I1 == I2; | ||||||||
681 | |||||||||
682 | if (I1.getBitWidth() > I2.getBitWidth()) | ||||||||
683 | return I1 == I2.zext(I1.getBitWidth()); | ||||||||
684 | |||||||||
685 | return I1.zext(I2.getBitWidth()) == I2; | ||||||||
686 | } | ||||||||
687 | |||||||||
688 | /// Overload to compute a hash_code for an APInt value. | ||||||||
689 | friend hash_code hash_value(const APInt &Arg); | ||||||||
690 | |||||||||
691 | /// This function returns a pointer to the internal storage of the APInt. | ||||||||
692 | /// This is useful for writing out the APInt in binary form without any | ||||||||
693 | /// conversions. | ||||||||
694 | const uint64_t *getRawData() const { | ||||||||
695 | if (isSingleWord()) | ||||||||
696 | return &U.VAL; | ||||||||
697 | return &U.pVal[0]; | ||||||||
698 | } | ||||||||
699 | |||||||||
700 | /// @} | ||||||||
701 | /// \name Unary Operators | ||||||||
702 | /// @{ | ||||||||
703 | |||||||||
704 | /// Postfix increment operator. | ||||||||
705 | /// | ||||||||
706 | /// Increments *this by 1. | ||||||||
707 | /// | ||||||||
708 | /// \returns a new APInt value representing the original value of *this. | ||||||||
709 | APInt operator++(int) { | ||||||||
710 | APInt API(*this); | ||||||||
711 | ++(*this); | ||||||||
712 | return API; | ||||||||
713 | } | ||||||||
714 | |||||||||
715 | /// Prefix increment operator. | ||||||||
716 | /// | ||||||||
717 | /// \returns *this incremented by one | ||||||||
718 | APInt &operator++(); | ||||||||
719 | |||||||||
720 | /// Postfix decrement operator. | ||||||||
721 | /// | ||||||||
722 | /// Decrements *this by 1. | ||||||||
723 | /// | ||||||||
724 | /// \returns a new APInt value representing the original value of *this. | ||||||||
725 | APInt operator--(int) { | ||||||||
726 | APInt API(*this); | ||||||||
727 | --(*this); | ||||||||
728 | return API; | ||||||||
729 | } | ||||||||
730 | |||||||||
731 | /// Prefix decrement operator. | ||||||||
732 | /// | ||||||||
733 | /// \returns *this decremented by one. | ||||||||
734 | APInt &operator--(); | ||||||||
735 | |||||||||
736 | /// Logical negation operator. | ||||||||
737 | /// | ||||||||
738 | /// Performs logical negation operation on this APInt. | ||||||||
739 | /// | ||||||||
740 | /// \returns true if *this is zero, false otherwise. | ||||||||
741 | bool operator!() const { | ||||||||
742 | if (isSingleWord()) | ||||||||
743 | return U.VAL == 0; | ||||||||
744 | return countLeadingZerosSlowCase() == BitWidth; | ||||||||
745 | } | ||||||||
746 | |||||||||
747 | /// @} | ||||||||
748 | /// \name Assignment Operators | ||||||||
749 | /// @{ | ||||||||
750 | |||||||||
751 | /// Copy assignment operator. | ||||||||
752 | /// | ||||||||
753 | /// \returns *this after assignment of RHS. | ||||||||
754 | APInt &operator=(const APInt &RHS) { | ||||||||
755 | // If the bitwidths are the same, we can avoid mucking with memory | ||||||||
756 | if (isSingleWord() && RHS.isSingleWord()) { | ||||||||
757 | U.VAL = RHS.U.VAL; | ||||||||
758 | BitWidth = RHS.BitWidth; | ||||||||
759 | return clearUnusedBits(); | ||||||||
760 | } | ||||||||
761 | |||||||||
762 | AssignSlowCase(RHS); | ||||||||
763 | return *this; | ||||||||
764 | } | ||||||||
765 | |||||||||
766 | /// Move assignment operator. | ||||||||
767 | APInt &operator=(APInt &&that) { | ||||||||
768 | #ifdef EXPENSIVE_CHECKS | ||||||||
769 | // Some std::shuffle implementations still do self-assignment. | ||||||||
770 | if (this == &that) | ||||||||
771 | return *this; | ||||||||
772 | #endif | ||||||||
773 | assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported" ) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 773, __extension__ __PRETTY_FUNCTION__)); | ||||||||
774 | if (!isSingleWord()) | ||||||||
775 | delete[] U.pVal; | ||||||||
776 | |||||||||
777 | // Use memcpy so that type based alias analysis sees both VAL and pVal | ||||||||
778 | // as modified. | ||||||||
779 | memcpy(&U, &that.U, sizeof(U)); | ||||||||
780 | |||||||||
781 | BitWidth = that.BitWidth; | ||||||||
782 | that.BitWidth = 0; | ||||||||
783 | |||||||||
784 | return *this; | ||||||||
785 | } | ||||||||
786 | |||||||||
787 | /// Assignment operator. | ||||||||
788 | /// | ||||||||
789 | /// The RHS value is assigned to *this. If the significant bits in RHS exceed | ||||||||
790 | /// the bit width, the excess bits are truncated. If the bit width is larger | ||||||||
791 | /// than 64, the value is zero filled in the unspecified high order bits. | ||||||||
792 | /// | ||||||||
793 | /// \returns *this after assignment of RHS value. | ||||||||
794 | APInt &operator=(uint64_t RHS) { | ||||||||
795 | if (isSingleWord()) { | ||||||||
796 | U.VAL = RHS; | ||||||||
797 | return clearUnusedBits(); | ||||||||
798 | } | ||||||||
799 | U.pVal[0] = RHS; | ||||||||
800 | memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); | ||||||||
801 | return *this; | ||||||||
802 | } | ||||||||
803 | |||||||||
804 | /// Bitwise AND assignment operator. | ||||||||
805 | /// | ||||||||
806 | /// Performs a bitwise AND operation on this APInt and RHS. The result is | ||||||||
807 | /// assigned to *this. | ||||||||
808 | /// | ||||||||
809 | /// \returns *this after ANDing with RHS. | ||||||||
810 | APInt &operator&=(const APInt &RHS) { | ||||||||
811 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 811, __extension__ __PRETTY_FUNCTION__)); | ||||||||
812 | if (isSingleWord()) | ||||||||
813 | U.VAL &= RHS.U.VAL; | ||||||||
814 | else | ||||||||
815 | AndAssignSlowCase(RHS); | ||||||||
816 | return *this; | ||||||||
817 | } | ||||||||
818 | |||||||||
819 | /// Bitwise AND assignment operator. | ||||||||
820 | /// | ||||||||
821 | /// Performs a bitwise AND operation on this APInt and RHS. RHS is | ||||||||
822 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
823 | /// the LHS. | ||||||||
824 | APInt &operator&=(uint64_t RHS) { | ||||||||
825 | if (isSingleWord()) { | ||||||||
826 | U.VAL &= RHS; | ||||||||
827 | return *this; | ||||||||
828 | } | ||||||||
829 | U.pVal[0] &= RHS; | ||||||||
830 | memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE); | ||||||||
831 | return *this; | ||||||||
832 | } | ||||||||
833 | |||||||||
834 | /// Bitwise OR assignment operator. | ||||||||
835 | /// | ||||||||
836 | /// Performs a bitwise OR operation on this APInt and RHS. The result is | ||||||||
837 | /// assigned *this; | ||||||||
838 | /// | ||||||||
839 | /// \returns *this after ORing with RHS. | ||||||||
840 | APInt &operator|=(const APInt &RHS) { | ||||||||
841 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 841, __extension__ __PRETTY_FUNCTION__)); | ||||||||
842 | if (isSingleWord()) | ||||||||
843 | U.VAL |= RHS.U.VAL; | ||||||||
844 | else | ||||||||
845 | OrAssignSlowCase(RHS); | ||||||||
846 | return *this; | ||||||||
847 | } | ||||||||
848 | |||||||||
849 | /// Bitwise OR assignment operator. | ||||||||
850 | /// | ||||||||
851 | /// Performs a bitwise OR operation on this APInt and RHS. RHS is | ||||||||
852 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
853 | /// the LHS. | ||||||||
854 | APInt &operator|=(uint64_t RHS) { | ||||||||
855 | if (isSingleWord()) { | ||||||||
856 | U.VAL |= RHS; | ||||||||
857 | return clearUnusedBits(); | ||||||||
858 | } | ||||||||
859 | U.pVal[0] |= RHS; | ||||||||
860 | return *this; | ||||||||
861 | } | ||||||||
862 | |||||||||
863 | /// Bitwise XOR assignment operator. | ||||||||
864 | /// | ||||||||
865 | /// Performs a bitwise XOR operation on this APInt and RHS. The result is | ||||||||
866 | /// assigned to *this. | ||||||||
867 | /// | ||||||||
868 | /// \returns *this after XORing with RHS. | ||||||||
869 | APInt &operator^=(const APInt &RHS) { | ||||||||
870 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 870, __extension__ __PRETTY_FUNCTION__)); | ||||||||
871 | if (isSingleWord()) | ||||||||
872 | U.VAL ^= RHS.U.VAL; | ||||||||
873 | else | ||||||||
874 | XorAssignSlowCase(RHS); | ||||||||
875 | return *this; | ||||||||
876 | } | ||||||||
877 | |||||||||
878 | /// Bitwise XOR assignment operator. | ||||||||
879 | /// | ||||||||
880 | /// Performs a bitwise XOR operation on this APInt and RHS. RHS is | ||||||||
881 | /// logically zero-extended or truncated to match the bit-width of | ||||||||
882 | /// the LHS. | ||||||||
883 | APInt &operator^=(uint64_t RHS) { | ||||||||
884 | if (isSingleWord()) { | ||||||||
885 | U.VAL ^= RHS; | ||||||||
886 | return clearUnusedBits(); | ||||||||
887 | } | ||||||||
888 | U.pVal[0] ^= RHS; | ||||||||
889 | return *this; | ||||||||
890 | } | ||||||||
891 | |||||||||
892 | /// Multiplication assignment operator. | ||||||||
893 | /// | ||||||||
894 | /// Multiplies this APInt by RHS and assigns the result to *this. | ||||||||
895 | /// | ||||||||
896 | /// \returns *this | ||||||||
897 | APInt &operator*=(const APInt &RHS); | ||||||||
898 | APInt &operator*=(uint64_t RHS); | ||||||||
899 | |||||||||
900 | /// Addition assignment operator. | ||||||||
901 | /// | ||||||||
902 | /// Adds RHS to *this and assigns the result to *this. | ||||||||
903 | /// | ||||||||
904 | /// \returns *this | ||||||||
905 | APInt &operator+=(const APInt &RHS); | ||||||||
906 | APInt &operator+=(uint64_t RHS); | ||||||||
907 | |||||||||
908 | /// Subtraction assignment operator. | ||||||||
909 | /// | ||||||||
910 | /// Subtracts RHS from *this and assigns the result to *this. | ||||||||
911 | /// | ||||||||
912 | /// \returns *this | ||||||||
913 | APInt &operator-=(const APInt &RHS); | ||||||||
914 | APInt &operator-=(uint64_t RHS); | ||||||||
915 | |||||||||
916 | /// Left-shift assignment function. | ||||||||
917 | /// | ||||||||
918 | /// Shifts *this left by shiftAmt and assigns the result to *this. | ||||||||
919 | /// | ||||||||
920 | /// \returns *this after shifting left by ShiftAmt | ||||||||
921 | APInt &operator<<=(unsigned ShiftAmt) { | ||||||||
922 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 922, __extension__ __PRETTY_FUNCTION__)); | ||||||||
923 | if (isSingleWord()) { | ||||||||
924 | if (ShiftAmt == BitWidth) | ||||||||
925 | U.VAL = 0; | ||||||||
926 | else | ||||||||
927 | U.VAL <<= ShiftAmt; | ||||||||
928 | return clearUnusedBits(); | ||||||||
929 | } | ||||||||
930 | shlSlowCase(ShiftAmt); | ||||||||
931 | return *this; | ||||||||
932 | } | ||||||||
933 | |||||||||
934 | /// Left-shift assignment function. | ||||||||
935 | /// | ||||||||
936 | /// Shifts *this left by shiftAmt and assigns the result to *this. | ||||||||
937 | /// | ||||||||
938 | /// \returns *this after shifting left by ShiftAmt | ||||||||
939 | APInt &operator<<=(const APInt &ShiftAmt); | ||||||||
940 | |||||||||
941 | /// @} | ||||||||
942 | /// \name Binary Operators | ||||||||
943 | /// @{ | ||||||||
944 | |||||||||
945 | /// Multiplication operator. | ||||||||
946 | /// | ||||||||
947 | /// Multiplies this APInt by RHS and returns the result. | ||||||||
948 | APInt operator*(const APInt &RHS) const; | ||||||||
949 | |||||||||
950 | /// Left logical shift operator. | ||||||||
951 | /// | ||||||||
952 | /// Shifts this APInt left by \p Bits and returns the result. | ||||||||
953 | APInt operator<<(unsigned Bits) const { return shl(Bits); } | ||||||||
954 | |||||||||
955 | /// Left logical shift operator. | ||||||||
956 | /// | ||||||||
957 | /// Shifts this APInt left by \p Bits and returns the result. | ||||||||
958 | APInt operator<<(const APInt &Bits) const { return shl(Bits); } | ||||||||
959 | |||||||||
960 | /// Arithmetic right-shift function. | ||||||||
961 | /// | ||||||||
962 | /// Arithmetic right-shift this APInt by shiftAmt. | ||||||||
963 | APInt ashr(unsigned ShiftAmt) const { | ||||||||
964 | APInt R(*this); | ||||||||
965 | R.ashrInPlace(ShiftAmt); | ||||||||
966 | return R; | ||||||||
967 | } | ||||||||
968 | |||||||||
969 | /// Arithmetic right-shift this APInt by ShiftAmt in place. | ||||||||
970 | void ashrInPlace(unsigned ShiftAmt) { | ||||||||
971 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 971, __extension__ __PRETTY_FUNCTION__)); | ||||||||
972 | if (isSingleWord()) { | ||||||||
973 | int64_t SExtVAL = SignExtend64(U.VAL, BitWidth); | ||||||||
974 | if (ShiftAmt == BitWidth) | ||||||||
975 | U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit. | ||||||||
976 | else | ||||||||
977 | U.VAL = SExtVAL >> ShiftAmt; | ||||||||
978 | clearUnusedBits(); | ||||||||
979 | return; | ||||||||
980 | } | ||||||||
981 | ashrSlowCase(ShiftAmt); | ||||||||
982 | } | ||||||||
983 | |||||||||
984 | /// Logical right-shift function. | ||||||||
985 | /// | ||||||||
986 | /// Logical right-shift this APInt by shiftAmt. | ||||||||
987 | APInt lshr(unsigned shiftAmt) const { | ||||||||
988 | APInt R(*this); | ||||||||
989 | R.lshrInPlace(shiftAmt); | ||||||||
990 | return R; | ||||||||
991 | } | ||||||||
992 | |||||||||
993 | /// Logical right-shift this APInt by ShiftAmt in place. | ||||||||
994 | void lshrInPlace(unsigned ShiftAmt) { | ||||||||
995 | assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth && "Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 995, __extension__ __PRETTY_FUNCTION__)); | ||||||||
996 | if (isSingleWord()) { | ||||||||
997 | if (ShiftAmt == BitWidth) | ||||||||
998 | U.VAL = 0; | ||||||||
999 | else | ||||||||
1000 | U.VAL >>= ShiftAmt; | ||||||||
1001 | return; | ||||||||
1002 | } | ||||||||
1003 | lshrSlowCase(ShiftAmt); | ||||||||
1004 | } | ||||||||
1005 | |||||||||
1006 | /// Left-shift function. | ||||||||
1007 | /// | ||||||||
1008 | /// Left-shift this APInt by shiftAmt. | ||||||||
1009 | APInt shl(unsigned shiftAmt) const { | ||||||||
1010 | APInt R(*this); | ||||||||
1011 | R <<= shiftAmt; | ||||||||
1012 | return R; | ||||||||
1013 | } | ||||||||
1014 | |||||||||
1015 | /// Rotate left by rotateAmt. | ||||||||
1016 | APInt rotl(unsigned rotateAmt) const; | ||||||||
1017 | |||||||||
1018 | /// Rotate right by rotateAmt. | ||||||||
1019 | APInt rotr(unsigned rotateAmt) const; | ||||||||
1020 | |||||||||
1021 | /// Arithmetic right-shift function. | ||||||||
1022 | /// | ||||||||
1023 | /// Arithmetic right-shift this APInt by shiftAmt. | ||||||||
1024 | APInt ashr(const APInt &ShiftAmt) const { | ||||||||
1025 | APInt R(*this); | ||||||||
1026 | R.ashrInPlace(ShiftAmt); | ||||||||
1027 | return R; | ||||||||
1028 | } | ||||||||
1029 | |||||||||
1030 | /// Arithmetic right-shift this APInt by shiftAmt in place. | ||||||||
1031 | void ashrInPlace(const APInt &shiftAmt); | ||||||||
1032 | |||||||||
1033 | /// Logical right-shift function. | ||||||||
1034 | /// | ||||||||
1035 | /// Logical right-shift this APInt by shiftAmt. | ||||||||
1036 | APInt lshr(const APInt &ShiftAmt) const { | ||||||||
1037 | APInt R(*this); | ||||||||
1038 | R.lshrInPlace(ShiftAmt); | ||||||||
1039 | return R; | ||||||||
1040 | } | ||||||||
1041 | |||||||||
1042 | /// Logical right-shift this APInt by ShiftAmt in place. | ||||||||
1043 | void lshrInPlace(const APInt &ShiftAmt); | ||||||||
1044 | |||||||||
1045 | /// Left-shift function. | ||||||||
1046 | /// | ||||||||
1047 | /// Left-shift this APInt by shiftAmt. | ||||||||
1048 | APInt shl(const APInt &ShiftAmt) const { | ||||||||
1049 | APInt R(*this); | ||||||||
1050 | R <<= ShiftAmt; | ||||||||
1051 | return R; | ||||||||
1052 | } | ||||||||
1053 | |||||||||
1054 | /// Rotate left by rotateAmt. | ||||||||
1055 | APInt rotl(const APInt &rotateAmt) const; | ||||||||
1056 | |||||||||
1057 | /// Rotate right by rotateAmt. | ||||||||
1058 | APInt rotr(const APInt &rotateAmt) const; | ||||||||
1059 | |||||||||
1060 | /// Unsigned division operation. | ||||||||
1061 | /// | ||||||||
1062 | /// Perform an unsigned divide operation on this APInt by RHS. Both this and | ||||||||
1063 | /// RHS are treated as unsigned quantities for purposes of this division. | ||||||||
1064 | /// | ||||||||
1065 | /// \returns a new APInt value containing the division result, rounded towards | ||||||||
1066 | /// zero. | ||||||||
1067 | APInt udiv(const APInt &RHS) const; | ||||||||
1068 | APInt udiv(uint64_t RHS) const; | ||||||||
1069 | |||||||||
1070 | /// Signed division function for APInt. | ||||||||
1071 | /// | ||||||||
1072 | /// Signed divide this APInt by APInt RHS. | ||||||||
1073 | /// | ||||||||
1074 | /// The result is rounded towards zero. | ||||||||
1075 | APInt sdiv(const APInt &RHS) const; | ||||||||
1076 | APInt sdiv(int64_t RHS) const; | ||||||||
1077 | |||||||||
1078 | /// Unsigned remainder operation. | ||||||||
1079 | /// | ||||||||
1080 | /// Perform an unsigned remainder operation on this APInt with RHS being the | ||||||||
1081 | /// divisor. Both this and RHS are treated as unsigned quantities for purposes | ||||||||
1082 | /// of this operation. Note that this is a true remainder operation and not a | ||||||||
1083 | /// modulo operation because the sign follows the sign of the dividend which | ||||||||
1084 | /// is *this. | ||||||||
1085 | /// | ||||||||
1086 | /// \returns a new APInt value containing the remainder result | ||||||||
1087 | APInt urem(const APInt &RHS) const; | ||||||||
1088 | uint64_t urem(uint64_t RHS) const; | ||||||||
1089 | |||||||||
1090 | /// Function for signed remainder operation. | ||||||||
1091 | /// | ||||||||
1092 | /// Signed remainder operation on APInt. | ||||||||
1093 | APInt srem(const APInt &RHS) const; | ||||||||
1094 | int64_t srem(int64_t RHS) const; | ||||||||
1095 | |||||||||
1096 | /// Dual division/remainder interface. | ||||||||
1097 | /// | ||||||||
1098 | /// Sometimes it is convenient to divide two APInt values and obtain both the | ||||||||
1099 | /// quotient and remainder. This function does both operations in the same | ||||||||
1100 | /// computation making it a little more efficient. The pair of input arguments | ||||||||
1101 | /// may overlap with the pair of output arguments. It is safe to call | ||||||||
1102 | /// udivrem(X, Y, X, Y), for example. | ||||||||
1103 | static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, | ||||||||
1104 | APInt &Remainder); | ||||||||
1105 | static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient, | ||||||||
1106 | uint64_t &Remainder); | ||||||||
1107 | |||||||||
1108 | static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient, | ||||||||
1109 | APInt &Remainder); | ||||||||
1110 | static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient, | ||||||||
1111 | int64_t &Remainder); | ||||||||
1112 | |||||||||
1113 | // Operations that return overflow indicators. | ||||||||
1114 | APInt sadd_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1115 | APInt uadd_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1116 | APInt ssub_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1117 | APInt usub_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1118 | APInt sdiv_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1119 | APInt smul_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1120 | APInt umul_ov(const APInt &RHS, bool &Overflow) const; | ||||||||
1121 | APInt sshl_ov(const APInt &Amt, bool &Overflow) const; | ||||||||
1122 | APInt ushl_ov(const APInt &Amt, bool &Overflow) const; | ||||||||
1123 | |||||||||
1124 | // Operations that saturate | ||||||||
1125 | APInt sadd_sat(const APInt &RHS) const; | ||||||||
1126 | APInt uadd_sat(const APInt &RHS) const; | ||||||||
1127 | APInt ssub_sat(const APInt &RHS) const; | ||||||||
1128 | APInt usub_sat(const APInt &RHS) const; | ||||||||
1129 | APInt smul_sat(const APInt &RHS) const; | ||||||||
1130 | APInt umul_sat(const APInt &RHS) const; | ||||||||
1131 | APInt sshl_sat(const APInt &RHS) const; | ||||||||
1132 | APInt ushl_sat(const APInt &RHS) const; | ||||||||
1133 | |||||||||
1134 | /// Array-indexing support. | ||||||||
1135 | /// | ||||||||
1136 | /// \returns the bit value at bitPosition | ||||||||
1137 | bool operator[](unsigned bitPosition) const { | ||||||||
1138 | assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() && "Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1138, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1139 | return (maskBit(bitPosition) & getWord(bitPosition)) != 0; | ||||||||
1140 | } | ||||||||
1141 | |||||||||
1142 | /// @} | ||||||||
1143 | /// \name Comparison Operators | ||||||||
1144 | /// @{ | ||||||||
1145 | |||||||||
1146 | /// Equality operator. | ||||||||
1147 | /// | ||||||||
1148 | /// Compares this APInt with RHS for the validity of the equality | ||||||||
1149 | /// relationship. | ||||||||
1150 | bool operator==(const APInt &RHS) const { | ||||||||
1151 | assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth && "Comparison requires equal bit widths") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1151, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1152 | if (isSingleWord()) | ||||||||
1153 | return U.VAL == RHS.U.VAL; | ||||||||
1154 | return EqualSlowCase(RHS); | ||||||||
1155 | } | ||||||||
1156 | |||||||||
1157 | /// Equality operator. | ||||||||
1158 | /// | ||||||||
1159 | /// Compares this APInt with a uint64_t for the validity of the equality | ||||||||
1160 | /// relationship. | ||||||||
1161 | /// | ||||||||
1162 | /// \returns true if *this == Val | ||||||||
1163 | bool operator==(uint64_t Val) const { | ||||||||
1164 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val; | ||||||||
1165 | } | ||||||||
1166 | |||||||||
1167 | /// Equality comparison. | ||||||||
1168 | /// | ||||||||
1169 | /// Compares this APInt with RHS for the validity of the equality | ||||||||
1170 | /// relationship. | ||||||||
1171 | /// | ||||||||
1172 | /// \returns true if *this == Val | ||||||||
1173 | bool eq(const APInt &RHS) const { return (*this) == RHS; } | ||||||||
1174 | |||||||||
1175 | /// Inequality operator. | ||||||||
1176 | /// | ||||||||
1177 | /// Compares this APInt with RHS for the validity of the inequality | ||||||||
1178 | /// relationship. | ||||||||
1179 | /// | ||||||||
1180 | /// \returns true if *this != Val | ||||||||
1181 | bool operator!=(const APInt &RHS) const { return !((*this) == RHS); } | ||||||||
1182 | |||||||||
1183 | /// Inequality operator. | ||||||||
1184 | /// | ||||||||
1185 | /// Compares this APInt with a uint64_t for the validity of the inequality | ||||||||
1186 | /// relationship. | ||||||||
1187 | /// | ||||||||
1188 | /// \returns true if *this != Val | ||||||||
1189 | bool operator!=(uint64_t Val) const { return !((*this) == Val); } | ||||||||
1190 | |||||||||
1191 | /// Inequality comparison | ||||||||
1192 | /// | ||||||||
1193 | /// Compares this APInt with RHS for the validity of the inequality | ||||||||
1194 | /// relationship. | ||||||||
1195 | /// | ||||||||
1196 | /// \returns true if *this != Val | ||||||||
1197 | bool ne(const APInt &RHS) const { return !((*this) == RHS); } | ||||||||
1198 | |||||||||
1199 | /// Unsigned less than comparison | ||||||||
1200 | /// | ||||||||
1201 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1202 | /// the validity of the less-than relationship. | ||||||||
1203 | /// | ||||||||
1204 | /// \returns true if *this < RHS when both are considered unsigned. | ||||||||
1205 | bool ult(const APInt &RHS) const { return compare(RHS) < 0; } | ||||||||
1206 | |||||||||
1207 | /// Unsigned less than comparison | ||||||||
1208 | /// | ||||||||
1209 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1210 | /// the validity of the less-than relationship. | ||||||||
1211 | /// | ||||||||
1212 | /// \returns true if *this < RHS when considered unsigned. | ||||||||
1213 | bool ult(uint64_t RHS) const { | ||||||||
1214 | // Only need to check active bits if not a single word. | ||||||||
1215 | return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS; | ||||||||
1216 | } | ||||||||
1217 | |||||||||
1218 | /// Signed less than comparison | ||||||||
1219 | /// | ||||||||
1220 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
1221 | /// validity of the less-than relationship. | ||||||||
1222 | /// | ||||||||
1223 | /// \returns true if *this < RHS when both are considered signed. | ||||||||
1224 | bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; } | ||||||||
1225 | |||||||||
1226 | /// Signed less than comparison | ||||||||
1227 | /// | ||||||||
1228 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
1229 | /// the validity of the less-than relationship. | ||||||||
1230 | /// | ||||||||
1231 | /// \returns true if *this < RHS when considered signed. | ||||||||
1232 | bool slt(int64_t RHS) const { | ||||||||
1233 | return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative() | ||||||||
1234 | : getSExtValue() < RHS; | ||||||||
1235 | } | ||||||||
1236 | |||||||||
1237 | /// Unsigned less or equal comparison | ||||||||
1238 | /// | ||||||||
1239 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1240 | /// validity of the less-or-equal relationship. | ||||||||
1241 | /// | ||||||||
1242 | /// \returns true if *this <= RHS when both are considered unsigned. | ||||||||
1243 | bool ule(const APInt &RHS) const { return compare(RHS) <= 0; } | ||||||||
1244 | |||||||||
1245 | /// Unsigned less or equal comparison | ||||||||
1246 | /// | ||||||||
1247 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1248 | /// the validity of the less-or-equal relationship. | ||||||||
1249 | /// | ||||||||
1250 | /// \returns true if *this <= RHS when considered unsigned. | ||||||||
1251 | bool ule(uint64_t RHS) const { return !ugt(RHS); } | ||||||||
1252 | |||||||||
1253 | /// Signed less or equal comparison | ||||||||
1254 | /// | ||||||||
1255 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
1256 | /// validity of the less-or-equal relationship. | ||||||||
1257 | /// | ||||||||
1258 | /// \returns true if *this <= RHS when both are considered signed. | ||||||||
1259 | bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; } | ||||||||
1260 | |||||||||
1261 | /// Signed less or equal comparison | ||||||||
1262 | /// | ||||||||
1263 | /// Regards both *this as a signed quantity and compares it with RHS for the | ||||||||
1264 | /// validity of the less-or-equal relationship. | ||||||||
1265 | /// | ||||||||
1266 | /// \returns true if *this <= RHS when considered signed. | ||||||||
1267 | bool sle(uint64_t RHS) const { return !sgt(RHS); } | ||||||||
1268 | |||||||||
1269 | /// Unsigned greater than comparison | ||||||||
1270 | /// | ||||||||
1271 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1272 | /// the validity of the greater-than relationship. | ||||||||
1273 | /// | ||||||||
1274 | /// \returns true if *this > RHS when both are considered unsigned. | ||||||||
1275 | bool ugt(const APInt &RHS) const { return !ule(RHS); } | ||||||||
1276 | |||||||||
1277 | /// Unsigned greater than comparison | ||||||||
1278 | /// | ||||||||
1279 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1280 | /// the validity of the greater-than relationship. | ||||||||
1281 | /// | ||||||||
1282 | /// \returns true if *this > RHS when considered unsigned. | ||||||||
1283 | bool ugt(uint64_t RHS) const { | ||||||||
1284 | // Only need to check active bits if not a single word. | ||||||||
1285 | return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS; | ||||||||
1286 | } | ||||||||
1287 | |||||||||
1288 | /// Signed greater than comparison | ||||||||
1289 | /// | ||||||||
1290 | /// Regards both *this and RHS as signed quantities and compares them for the | ||||||||
1291 | /// validity of the greater-than relationship. | ||||||||
1292 | /// | ||||||||
1293 | /// \returns true if *this > RHS when both are considered signed. | ||||||||
1294 | bool sgt(const APInt &RHS) const { return !sle(RHS); } | ||||||||
1295 | |||||||||
1296 | /// Signed greater than comparison | ||||||||
1297 | /// | ||||||||
1298 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
1299 | /// the validity of the greater-than relationship. | ||||||||
1300 | /// | ||||||||
1301 | /// \returns true if *this > RHS when considered signed. | ||||||||
1302 | bool sgt(int64_t RHS) const { | ||||||||
1303 | return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative() | ||||||||
1304 | : getSExtValue() > RHS; | ||||||||
1305 | } | ||||||||
1306 | |||||||||
1307 | /// Unsigned greater or equal comparison | ||||||||
1308 | /// | ||||||||
1309 | /// Regards both *this and RHS as unsigned quantities and compares them for | ||||||||
1310 | /// validity of the greater-or-equal relationship. | ||||||||
1311 | /// | ||||||||
1312 | /// \returns true if *this >= RHS when both are considered unsigned. | ||||||||
1313 | bool uge(const APInt &RHS) const { return !ult(RHS); } | ||||||||
1314 | |||||||||
1315 | /// Unsigned greater or equal comparison | ||||||||
1316 | /// | ||||||||
1317 | /// Regards both *this as an unsigned quantity and compares it with RHS for | ||||||||
1318 | /// the validity of the greater-or-equal relationship. | ||||||||
1319 | /// | ||||||||
1320 | /// \returns true if *this >= RHS when considered unsigned. | ||||||||
1321 | bool uge(uint64_t RHS) const { return !ult(RHS); } | ||||||||
1322 | |||||||||
1323 | /// Signed greater or equal comparison | ||||||||
1324 | /// | ||||||||
1325 | /// Regards both *this and RHS as signed quantities and compares them for | ||||||||
1326 | /// validity of the greater-or-equal relationship. | ||||||||
1327 | /// | ||||||||
1328 | /// \returns true if *this >= RHS when both are considered signed. | ||||||||
1329 | bool sge(const APInt &RHS) const { return !slt(RHS); } | ||||||||
1330 | |||||||||
1331 | /// Signed greater or equal comparison | ||||||||
1332 | /// | ||||||||
1333 | /// Regards both *this as a signed quantity and compares it with RHS for | ||||||||
1334 | /// the validity of the greater-or-equal relationship. | ||||||||
1335 | /// | ||||||||
1336 | /// \returns true if *this >= RHS when considered signed. | ||||||||
1337 | bool sge(int64_t RHS) const { return !slt(RHS); } | ||||||||
1338 | |||||||||
1339 | /// This operation tests if there are any pairs of corresponding bits | ||||||||
1340 | /// between this APInt and RHS that are both set. | ||||||||
1341 | bool intersects(const APInt &RHS) const { | ||||||||
1342 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1342, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1343 | if (isSingleWord()) | ||||||||
1344 | return (U.VAL & RHS.U.VAL) != 0; | ||||||||
1345 | return intersectsSlowCase(RHS); | ||||||||
1346 | } | ||||||||
1347 | |||||||||
1348 | /// This operation checks that all bits set in this APInt are also set in RHS. | ||||||||
1349 | bool isSubsetOf(const APInt &RHS) const { | ||||||||
1350 | assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth && "Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1350, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1351 | if (isSingleWord()) | ||||||||
1352 | return (U.VAL & ~RHS.U.VAL) == 0; | ||||||||
1353 | return isSubsetOfSlowCase(RHS); | ||||||||
1354 | } | ||||||||
1355 | |||||||||
1356 | /// @} | ||||||||
1357 | /// \name Resizing Operators | ||||||||
1358 | /// @{ | ||||||||
1359 | |||||||||
1360 | /// Truncate to new width. | ||||||||
1361 | /// | ||||||||
1362 | /// Truncate the APInt to a specified width. It is an error to specify a width | ||||||||
1363 | /// that is greater than or equal to the current width. | ||||||||
1364 | APInt trunc(unsigned width) const; | ||||||||
1365 | |||||||||
1366 | /// Truncate to new width with unsigned saturation. | ||||||||
1367 | /// | ||||||||
1368 | /// If the APInt, treated as unsigned integer, can be losslessly truncated to | ||||||||
1369 | /// the new bitwidth, then return truncated APInt. Else, return max value. | ||||||||
1370 | APInt truncUSat(unsigned width) const; | ||||||||
1371 | |||||||||
1372 | /// Truncate to new width with signed saturation. | ||||||||
1373 | /// | ||||||||
1374 | /// If this APInt, treated as signed integer, can be losslessly truncated to | ||||||||
1375 | /// the new bitwidth, then return truncated APInt. Else, return either | ||||||||
1376 | /// signed min value if the APInt was negative, or signed max value. | ||||||||
1377 | APInt truncSSat(unsigned width) const; | ||||||||
1378 | |||||||||
1379 | /// Sign extend to a new width. | ||||||||
1380 | /// | ||||||||
1381 | /// This operation sign extends the APInt to a new width. If the high order | ||||||||
1382 | /// bit is set, the fill on the left will be done with 1 bits, otherwise zero. | ||||||||
1383 | /// It is an error to specify a width that is less than or equal to the | ||||||||
1384 | /// current width. | ||||||||
1385 | APInt sext(unsigned width) const; | ||||||||
1386 | |||||||||
1387 | /// Zero extend to a new width. | ||||||||
1388 | /// | ||||||||
1389 | /// This operation zero extends the APInt to a new width. The high order bits | ||||||||
1390 | /// are filled with 0 bits. It is an error to specify a width that is less | ||||||||
1391 | /// than or equal to the current width. | ||||||||
1392 | APInt zext(unsigned width) const; | ||||||||
1393 | |||||||||
1394 | /// Sign extend or truncate to width | ||||||||
1395 | /// | ||||||||
1396 | /// Make this APInt have the bit width given by \p width. The value is sign | ||||||||
1397 | /// extended, truncated, or left alone to make it that width. | ||||||||
1398 | APInt sextOrTrunc(unsigned width) const; | ||||||||
1399 | |||||||||
1400 | /// Zero extend or truncate to width | ||||||||
1401 | /// | ||||||||
1402 | /// Make this APInt have the bit width given by \p width. The value is zero | ||||||||
1403 | /// extended, truncated, or left alone to make it that width. | ||||||||
1404 | APInt zextOrTrunc(unsigned width) const; | ||||||||
1405 | |||||||||
1406 | /// Truncate to width | ||||||||
1407 | /// | ||||||||
1408 | /// Make this APInt have the bit width given by \p width. The value is | ||||||||
1409 | /// truncated or left alone to make it that width. | ||||||||
1410 | APInt truncOrSelf(unsigned width) const; | ||||||||
1411 | |||||||||
1412 | /// Sign extend or truncate to width | ||||||||
1413 | /// | ||||||||
1414 | /// Make this APInt have the bit width given by \p width. The value is sign | ||||||||
1415 | /// extended, or left alone to make it that width. | ||||||||
1416 | APInt sextOrSelf(unsigned width) const; | ||||||||
1417 | |||||||||
1418 | /// Zero extend or truncate to width | ||||||||
1419 | /// | ||||||||
1420 | /// Make this APInt have the bit width given by \p width. The value is zero | ||||||||
1421 | /// extended, or left alone to make it that width. | ||||||||
1422 | APInt zextOrSelf(unsigned width) const; | ||||||||
1423 | |||||||||
1424 | /// @} | ||||||||
1425 | /// \name Bit Manipulation Operators | ||||||||
1426 | /// @{ | ||||||||
1427 | |||||||||
1428 | /// Set every bit to 1. | ||||||||
1429 | void setAllBits() { | ||||||||
1430 | if (isSingleWord()) | ||||||||
1431 | U.VAL = WORDTYPE_MAX; | ||||||||
1432 | else | ||||||||
1433 | // Set all the bits in all the words. | ||||||||
1434 | memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE); | ||||||||
1435 | // Clear the unused ones | ||||||||
1436 | clearUnusedBits(); | ||||||||
1437 | } | ||||||||
1438 | |||||||||
1439 | /// Set a given bit to 1. | ||||||||
1440 | /// | ||||||||
1441 | /// Set the given bit to 1 whose position is given as "bitPosition". | ||||||||
1442 | void setBit(unsigned BitPosition) { | ||||||||
1443 | assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth && "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1443, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1444 | WordType Mask = maskBit(BitPosition); | ||||||||
1445 | if (isSingleWord()) | ||||||||
1446 | U.VAL |= Mask; | ||||||||
1447 | else | ||||||||
1448 | U.pVal[whichWord(BitPosition)] |= Mask; | ||||||||
1449 | } | ||||||||
1450 | |||||||||
1451 | /// Set the sign bit to 1. | ||||||||
1452 | void setSignBit() { | ||||||||
1453 | setBit(BitWidth - 1); | ||||||||
1454 | } | ||||||||
1455 | |||||||||
1456 | /// Set a given bit to a given value. | ||||||||
1457 | void setBitVal(unsigned BitPosition, bool BitValue) { | ||||||||
1458 | if (BitValue) | ||||||||
1459 | setBit(BitPosition); | ||||||||
1460 | else | ||||||||
1461 | clearBit(BitPosition); | ||||||||
1462 | } | ||||||||
1463 | |||||||||
1464 | /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. | ||||||||
1465 | /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls | ||||||||
1466 | /// setBits when \p loBit < \p hiBit. | ||||||||
1467 | /// For \p loBit == \p hiBit wrap case, set every bit to 1. | ||||||||
1468 | void setBitsWithWrap(unsigned loBit, unsigned hiBit) { | ||||||||
1469 | assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range" ) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1469, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1470 | assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range" ) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1470, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1471 | if (loBit < hiBit) { | ||||||||
1472 | setBits(loBit, hiBit); | ||||||||
1473 | return; | ||||||||
1474 | } | ||||||||
1475 | setLowBits(hiBit); | ||||||||
1476 | setHighBits(BitWidth - loBit); | ||||||||
1477 | } | ||||||||
1478 | |||||||||
1479 | /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1. | ||||||||
1480 | /// This function handles case when \p loBit <= \p hiBit. | ||||||||
1481 | void setBits(unsigned loBit, unsigned hiBit) { | ||||||||
1482 | assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range" ) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1482, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1483 | assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range" ) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1483, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1484 | assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit" ) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1484, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1485 | if (loBit == hiBit) | ||||||||
1486 | return; | ||||||||
1487 | if (loBit
| ||||||||
1488 | uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit)); | ||||||||
1489 | mask <<= loBit; | ||||||||
1490 | if (isSingleWord()) | ||||||||
1491 | U.VAL |= mask; | ||||||||
1492 | else | ||||||||
1493 | U.pVal[0] |= mask; | ||||||||
1494 | } else { | ||||||||
1495 | setBitsSlowCase(loBit, hiBit); | ||||||||
1496 | } | ||||||||
1497 | } | ||||||||
1498 | |||||||||
1499 | /// Set the top bits starting from loBit. | ||||||||
1500 | void setBitsFrom(unsigned loBit) { | ||||||||
1501 | return setBits(loBit, BitWidth); | ||||||||
1502 | } | ||||||||
1503 | |||||||||
1504 | /// Set the bottom loBits bits. | ||||||||
1505 | void setLowBits(unsigned loBits) { | ||||||||
1506 | return setBits(0, loBits); | ||||||||
1507 | } | ||||||||
1508 | |||||||||
1509 | /// Set the top hiBits bits. | ||||||||
1510 | void setHighBits(unsigned hiBits) { | ||||||||
1511 | return setBits(BitWidth - hiBits, BitWidth); | ||||||||
1512 | } | ||||||||
1513 | |||||||||
1514 | /// Set every bit to 0. | ||||||||
1515 | void clearAllBits() { | ||||||||
1516 | if (isSingleWord()) | ||||||||
1517 | U.VAL = 0; | ||||||||
1518 | else | ||||||||
1519 | memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE); | ||||||||
1520 | } | ||||||||
1521 | |||||||||
1522 | /// Set a given bit to 0. | ||||||||
1523 | /// | ||||||||
1524 | /// Set the given bit to 0 whose position is given as "bitPosition". | ||||||||
1525 | void clearBit(unsigned BitPosition) { | ||||||||
1526 | assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth && "BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1526, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1527 | WordType Mask = ~maskBit(BitPosition); | ||||||||
1528 | if (isSingleWord()) | ||||||||
1529 | U.VAL &= Mask; | ||||||||
1530 | else | ||||||||
1531 | U.pVal[whichWord(BitPosition)] &= Mask; | ||||||||
1532 | } | ||||||||
1533 | |||||||||
1534 | /// Set bottom loBits bits to 0. | ||||||||
1535 | void clearLowBits(unsigned loBits) { | ||||||||
1536 | assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth" ) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1536, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1537 | APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits); | ||||||||
1538 | *this &= Keep; | ||||||||
1539 | } | ||||||||
1540 | |||||||||
1541 | /// Set the sign bit to 0. | ||||||||
1542 | void clearSignBit() { | ||||||||
1543 | clearBit(BitWidth - 1); | ||||||||
1544 | } | ||||||||
1545 | |||||||||
1546 | /// Toggle every bit to its opposite value. | ||||||||
1547 | void flipAllBits() { | ||||||||
1548 | if (isSingleWord()) { | ||||||||
1549 | U.VAL ^= WORDTYPE_MAX; | ||||||||
1550 | clearUnusedBits(); | ||||||||
1551 | } else { | ||||||||
1552 | flipAllBitsSlowCase(); | ||||||||
1553 | } | ||||||||
1554 | } | ||||||||
1555 | |||||||||
1556 | /// Toggles a given bit to its opposite value. | ||||||||
1557 | /// | ||||||||
1558 | /// Toggle a given bit to its opposite value whose position is given | ||||||||
1559 | /// as "bitPosition". | ||||||||
1560 | void flipBit(unsigned bitPosition); | ||||||||
1561 | |||||||||
1562 | /// Negate this APInt in place. | ||||||||
1563 | void negate() { | ||||||||
1564 | flipAllBits(); | ||||||||
1565 | ++(*this); | ||||||||
1566 | } | ||||||||
1567 | |||||||||
1568 | /// Insert the bits from a smaller APInt starting at bitPosition. | ||||||||
1569 | void insertBits(const APInt &SubBits, unsigned bitPosition); | ||||||||
1570 | void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits); | ||||||||
1571 | |||||||||
1572 | /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits). | ||||||||
1573 | APInt extractBits(unsigned numBits, unsigned bitPosition) const; | ||||||||
1574 | uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const; | ||||||||
1575 | |||||||||
1576 | /// @} | ||||||||
1577 | /// \name Value Characterization Functions | ||||||||
1578 | /// @{ | ||||||||
1579 | |||||||||
1580 | /// Return the number of bits in the APInt. | ||||||||
1581 | unsigned getBitWidth() const { return BitWidth; } | ||||||||
1582 | |||||||||
1583 | /// Get the number of words. | ||||||||
1584 | /// | ||||||||
1585 | /// Here one word's bitwidth equals to that of uint64_t. | ||||||||
1586 | /// | ||||||||
1587 | /// \returns the number of words to hold the integer value of this APInt. | ||||||||
1588 | unsigned getNumWords() const { return getNumWords(BitWidth); } | ||||||||
1589 | |||||||||
1590 | /// Get the number of words. | ||||||||
1591 | /// | ||||||||
1592 | /// *NOTE* Here one word's bitwidth equals to that of uint64_t. | ||||||||
1593 | /// | ||||||||
1594 | /// \returns the number of words to hold the integer value with a given bit | ||||||||
1595 | /// width. | ||||||||
1596 | static unsigned getNumWords(unsigned BitWidth) { | ||||||||
1597 | return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD; | ||||||||
1598 | } | ||||||||
1599 | |||||||||
1600 | /// Compute the number of active bits in the value | ||||||||
1601 | /// | ||||||||
1602 | /// This function returns the number of active bits which is defined as the | ||||||||
1603 | /// bit width minus the number of leading zeros. This is used in several | ||||||||
1604 | /// computations to see how "wide" the value is. | ||||||||
1605 | unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); } | ||||||||
1606 | |||||||||
1607 | /// Compute the number of active words in the value of this APInt. | ||||||||
1608 | /// | ||||||||
1609 | /// This is used in conjunction with getActiveData to extract the raw value of | ||||||||
1610 | /// the APInt. | ||||||||
1611 | unsigned getActiveWords() const { | ||||||||
1612 | unsigned numActiveBits = getActiveBits(); | ||||||||
1613 | return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1; | ||||||||
1614 | } | ||||||||
1615 | |||||||||
1616 | /// Get the minimum bit size for this signed APInt | ||||||||
1617 | /// | ||||||||
1618 | /// Computes the minimum bit width for this APInt while considering it to be a | ||||||||
1619 | /// signed (and probably negative) value. If the value is not negative, this | ||||||||
1620 | /// function returns the same value as getActiveBits()+1. Otherwise, it | ||||||||
1621 | /// returns the smallest bit width that will retain the negative value. For | ||||||||
1622 | /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so | ||||||||
1623 | /// for -1, this function will always return 1. | ||||||||
1624 | unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; } | ||||||||
1625 | |||||||||
1626 | /// Get zero extended value | ||||||||
1627 | /// | ||||||||
1628 | /// This method attempts to return the value of this APInt as a zero extended | ||||||||
1629 | /// uint64_t. The bitwidth must be <= 64 or the value must fit within a | ||||||||
1630 | /// uint64_t. Otherwise an assertion will result. | ||||||||
1631 | uint64_t getZExtValue() const { | ||||||||
1632 | if (isSingleWord()) | ||||||||
1633 | return U.VAL; | ||||||||
1634 | assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 && "Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1634, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1635 | return U.pVal[0]; | ||||||||
1636 | } | ||||||||
1637 | |||||||||
1638 | /// Get sign extended value | ||||||||
1639 | /// | ||||||||
1640 | /// This method attempts to return the value of this APInt as a sign extended | ||||||||
1641 | /// int64_t. The bit width must be <= 64 or the value must fit within an | ||||||||
1642 | /// int64_t. Otherwise an assertion will result. | ||||||||
1643 | int64_t getSExtValue() const { | ||||||||
1644 | if (isSingleWord()) | ||||||||
1645 | return SignExtend64(U.VAL, BitWidth); | ||||||||
1646 | assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getMinSignedBits() <= 64 && "Too many bits for int64_t") ? void (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\"" , "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include/llvm/ADT/APInt.h" , 1646, __extension__ __PRETTY_FUNCTION__)); | ||||||||
1647 | return int64_t(U.pVal[0]); | ||||||||
1648 | } | ||||||||
1649 | |||||||||
1650 | /// Get bits required for string value. | ||||||||
1651 | /// | ||||||||
1652 | /// This method determines how many bits are required to hold the APInt | ||||||||
1653 | /// equivalent of the string given by \p str. | ||||||||
1654 | static unsigned getBitsNeeded(StringRef str, uint8_t radix); | ||||||||
1655 | |||||||||
1656 | /// The APInt version of the countLeadingZeros functions in | ||||||||
1657 | /// MathExtras.h. | ||||||||
1658 | /// | ||||||||
1659 | /// It counts the number of zeros from the most significant bit to the first | ||||||||
1660 | /// one bit. | ||||||||
1661 | /// | ||||||||
1662 | /// \returns BitWidth if the value is zero, otherwise returns the number of | ||||||||
1663 | /// zeros from the most significant bit to the first one bits. | ||||||||
1664 | unsigned countLeadingZeros() const { | ||||||||
1665 | if (isSingleWord()) { | ||||||||
1666 | unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth; | ||||||||
1667 | return llvm::countLeadingZeros(U.VAL) - unusedBits; | ||||||||
1668 | } | ||||||||
1669 | return countLeadingZerosSlowCase(); | ||||||||
1670 | } | ||||||||
1671 | |||||||||
1672 | /// Count the number of leading one bits. | ||||||||
1673 | /// | ||||||||
1674 | /// This function is an APInt version of the countLeadingOnes | ||||||||
1675 | /// functions in MathExtras.h. It counts the number of ones from the most | ||||||||
1676 | /// significant bit to the first zero bit. | ||||||||
1677 | /// | ||||||||
1678 | /// \returns 0 if the high order bit is not set, otherwise returns the number | ||||||||
1679 | /// of 1 bits from the most significant to the least | ||||||||
1680 | unsigned countLeadingOnes() const { | ||||||||
1681 | if (isSingleWord()) | ||||||||
1682 | return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth)); | ||||||||
1683 | return countLeadingOnesSlowCase(); | ||||||||
1684 | } | ||||||||
1685 | |||||||||
1686 | /// Computes the number of leading bits of this APInt that are equal to its | ||||||||
1687 | /// sign bit. | ||||||||
1688 | unsigned getNumSignBits() const { | ||||||||
1689 | return isNegative() ? countLeadingOnes() : countLeadingZeros(); | ||||||||
1690 | } | ||||||||
1691 | |||||||||
1692 | /// Count the number of trailing zero bits. | ||||||||
1693 | /// | ||||||||
1694 | /// This function is an APInt version of the countTrailingZeros | ||||||||
1695 | /// functions in MathExtras.h. It counts the number of zeros from the least | ||||||||
1696 | /// significant bit to the first set bit. | ||||||||
1697 | /// | ||||||||
1698 | /// \returns BitWidth if the value is zero, otherwise returns the number of | ||||||||
1699 | /// zeros from the least significant bit to the first one bit. | ||||||||
1700 | unsigned countTrailingZeros() const { | ||||||||
1701 | if (isSingleWord()) { | ||||||||
1702 | unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL); | ||||||||
1703 | return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros); | ||||||||
1704 | } | ||||||||
1705 | return countTrailingZerosSlowCase(); | ||||||||
1706 | } | ||||||||
1707 | |||||||||
1708 | /// Count the number of trailing one bits. | ||||||||
1709 | /// | ||||||||
1710 | /// This function is an APInt version of the countTrailingOnes | ||||||||
1711 | /// functions in MathExtras.h. It counts the number of ones from the least | ||||||||
1712 | /// significant bit to the first zero bit. | ||||||||
1713 | /// | ||||||||
1714 | /// \returns BitWidth if the value is all ones, otherwise returns the number | ||||||||
1715 | /// of ones from the least significant bit to the first zero bit. | ||||||||
1716 | unsigned countTrailingOnes() const { | ||||||||
1717 | if (isSingleWord()) | ||||||||
1718 | return llvm::countTrailingOnes(U.VAL); | ||||||||
1719 | return countTrailingOnesSlowCase(); | ||||||||
1720 | } | ||||||||
1721 | |||||||||
1722 | /// Count the number of bits set. | ||||||||
1723 | /// | ||||||||
1724 | /// This function is an APInt version of the countPopulation functions | ||||||||
1725 | /// in MathExtras.h. It counts the number of 1 bits in the APInt value. | ||||||||
1726 | /// | ||||||||
1727 | /// \returns 0 if the value is zero, otherwise returns the number of set bits. | ||||||||
1728 | unsigned countPopulation() const { | ||||||||
1729 | if (isSingleWord()) | ||||||||
1730 | return llvm::countPopulation(U.VAL); | ||||||||
1731 | return countPopulationSlowCase(); | ||||||||
1732 | } | ||||||||
1733 | |||||||||
1734 | /// @} | ||||||||
1735 | /// \name Conversion Functions | ||||||||
1736 | /// @{ | ||||||||
1737 | void print(raw_ostream &OS, bool isSigned) const; | ||||||||
1738 | |||||||||
1739 | /// Converts an APInt to a string and append it to Str. Str is commonly a | ||||||||
1740 | /// SmallString. | ||||||||
1741 | void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed, | ||||||||
1742 | bool formatAsCLiteral = false) const; | ||||||||
1743 | |||||||||
1744 | /// Considers the APInt to be unsigned and converts it into a string in the | ||||||||
1745 | /// radix given. The radix can be 2, 8, 10 16, or 36. | ||||||||
1746 | void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { | ||||||||
1747 | toString(Str, Radix, false, false); | ||||||||
1748 | } | ||||||||
1749 | |||||||||
1750 | /// Considers the APInt to be signed and converts it into a string in the | ||||||||
1751 | /// radix given. The radix can be 2, 8, 10, 16, or 36. | ||||||||
1752 | void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const { | ||||||||
1753 | toString(Str, Radix, true, false); | ||||||||
1754 | } | ||||||||
1755 | |||||||||
1756 | /// \returns a byte-swapped representation of this APInt Value. | ||||||||
1757 | APInt byteSwap() const; | ||||||||
1758 | |||||||||
1759 | /// \returns the value with the bit representation reversed of this APInt | ||||||||
1760 | /// Value. | ||||||||
1761 | APInt reverseBits() const; | ||||||||
1762 | |||||||||
1763 | /// Converts this APInt to a double value. | ||||||||
1764 | double roundToDouble(bool isSigned) const; | ||||||||
1765 | |||||||||
1766 | /// Converts this unsigned APInt to a double value. | ||||||||
1767 | double roundToDouble() const { return roundToDouble(false); } | ||||||||
1768 | |||||||||
1769 | /// Converts this signed APInt to a double value. | ||||||||
1770 | double signedRoundToDouble() const { return roundToDouble(true); } | ||||||||
1771 | |||||||||
1772 | /// Converts APInt bits to a double | ||||||||
1773 | /// | ||||||||
1774 | /// The conversion does not do a translation from integer to double, it just | ||||||||
1775 | /// re-interprets the bits as a double. Note that it is valid to do this on | ||||||||
1776 | /// any bit width. Exactly 64 bits will be translated. | ||||||||
1777 | double bitsToDouble() const { | ||||||||
1778 | return BitsToDouble(getWord(0)); | ||||||||
1779 | } | ||||||||
1780 | |||||||||
1781 | /// Converts APInt bits to a float | ||||||||
1782 | /// | ||||||||
1783 | /// The conversion does not do a translation from integer to float, it just | ||||||||
1784 | /// re-interprets the bits as a float. Note that it is valid to do this on | ||||||||
1785 | /// any bit width. Exactly 32 bits will be translated. | ||||||||
1786 | float bitsToFloat() const { | ||||||||
1787 | return BitsToFloat(static_cast<uint32_t>(getWord(0))); | ||||||||
1788 | } | ||||||||
1789 | |||||||||
1790 | /// Converts a double to APInt bits. | ||||||||
1791 | /// | ||||||||
1792 | /// The conversion does not do a translation from double to integer, it just | ||||||||
1793 | /// re-interprets the bits of the double. | ||||||||
1794 | static APInt doubleToBits(double V) { | ||||||||
1795 | return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V)); | ||||||||
1796 | } | ||||||||
1797 | |||||||||
1798 | /// Converts a float to APInt bits. | ||||||||
1799 | /// | ||||||||
1800 | /// The conversion does not do a translation from float to integer, it just | ||||||||
1801 | /// re-interprets the bits of the float. | ||||||||
1802 | static APInt floatToBits(float V) { | ||||||||
1803 | return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V)); | ||||||||
1804 | } | ||||||||
1805 | |||||||||
1806 | /// @} | ||||||||
1807 | /// \name Mathematics Operations | ||||||||
1808 | /// @{ | ||||||||
1809 | |||||||||
1810 | /// \returns the floor log base 2 of this APInt. | ||||||||
1811 | unsigned logBase2() const { return getActiveBits() - 1; } | ||||||||
1812 | |||||||||
1813 | /// \returns the ceil log base 2 of this APInt. | ||||||||
1814 | unsigned ceilLogBase2() const { | ||||||||
1815 | APInt temp(*this); | ||||||||
1816 | --temp; | ||||||||
1817 | return temp.getActiveBits(); | ||||||||
1818 | } | ||||||||
1819 | |||||||||
1820 | /// \returns the nearest log base 2 of this APInt. Ties round up. | ||||||||
1821 | /// | ||||||||
1822 | /// NOTE: When we have a BitWidth of 1, we define: | ||||||||
1823 | /// | ||||||||
1824 | /// log2(0) = UINT32_MAX | ||||||||
1825 | /// log2(1) = 0 | ||||||||
1826 | /// | ||||||||
1827 | /// to get around any mathematical concerns resulting from | ||||||||
1828 | /// referencing 2 in a space where 2 does no exist. | ||||||||
1829 | unsigned nearestLogBase2() const { | ||||||||
1830 | // Special case when we have a bitwidth of 1. If VAL is 1, then we | ||||||||
1831 | // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to | ||||||||
1832 | // UINT32_MAX. | ||||||||
1833 | if (BitWidth == 1) | ||||||||
1834 | return U.VAL - 1; | ||||||||
1835 | |||||||||
1836 | // Handle the zero case. | ||||||||
1837 | if (isNullValue()) | ||||||||
1838 | return UINT32_MAX(4294967295U); | ||||||||
1839 | |||||||||
1840 | // The non-zero case is handled by computing: | ||||||||
1841 | // | ||||||||
1842 | // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1]. | ||||||||
1843 | // | ||||||||
1844 | // where x[i] is referring to the value of the ith bit of x. | ||||||||
1845 | unsigned lg = logBase2(); | ||||||||
1846 | return lg + unsigned((*this)[lg - 1]); | ||||||||
1847 | } | ||||||||
1848 | |||||||||
1849 | /// \returns the log base 2 of this APInt if its an exact power of two, -1 | ||||||||
1850 | /// otherwise | ||||||||
1851 | int32_t exactLogBase2() const { | ||||||||
1852 | if (!isPowerOf2()) | ||||||||
1853 | return -1; | ||||||||
1854 | return logBase2(); | ||||||||
1855 | } | ||||||||
1856 | |||||||||
1857 | /// Compute the square root | ||||||||
1858 | APInt sqrt() const; | ||||||||
1859 | |||||||||
1860 | /// Get the absolute value; | ||||||||
1861 | /// | ||||||||
1862 | /// If *this is < 0 then return -(*this), otherwise *this; | ||||||||
1863 | APInt abs() const { | ||||||||
1864 | if (isNegative()) | ||||||||
1865 | return -(*this); | ||||||||
1866 | return *this; | ||||||||
1867 | } | ||||||||
1868 | |||||||||
1869 | /// \returns the multiplicative inverse for a given modulo. | ||||||||
1870 | APInt multiplicativeInverse(const APInt &modulo) const; | ||||||||
1871 | |||||||||
1872 | /// @} | ||||||||
1873 | /// \name Support for division by constant | ||||||||
1874 | /// @{ | ||||||||
1875 | |||||||||
1876 | /// Calculate the magic number for signed division by a constant. | ||||||||
1877 | struct ms; | ||||||||
1878 | ms magic() const; | ||||||||
1879 | |||||||||
1880 | /// Calculate the magic number for unsigned division by a constant. | ||||||||
1881 | struct mu; | ||||||||
1882 | mu magicu(unsigned LeadingZeros = 0) const; | ||||||||
1883 | |||||||||
1884 | /// @} | ||||||||
1885 | /// \name Building-block Operations for APInt and APFloat | ||||||||
1886 | /// @{ | ||||||||
1887 | |||||||||
1888 | // These building block operations operate on a representation of arbitrary | ||||||||
1889 | // precision, two's-complement, bignum integer values. They should be | ||||||||
1890 | // sufficient to implement APInt and APFloat bignum requirements. Inputs are | ||||||||
1891 | // generally a pointer to the base of an array of integer parts, representing | ||||||||
1892 | // an unsigned bignum, and a count of how many parts there are. | ||||||||
1893 | |||||||||
1894 | /// Sets the least significant part of a bignum to the input value, and zeroes | ||||||||
1895 | /// out higher parts. | ||||||||
1896 | static void tcSet(WordType *, WordType, unsigned); | ||||||||
1897 | |||||||||
1898 | /// Assign one bignum to another. | ||||||||
1899 | static void tcAssign(WordType *, const WordType *, unsigned); | ||||||||
1900 | |||||||||
1901 | /// Returns true if a bignum is zero, false otherwise. | ||||||||
1902 | static bool tcIsZero(const WordType *, unsigned); | ||||||||
1903 | |||||||||
1904 | /// Extract the given bit of a bignum; returns 0 or 1. Zero-based. | ||||||||
1905 | static int tcExtractBit(const WordType *, unsigned bit); | ||||||||
1906 | |||||||||
1907 | /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to | ||||||||
1908 | /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least | ||||||||
1909 | /// significant bit of DST. All high bits above srcBITS in DST are | ||||||||
1910 | /// zero-filled. | ||||||||
1911 | static void tcExtract(WordType *, unsigned dstCount, | ||||||||
1912 | const WordType *, unsigned srcBits, | ||||||||
1913 | unsigned srcLSB); | ||||||||
1914 | |||||||||
1915 | /// Set the given bit of a bignum. Zero-based. | ||||||||
1916 | static void tcSetBit(WordType *, unsigned bit); | ||||||||
1917 | |||||||||
1918 | /// Clear the given bit of a bignum. Zero-based. | ||||||||
1919 | static void tcClearBit(WordType *, unsigned bit); | ||||||||
1920 | |||||||||
1921 | /// Returns the bit number of the least or most significant set bit of a | ||||||||
1922 | /// number. If the input number has no bits set -1U is returned. | ||||||||
1923 | static unsigned tcLSB(const WordType *, unsigned n); | ||||||||
1924 | static unsigned tcMSB(const WordType *parts, unsigned n); | ||||||||
1925 | |||||||||
1926 | /// Negate a bignum in-place. | ||||||||
1927 | static void tcNegate(WordType *, unsigned); | ||||||||
1928 | |||||||||
1929 | /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag. | ||||||||
1930 | static WordType tcAdd(WordType *, const WordType *, | ||||||||
1931 | WordType carry, unsigned); | ||||||||
1932 | /// DST += RHS. Returns the carry flag. | ||||||||
1933 | static WordType tcAddPart(WordType *, WordType, unsigned); | ||||||||
1934 | |||||||||
1935 | /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag. | ||||||||
1936 | static WordType tcSubtract(WordType *, const WordType *, | ||||||||
1937 | WordType carry, unsigned); | ||||||||
1938 | /// DST -= RHS. Returns the carry flag. | ||||||||
1939 | static WordType tcSubtractPart(WordType *, WordType, unsigned); | ||||||||
1940 | |||||||||
1941 | /// DST += SRC * MULTIPLIER + PART if add is true | ||||||||
1942 | /// DST = SRC * MULTIPLIER + PART if add is false | ||||||||
1943 | /// | ||||||||
1944 | /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must | ||||||||
1945 | /// start at the same point, i.e. DST == SRC. | ||||||||
1946 | /// | ||||||||
1947 | /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned. | ||||||||
1948 | /// Otherwise DST is filled with the least significant DSTPARTS parts of the | ||||||||
1949 | /// result, and if all of the omitted higher parts were zero return zero, | ||||||||
1950 | /// otherwise overflow occurred and return one. | ||||||||
1951 | static int tcMultiplyPart(WordType *dst, const WordType *src, | ||||||||
1952 | WordType multiplier, WordType carry, | ||||||||
1953 | unsigned srcParts, unsigned dstParts, | ||||||||
1954 | bool add); | ||||||||
1955 | |||||||||
1956 | /// DST = LHS * RHS, where DST has the same width as the operands and is | ||||||||
1957 | /// filled with the least significant parts of the result. Returns one if | ||||||||
1958 | /// overflow occurred, otherwise zero. DST must be disjoint from both | ||||||||
1959 | /// operands. | ||||||||
1960 | static int tcMultiply(WordType *, const WordType *, const WordType *, | ||||||||
1961 | unsigned); | ||||||||
1962 | |||||||||
1963 | /// DST = LHS * RHS, where DST has width the sum of the widths of the | ||||||||
1964 | /// operands. No overflow occurs. DST must be disjoint from both operands. | ||||||||
1965 | static void tcFullMultiply(WordType *, const WordType *, | ||||||||
1966 | const WordType *, unsigned, unsigned); | ||||||||
1967 | |||||||||
1968 | /// If RHS is zero LHS and REMAINDER are left unchanged, return one. | ||||||||
1969 | /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set | ||||||||
1970 | /// REMAINDER to the remainder, return zero. i.e. | ||||||||
1971 | /// | ||||||||
1972 | /// OLD_LHS = RHS * LHS + REMAINDER | ||||||||
1973 | /// | ||||||||
1974 | /// SCRATCH is a bignum of the same size as the operands and result for use by | ||||||||
1975 | /// the routine; its contents need not be initialized and are destroyed. LHS, | ||||||||
1976 | /// REMAINDER and SCRATCH must be distinct. | ||||||||
1977 | static int tcDivide(WordType *lhs, const WordType *rhs, | ||||||||
1978 | WordType *remainder, WordType *scratch, | ||||||||
1979 | unsigned parts); | ||||||||
1980 | |||||||||
1981 | /// Shift a bignum left Count bits. Shifted in bits are zero. There are no | ||||||||
1982 | /// restrictions on Count. | ||||||||
1983 | static void tcShiftLeft(WordType *, unsigned Words, unsigned Count); | ||||||||
1984 | |||||||||
1985 | /// Shift a bignum right Count bits. Shifted in bits are zero. There are no | ||||||||
1986 | /// restrictions on Count. | ||||||||
1987 | static void tcShiftRight(WordType *, unsigned Words, unsigned Count); | ||||||||
1988 | |||||||||
1989 | /// The obvious AND, OR and XOR and complement operations. | ||||||||
1990 | static void tcAnd(WordType *, const WordType *, unsigned); | ||||||||
1991 | static void tcOr(WordType *, const WordType *, unsigned); | ||||||||
1992 | static void tcXor(WordType *, const WordType *, unsigned); | ||||||||
1993 | static void tcComplement(WordType *, unsigned); | ||||||||
1994 | |||||||||
1995 | /// Comparison (unsigned) of two bignums. | ||||||||
1996 | static int tcCompare(const WordType *, const WordType *, unsigned); | ||||||||
1997 | |||||||||
1998 | /// Increment a bignum in-place. Return the carry flag. | ||||||||
1999 | static WordType tcIncrement(WordType *dst, unsigned parts) { | ||||||||
2000 | return tcAddPart(dst, 1, parts); | ||||||||
2001 | } | ||||||||
2002 | |||||||||
2003 | /// Decrement a bignum in-place. Return the borrow flag. | ||||||||
2004 | static WordType tcDecrement(WordType *dst, unsigned parts) { | ||||||||
2005 | return tcSubtractPart(dst, 1, parts); | ||||||||
2006 | } | ||||||||
2007 | |||||||||
2008 | /// Set the least significant BITS and clear the rest. | ||||||||
2009 | static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits); | ||||||||
2010 | |||||||||
2011 | /// debug method | ||||||||
2012 | void dump() const; | ||||||||
2013 | |||||||||
2014 | /// @} | ||||||||
2015 | }; | ||||||||
2016 | |||||||||
2017 | /// Magic data for optimising signed division by a constant. | ||||||||
2018 | struct APInt::ms { | ||||||||
2019 | APInt m; ///< magic number | ||||||||
2020 | unsigned s; ///< shift amount | ||||||||
2021 | }; | ||||||||
2022 | |||||||||
2023 | /// Magic data for optimising unsigned division by a constant. | ||||||||
2024 | struct APInt::mu { | ||||||||
2025 | APInt m; ///< magic number | ||||||||
2026 | bool a; ///< add indicator | ||||||||
2027 | unsigned s; ///< shift amount | ||||||||
2028 | }; | ||||||||
2029 | |||||||||
2030 | inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; } | ||||||||
2031 | |||||||||
2032 | inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; } | ||||||||
2033 | |||||||||
2034 | /// Unary bitwise complement operator. | ||||||||
2035 | /// | ||||||||
2036 | /// \returns an APInt that is the bitwise complement of \p v. | ||||||||
2037 | inline APInt operator~(APInt v) { | ||||||||
2038 | v.flipAllBits(); | ||||||||
2039 | return v; | ||||||||
2040 | } | ||||||||
2041 | |||||||||
2042 | inline APInt operator&(APInt a, const APInt &b) { | ||||||||
2043 | a &= b; | ||||||||
2044 | return a; | ||||||||
2045 | } | ||||||||
2046 | |||||||||
2047 | inline APInt operator&(const APInt &a, APInt &&b) { | ||||||||
2048 | b &= a; | ||||||||
2049 | return std::move(b); | ||||||||
2050 | } | ||||||||
2051 | |||||||||
2052 | inline APInt operator&(APInt a, uint64_t RHS) { | ||||||||
2053 | a &= RHS; | ||||||||
2054 | return a; | ||||||||
2055 | } | ||||||||
2056 | |||||||||
2057 | inline APInt operator&(uint64_t LHS, APInt b) { | ||||||||
2058 | b &= LHS; | ||||||||
2059 | return b; | ||||||||
2060 | } | ||||||||
2061 | |||||||||
2062 | inline APInt operator|(APInt a, const APInt &b) { | ||||||||
2063 | a |= b; | ||||||||
2064 | return a; | ||||||||
2065 | } | ||||||||
2066 | |||||||||
2067 | inline APInt operator|(const APInt &a, APInt &&b) { | ||||||||
2068 | b |= a; | ||||||||
2069 | return std::move(b); | ||||||||
2070 | } | ||||||||
2071 | |||||||||
2072 | inline APInt operator|(APInt a, uint64_t RHS) { | ||||||||
2073 | a |= RHS; | ||||||||
2074 | return a; | ||||||||
2075 | } | ||||||||
2076 | |||||||||
2077 | inline APInt operator|(uint64_t LHS, APInt b) { | ||||||||
2078 | b |= LHS; | ||||||||
2079 | return b; | ||||||||
2080 | } | ||||||||
2081 | |||||||||
2082 | inline APInt operator^(APInt a, const APInt &b) { | ||||||||
2083 | a ^= b; | ||||||||
2084 | return a; | ||||||||
2085 | } | ||||||||
2086 | |||||||||
2087 | inline APInt operator^(const APInt &a, APInt &&b) { | ||||||||
2088 | b ^= a; | ||||||||
2089 | return std::move(b); | ||||||||
2090 | } | ||||||||
2091 | |||||||||
2092 | inline APInt operator^(APInt a, uint64_t RHS) { | ||||||||
2093 | a ^= RHS; | ||||||||
2094 | return a; | ||||||||
2095 | } | ||||||||
2096 | |||||||||
2097 | inline APInt operator^(uint64_t LHS, APInt b) { | ||||||||
2098 | b ^= LHS; | ||||||||
2099 | return b; | ||||||||
2100 | } | ||||||||
2101 | |||||||||
2102 | inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) { | ||||||||
2103 | I.print(OS, true); | ||||||||
2104 | return OS; | ||||||||
2105 | } | ||||||||
2106 | |||||||||
2107 | inline APInt operator-(APInt v) { | ||||||||
2108 | v.negate(); | ||||||||
2109 | return v; | ||||||||
2110 | } | ||||||||
2111 | |||||||||
2112 | inline APInt operator+(APInt a, const APInt &b) { | ||||||||
2113 | a += b; | ||||||||
2114 | return a; | ||||||||
2115 | } | ||||||||
2116 | |||||||||
2117 | inline APInt operator+(const APInt &a, APInt &&b) { | ||||||||
2118 | b += a; | ||||||||
2119 | return std::move(b); | ||||||||
2120 | } | ||||||||
2121 | |||||||||
2122 | inline APInt operator+(APInt a, uint64_t RHS) { | ||||||||
2123 | a += RHS; | ||||||||
2124 | return a; | ||||||||
2125 | } | ||||||||
2126 | |||||||||
2127 | inline APInt operator+(uint64_t LHS, APInt b) { | ||||||||
2128 | b += LHS; | ||||||||
2129 | return b; | ||||||||
2130 | } | ||||||||
2131 | |||||||||
2132 | inline APInt operator-(APInt a, const APInt &b) { | ||||||||
2133 | a -= b; | ||||||||
2134 | return a; | ||||||||
2135 | } | ||||||||
2136 | |||||||||
2137 | inline APInt operator-(const APInt &a, APInt &&b) { | ||||||||
2138 | b.negate(); | ||||||||
2139 | b += a; | ||||||||
2140 | return std::move(b); | ||||||||
2141 | } | ||||||||
2142 | |||||||||
2143 | inline APInt operator-(APInt a, uint64_t RHS) { | ||||||||
2144 | a -= RHS; | ||||||||
2145 | return a; | ||||||||
2146 | } | ||||||||
2147 | |||||||||
2148 | inline APInt operator-(uint64_t LHS, APInt b) { | ||||||||
2149 | b.negate(); | ||||||||
2150 | b += LHS; | ||||||||
2151 | return b; | ||||||||
2152 | } | ||||||||
2153 | |||||||||
2154 | inline APInt operator*(APInt a, uint64_t RHS) { | ||||||||
2155 | a *= RHS; | ||||||||
2156 | return a; | ||||||||
2157 | } | ||||||||
2158 | |||||||||
2159 | inline APInt operator*(uint64_t LHS, APInt b) { | ||||||||
2160 | b *= LHS; | ||||||||
2161 | return b; | ||||||||
2162 | } | ||||||||
2163 | |||||||||
2164 | |||||||||
2165 | namespace APIntOps { | ||||||||
2166 | |||||||||
2167 | /// Determine the smaller of two APInts considered to be signed. | ||||||||
2168 | inline const APInt &smin(const APInt &A, const APInt &B) { | ||||||||
2169 | return A.slt(B) ? A : B; | ||||||||
2170 | } | ||||||||
2171 | |||||||||
2172 | /// Determine the larger of two APInts considered to be signed. | ||||||||
2173 | inline const APInt &smax(const APInt &A, const APInt &B) { | ||||||||
2174 | return A.sgt(B) ? A : B; | ||||||||
2175 | } | ||||||||
2176 | |||||||||
2177 | /// Determine the smaller of two APInts considered to be unsigned. | ||||||||
2178 | inline const APInt &umin(const APInt &A, const APInt &B) { | ||||||||
2179 | return A.ult(B) ? A : B; | ||||||||
2180 | } | ||||||||
2181 | |||||||||
2182 | /// Determine the larger of two APInts considered to be unsigned. | ||||||||
2183 | inline const APInt &umax(const APInt &A, const APInt &B) { | ||||||||
2184 | return A.ugt(B) ? A : B; | ||||||||
2185 | } | ||||||||
2186 | |||||||||
2187 | /// Compute GCD of two unsigned APInt values. | ||||||||
2188 | /// | ||||||||
2189 | /// This function returns the greatest common divisor of the two APInt values | ||||||||
2190 | /// using Stein's algorithm. | ||||||||
2191 | /// | ||||||||
2192 | /// \returns the greatest common divisor of A and B. | ||||||||
2193 | APInt GreatestCommonDivisor(APInt A, APInt B); | ||||||||
2194 | |||||||||
2195 | /// Converts the given APInt to a double value. | ||||||||
2196 | /// | ||||||||
2197 | /// Treats the APInt as an unsigned value for conversion purposes. | ||||||||
2198 | inline double RoundAPIntToDouble(const APInt &APIVal) { | ||||||||
2199 | return APIVal.roundToDouble(); | ||||||||
2200 | } | ||||||||
2201 | |||||||||
2202 | /// Converts the given APInt to a double value. | ||||||||
2203 | /// | ||||||||
2204 | /// Treats the APInt as a signed value for conversion purposes. | ||||||||
2205 | inline double RoundSignedAPIntToDouble(const APInt &APIVal) { | ||||||||
2206 | return APIVal.signedRoundToDouble(); | ||||||||
2207 | } | ||||||||
2208 | |||||||||
2209 | /// Converts the given APInt to a float value. | ||||||||
2210 | inline float RoundAPIntToFloat(const APInt &APIVal) { | ||||||||
2211 | return float(RoundAPIntToDouble(APIVal)); | ||||||||
2212 | } | ||||||||
2213 | |||||||||
2214 | /// Converts the given APInt to a float value. | ||||||||
2215 | /// | ||||||||
2216 | /// Treats the APInt as a signed value for conversion purposes. | ||||||||
2217 | inline float RoundSignedAPIntToFloat(const APInt &APIVal) { | ||||||||
2218 | return float(APIVal.signedRoundToDouble()); | ||||||||
2219 | } | ||||||||
2220 | |||||||||
2221 | /// Converts the given double value into a APInt. | ||||||||
2222 | /// | ||||||||
2223 | /// This function convert a double value to an APInt value. | ||||||||
2224 | APInt RoundDoubleToAPInt(double Double, unsigned width); | ||||||||
2225 | |||||||||
2226 | /// Converts a float value into a APInt. | ||||||||
2227 | /// | ||||||||
2228 | /// Converts a float value into an APInt value. | ||||||||
2229 | inline APInt RoundFloatToAPInt(float Float, unsigned width) { | ||||||||
2230 | return RoundDoubleToAPInt(double(Float), width); | ||||||||
2231 | } | ||||||||
2232 | |||||||||
2233 | /// Return A unsign-divided by B, rounded by the given rounding mode. | ||||||||
2234 | APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM); | ||||||||
2235 | |||||||||
2236 | /// Return A sign-divided by B, rounded by the given rounding mode. | ||||||||
2237 | APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM); | ||||||||
2238 | |||||||||
2239 | /// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range | ||||||||
2240 | /// (e.g. 32 for i32). | ||||||||
2241 | /// This function finds the smallest number n, such that | ||||||||
2242 | /// (a) n >= 0 and q(n) = 0, or | ||||||||
2243 | /// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all | ||||||||
2244 | /// integers, belong to two different intervals [Rk, Rk+R), | ||||||||
2245 | /// where R = 2^BW, and k is an integer. | ||||||||
2246 | /// The idea here is to find when q(n) "overflows" 2^BW, while at the | ||||||||
2247 | /// same time "allowing" subtraction. In unsigned modulo arithmetic a | ||||||||
2248 | /// subtraction (treated as addition of negated numbers) would always | ||||||||
2249 | /// count as an overflow, but here we want to allow values to decrease | ||||||||
2250 | /// and increase as long as they are within the same interval. | ||||||||
2251 | /// Specifically, adding of two negative numbers should not cause an | ||||||||
2252 | /// overflow (as long as the magnitude does not exceed the bit width). | ||||||||
2253 | /// On the other hand, given a positive number, adding a negative | ||||||||
2254 | /// number to it can give a negative result, which would cause the | ||||||||
2255 | /// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is | ||||||||
2256 | /// treated as a special case of an overflow. | ||||||||
2257 | /// | ||||||||
2258 | /// This function returns None if after finding k that minimizes the | ||||||||
2259 | /// positive solution to q(n) = kR, both solutions are contained between | ||||||||
2260 | /// two consecutive integers. | ||||||||
2261 | /// | ||||||||
2262 | /// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation | ||||||||
2263 | /// in arithmetic modulo 2^BW, and treating the values as signed) by the | ||||||||
2264 | /// virtue of *signed* overflow. This function will *not* find such an n, | ||||||||
2265 | /// however it may find a value of n satisfying the inequalities due to | ||||||||
2266 | /// an *unsigned* overflow (if the values are treated as unsigned). | ||||||||
2267 | /// To find a solution for a signed overflow, treat it as a problem of | ||||||||
2268 | /// finding an unsigned overflow with a range with of BW-1. | ||||||||
2269 | /// | ||||||||
2270 | /// The returned value may have a different bit width from the input | ||||||||
2271 | /// coefficients. | ||||||||
2272 | Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, | ||||||||
2273 | unsigned RangeWidth); | ||||||||
2274 | |||||||||
2275 | /// Compare two values, and if they are different, return the position of the | ||||||||
2276 | /// most significant bit that is different in the values. | ||||||||
2277 | Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A, | ||||||||
2278 | const APInt &B); | ||||||||
2279 | |||||||||
2280 | } // End of APIntOps namespace | ||||||||
2281 | |||||||||
2282 | // See friend declaration above. This additional declaration is required in | ||||||||
2283 | // order to compile LLVM with IBM xlC compiler. | ||||||||
2284 | hash_code hash_value(const APInt &Arg); | ||||||||
2285 | |||||||||
2286 | /// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst | ||||||||
2287 | /// with the integer held in IntVal. | ||||||||
2288 | void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes); | ||||||||
2289 | |||||||||
2290 | /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting | ||||||||
2291 | /// from Src into IntVal, which is assumed to be wide enough and to hold zero. | ||||||||
2292 | void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes); | ||||||||
2293 | |||||||||
2294 | /// Provide DenseMapInfo for APInt. | ||||||||
2295 | template <> struct DenseMapInfo<APInt> { | ||||||||
2296 | static inline APInt getEmptyKey() { | ||||||||
2297 | APInt V(nullptr, 0); | ||||||||
2298 | V.U.VAL = 0; | ||||||||
2299 | return V; | ||||||||
2300 | } | ||||||||
2301 | |||||||||
2302 | static inline APInt getTombstoneKey() { | ||||||||
2303 | APInt V(nullptr, 0); | ||||||||
2304 | V.U.VAL = 1; | ||||||||
2305 | return V; | ||||||||
2306 | } | ||||||||
2307 | |||||||||
2308 | static unsigned getHashValue(const APInt &Key); | ||||||||
2309 | |||||||||
2310 | static bool isEqual(const APInt &LHS, const APInt &RHS) { | ||||||||
2311 | return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS; | ||||||||
2312 | } | ||||||||
2313 | }; | ||||||||
2314 | |||||||||
2315 | } // namespace llvm | ||||||||
2316 | |||||||||
2317 | #endif |