Bug Summary

File:llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1547, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/Hexagon -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/ADT/APInt.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/SetVector.h"
12#include "llvm/ADT/SmallPtrSet.h"
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/ADT/SmallVector.h"
15#include "llvm/ADT/StringRef.h"
16#include "llvm/ADT/Triple.h"
17#include "llvm/Analysis/AliasAnalysis.h"
18#include "llvm/Analysis/InstructionSimplify.h"
19#include "llvm/Analysis/LoopInfo.h"
20#include "llvm/Analysis/LoopPass.h"
21#include "llvm/Analysis/MemoryLocation.h"
22#include "llvm/Analysis/ScalarEvolution.h"
23#include "llvm/Analysis/ScalarEvolutionExpander.h"
24#include "llvm/Analysis/ScalarEvolutionExpressions.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/Analysis/ValueTracking.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constant.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DataLayout.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/IntrinsicInst.h"
41#include "llvm/IR/Intrinsics.h"
42#include "llvm/IR/IntrinsicsHexagon.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/PatternMatch.h"
45#include "llvm/IR/Type.h"
46#include "llvm/IR/User.h"
47#include "llvm/IR/Value.h"
48#include "llvm/InitializePasses.h"
49#include "llvm/Pass.h"
50#include "llvm/Support/Casting.h"
51#include "llvm/Support/CommandLine.h"
52#include "llvm/Support/Compiler.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/KnownBits.h"
56#include "llvm/Support/raw_ostream.h"
57#include "llvm/Transforms/Scalar.h"
58#include "llvm/Transforms/Utils.h"
59#include "llvm/Transforms/Utils/Local.h"
60#include <algorithm>
61#include <array>
62#include <cassert>
63#include <cstdint>
64#include <cstdlib>
65#include <deque>
66#include <functional>
67#include <iterator>
68#include <map>
69#include <set>
70#include <utility>
71#include <vector>
72
73#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
74
75using namespace llvm;
76
77static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
78 cl::Hidden, cl::init(false),
79 cl::desc("Disable generation of memcpy in loop idiom recognition"));
80
81static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
82 cl::Hidden, cl::init(false),
83 cl::desc("Disable generation of memmove in loop idiom recognition"));
84
85static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
86 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
87 "check guarding the memmove."));
88
89static cl::opt<unsigned> CompileTimeMemSizeThreshold(
90 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
91 cl::desc("Threshold (in bytes) to perform the transformation, if the "
92 "runtime loop count (mem transfer size) is known at compile-time."));
93
94static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
95 cl::Hidden, cl::init(true),
96 cl::desc("Only enable generating memmove in non-nested loops"));
97
98static cl::opt<bool> HexagonVolatileMemcpy(
99 "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
100 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
101
102static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
103 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
104
105static const char *HexagonVolatileMemcpyName
106 = "hexagon_memcpy_forward_vp4cp4n2";
107
108
109namespace llvm {
110
111 void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
112 Pass *createHexagonLoopIdiomPass();
113
114} // end namespace llvm
115
116namespace {
117
118 class HexagonLoopIdiomRecognize : public LoopPass {
119 public:
120 static char ID;
121
122 explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
123 initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
124 }
125
126 StringRef getPassName() const override {
127 return "Recognize Hexagon-specific loop idioms";
128 }
129
130 void getAnalysisUsage(AnalysisUsage &AU) const override {
131 AU.addRequired<LoopInfoWrapperPass>();
132 AU.addRequiredID(LoopSimplifyID);
133 AU.addRequiredID(LCSSAID);
134 AU.addRequired<AAResultsWrapperPass>();
135 AU.addPreserved<AAResultsWrapperPass>();
136 AU.addRequired<ScalarEvolutionWrapperPass>();
137 AU.addRequired<DominatorTreeWrapperPass>();
138 AU.addRequired<TargetLibraryInfoWrapperPass>();
139 AU.addPreserved<TargetLibraryInfoWrapperPass>();
140 }
141
142 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
143
144 private:
145 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
146 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
147 void collectStores(Loop *CurLoop, BasicBlock *BB,
148 SmallVectorImpl<StoreInst*> &Stores);
149 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
150 bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
151 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
152 SmallVectorImpl<BasicBlock*> &ExitBlocks);
153 bool runOnCountableLoop(Loop *L);
154
155 AliasAnalysis *AA;
156 const DataLayout *DL;
157 DominatorTree *DT;
158 LoopInfo *LF;
159 const TargetLibraryInfo *TLI;
160 ScalarEvolution *SE;
161 bool HasMemcpy, HasMemmove;
162 };
163
164 struct Simplifier {
165 struct Rule {
166 using FuncType = std::function<Value* (Instruction*, LLVMContext&)>;
167 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
168 StringRef Name; // For debugging.
169 FuncType Fn;
170 };
171
172 void addRule(StringRef N, const Rule::FuncType &F) {
173 Rules.push_back(Rule(N, F));
174 }
175
176 private:
177 struct WorkListType {
178 WorkListType() = default;
179
180 void push_back(Value* V) {
181 // Do not push back duplicates.
182 if (!S.count(V)) { Q.push_back(V); S.insert(V); }
183 }
184
185 Value *pop_front_val() {
186 Value *V = Q.front(); Q.pop_front(); S.erase(V);
187 return V;
188 }
189
190 bool empty() const { return Q.empty(); }
191
192 private:
193 std::deque<Value*> Q;
194 std::set<Value*> S;
195 };
196
197 using ValueSetType = std::set<Value *>;
198
199 std::vector<Rule> Rules;
200
201 public:
202 struct Context {
203 using ValueMapType = DenseMap<Value *, Value *>;
204
205 Value *Root;
206 ValueSetType Used; // The set of all cloned values used by Root.
207 ValueSetType Clones; // The set of all cloned values.
208 LLVMContext &Ctx;
209
210 Context(Instruction *Exp)
211 : Ctx(Exp->getParent()->getParent()->getContext()) {
212 initialize(Exp);
213 }
214
215 ~Context() { cleanup(); }
216
217 void print(raw_ostream &OS, const Value *V) const;
218 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
219
220 private:
221 friend struct Simplifier;
222
223 void initialize(Instruction *Exp);
224 void cleanup();
225
226 template <typename FuncT> void traverse(Value *V, FuncT F);
227 void record(Value *V);
228 void use(Value *V);
229 void unuse(Value *V);
230
231 bool equal(const Instruction *I, const Instruction *J) const;
232 Value *find(Value *Tree, Value *Sub) const;
233 Value *subst(Value *Tree, Value *OldV, Value *NewV);
234 void replace(Value *OldV, Value *NewV);
235 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
236 };
237
238 Value *simplify(Context &C);
239 };
240
241 struct PE {
242 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
243
244 const Simplifier::Context &C;
245 const Value *V;
246 };
247
248 LLVM_ATTRIBUTE_USED__attribute__((__used__))
249 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
250 P.C.print(OS, P.V ? P.V : P.C.Root);
251 return OS;
252 }
253
254} // end anonymous namespace
255
256char HexagonLoopIdiomRecognize::ID = 0;
257
258INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
259 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
260INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
261INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
262INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
263INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
264INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
265INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
266INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
267INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
268 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
269
270template <typename FuncT>
271void Simplifier::Context::traverse(Value *V, FuncT F) {
272 WorkListType Q;
273 Q.push_back(V);
274
275 while (!Q.empty()) {
276 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
277 if (!U || U->getParent())
278 continue;
279 if (!F(U))
280 continue;
281 for (Value *Op : U->operands())
282 Q.push_back(Op);
283 }
284}
285
286void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
287 const auto *U = dyn_cast<const Instruction>(V);
288 if (!U) {
289 OS << V << '(' << *V << ')';
290 return;
291 }
292
293 if (U->getParent()) {
294 OS << U << '(';
295 U->printAsOperand(OS, true);
296 OS << ')';
297 return;
298 }
299
300 unsigned N = U->getNumOperands();
301 if (N != 0)
302 OS << U << '(';
303 OS << U->getOpcodeName();
304 for (const Value *Op : U->operands()) {
305 OS << ' ';
306 print(OS, Op);
307 }
308 if (N != 0)
309 OS << ')';
310}
311
312void Simplifier::Context::initialize(Instruction *Exp) {
313 // Perform a deep clone of the expression, set Root to the root
314 // of the clone, and build a map from the cloned values to the
315 // original ones.
316 ValueMapType M;
317 BasicBlock *Block = Exp->getParent();
318 WorkListType Q;
319 Q.push_back(Exp);
320
321 while (!Q.empty()) {
322 Value *V = Q.pop_front_val();
323 if (M.find(V) != M.end())
324 continue;
325 if (Instruction *U = dyn_cast<Instruction>(V)) {
326 if (isa<PHINode>(U) || U->getParent() != Block)
327 continue;
328 for (Value *Op : U->operands())
329 Q.push_back(Op);
330 M.insert({U, U->clone()});
331 }
332 }
333
334 for (std::pair<Value*,Value*> P : M) {
335 Instruction *U = cast<Instruction>(P.second);
336 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
337 auto F = M.find(U->getOperand(i));
338 if (F != M.end())
339 U->setOperand(i, F->second);
340 }
341 }
342
343 auto R = M.find(Exp);
344 assert(R != M.end())((R != M.end()) ? static_cast<void> (0) : __assert_fail
("R != M.end()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 344, __PRETTY_FUNCTION__))
;
345 Root = R->second;
346
347 record(Root);
348 use(Root);
349}
350
351void Simplifier::Context::record(Value *V) {
352 auto Record = [this](Instruction *U) -> bool {
353 Clones.insert(U);
354 return true;
355 };
356 traverse(V, Record);
357}
358
359void Simplifier::Context::use(Value *V) {
360 auto Use = [this](Instruction *U) -> bool {
361 Used.insert(U);
362 return true;
363 };
364 traverse(V, Use);
365}
366
367void Simplifier::Context::unuse(Value *V) {
368 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
369 return;
370
371 auto Unuse = [this](Instruction *U) -> bool {
372 if (!U->use_empty())
373 return false;
374 Used.erase(U);
375 return true;
376 };
377 traverse(V, Unuse);
378}
379
380Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
381 if (Tree == OldV)
382 return NewV;
383 if (OldV == NewV)
384 return Tree;
385
386 WorkListType Q;
387 Q.push_back(Tree);
388 while (!Q.empty()) {
389 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
390 // If U is not an instruction, or it's not a clone, skip it.
391 if (!U || U->getParent())
392 continue;
393 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
394 Value *Op = U->getOperand(i);
395 if (Op == OldV) {
396 U->setOperand(i, NewV);
397 unuse(OldV);
398 } else {
399 Q.push_back(Op);
400 }
401 }
402 }
403 return Tree;
404}
405
406void Simplifier::Context::replace(Value *OldV, Value *NewV) {
407 if (Root == OldV) {
408 Root = NewV;
409 use(Root);
410 return;
411 }
412
413 // NewV may be a complex tree that has just been created by one of the
414 // transformation rules. We need to make sure that it is commoned with
415 // the existing Root to the maximum extent possible.
416 // Identify all subtrees of NewV (including NewV itself) that have
417 // equivalent counterparts in Root, and replace those subtrees with
418 // these counterparts.
419 WorkListType Q;
420 Q.push_back(NewV);
421 while (!Q.empty()) {
422 Value *V = Q.pop_front_val();
423 Instruction *U = dyn_cast<Instruction>(V);
424 if (!U || U->getParent())
425 continue;
426 if (Value *DupV = find(Root, V)) {
427 if (DupV != V)
428 NewV = subst(NewV, V, DupV);
429 } else {
430 for (Value *Op : U->operands())
431 Q.push_back(Op);
432 }
433 }
434
435 // Now, simply replace OldV with NewV in Root.
436 Root = subst(Root, OldV, NewV);
437 use(Root);
438}
439
440void Simplifier::Context::cleanup() {
441 for (Value *V : Clones) {
442 Instruction *U = cast<Instruction>(V);
443 if (!U->getParent())
444 U->dropAllReferences();
445 }
446
447 for (Value *V : Clones) {
448 Instruction *U = cast<Instruction>(V);
449 if (!U->getParent())
450 U->deleteValue();
451 }
452}
453
454bool Simplifier::Context::equal(const Instruction *I,
455 const Instruction *J) const {
456 if (I == J)
457 return true;
458 if (!I->isSameOperationAs(J))
459 return false;
460 if (isa<PHINode>(I))
461 return I->isIdenticalTo(J);
462
463 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
464 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
465 if (OpI == OpJ)
466 continue;
467 auto *InI = dyn_cast<const Instruction>(OpI);
468 auto *InJ = dyn_cast<const Instruction>(OpJ);
469 if (InI && InJ) {
470 if (!equal(InI, InJ))
471 return false;
472 } else if (InI != InJ || !InI)
473 return false;
474 }
475 return true;
476}
477
478Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
479 Instruction *SubI = dyn_cast<Instruction>(Sub);
480 WorkListType Q;
481 Q.push_back(Tree);
482
483 while (!Q.empty()) {
484 Value *V = Q.pop_front_val();
485 if (V == Sub)
486 return V;
487 Instruction *U = dyn_cast<Instruction>(V);
488 if (!U || U->getParent())
489 continue;
490 if (SubI && equal(SubI, U))
491 return U;
492 assert(!isa<PHINode>(U))((!isa<PHINode>(U)) ? static_cast<void> (0) : __assert_fail
("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 492, __PRETTY_FUNCTION__))
;
493 for (Value *Op : U->operands())
494 Q.push_back(Op);
495 }
496 return nullptr;
497}
498
499void Simplifier::Context::link(Instruction *I, BasicBlock *B,
500 BasicBlock::iterator At) {
501 if (I->getParent())
502 return;
503
504 for (Value *Op : I->operands()) {
505 if (Instruction *OpI = dyn_cast<Instruction>(Op))
506 link(OpI, B, At);
507 }
508
509 B->getInstList().insert(At, I);
510}
511
512Value *Simplifier::Context::materialize(BasicBlock *B,
513 BasicBlock::iterator At) {
514 if (Instruction *RootI = dyn_cast<Instruction>(Root))
515 link(RootI, B, At);
516 return Root;
517}
518
519Value *Simplifier::simplify(Context &C) {
520 WorkListType Q;
521 Q.push_back(C.Root);
522 unsigned Count = 0;
523 const unsigned Limit = SimplifyLimit;
524
525 while (!Q.empty()) {
526 if (Count++ >= Limit)
527 break;
528 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
529 if (!U || U->getParent() || !C.Used.count(U))
530 continue;
531 bool Changed = false;
532 for (Rule &R : Rules) {
533 Value *W = R.Fn(U, C.Ctx);
534 if (!W)
535 continue;
536 Changed = true;
537 C.record(W);
538 C.replace(U, W);
539 Q.push_back(C.Root);
540 break;
541 }
542 if (!Changed) {
543 for (Value *Op : U->operands())
544 Q.push_back(Op);
545 }
546 }
547 return Count < Limit ? C.Root : nullptr;
548}
549
550//===----------------------------------------------------------------------===//
551//
552// Implementation of PolynomialMultiplyRecognize
553//
554//===----------------------------------------------------------------------===//
555
556namespace {
557
558 class PolynomialMultiplyRecognize {
559 public:
560 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
561 const DominatorTree &dt, const TargetLibraryInfo &tli,
562 ScalarEvolution &se)
563 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
564
565 bool recognize();
566
567 private:
568 using ValueSeq = SetVector<Value *>;
569
570 IntegerType *getPmpyType() const {
571 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
572 return IntegerType::get(Ctx, 32);
573 }
574
575 bool isPromotableTo(Value *V, IntegerType *Ty);
576 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
577 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
578
579 Value *getCountIV(BasicBlock *BB);
580 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
581 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
582 ValueSeq &Late);
583 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
584 bool commutesWithShift(Instruction *I);
585 bool highBitsAreZero(Value *V, unsigned IterCount);
586 bool keepsHighBitsZero(Value *V, unsigned IterCount);
587 bool isOperandShifted(Instruction *I, Value *Op);
588 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
589 unsigned IterCount);
590 void cleanupLoopBody(BasicBlock *LoopB);
591
592 struct ParsedValues {
593 ParsedValues() = default;
594
595 Value *M = nullptr;
596 Value *P = nullptr;
597 Value *Q = nullptr;
598 Value *R = nullptr;
599 Value *X = nullptr;
600 Instruction *Res = nullptr;
601 unsigned IterCount = 0;
602 bool Left = false;
603 bool Inv = false;
604 };
605
606 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
607 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
608 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
609 Value *CIV, ParsedValues &PV, bool PreScan);
610 unsigned getInverseMxN(unsigned QP);
611 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
612
613 void setupPreSimplifier(Simplifier &S);
614 void setupPostSimplifier(Simplifier &S);
615
616 Loop *CurLoop;
617 const DataLayout &DL;
618 const DominatorTree &DT;
619 const TargetLibraryInfo &TLI;
620 ScalarEvolution &SE;
621 };
622
623} // end anonymous namespace
624
625Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
626 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
627 if (std::distance(PI, PE) != 2)
628 return nullptr;
629 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
630
631 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
632 auto *PN = cast<PHINode>(I);
633 Value *InitV = PN->getIncomingValueForBlock(PB);
634 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
635 continue;
636 Value *IterV = PN->getIncomingValueForBlock(BB);
637 auto *BO = dyn_cast<BinaryOperator>(IterV);
638 if (!BO)
639 continue;
640 if (BO->getOpcode() != Instruction::Add)
641 continue;
642 Value *IncV = nullptr;
643 if (BO->getOperand(0) == PN)
644 IncV = BO->getOperand(1);
645 else if (BO->getOperand(1) == PN)
646 IncV = BO->getOperand(0);
647 if (IncV == nullptr)
648 continue;
649
650 if (auto *T = dyn_cast<ConstantInt>(IncV))
651 if (T->getZExtValue() == 1)
652 return PN;
653 }
654 return nullptr;
655}
656
657static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
658 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
659 Use &TheUse = UI.getUse();
660 ++UI;
661 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
662 if (BB == II->getParent())
663 II->replaceUsesOfWith(I, J);
664 }
665}
666
667bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
668 Value *CIV, ParsedValues &PV) {
669 // Match the following:
670 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
671 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
672 // The condition may also check for equality with the masked value, i.e
673 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
674 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
675
676 Value *CondV = SelI->getCondition();
677 Value *TrueV = SelI->getTrueValue();
678 Value *FalseV = SelI->getFalseValue();
679
680 using namespace PatternMatch;
681
682 CmpInst::Predicate P;
683 Value *A = nullptr, *B = nullptr, *C = nullptr;
684
685 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
686 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
687 return false;
688 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
689 return false;
690 // Matched: select (A & B) == C ? ... : ...
691 // select (A & B) != C ? ... : ...
692
693 Value *X = nullptr, *Sh1 = nullptr;
694 // Check (A & B) for (X & (1 << i)):
695 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
696 Sh1 = A;
697 X = B;
698 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
699 Sh1 = B;
700 X = A;
701 } else {
702 // TODO: Could also check for an induction variable containing single
703 // bit shifted left by 1 in each iteration.
704 return false;
705 }
706
707 bool TrueIfZero;
708
709 // Check C against the possible values for comparison: 0 and (1 << i):
710 if (match(C, m_Zero()))
711 TrueIfZero = (P == CmpInst::ICMP_EQ);
712 else if (C == Sh1)
713 TrueIfZero = (P == CmpInst::ICMP_NE);
714 else
715 return false;
716
717 // So far, matched:
718 // select (X & (1 << i)) ? ... : ...
719 // including variations of the check against zero/non-zero value.
720
721 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
722 if (TrueIfZero) {
723 ShouldSameV = TrueV;
724 ShouldXoredV = FalseV;
725 } else {
726 ShouldSameV = FalseV;
727 ShouldXoredV = TrueV;
728 }
729
730 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
731 Value *T = nullptr;
732 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
733 // Matched: select +++ ? ... : Y ^ Z
734 // select +++ ? Y ^ Z : ...
735 // where +++ denotes previously checked matches.
736 if (ShouldSameV == Y)
737 T = Z;
738 else if (ShouldSameV == Z)
739 T = Y;
740 else
741 return false;
742 R = ShouldSameV;
743 // Matched: select +++ ? R : R ^ T
744 // select +++ ? R ^ T : R
745 // depending on TrueIfZero.
746
747 } else if (match(ShouldSameV, m_Zero())) {
748 // Matched: select +++ ? 0 : ...
749 // select +++ ? ... : 0
750 if (!SelI->hasOneUse())
751 return false;
752 T = ShouldXoredV;
753 // Matched: select +++ ? 0 : T
754 // select +++ ? T : 0
755
756 Value *U = *SelI->user_begin();
757 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
758 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
759 return false;
760 // Matched: xor (select +++ ? 0 : T), R
761 // xor (select +++ ? T : 0), R
762 } else
763 return false;
764
765 // The xor input value T is isolated into its own match so that it could
766 // be checked against an induction variable containing a shifted bit
767 // (todo).
768 // For now, check against (Q << i).
769 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
770 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
771 return false;
772 // Matched: select +++ ? R : R ^ (Q << i)
773 // select +++ ? R ^ (Q << i) : R
774
775 PV.X = X;
776 PV.Q = Q;
777 PV.R = R;
778 PV.Left = true;
779 return true;
780}
781
782bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
783 ParsedValues &PV) {
784 // Match the following:
785 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
786 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
787 // The condition may also check for equality with the masked value, i.e
788 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
789 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
790
791 Value *CondV = SelI->getCondition();
792 Value *TrueV = SelI->getTrueValue();
793 Value *FalseV = SelI->getFalseValue();
794
795 using namespace PatternMatch;
796
797 Value *C = nullptr;
798 CmpInst::Predicate P;
799 bool TrueIfZero;
800
801 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
802 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
803 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
804 return false;
805 // Matched: select C == 0 ? ... : ...
806 // select C != 0 ? ... : ...
807 TrueIfZero = (P == CmpInst::ICMP_EQ);
808 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
809 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
810 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
811 return false;
812 // Matched: select C == 1 ? ... : ...
813 // select C != 1 ? ... : ...
814 TrueIfZero = (P == CmpInst::ICMP_NE);
815 } else
816 return false;
817
818 Value *X = nullptr;
819 if (!match(C, m_And(m_Value(X), m_One())) &&
820 !match(C, m_And(m_One(), m_Value(X))))
821 return false;
822 // Matched: select (X & 1) == +++ ? ... : ...
823 // select (X & 1) != +++ ? ... : ...
824
825 Value *R = nullptr, *Q = nullptr;
826 if (TrueIfZero) {
827 // The select's condition is true if the tested bit is 0.
828 // TrueV must be the shift, FalseV must be the xor.
829 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
830 return false;
831 // Matched: select +++ ? (R >> 1) : ...
832 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
833 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
834 return false;
835 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
836 // with commuting ^.
837 } else {
838 // The select's condition is true if the tested bit is 1.
839 // TrueV must be the xor, FalseV must be the shift.
840 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
841 return false;
842 // Matched: select +++ ? ... : (R >> 1)
843 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
844 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
845 return false;
846 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
847 // with commuting ^.
848 }
849
850 PV.X = X;
851 PV.Q = Q;
852 PV.R = R;
853 PV.Left = false;
854 return true;
855}
856
857bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
858 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
859 bool PreScan) {
860 using namespace PatternMatch;
861
862 // The basic pattern for R = P.Q is:
863 // for i = 0..31
864 // R = phi (0, R')
865 // if (P & (1 << i)) ; test-bit(P, i)
866 // R' = R ^ (Q << i)
867 //
868 // Similarly, the basic pattern for R = (P/Q).Q - P
869 // for i = 0..31
870 // R = phi(P, R')
871 // if (R & (1 << i))
872 // R' = R ^ (Q << i)
873
874 // There exist idioms, where instead of Q being shifted left, P is shifted
875 // right. This produces a result that is shifted right by 32 bits (the
876 // non-shifted result is 64-bit).
877 //
878 // For R = P.Q, this would be:
879 // for i = 0..31
880 // R = phi (0, R')
881 // if ((P >> i) & 1)
882 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
883 // else ; be shifted by 1, not i.
884 // R' = R >> 1
885 //
886 // And for the inverse:
887 // for i = 0..31
888 // R = phi (P, R')
889 // if (R & 1)
890 // R' = (R >> 1) ^ Q
891 // else
892 // R' = R >> 1
893
894 // The left-shifting idioms share the same pattern:
895 // select (X & (1 << i)) ? R ^ (Q << i) : R
896 // Similarly for right-shifting idioms:
897 // select (X & 1) ? (R >> 1) ^ Q
898
899 if (matchLeftShift(SelI, CIV, PV)) {
900 // If this is a pre-scan, getting this far is sufficient.
901 if (PreScan)
902 return true;
903
904 // Need to make sure that the SelI goes back into R.
905 auto *RPhi = dyn_cast<PHINode>(PV.R);
906 if (!RPhi)
907 return false;
908 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
909 return false;
910 PV.Res = SelI;
911
912 // If X is loop invariant, it must be the input polynomial, and the
913 // idiom is the basic polynomial multiply.
914 if (CurLoop->isLoopInvariant(PV.X)) {
915 PV.P = PV.X;
916 PV.Inv = false;
917 } else {
918 // X is not loop invariant. If X == R, this is the inverse pmpy.
919 // Otherwise, check for an xor with an invariant value. If the
920 // variable argument to the xor is R, then this is still a valid
921 // inverse pmpy.
922 PV.Inv = true;
923 if (PV.X != PV.R) {
924 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
925 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
926 return false;
927 auto *I1 = dyn_cast<Instruction>(X1);
928 auto *I2 = dyn_cast<Instruction>(X2);
929 if (!I1 || I1->getParent() != LoopB) {
930 Var = X2;
931 Inv = X1;
932 } else if (!I2 || I2->getParent() != LoopB) {
933 Var = X1;
934 Inv = X2;
935 } else
936 return false;
937 if (Var != PV.R)
938 return false;
939 PV.M = Inv;
940 }
941 // The input polynomial P still needs to be determined. It will be
942 // the entry value of R.
943 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
944 PV.P = EntryP;
945 }
946
947 return true;
948 }
949
950 if (matchRightShift(SelI, PV)) {
951 // If this is an inverse pattern, the Q polynomial must be known at
952 // compile time.
953 if (PV.Inv && !isa<ConstantInt>(PV.Q))
954 return false;
955 if (PreScan)
956 return true;
957 // There is no exact matching of right-shift pmpy.
958 return false;
959 }
960
961 return false;
962}
963
964bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
965 IntegerType *DestTy) {
966 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
967 if (!T || T->getBitWidth() > DestTy->getBitWidth())
968 return false;
969 if (T->getBitWidth() == DestTy->getBitWidth())
970 return true;
971 // Non-instructions are promotable. The reason why an instruction may not
972 // be promotable is that it may produce a different result if its operands
973 // and the result are promoted, for example, it may produce more non-zero
974 // bits. While it would still be possible to represent the proper result
975 // in a wider type, it may require adding additional instructions (which
976 // we don't want to do).
977 Instruction *In = dyn_cast<Instruction>(Val);
978 if (!In)
979 return true;
980 // The bitwidth of the source type is smaller than the destination.
981 // Check if the individual operation can be promoted.
982 switch (In->getOpcode()) {
983 case Instruction::PHI:
984 case Instruction::ZExt:
985 case Instruction::And:
986 case Instruction::Or:
987 case Instruction::Xor:
988 case Instruction::LShr: // Shift right is ok.
989 case Instruction::Select:
990 case Instruction::Trunc:
991 return true;
992 case Instruction::ICmp:
993 if (CmpInst *CI = cast<CmpInst>(In))
994 return CI->isEquality() || CI->isUnsigned();
995 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 995)
;
996 case Instruction::Add:
997 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
998 }
999 return false;
1000}
1001
1002void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1003 IntegerType *DestTy, BasicBlock *LoopB) {
1004 Type *OrigTy = In->getType();
1005 assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")((!OrigTy->isVoidTy() && "Invalid instruction to promote"
) ? static_cast<void> (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1005, __PRETTY_FUNCTION__))
;
1006
1007 // Leave boolean values alone.
1008 if (!In->getType()->isIntegerTy(1))
1009 In->mutateType(DestTy);
1010 unsigned DestBW = DestTy->getBitWidth();
1011
1012 // Handle PHIs.
1013 if (PHINode *P = dyn_cast<PHINode>(In)) {
1014 unsigned N = P->getNumIncomingValues();
1015 for (unsigned i = 0; i != N; ++i) {
1016 BasicBlock *InB = P->getIncomingBlock(i);
1017 if (InB == LoopB)
1018 continue;
1019 Value *InV = P->getIncomingValue(i);
1020 IntegerType *Ty = cast<IntegerType>(InV->getType());
1021 // Do not promote values in PHI nodes of type i1.
1022 if (Ty != P->getType()) {
1023 // If the value type does not match the PHI type, the PHI type
1024 // must have been promoted.
1025 assert(Ty->getBitWidth() < DestBW)((Ty->getBitWidth() < DestBW) ? static_cast<void>
(0) : __assert_fail ("Ty->getBitWidth() < DestBW", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1025, __PRETTY_FUNCTION__))
;
1026 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1027 P->setIncomingValue(i, InV);
1028 }
1029 }
1030 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1031 Value *Op = Z->getOperand(0);
1032 if (Op->getType() == Z->getType())
1033 Z->replaceAllUsesWith(Op);
1034 Z->eraseFromParent();
1035 return;
1036 }
1037 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1038 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1039 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1040 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1041 T->replaceAllUsesWith(And);
1042 T->eraseFromParent();
1043 return;
1044 }
1045
1046 // Promote immediates.
1047 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1048 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1049 if (CI->getType()->getBitWidth() < DestBW)
1050 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1051 }
1052}
1053
1054bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1055 BasicBlock *ExitB) {
1056 assert(LoopB)((LoopB) ? static_cast<void> (0) : __assert_fail ("LoopB"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1056, __PRETTY_FUNCTION__))
;
1057 // Skip loops where the exit block has more than one predecessor. The values
1058 // coming from the loop block will be promoted to another type, and so the
1059 // values coming into the exit block from other predecessors would also have
1060 // to be promoted.
1061 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1062 return false;
1063 IntegerType *DestTy = getPmpyType();
1064 // Check if the exit values have types that are no wider than the type
1065 // that we want to promote to.
1066 unsigned DestBW = DestTy->getBitWidth();
1067 for (PHINode &P : ExitB->phis()) {
1068 if (P.getNumIncomingValues() != 1)
1069 return false;
1070 assert(P.getIncomingBlock(0) == LoopB)((P.getIncomingBlock(0) == LoopB) ? static_cast<void> (
0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1070, __PRETTY_FUNCTION__))
;
1071 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1072 if (!T || T->getBitWidth() > DestBW)
1073 return false;
1074 }
1075
1076 // Check all instructions in the loop.
1077 for (Instruction &In : *LoopB)
1078 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1079 return false;
1080
1081 // Perform the promotion.
1082 std::vector<Instruction*> LoopIns;
1083 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1084 [](Instruction &In) { return &In; });
1085 for (Instruction *In : LoopIns)
1086 if (!In->isTerminator())
1087 promoteTo(In, DestTy, LoopB);
1088
1089 // Fix up the PHI nodes in the exit block.
1090 Instruction *EndI = ExitB->getFirstNonPHI();
1091 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1092 for (auto I = ExitB->begin(); I != End; ++I) {
1093 PHINode *P = dyn_cast<PHINode>(I);
1094 if (!P)
1095 break;
1096 Type *Ty0 = P->getIncomingValue(0)->getType();
1097 Type *PTy = P->getType();
1098 if (PTy != Ty0) {
1099 assert(Ty0 == DestTy)((Ty0 == DestTy) ? static_cast<void> (0) : __assert_fail
("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1099, __PRETTY_FUNCTION__))
;
1100 // In order to create the trunc, P must have the promoted type.
1101 P->mutateType(Ty0);
1102 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1103 // In order for the RAUW to work, the types of P and T must match.
1104 P->mutateType(PTy);
1105 P->replaceAllUsesWith(T);
1106 // Final update of the P's type.
1107 P->mutateType(Ty0);
1108 cast<Instruction>(T)->setOperand(0, P);
1109 }
1110 }
1111
1112 return true;
1113}
1114
1115bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1116 ValueSeq &Cycle) {
1117 // Out = ..., In, ...
1118 if (Out == In)
1119 return true;
1120
1121 auto *BB = cast<Instruction>(Out)->getParent();
1122 bool HadPhi = false;
1123
1124 for (auto U : Out->users()) {
1125 auto *I = dyn_cast<Instruction>(&*U);
1126 if (I == nullptr || I->getParent() != BB)
1127 continue;
1128 // Make sure that there are no multi-iteration cycles, e.g.
1129 // p1 = phi(p2)
1130 // p2 = phi(p1)
1131 // The cycle p1->p2->p1 would span two loop iterations.
1132 // Check that there is only one phi in the cycle.
1133 bool IsPhi = isa<PHINode>(I);
1134 if (IsPhi && HadPhi)
1135 return false;
1136 HadPhi |= IsPhi;
1137 if (Cycle.count(I))
1138 return false;
1139 Cycle.insert(I);
1140 if (findCycle(I, In, Cycle))
1141 break;
1142 Cycle.remove(I);
1143 }
1144 return !Cycle.empty();
1145}
1146
1147void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1148 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1149 // All the values in the cycle that are between the phi node and the
1150 // divider instruction will be classified as "early", all other values
1151 // will be "late".
1152
1153 bool IsE = true;
1154 unsigned I, N = Cycle.size();
1155 for (I = 0; I < N; ++I) {
1156 Value *V = Cycle[I];
1157 if (DivI == V)
1158 IsE = false;
1159 else if (!isa<PHINode>(V))
1160 continue;
1161 // Stop if found either.
1162 break;
1163 }
1164 // "I" is the index of either DivI or the phi node, whichever was first.
1165 // "E" is "false" or "true" respectively.
1166 ValueSeq &First = !IsE ? Early : Late;
1167 for (unsigned J = 0; J < I; ++J)
1168 First.insert(Cycle[J]);
1169
1170 ValueSeq &Second = IsE ? Early : Late;
1171 Second.insert(Cycle[I]);
1172 for (++I; I < N; ++I) {
1173 Value *V = Cycle[I];
1174 if (DivI == V || isa<PHINode>(V))
1175 break;
1176 Second.insert(V);
1177 }
1178
1179 for (; I < N; ++I)
1180 First.insert(Cycle[I]);
1181}
1182
1183bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1184 ValueSeq &Early, ValueSeq &Late) {
1185 // Select is an exception, since the condition value does not have to be
1186 // classified in the same way as the true/false values. The true/false
1187 // values do have to be both early or both late.
1188 if (UseI->getOpcode() == Instruction::Select) {
1189 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1190 if (Early.count(TV) || Early.count(FV)) {
1191 if (Late.count(TV) || Late.count(FV))
1192 return false;
1193 Early.insert(UseI);
1194 } else if (Late.count(TV) || Late.count(FV)) {
1195 if (Early.count(TV) || Early.count(FV))
1196 return false;
1197 Late.insert(UseI);
1198 }
1199 return true;
1200 }
1201
1202 // Not sure what would be the example of this, but the code below relies
1203 // on having at least one operand.
1204 if (UseI->getNumOperands() == 0)
1205 return true;
1206
1207 bool AE = true, AL = true;
1208 for (auto &I : UseI->operands()) {
1209 if (Early.count(&*I))
1210 AL = false;
1211 else if (Late.count(&*I))
1212 AE = false;
1213 }
1214 // If the operands appear "all early" and "all late" at the same time,
1215 // then it means that none of them are actually classified as either.
1216 // This is harmless.
1217 if (AE && AL)
1218 return true;
1219 // Conversely, if they are neither "all early" nor "all late", then
1220 // we have a mixture of early and late operands that is not a known
1221 // exception.
1222 if (!AE && !AL)
1223 return false;
1224
1225 // Check that we have covered the two special cases.
1226 assert(AE != AL)((AE != AL) ? static_cast<void> (0) : __assert_fail ("AE != AL"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1226, __PRETTY_FUNCTION__))
;
1227
1228 if (AE)
1229 Early.insert(UseI);
1230 else
1231 Late.insert(UseI);
1232 return true;
1233}
1234
1235bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1236 switch (I->getOpcode()) {
1237 case Instruction::And:
1238 case Instruction::Or:
1239 case Instruction::Xor:
1240 case Instruction::LShr:
1241 case Instruction::Shl:
1242 case Instruction::Select:
1243 case Instruction::ICmp:
1244 case Instruction::PHI:
1245 break;
1246 default:
1247 return false;
1248 }
1249 return true;
1250}
1251
1252bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1253 unsigned IterCount) {
1254 auto *T = dyn_cast<IntegerType>(V->getType());
1255 if (!T)
1256 return false;
1257
1258 KnownBits Known(T->getBitWidth());
1259 computeKnownBits(V, Known, DL);
1260 return Known.countMinLeadingZeros() >= IterCount;
1261}
1262
1263bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1264 unsigned IterCount) {
1265 // Assume that all inputs to the value have the high bits zero.
1266 // Check if the value itself preserves the zeros in the high bits.
1267 if (auto *C = dyn_cast<ConstantInt>(V))
1268 return C->getValue().countLeadingZeros() >= IterCount;
1269
1270 if (auto *I = dyn_cast<Instruction>(V)) {
1271 switch (I->getOpcode()) {
1272 case Instruction::And:
1273 case Instruction::Or:
1274 case Instruction::Xor:
1275 case Instruction::LShr:
1276 case Instruction::Select:
1277 case Instruction::ICmp:
1278 case Instruction::PHI:
1279 case Instruction::ZExt:
1280 return true;
1281 }
1282 }
1283
1284 return false;
1285}
1286
1287bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1288 unsigned Opc = I->getOpcode();
1289 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1290 return Op != I->getOperand(1);
1291 return true;
1292}
1293
1294bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1295 BasicBlock *ExitB, unsigned IterCount) {
1296 Value *CIV = getCountIV(LoopB);
1297 if (CIV == nullptr)
1298 return false;
1299 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1300 if (CIVTy == nullptr)
1301 return false;
1302
1303 ValueSeq RShifts;
1304 ValueSeq Early, Late, Cycled;
1305
1306 // Find all value cycles that contain logical right shifts by 1.
1307 for (Instruction &I : *LoopB) {
1308 using namespace PatternMatch;
1309
1310 Value *V = nullptr;
1311 if (!match(&I, m_LShr(m_Value(V), m_One())))
1312 continue;
1313 ValueSeq C;
1314 if (!findCycle(&I, V, C))
1315 continue;
1316
1317 // Found a cycle.
1318 C.insert(&I);
1319 classifyCycle(&I, C, Early, Late);
1320 Cycled.insert(C.begin(), C.end());
1321 RShifts.insert(&I);
1322 }
1323
1324 // Find the set of all values affected by the shift cycles, i.e. all
1325 // cycled values, and (recursively) all their users.
1326 ValueSeq Users(Cycled.begin(), Cycled.end());
1327 for (unsigned i = 0; i < Users.size(); ++i) {
1328 Value *V = Users[i];
1329 if (!isa<IntegerType>(V->getType()))
1330 return false;
1331 auto *R = cast<Instruction>(V);
1332 // If the instruction does not commute with shifts, the loop cannot
1333 // be unshifted.
1334 if (!commutesWithShift(R))
1335 return false;
1336 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1337 auto *T = cast<Instruction>(*I);
1338 // Skip users from outside of the loop. They will be handled later.
1339 // Also, skip the right-shifts and phi nodes, since they mix early
1340 // and late values.
1341 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1342 continue;
1343
1344 Users.insert(T);
1345 if (!classifyInst(T, Early, Late))
1346 return false;
1347 }
1348 }
1349
1350 if (Users.empty())
1351 return false;
1352
1353 // Verify that high bits remain zero.
1354 ValueSeq Internal(Users.begin(), Users.end());
1355 ValueSeq Inputs;
1356 for (unsigned i = 0; i < Internal.size(); ++i) {
1357 auto *R = dyn_cast<Instruction>(Internal[i]);
1358 if (!R)
1359 continue;
1360 for (Value *Op : R->operands()) {
1361 auto *T = dyn_cast<Instruction>(Op);
1362 if (T && T->getParent() != LoopB)
1363 Inputs.insert(Op);
1364 else
1365 Internal.insert(Op);
1366 }
1367 }
1368 for (Value *V : Inputs)
1369 if (!highBitsAreZero(V, IterCount))
1370 return false;
1371 for (Value *V : Internal)
1372 if (!keepsHighBitsZero(V, IterCount))
1373 return false;
1374
1375 // Finally, the work can be done. Unshift each user.
1376 IRBuilder<> IRB(LoopB);
1377 std::map<Value*,Value*> ShiftMap;
1378
1379 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1380
1381 CastMapType CastMap;
1382
1383 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1384 IntegerType *Ty) -> Value* {
1385 auto H = CM.find(std::make_pair(V, Ty));
1386 if (H != CM.end())
1387 return H->second;
1388 Value *CV = IRB.CreateIntCast(V, Ty, false);
1389 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1390 return CV;
1391 };
1392
1393 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1394 using namespace PatternMatch;
1395
1396 if (isa<PHINode>(I) || !Users.count(&*I))
1397 continue;
1398
1399 // Match lshr x, 1.
1400 Value *V = nullptr;
1401 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1402 replaceAllUsesOfWithIn(&*I, V, LoopB);
1403 continue;
1404 }
1405 // For each non-cycled operand, replace it with the corresponding
1406 // value shifted left.
1407 for (auto &J : I->operands()) {
1408 Value *Op = J.get();
1409 if (!isOperandShifted(&*I, Op))
1410 continue;
1411 if (Users.count(Op))
1412 continue;
1413 // Skip shifting zeros.
1414 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1415 continue;
1416 // Check if we have already generated a shift for this value.
1417 auto F = ShiftMap.find(Op);
1418 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1419 if (W == nullptr) {
1420 IRB.SetInsertPoint(&*I);
1421 // First, the shift amount will be CIV or CIV+1, depending on
1422 // whether the value is early or late. Instead of creating CIV+1,
1423 // do a single shift of the value.
1424 Value *ShAmt = CIV, *ShVal = Op;
1425 auto *VTy = cast<IntegerType>(ShVal->getType());
1426 auto *ATy = cast<IntegerType>(ShAmt->getType());
1427 if (Late.count(&*I))
1428 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1429 // Second, the types of the shifted value and the shift amount
1430 // must match.
1431 if (VTy != ATy) {
1432 if (VTy->getBitWidth() < ATy->getBitWidth())
1433 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1434 else
1435 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1436 }
1437 // Ready to generate the shift and memoize it.
1438 W = IRB.CreateShl(ShVal, ShAmt);
1439 ShiftMap.insert(std::make_pair(Op, W));
1440 }
1441 I->replaceUsesOfWith(Op, W);
1442 }
1443 }
1444
1445 // Update the users outside of the loop to account for having left
1446 // shifts. They would normally be shifted right in the loop, so shift
1447 // them right after the loop exit.
1448 // Take advantage of the loop-closed SSA form, which has all the post-
1449 // loop values in phi nodes.
1450 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1451 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1452 if (!isa<PHINode>(P))
1453 break;
1454 auto *PN = cast<PHINode>(P);
1455 Value *U = PN->getIncomingValueForBlock(LoopB);
1456 if (!Users.count(U))
1457 continue;
1458 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1459 PN->replaceAllUsesWith(S);
1460 // The above RAUW will create
1461 // S = lshr S, IterCount
1462 // so we need to fix it back into
1463 // S = lshr PN, IterCount
1464 cast<User>(S)->replaceUsesOfWith(S, PN);
1465 }
1466
1467 return true;
1468}
1469
1470void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1471 for (auto &I : *LoopB)
1472 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1473 I.replaceAllUsesWith(SV);
1474
1475 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1476 N = std::next(I);
1477 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1478 }
1479}
1480
1481unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1482 // Arrays of coefficients of Q and the inverse, C.
1483 // Q[i] = coefficient at x^i.
1484 std::array<char,32> Q, C;
1485
1486 for (unsigned i = 0; i < 32; ++i) {
1487 Q[i] = QP & 1;
1488 QP >>= 1;
1489 }
1490 assert(Q[0] == 1)((Q[0] == 1) ? static_cast<void> (0) : __assert_fail ("Q[0] == 1"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1490, __PRETTY_FUNCTION__))
;
1491
1492 // Find C, such that
1493 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1494 //
1495 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1496 // operations * and + are & and ^ respectively.
1497 //
1498 // Find C[i] recursively, by comparing i-th coefficient in the product
1499 // with 0 (or 1 for i=0).
1500 //
1501 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1502 C[0] = 1;
1503 for (unsigned i = 1; i < 32; ++i) {
1504 // Solve for C[i] in:
1505 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1506 // This is equivalent to
1507 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1508 // which is
1509 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1510 unsigned T = 0;
1511 for (unsigned j = 0; j < i; ++j)
1512 T = T ^ (C[j] & Q[i-j]);
1513 C[i] = T;
1514 }
1515
1516 unsigned QV = 0;
1517 for (unsigned i = 0; i < 32; ++i)
1518 if (C[i])
1519 QV |= (1 << i);
1520
1521 return QV;
1522}
1523
1524Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1525 ParsedValues &PV) {
1526 IRBuilder<> B(&*At);
1527 Module *M = At->getParent()->getParent()->getParent();
1528 Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1529
1530 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1531 unsigned IC = PV.IterCount;
1532
1533 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1534 P0 = P = B.CreateXor(P, PV.M);
1535
1536 // Create a bit mask to clear the high bits beyond IterCount.
1537 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
16
Returning from 'APInt::getLowBitsSet'
1538
1539 if (PV.IterCount != 32)
17
Assuming field 'IterCount' is equal to 32
18
Taking false branch
1540 P = B.CreateAnd(P, BMI);
1541
1542 if (PV.Inv) {
19
Assuming field 'Inv' is true
20
Taking true branch
1543 auto *QI = dyn_cast<ConstantInt>(PV.Q);
21
Assuming field 'Q' is a 'ConstantInt'
1544 assert
21.1
'QI' is non-null
21.1
'QI' is non-null
(QI && QI->getBitWidth() <= 32)((QI && QI->getBitWidth() <= 32) ? static_cast<
void> (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1544, __PRETTY_FUNCTION__))
;
22
Assuming the condition is true
23
'?' condition is true
1545
1546 // Again, clearing bits beyond IterCount.
1547 unsigned M = (1 << PV.IterCount) - 1;
24
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1548 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1549 unsigned QV = getInverseMxN(Tmp) & M;
1550 auto *QVI = ConstantInt::get(QI->getType(), QV);
1551 P = B.CreateCall(PMF, {P, QVI});
1552 P = B.CreateTrunc(P, QI->getType());
1553 if (IC != 32)
1554 P = B.CreateAnd(P, BMI);
1555 }
1556
1557 Value *R = B.CreateCall(PMF, {P, Q});
1558
1559 if (PV.M != nullptr)
1560 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1561
1562 return R;
1563}
1564
1565static bool hasZeroSignBit(const Value *V) {
1566 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1567 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1568 const Instruction *I = dyn_cast<const Instruction>(V);
1569 if (!I)
1570 return false;
1571 switch (I->getOpcode()) {
1572 case Instruction::LShr:
1573 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1574 return SI->getZExtValue() > 0;
1575 return false;
1576 case Instruction::Or:
1577 case Instruction::Xor:
1578 return hasZeroSignBit(I->getOperand(0)) &&
1579 hasZeroSignBit(I->getOperand(1));
1580 case Instruction::And:
1581 return hasZeroSignBit(I->getOperand(0)) ||
1582 hasZeroSignBit(I->getOperand(1));
1583 }
1584 return false;
1585}
1586
1587void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1588 S.addRule("sink-zext",
1589 // Sink zext past bitwise operations.
1590 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1591 if (I->getOpcode() != Instruction::ZExt)
1592 return nullptr;
1593 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1594 if (!T)
1595 return nullptr;
1596 switch (T->getOpcode()) {
1597 case Instruction::And:
1598 case Instruction::Or:
1599 case Instruction::Xor:
1600 break;
1601 default:
1602 return nullptr;
1603 }
1604 IRBuilder<> B(Ctx);
1605 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1606 B.CreateZExt(T->getOperand(0), I->getType()),
1607 B.CreateZExt(T->getOperand(1), I->getType()));
1608 });
1609 S.addRule("xor/and -> and/xor",
1610 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1611 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1612 if (I->getOpcode() != Instruction::Xor)
1613 return nullptr;
1614 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1615 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1616 if (!And0 || !And1)
1617 return nullptr;
1618 if (And0->getOpcode() != Instruction::And ||
1619 And1->getOpcode() != Instruction::And)
1620 return nullptr;
1621 if (And0->getOperand(1) != And1->getOperand(1))
1622 return nullptr;
1623 IRBuilder<> B(Ctx);
1624 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1625 And0->getOperand(1));
1626 });
1627 S.addRule("sink binop into select",
1628 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1629 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1630 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1631 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1632 if (!BO)
1633 return nullptr;
1634 Instruction::BinaryOps Op = BO->getOpcode();
1635 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1636 IRBuilder<> B(Ctx);
1637 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1638 Value *Z = BO->getOperand(1);
1639 return B.CreateSelect(Sel->getCondition(),
1640 B.CreateBinOp(Op, X, Z),
1641 B.CreateBinOp(Op, Y, Z));
1642 }
1643 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1644 IRBuilder<> B(Ctx);
1645 Value *X = BO->getOperand(0);
1646 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1647 return B.CreateSelect(Sel->getCondition(),
1648 B.CreateBinOp(Op, X, Y),
1649 B.CreateBinOp(Op, X, Z));
1650 }
1651 return nullptr;
1652 });
1653 S.addRule("fold select-select",
1654 // (select c (select c x y) z) -> (select c x z)
1655 // (select c x (select c y z)) -> (select c x z)
1656 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1657 SelectInst *Sel = dyn_cast<SelectInst>(I);
1658 if (!Sel)
1659 return nullptr;
1660 IRBuilder<> B(Ctx);
1661 Value *C = Sel->getCondition();
1662 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1663 if (Sel0->getCondition() == C)
1664 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1665 }
1666 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1667 if (Sel1->getCondition() == C)
1668 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1669 }
1670 return nullptr;
1671 });
1672 S.addRule("or-signbit -> xor-signbit",
1673 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1674 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1675 if (I->getOpcode() != Instruction::Or)
1676 return nullptr;
1677 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1678 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1679 return nullptr;
1680 if (!hasZeroSignBit(I->getOperand(0)))
1681 return nullptr;
1682 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1683 });
1684 S.addRule("sink lshr into binop",
1685 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1686 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1687 if (I->getOpcode() != Instruction::LShr)
1688 return nullptr;
1689 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1690 if (!BitOp)
1691 return nullptr;
1692 switch (BitOp->getOpcode()) {
1693 case Instruction::And:
1694 case Instruction::Or:
1695 case Instruction::Xor:
1696 break;
1697 default:
1698 return nullptr;
1699 }
1700 IRBuilder<> B(Ctx);
1701 Value *S = I->getOperand(1);
1702 return B.CreateBinOp(BitOp->getOpcode(),
1703 B.CreateLShr(BitOp->getOperand(0), S),
1704 B.CreateLShr(BitOp->getOperand(1), S));
1705 });
1706 S.addRule("expose bitop-const",
1707 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1708 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1709 auto IsBitOp = [](unsigned Op) -> bool {
1710 switch (Op) {
1711 case Instruction::And:
1712 case Instruction::Or:
1713 case Instruction::Xor:
1714 return true;
1715 }
1716 return false;
1717 };
1718 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1719 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1720 return nullptr;
1721 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1722 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1723 return nullptr;
1724 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1725 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1726 if (!CA || !CB)
1727 return nullptr;
1728 IRBuilder<> B(Ctx);
1729 Value *X = BitOp2->getOperand(0);
1730 return B.CreateBinOp(BitOp2->getOpcode(), X,
1731 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1732 });
1733}
1734
1735void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1736 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1737 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1738 if (I->getOpcode() != Instruction::And)
1739 return nullptr;
1740 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1741 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1742 if (!Xor || !C0)
1743 return nullptr;
1744 if (Xor->getOpcode() != Instruction::Xor)
1745 return nullptr;
1746 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1747 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1748 // Pick the first non-null and.
1749 if (!And0 || And0->getOpcode() != Instruction::And)
1750 std::swap(And0, And1);
1751 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1752 if (!C1)
1753 return nullptr;
1754 uint32_t V0 = C0->getZExtValue();
1755 uint32_t V1 = C1->getZExtValue();
1756 if (V0 != (V0 & V1))
1757 return nullptr;
1758 IRBuilder<> B(Ctx);
1759 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1760 });
1761}
1762
1763bool PolynomialMultiplyRecognize::recognize() {
1764 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1765 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1766 // Restrictions:
1767 // - The loop must consist of a single block.
1768 // - The iteration count must be known at compile-time.
1769 // - The loop must have an induction variable starting from 0, and
1770 // incremented in each iteration of the loop.
1771 BasicBlock *LoopB = CurLoop->getHeader();
1772 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1773
1774 if (LoopB != CurLoop->getLoopLatch())
1775 return false;
1776 BasicBlock *ExitB = CurLoop->getExitBlock();
1777 if (ExitB == nullptr)
1778 return false;
1779 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1780 if (EntryB == nullptr)
1781 return false;
1782
1783 unsigned IterCount = 0;
1784 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1785 if (isa<SCEVCouldNotCompute>(CT))
1786 return false;
1787 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1788 IterCount = CV->getValue()->getZExtValue() + 1;
1789
1790 Value *CIV = getCountIV(LoopB);
1791 ParsedValues PV;
1792 Simplifier PreSimp;
1793 PV.IterCount = IterCount;
1794 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1795 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1796
1797 setupPreSimplifier(PreSimp);
1798
1799 // Perform a preliminary scan of select instructions to see if any of them
1800 // looks like a generator of the polynomial multiply steps. Assume that a
1801 // loop can only contain a single transformable operation, so stop the
1802 // traversal after the first reasonable candidate was found.
1803 // XXX: Currently this approach can modify the loop before being 100% sure
1804 // that the transformation can be carried out.
1805 bool FoundPreScan = false;
1806 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1807 for (const Value *U : V->users()) {
1808 if (const auto *P = dyn_cast<const PHINode>(U))
1809 if (P->getParent() == LoopB)
1810 return true;
1811 }
1812 return false;
1813 };
1814 for (Instruction &In : *LoopB) {
1815 SelectInst *SI = dyn_cast<SelectInst>(&In);
1816 if (!SI || !FeedsPHI(SI))
1817 continue;
1818
1819 Simplifier::Context C(SI);
1820 Value *T = PreSimp.simplify(C);
1821 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1822 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1823 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1824 FoundPreScan = true;
1825 if (SelI != SI) {
1826 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1827 SI->replaceAllUsesWith(NewSel);
1828 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1829 }
1830 break;
1831 }
1832 }
1833
1834 if (!FoundPreScan) {
1835 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1836 return false;
1837 }
1838
1839 if (!PV.Left) {
1840 // The right shift version actually only returns the higher bits of
1841 // the result (each iteration discards the LSB). If we want to convert it
1842 // to a left-shifting loop, the working data type must be at least as
1843 // wide as the target's pmpy instruction.
1844 if (!promoteTypes(LoopB, ExitB))
1845 return false;
1846 // Run post-promotion simplifications.
1847 Simplifier PostSimp;
1848 setupPostSimplifier(PostSimp);
1849 for (Instruction &In : *LoopB) {
1850 SelectInst *SI = dyn_cast<SelectInst>(&In);
1851 if (!SI || !FeedsPHI(SI))
1852 continue;
1853 Simplifier::Context C(SI);
1854 Value *T = PostSimp.simplify(C);
1855 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1856 if (SelI != SI) {
1857 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1858 SI->replaceAllUsesWith(NewSel);
1859 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1860 }
1861 break;
1862 }
1863
1864 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1865 return false;
1866 cleanupLoopBody(LoopB);
1867 }
1868
1869 // Scan the loop again, find the generating select instruction.
1870 bool FoundScan = false;
1871 for (Instruction &In : *LoopB) {
1872 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1873 if (!SelI)
1874 continue;
1875 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1876 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1877 if (FoundScan)
1878 break;
1879 }
1880 assert(FoundScan)((FoundScan) ? static_cast<void> (0) : __assert_fail ("FoundScan"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1880, __PRETTY_FUNCTION__))
;
1881
1882 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1883 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1884 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1885 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1886 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1887 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1888 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1889 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1890 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1891 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1892 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1893 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1894 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1895
1896 BasicBlock::iterator At(EntryB->getTerminator());
1897 Value *PM = generate(At, PV);
1898 if (PM == nullptr)
1899 return false;
1900
1901 if (PM->getType() != PV.Res->getType())
1902 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1903
1904 PV.Res->replaceAllUsesWith(PM);
1905 PV.Res->eraseFromParent();
1906 return true;
1907}
1908
1909int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1910 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1911 return SC->getAPInt().getSExtValue();
1912 return 0;
1913}
1914
1915bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1916 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1917 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1918 return false;
1919
1920 Value *StoredVal = SI->getValueOperand();
1921 Value *StorePtr = SI->getPointerOperand();
1922
1923 // Reject stores that are so large that they overflow an unsigned.
1924 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1925 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1926 return false;
1927
1928 // See if the pointer expression is an AddRec like {base,+,1} on the current
1929 // loop, which indicates a strided store. If we have something else, it's a
1930 // random store we can't handle.
1931 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1932 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1933 return false;
1934
1935 // Check to see if the stride matches the size of the store. If so, then we
1936 // know that every byte is touched in the loop.
1937 int Stride = getSCEVStride(StoreEv);
1938 if (Stride == 0)
1939 return false;
1940 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1941 if (StoreSize != unsigned(std::abs(Stride)))
1942 return false;
1943
1944 // The store must be feeding a non-volatile load.
1945 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1946 if (!LI || !LI->isSimple())
1947 return false;
1948
1949 // See if the pointer expression is an AddRec like {base,+,1} on the current
1950 // loop, which indicates a strided load. If we have something else, it's a
1951 // random load we can't handle.
1952 Value *LoadPtr = LI->getPointerOperand();
1953 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1954 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1955 return false;
1956
1957 // The store and load must share the same stride.
1958 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1959 return false;
1960
1961 // Success. This store can be converted into a memcpy.
1962 return true;
1963}
1964
1965/// mayLoopAccessLocation - Return true if the specified loop might access the
1966/// specified pointer location, which is a loop-strided access. The 'Access'
1967/// argument specifies what the verboten forms of access are (read or write).
1968static bool
1969mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1970 const SCEV *BECount, unsigned StoreSize,
1971 AliasAnalysis &AA,
1972 SmallPtrSetImpl<Instruction *> &Ignored) {
1973 // Get the location that may be stored across the loop. Since the access
1974 // is strided positively through memory, we say that the modified location
1975 // starts at the pointer and has infinite size.
1976 LocationSize AccessSize = LocationSize::unknown();
1977
1978 // If the loop iterates a fixed number of times, we can refine the access
1979 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1980 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1981 AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
1982 StoreSize);
1983
1984 // TODO: For this to be really effective, we have to dive into the pointer
1985 // operand in the store. Store to &A[i] of 100 will always return may alias
1986 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1987 // which will then no-alias a store to &A[100].
1988 MemoryLocation StoreLoc(Ptr, AccessSize);
1989
1990 for (auto *B : L->blocks())
1991 for (auto &I : *B)
1992 if (Ignored.count(&I) == 0 &&
1993 isModOrRefSet(
1994 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
1995 return true;
1996
1997 return false;
1998}
1999
2000void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2001 SmallVectorImpl<StoreInst*> &Stores) {
2002 Stores.clear();
2003 for (Instruction &I : *BB)
2004 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2005 if (isLegalStore(CurLoop, SI))
2006 Stores.push_back(SI);
2007}
2008
2009bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2010 StoreInst *SI, const SCEV *BECount) {
2011 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2013, __PRETTY_FUNCTION__))
2012 "Expected only non-volatile stores, or Hexagon-specific memcpy"(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2013, __PRETTY_FUNCTION__))
2013 "to volatile destination.")(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2013, __PRETTY_FUNCTION__))
;
2014
2015 Value *StorePtr = SI->getPointerOperand();
2016 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2017 unsigned Stride = getSCEVStride(StoreEv);
2018 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2019 if (Stride != StoreSize)
2020 return false;
2021
2022 // See if the pointer expression is an AddRec like {base,+,1} on the current
2023 // loop, which indicates a strided load. If we have something else, it's a
2024 // random load we can't handle.
2025 auto *LI = cast<LoadInst>(SI->getValueOperand());
2026 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2027
2028 // The trip count of the loop and the base pointer of the addrec SCEV is
2029 // guaranteed to be loop invariant, which means that it should dominate the
2030 // header. This allows us to insert code for it in the preheader.
2031 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2032 Instruction *ExpPt = Preheader->getTerminator();
2033 IRBuilder<> Builder(ExpPt);
2034 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2035
2036 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2037
2038 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2039 // this into a memcpy/memmove in the loop preheader now if we want. However,
2040 // this would be unsafe to do if there is anything else in the loop that may
2041 // read or write the memory region we're storing to. For memcpy, this
2042 // includes the load that feeds the stores. Check for an alias by generating
2043 // the base address and checking everything.
2044 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2045 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2046 Value *LoadBasePtr = nullptr;
2047
2048 bool Overlap = false;
2049 bool DestVolatile = SI->isVolatile();
2050 Type *BECountTy = BECount->getType();
2051
2052 if (DestVolatile) {
2053 // The trip count must fit in i32, since it is the type of the "num_words"
2054 // argument to hexagon_memcpy_forward_vp4cp4n2.
2055 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2056CleanupAndExit:
2057 // If we generated new code for the base pointer, clean up.
2058 Expander.clear();
2059 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2060 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2061 StoreBasePtr = nullptr;
2062 }
2063 if (LoadBasePtr) {
2064 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2065 LoadBasePtr = nullptr;
2066 }
2067 return false;
2068 }
2069 }
2070
2071 SmallPtrSet<Instruction*, 2> Ignore1;
2072 Ignore1.insert(SI);
2073 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2074 StoreSize, *AA, Ignore1)) {
2075 // Check if the load is the offending instruction.
2076 Ignore1.insert(LI);
2077 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2078 BECount, StoreSize, *AA, Ignore1)) {
2079 // Still bad. Nothing we can do.
2080 goto CleanupAndExit;
2081 }
2082 // It worked with the load ignored.
2083 Overlap = true;
2084 }
2085
2086 if (!Overlap) {
2087 if (DisableMemcpyIdiom || !HasMemcpy)
2088 goto CleanupAndExit;
2089 } else {
2090 // Don't generate memmove if this function will be inlined. This is
2091 // because the caller will undergo this transformation after inlining.
2092 Function *Func = CurLoop->getHeader()->getParent();
2093 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2094 goto CleanupAndExit;
2095
2096 // In case of a memmove, the call to memmove will be executed instead
2097 // of the loop, so we need to make sure that there is nothing else in
2098 // the loop than the load, store and instructions that these two depend
2099 // on.
2100 SmallVector<Instruction*,2> Insts;
2101 Insts.push_back(SI);
2102 Insts.push_back(LI);
2103 if (!coverLoop(CurLoop, Insts))
2104 goto CleanupAndExit;
2105
2106 if (DisableMemmoveIdiom || !HasMemmove)
2107 goto CleanupAndExit;
2108 bool IsNested = CurLoop->getParentLoop() != nullptr;
2109 if (IsNested && OnlyNonNestedMemmove)
2110 goto CleanupAndExit;
2111 }
2112
2113 // For a memcpy, we have to make sure that the input array is not being
2114 // mutated by the loop.
2115 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2116 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2117
2118 SmallPtrSet<Instruction*, 2> Ignore2;
2119 Ignore2.insert(SI);
2120 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2121 StoreSize, *AA, Ignore2))
2122 goto CleanupAndExit;
2123
2124 // Check the stride.
2125 bool StridePos = getSCEVStride(LoadEv) >= 0;
2126
2127 // Currently, the volatile memcpy only emulates traversing memory forward.
2128 if (!StridePos && DestVolatile)
2129 goto CleanupAndExit;
2130
2131 bool RuntimeCheck = (Overlap || DestVolatile);
2132
2133 BasicBlock *ExitB;
2134 if (RuntimeCheck) {
2135 // The runtime check needs a single exit block.
2136 SmallVector<BasicBlock*, 8> ExitBlocks;
2137 CurLoop->getUniqueExitBlocks(ExitBlocks);
2138 if (ExitBlocks.size() != 1)
2139 goto CleanupAndExit;
2140 ExitB = ExitBlocks[0];
2141 }
2142
2143 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2144 // pointer size if it isn't already.
2145 LLVMContext &Ctx = SI->getContext();
2146 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2147 DebugLoc DLoc = SI->getDebugLoc();
2148
2149 const SCEV *NumBytesS =
2150 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2151 if (StoreSize != 1)
2152 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2153 SCEV::FlagNUW);
2154 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2155 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2156 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2157 NumBytes = Simp;
2158
2159 CallInst *NewCall;
2160
2161 if (RuntimeCheck) {
2162 unsigned Threshold = RuntimeMemSizeThreshold;
2163 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2164 uint64_t C = CI->getZExtValue();
2165 if (Threshold != 0 && C < Threshold)
2166 goto CleanupAndExit;
2167 if (C < CompileTimeMemSizeThreshold)
2168 goto CleanupAndExit;
2169 }
2170
2171 BasicBlock *Header = CurLoop->getHeader();
2172 Function *Func = Header->getParent();
2173 Loop *ParentL = LF->getLoopFor(Preheader);
2174 StringRef HeaderName = Header->getName();
2175
2176 // Create a new (empty) preheader, and update the PHI nodes in the
2177 // header to use the new preheader.
2178 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2179 Func, Header);
2180 if (ParentL)
2181 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2182 IRBuilder<>(NewPreheader).CreateBr(Header);
2183 for (auto &In : *Header) {
2184 PHINode *PN = dyn_cast<PHINode>(&In);
2185 if (!PN)
2186 break;
2187 int bx = PN->getBasicBlockIndex(Preheader);
2188 if (bx >= 0)
2189 PN->setIncomingBlock(bx, NewPreheader);
2190 }
2191 DT->addNewBlock(NewPreheader, Preheader);
2192 DT->changeImmediateDominator(Header, NewPreheader);
2193
2194 // Check for safe conditions to execute memmove.
2195 // If stride is positive, copying things from higher to lower addresses
2196 // is equivalent to memmove. For negative stride, it's the other way
2197 // around. Copying forward in memory with positive stride may not be
2198 // same as memmove since we may be copying values that we just stored
2199 // in some previous iteration.
2200 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2201 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2202 Value *LowA = StridePos ? SA : LA;
2203 Value *HighA = StridePos ? LA : SA;
2204 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2205 Value *Cond = CmpA;
2206
2207 // Check for distance between pointers. Since the case LowA < HighA
2208 // is checked for above, assume LowA >= HighA.
2209 Value *Dist = Builder.CreateSub(LowA, HighA);
2210 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2211 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2212 Cond = CmpEither;
2213
2214 if (Threshold != 0) {
2215 Type *Ty = NumBytes->getType();
2216 Value *Thr = ConstantInt::get(Ty, Threshold);
2217 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2218 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2219 Cond = CmpBoth;
2220 }
2221 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2222 Func, NewPreheader);
2223 if (ParentL)
2224 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2225 Instruction *OldT = Preheader->getTerminator();
2226 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2227 OldT->eraseFromParent();
2228 Preheader->setName(Preheader->getName()+".old");
2229 DT->addNewBlock(MemmoveB, Preheader);
2230 // Find the new immediate dominator of the exit block.
2231 BasicBlock *ExitD = Preheader;
2232 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2233 BasicBlock *PB = *PI;
2234 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2235 if (!ExitD)
2236 break;
2237 }
2238 // If the prior immediate dominator of ExitB was dominated by the
2239 // old preheader, then the old preheader becomes the new immediate
2240 // dominator. Otherwise don't change anything (because the newly
2241 // added blocks are dominated by the old preheader).
2242 if (ExitD && DT->dominates(Preheader, ExitD)) {
2243 DomTreeNode *BN = DT->getNode(ExitB);
2244 DomTreeNode *DN = DT->getNode(ExitD);
2245 BN->setIDom(DN);
2246 }
2247
2248 // Add a call to memmove to the conditional block.
2249 IRBuilder<> CondBuilder(MemmoveB);
2250 CondBuilder.CreateBr(ExitB);
2251 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2252
2253 if (DestVolatile) {
2254 Type *Int32Ty = Type::getInt32Ty(Ctx);
2255 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2256 Type *VoidTy = Type::getVoidTy(Ctx);
2257 Module *M = Func->getParent();
2258 FunctionCallee Fn = M->getOrInsertFunction(
2259 HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
2260
2261 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2262 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2263 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2264 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2265 MemmoveB->getTerminator());
2266 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2267 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2268 NumWords = Simp;
2269
2270 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2271 ? StoreBasePtr
2272 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2273 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2274 ? LoadBasePtr
2275 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2276 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2277 } else {
2278 NewCall = CondBuilder.CreateMemMove(
2279 StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
2280 }
2281 } else {
2282 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
2283 LI->getAlign(), NumBytes);
2284 // Okay, the memcpy has been formed. Zap the original store and
2285 // anything that feeds into it.
2286 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2287 }
2288
2289 NewCall->setDebugLoc(DLoc);
2290
2291 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2292 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2293 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2294 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2295 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2296
2297 return true;
2298}
2299
2300// Check if the instructions in Insts, together with their dependencies
2301// cover the loop in the sense that the loop could be safely eliminated once
2302// the instructions in Insts are removed.
2303bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2304 SmallVectorImpl<Instruction*> &Insts) const {
2305 SmallSet<BasicBlock*,8> LoopBlocks;
2306 for (auto *B : L->blocks())
2307 LoopBlocks.insert(B);
2308
2309 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2310
2311 // Collect all instructions from the loop that the instructions in Insts
2312 // depend on (plus their dependencies, etc.). These instructions will
2313 // constitute the expression trees that feed those in Insts, but the trees
2314 // will be limited only to instructions contained in the loop.
2315 for (unsigned i = 0; i < Worklist.size(); ++i) {
2316 Instruction *In = Worklist[i];
2317 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2318 Instruction *OpI = dyn_cast<Instruction>(I);
2319 if (!OpI)
2320 continue;
2321 BasicBlock *PB = OpI->getParent();
2322 if (!LoopBlocks.count(PB))
2323 continue;
2324 Worklist.insert(OpI);
2325 }
2326 }
2327
2328 // Scan all instructions in the loop, if any of them have a user outside
2329 // of the loop, or outside of the expressions collected above, then either
2330 // the loop has a side-effect visible outside of it, or there are
2331 // instructions in it that are not involved in the original set Insts.
2332 for (auto *B : L->blocks()) {
2333 for (auto &In : *B) {
2334 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2335 continue;
2336 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2337 return false;
2338 for (auto K : In.users()) {
2339 Instruction *UseI = dyn_cast<Instruction>(K);
2340 if (!UseI)
2341 continue;
2342 BasicBlock *UseB = UseI->getParent();
2343 if (LF->getLoopFor(UseB) != L)
2344 return false;
2345 }
2346 }
2347 }
2348
2349 return true;
2350}
2351
2352/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2353/// with the specified backedge count. This block is known to be in the current
2354/// loop and not in any subloops.
2355bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2356 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2357 // We can only promote stores in this block if they are unconditionally
2358 // executed in the loop. For a block to be unconditionally executed, it has
2359 // to dominate all the exit blocks of the loop. Verify this now.
2360 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2361 return DT->dominates(BB, EB);
2362 };
2363 if (!all_of(ExitBlocks, DominatedByBB))
2364 return false;
2365
2366 bool MadeChange = false;
2367 // Look for store instructions, which may be optimized to memset/memcpy.
2368 SmallVector<StoreInst*,8> Stores;
2369 collectStores(CurLoop, BB, Stores);
2370
2371 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2372 for (auto &SI : Stores)
2373 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2374
2375 return MadeChange;
2376}
2377
2378bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2379 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2380 if (PMR.recognize())
2381 return true;
2382
2383 if (!HasMemcpy && !HasMemmove)
2384 return false;
2385
2386 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2387 assert(!isa<SCEVCouldNotCompute>(BECount) &&((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
2388 "runOnCountableLoop() called on a loop without a predictable"((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
2389 "backedge-taken count")((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
;
2390
2391 SmallVector<BasicBlock *, 8> ExitBlocks;
2392 L->getUniqueExitBlocks(ExitBlocks);
2393
2394 bool Changed = false;
2395
2396 // Scan all the blocks in the loop that are not in subloops.
2397 for (auto *BB : L->getBlocks()) {
2398 // Ignore blocks in subloops.
2399 if (LF->getLoopFor(BB) != L)
2400 continue;
2401 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2402 }
2403
2404 return Changed;
2405}
2406
2407bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
2408 const Module &M = *L->getHeader()->getParent()->getParent();
2409 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2410 return false;
2411
2412 if (skipLoop(L))
2413 return false;
2414
2415 // If the loop could not be converted to canonical form, it must have an
2416 // indirectbr in it, just give up.
2417 if (!L->getLoopPreheader())
2418 return false;
2419
2420 // Disable loop idiom recognition if the function's name is a common idiom.
2421 StringRef Name = L->getHeader()->getParent()->getName();
2422 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2423 return false;
2424
2425 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2426 DL = &L->getHeader()->getModule()->getDataLayout();
2427 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2428 LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2429 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2430 *L->getHeader()->getParent());
2431 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2432
2433 HasMemcpy = TLI->has(LibFunc_memcpy);
2434 HasMemmove = TLI->has(LibFunc_memmove);
2435
2436 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2437 return runOnCountableLoop(L);
2438 return false;
2439}
2440
2441Pass *llvm::createHexagonLoopIdiomPass() {
2442 return new HexagonLoopIdiomRecognize();
2443}

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <string>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34
35class APInt;
36
37inline APInt operator-(APInt);
38
39//===----------------------------------------------------------------------===//
40// APInt Class
41//===----------------------------------------------------------------------===//
42
43/// Class for arbitrary precision integers.
44///
45/// APInt is a functional replacement for common case unsigned integer type like
46/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
47/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
48/// than 64-bits of precision. APInt provides a variety of arithmetic operators
49/// and methods to manipulate integer values of any bit-width. It supports both
50/// the typical integer arithmetic and comparison operations as well as bitwise
51/// manipulation.
52///
53/// The class has several invariants worth noting:
54/// * All bit, byte, and word positions are zero-based.
55/// * Once the bit width is set, it doesn't change except by the Truncate,
56/// SignExtend, or ZeroExtend operations.
57/// * All binary operators must be on APInt instances of the same bit width.
58/// Attempting to use these operators on instances with different bit
59/// widths will yield an assertion.
60/// * The value is stored canonically as an unsigned value. For operations
61/// where it makes a difference, there are both signed and unsigned variants
62/// of the operation. For example, sdiv and udiv. However, because the bit
63/// widths must be the same, operations such as Mul and Add produce the same
64/// results regardless of whether the values are interpreted as signed or
65/// not.
66/// * In general, the class tries to follow the style of computation that LLVM
67/// uses in its IR. This simplifies its use for LLVM.
68///
69class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
70public:
71 typedef uint64_t WordType;
72
73 /// This enum is used to hold the constants we needed for APInt.
74 enum : unsigned {
75 /// Byte size of a word.
76 APINT_WORD_SIZE = sizeof(WordType),
77 /// Bits in a word.
78 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
79 };
80
81 enum class Rounding {
82 DOWN,
83 TOWARD_ZERO,
84 UP,
85 };
86
87 static const WordType WORDTYPE_MAX = ~WordType(0);
88
89private:
90 /// This union is used to store the integer value. When the
91 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
92 union {
93 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
94 uint64_t *pVal; ///< Used to store the >64 bits integer value.
95 } U;
96
97 unsigned BitWidth; ///< The number of bits in this APInt.
98
99 friend struct DenseMapAPIntKeyInfo;
100
101 friend class APSInt;
102
103 /// Fast internal constructor
104 ///
105 /// This constructor is used only internally for speed of construction of
106 /// temporaries. It is unsafe for general use so it is not public.
107 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
108 U.pVal = val;
109 }
110
111 /// Determine if this APInt just has one word to store value.
112 ///
113 /// \returns true if the number of bits <= 64, false otherwise.
114 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
115
116 /// Determine which word a bit is in.
117 ///
118 /// \returns the word position for the specified bit position.
119 static unsigned whichWord(unsigned bitPosition) {
120 return bitPosition / APINT_BITS_PER_WORD;
121 }
122
123 /// Determine which bit in a word a bit is in.
124 ///
125 /// \returns the bit position in a word for the specified bit position
126 /// in the APInt.
127 static unsigned whichBit(unsigned bitPosition) {
128 return bitPosition % APINT_BITS_PER_WORD;
129 }
130
131 /// Get a single bit mask.
132 ///
133 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
134 /// This method generates and returns a uint64_t (word) mask for a single
135 /// bit at a specific bit position. This is used to mask the bit in the
136 /// corresponding word.
137 static uint64_t maskBit(unsigned bitPosition) {
138 return 1ULL << whichBit(bitPosition);
139 }
140
141 /// Clear unused high order bits
142 ///
143 /// This method is used internally to clear the top "N" bits in the high order
144 /// word that are not used by the APInt. This is needed after the most
145 /// significant word is assigned a value to ensure that those bits are
146 /// zero'd out.
147 APInt &clearUnusedBits() {
148 // Compute how many bits are used in the final word
149 unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
150
151 // Mask out the high bits.
152 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
153 if (isSingleWord())
154 U.VAL &= mask;
155 else
156 U.pVal[getNumWords() - 1] &= mask;
157 return *this;
158 }
159
160 /// Get the word corresponding to a bit position
161 /// \returns the corresponding word for the specified bit position.
162 uint64_t getWord(unsigned bitPosition) const {
163 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
164 }
165
166 /// Utility method to change the bit width of this APInt to new bit width,
167 /// allocating and/or deallocating as necessary. There is no guarantee on the
168 /// value of any bits upon return. Caller should populate the bits after.
169 void reallocate(unsigned NewBitWidth);
170
171 /// Convert a char array into an APInt
172 ///
173 /// \param radix 2, 8, 10, 16, or 36
174 /// Converts a string into a number. The string must be non-empty
175 /// and well-formed as a number of the given base. The bit-width
176 /// must be sufficient to hold the result.
177 ///
178 /// This is used by the constructors that take string arguments.
179 ///
180 /// StringRef::getAsInteger is superficially similar but (1) does
181 /// not assume that the string is well-formed and (2) grows the
182 /// result to hold the input.
183 void fromString(unsigned numBits, StringRef str, uint8_t radix);
184
185 /// An internal division function for dividing APInts.
186 ///
187 /// This is used by the toString method to divide by the radix. It simply
188 /// provides a more convenient form of divide for internal use since KnuthDiv
189 /// has specific constraints on its inputs. If those constraints are not met
190 /// then it provides a simpler form of divide.
191 static void divide(const WordType *LHS, unsigned lhsWords,
192 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
193 WordType *Remainder);
194
195 /// out-of-line slow case for inline constructor
196 void initSlowCase(uint64_t val, bool isSigned);
197
198 /// shared code between two array constructors
199 void initFromArray(ArrayRef<uint64_t> array);
200
201 /// out-of-line slow case for inline copy constructor
202 void initSlowCase(const APInt &that);
203
204 /// out-of-line slow case for shl
205 void shlSlowCase(unsigned ShiftAmt);
206
207 /// out-of-line slow case for lshr.
208 void lshrSlowCase(unsigned ShiftAmt);
209
210 /// out-of-line slow case for ashr.
211 void ashrSlowCase(unsigned ShiftAmt);
212
213 /// out-of-line slow case for operator=
214 void AssignSlowCase(const APInt &RHS);
215
216 /// out-of-line slow case for operator==
217 bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
218
219 /// out-of-line slow case for countLeadingZeros
220 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
221
222 /// out-of-line slow case for countLeadingOnes.
223 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
224
225 /// out-of-line slow case for countTrailingZeros.
226 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
227
228 /// out-of-line slow case for countTrailingOnes
229 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
230
231 /// out-of-line slow case for countPopulation
232 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
233
234 /// out-of-line slow case for intersects.
235 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
236
237 /// out-of-line slow case for isSubsetOf.
238 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
239
240 /// out-of-line slow case for setBits.
241 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
242
243 /// out-of-line slow case for flipAllBits.
244 void flipAllBitsSlowCase();
245
246 /// out-of-line slow case for operator&=.
247 void AndAssignSlowCase(const APInt& RHS);
248
249 /// out-of-line slow case for operator|=.
250 void OrAssignSlowCase(const APInt& RHS);
251
252 /// out-of-line slow case for operator^=.
253 void XorAssignSlowCase(const APInt& RHS);
254
255 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
256 /// to, or greater than RHS.
257 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
258
259 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
260 /// to, or greater than RHS.
261 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
262
263public:
264 /// \name Constructors
265 /// @{
266
267 /// Create a new APInt of numBits width, initialized as val.
268 ///
269 /// If isSigned is true then val is treated as if it were a signed value
270 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
271 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
272 /// the range of val are zero filled).
273 ///
274 /// \param numBits the bit width of the constructed APInt
275 /// \param val the initial value of the APInt
276 /// \param isSigned how to treat signedness of val
277 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
278 : BitWidth(numBits) {
279 assert(BitWidth && "bitwidth too small")((BitWidth && "bitwidth too small") ? static_cast<
void> (0) : __assert_fail ("BitWidth && \"bitwidth too small\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 279, __PRETTY_FUNCTION__))
;
280 if (isSingleWord()) {
281 U.VAL = val;
282 clearUnusedBits();
283 } else {
284 initSlowCase(val, isSigned);
285 }
286 }
287
288 /// Construct an APInt of numBits width, initialized as bigVal[].
289 ///
290 /// Note that bigVal.size() can be smaller or larger than the corresponding
291 /// bit width but any extraneous bits will be dropped.
292 ///
293 /// \param numBits the bit width of the constructed APInt
294 /// \param bigVal a sequence of words to form the initial value of the APInt
295 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
296
297 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
298 /// deprecated because this constructor is prone to ambiguity with the
299 /// APInt(unsigned, uint64_t, bool) constructor.
300 ///
301 /// If this overload is ever deleted, care should be taken to prevent calls
302 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
303 /// constructor.
304 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
305
306 /// Construct an APInt from a string representation.
307 ///
308 /// This constructor interprets the string \p str in the given radix. The
309 /// interpretation stops when the first character that is not suitable for the
310 /// radix is encountered, or the end of the string. Acceptable radix values
311 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
312 /// string to require more bits than numBits.
313 ///
314 /// \param numBits the bit width of the constructed APInt
315 /// \param str the string to be interpreted
316 /// \param radix the radix to use for the conversion
317 APInt(unsigned numBits, StringRef str, uint8_t radix);
318
319 /// Simply makes *this a copy of that.
320 /// Copy Constructor.
321 APInt(const APInt &that) : BitWidth(that.BitWidth) {
322 if (isSingleWord())
323 U.VAL = that.U.VAL;
324 else
325 initSlowCase(that);
326 }
327
328 /// Move Constructor.
329 APInt(APInt &&that) : BitWidth(that.BitWidth) {
330 memcpy(&U, &that.U, sizeof(U));
331 that.BitWidth = 0;
332 }
333
334 /// Destructor.
335 ~APInt() {
336 if (needsCleanup())
337 delete[] U.pVal;
338 }
339
340 /// Default constructor that creates an uninteresting APInt
341 /// representing a 1-bit zero value.
342 ///
343 /// This is useful for object deserialization (pair this with the static
344 /// method Read).
345 explicit APInt() : BitWidth(1) { U.VAL = 0; }
346
347 /// Returns whether this instance allocated memory.
348 bool needsCleanup() const { return !isSingleWord(); }
349
350 /// Used to insert APInt objects, or objects that contain APInt objects, into
351 /// FoldingSets.
352 void Profile(FoldingSetNodeID &id) const;
353
354 /// @}
355 /// \name Value Tests
356 /// @{
357
358 /// Determine sign of this APInt.
359 ///
360 /// This tests the high bit of this APInt to determine if it is set.
361 ///
362 /// \returns true if this APInt is negative, false otherwise
363 bool isNegative() const { return (*this)[BitWidth - 1]; }
364
365 /// Determine if this APInt Value is non-negative (>= 0)
366 ///
367 /// This tests the high bit of the APInt to determine if it is unset.
368 bool isNonNegative() const { return !isNegative(); }
369
370 /// Determine if sign bit of this APInt is set.
371 ///
372 /// This tests the high bit of this APInt to determine if it is set.
373 ///
374 /// \returns true if this APInt has its sign bit set, false otherwise.
375 bool isSignBitSet() const { return (*this)[BitWidth-1]; }
376
377 /// Determine if sign bit of this APInt is clear.
378 ///
379 /// This tests the high bit of this APInt to determine if it is clear.
380 ///
381 /// \returns true if this APInt has its sign bit clear, false otherwise.
382 bool isSignBitClear() const { return !isSignBitSet(); }
383
384 /// Determine if this APInt Value is positive.
385 ///
386 /// This tests if the value of this APInt is positive (> 0). Note
387 /// that 0 is not a positive value.
388 ///
389 /// \returns true if this APInt is positive.
390 bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
391
392 /// Determine if this APInt Value is non-positive (<= 0).
393 ///
394 /// \returns true if this APInt is non-positive.
395 bool isNonPositive() const { return !isStrictlyPositive(); }
396
397 /// Determine if all bits are set
398 ///
399 /// This checks to see if the value has all bits of the APInt are set or not.
400 bool isAllOnesValue() const {
401 if (isSingleWord())
402 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
403 return countTrailingOnesSlowCase() == BitWidth;
404 }
405
406 /// Determine if all bits are clear
407 ///
408 /// This checks to see if the value has all bits of the APInt are clear or
409 /// not.
410 bool isNullValue() const { return !*this; }
411
412 /// Determine if this is a value of 1.
413 ///
414 /// This checks to see if the value of this APInt is one.
415 bool isOneValue() const {
416 if (isSingleWord())
417 return U.VAL == 1;
418 return countLeadingZerosSlowCase() == BitWidth - 1;
419 }
420
421 /// Determine if this is the largest unsigned value.
422 ///
423 /// This checks to see if the value of this APInt is the maximum unsigned
424 /// value for the APInt's bit width.
425 bool isMaxValue() const { return isAllOnesValue(); }
426
427 /// Determine if this is the largest signed value.
428 ///
429 /// This checks to see if the value of this APInt is the maximum signed
430 /// value for the APInt's bit width.
431 bool isMaxSignedValue() const {
432 if (isSingleWord())
433 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
434 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
435 }
436
437 /// Determine if this is the smallest unsigned value.
438 ///
439 /// This checks to see if the value of this APInt is the minimum unsigned
440 /// value for the APInt's bit width.
441 bool isMinValue() const { return isNullValue(); }
442
443 /// Determine if this is the smallest signed value.
444 ///
445 /// This checks to see if the value of this APInt is the minimum signed
446 /// value for the APInt's bit width.
447 bool isMinSignedValue() const {
448 if (isSingleWord())
449 return U.VAL == (WordType(1) << (BitWidth - 1));
450 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
451 }
452
453 /// Check if this APInt has an N-bits unsigned integer value.
454 bool isIntN(unsigned N) const {
455 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 455, __PRETTY_FUNCTION__))
;
456 return getActiveBits() <= N;
457 }
458
459 /// Check if this APInt has an N-bits signed integer value.
460 bool isSignedIntN(unsigned N) const {
461 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 461, __PRETTY_FUNCTION__))
;
462 return getMinSignedBits() <= N;
463 }
464
465 /// Check if this APInt's value is a power of two greater than zero.
466 ///
467 /// \returns true if the argument APInt value is a power of two > 0.
468 bool isPowerOf2() const {
469 if (isSingleWord())
470 return isPowerOf2_64(U.VAL);
471 return countPopulationSlowCase() == 1;
472 }
473
474 /// Check if the APInt's value is returned by getSignMask.
475 ///
476 /// \returns true if this is the value returned by getSignMask.
477 bool isSignMask() const { return isMinSignedValue(); }
478
479 /// Convert APInt to a boolean value.
480 ///
481 /// This converts the APInt to a boolean value as a test against zero.
482 bool getBoolValue() const { return !!*this; }
483
484 /// If this value is smaller than the specified limit, return it, otherwise
485 /// return the limit value. This causes the value to saturate to the limit.
486 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
487 return ugt(Limit) ? Limit : getZExtValue();
488 }
489
490 /// Check if the APInt consists of a repeated bit pattern.
491 ///
492 /// e.g. 0x01010101 satisfies isSplat(8).
493 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
494 /// width without remainder.
495 bool isSplat(unsigned SplatSizeInBits) const;
496
497 /// \returns true if this APInt value is a sequence of \param numBits ones
498 /// starting at the least significant bit with the remainder zero.
499 bool isMask(unsigned numBits) const {
500 assert(numBits != 0 && "numBits must be non-zero")((numBits != 0 && "numBits must be non-zero") ? static_cast
<void> (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 500, __PRETTY_FUNCTION__))
;
501 assert(numBits <= BitWidth && "numBits out of range")((numBits <= BitWidth && "numBits out of range") ?
static_cast<void> (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 501, __PRETTY_FUNCTION__))
;
502 if (isSingleWord())
503 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
504 unsigned Ones = countTrailingOnesSlowCase();
505 return (numBits == Ones) &&
506 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
507 }
508
509 /// \returns true if this APInt is a non-empty sequence of ones starting at
510 /// the least significant bit with the remainder zero.
511 /// Ex. isMask(0x0000FFFFU) == true.
512 bool isMask() const {
513 if (isSingleWord())
514 return isMask_64(U.VAL);
515 unsigned Ones = countTrailingOnesSlowCase();
516 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
517 }
518
519 /// Return true if this APInt value contains a sequence of ones with
520 /// the remainder zero.
521 bool isShiftedMask() const {
522 if (isSingleWord())
523 return isShiftedMask_64(U.VAL);
524 unsigned Ones = countPopulationSlowCase();
525 unsigned LeadZ = countLeadingZerosSlowCase();
526 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
527 }
528
529 /// @}
530 /// \name Value Generators
531 /// @{
532
533 /// Gets maximum unsigned value of APInt for specific bit width.
534 static APInt getMaxValue(unsigned numBits) {
535 return getAllOnesValue(numBits);
536 }
537
538 /// Gets maximum signed value of APInt for a specific bit width.
539 static APInt getSignedMaxValue(unsigned numBits) {
540 APInt API = getAllOnesValue(numBits);
541 API.clearBit(numBits - 1);
542 return API;
543 }
544
545 /// Gets minimum unsigned value of APInt for a specific bit width.
546 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
547
548 /// Gets minimum signed value of APInt for a specific bit width.
549 static APInt getSignedMinValue(unsigned numBits) {
550 APInt API(numBits, 0);
551 API.setBit(numBits - 1);
552 return API;
553 }
554
555 /// Get the SignMask for a specific bit width.
556 ///
557 /// This is just a wrapper function of getSignedMinValue(), and it helps code
558 /// readability when we want to get a SignMask.
559 static APInt getSignMask(unsigned BitWidth) {
560 return getSignedMinValue(BitWidth);
561 }
562
563 /// Get the all-ones value.
564 ///
565 /// \returns the all-ones value for an APInt of the specified bit-width.
566 static APInt getAllOnesValue(unsigned numBits) {
567 return APInt(numBits, WORDTYPE_MAX, true);
568 }
569
570 /// Get the '0' value.
571 ///
572 /// \returns the '0' value for an APInt of the specified bit-width.
573 static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
574
575 /// Compute an APInt containing numBits highbits from this APInt.
576 ///
577 /// Get an APInt with the same BitWidth as this APInt, just zero mask
578 /// the low bits and right shift to the least significant bit.
579 ///
580 /// \returns the high "numBits" bits of this APInt.
581 APInt getHiBits(unsigned numBits) const;
582
583 /// Compute an APInt containing numBits lowbits from this APInt.
584 ///
585 /// Get an APInt with the same BitWidth as this APInt, just zero mask
586 /// the high bits.
587 ///
588 /// \returns the low "numBits" bits of this APInt.
589 APInt getLoBits(unsigned numBits) const;
590
591 /// Return an APInt with exactly one bit set in the result.
592 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
593 APInt Res(numBits, 0);
594 Res.setBit(BitNo);
595 return Res;
596 }
597
598 /// Get a value with a block of bits set.
599 ///
600 /// Constructs an APInt value that has a contiguous range of bits set. The
601 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
602 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
603 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
604 /// \p hiBit.
605 ///
606 /// \param numBits the intended bit width of the result
607 /// \param loBit the index of the lowest bit set.
608 /// \param hiBit the index of the highest bit set.
609 ///
610 /// \returns An APInt value with the requested bits set.
611 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
612 assert(loBit <= hiBit && "loBit greater than hiBit")((loBit <= hiBit && "loBit greater than hiBit") ? static_cast
<void> (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 612, __PRETTY_FUNCTION__))
;
613 APInt Res(numBits, 0);
614 Res.setBits(loBit, hiBit);
615 return Res;
616 }
617
618 /// Wrap version of getBitsSet.
619 /// If \p hiBit is no less than \p loBit, this is same with getBitsSet.
620 /// If \p hiBit is less than \p loBit, the set bits "wrap". For example, with
621 /// parameters (32, 28, 4), you would get 0xF000000F.
622 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
623 unsigned hiBit) {
624 APInt Res(numBits, 0);
625 Res.setBitsWithWrap(loBit, hiBit);
626 return Res;
627 }
628
629 /// Get a value with upper bits starting at loBit set.
630 ///
631 /// Constructs an APInt value that has a contiguous range of bits set. The
632 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
633 /// bits will be zero. For example, with parameters(32, 12) you would get
634 /// 0xFFFFF000.
635 ///
636 /// \param numBits the intended bit width of the result
637 /// \param loBit the index of the lowest bit to set.
638 ///
639 /// \returns An APInt value with the requested bits set.
640 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
641 APInt Res(numBits, 0);
642 Res.setBitsFrom(loBit);
643 return Res;
644 }
645
646 /// Get a value with high bits set
647 ///
648 /// Constructs an APInt value that has the top hiBitsSet bits set.
649 ///
650 /// \param numBits the bitwidth of the result
651 /// \param hiBitsSet the number of high-order bits set in the result.
652 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
653 APInt Res(numBits, 0);
654 Res.setHighBits(hiBitsSet);
655 return Res;
656 }
657
658 /// Get a value with low bits set
659 ///
660 /// Constructs an APInt value that has the bottom loBitsSet bits set.
661 ///
662 /// \param numBits the bitwidth of the result
663 /// \param loBitsSet the number of low-order bits set in the result.
664 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
665 APInt Res(numBits, 0);
666 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
15
Returning from 'APInt::setLowBits'
667 return Res;
668 }
669
670 /// Return a value containing V broadcasted over NewLen bits.
671 static APInt getSplat(unsigned NewLen, const APInt &V);
672
673 /// Determine if two APInts have the same value, after zero-extending
674 /// one of them (if needed!) to ensure that the bit-widths match.
675 static bool isSameValue(const APInt &I1, const APInt &I2) {
676 if (I1.getBitWidth() == I2.getBitWidth())
677 return I1 == I2;
678
679 if (I1.getBitWidth() > I2.getBitWidth())
680 return I1 == I2.zext(I1.getBitWidth());
681
682 return I1.zext(I2.getBitWidth()) == I2;
683 }
684
685 /// Overload to compute a hash_code for an APInt value.
686 friend hash_code hash_value(const APInt &Arg);
687
688 /// This function returns a pointer to the internal storage of the APInt.
689 /// This is useful for writing out the APInt in binary form without any
690 /// conversions.
691 const uint64_t *getRawData() const {
692 if (isSingleWord())
693 return &U.VAL;
694 return &U.pVal[0];
695 }
696
697 /// @}
698 /// \name Unary Operators
699 /// @{
700
701 /// Postfix increment operator.
702 ///
703 /// Increments *this by 1.
704 ///
705 /// \returns a new APInt value representing the original value of *this.
706 const APInt operator++(int) {
707 APInt API(*this);
708 ++(*this);
709 return API;
710 }
711
712 /// Prefix increment operator.
713 ///
714 /// \returns *this incremented by one
715 APInt &operator++();
716
717 /// Postfix decrement operator.
718 ///
719 /// Decrements *this by 1.
720 ///
721 /// \returns a new APInt value representing the original value of *this.
722 const APInt operator--(int) {
723 APInt API(*this);
724 --(*this);
725 return API;
726 }
727
728 /// Prefix decrement operator.
729 ///
730 /// \returns *this decremented by one.
731 APInt &operator--();
732
733 /// Logical negation operator.
734 ///
735 /// Performs logical negation operation on this APInt.
736 ///
737 /// \returns true if *this is zero, false otherwise.
738 bool operator!() const {
739 if (isSingleWord())
740 return U.VAL == 0;
741 return countLeadingZerosSlowCase() == BitWidth;
742 }
743
744 /// @}
745 /// \name Assignment Operators
746 /// @{
747
748 /// Copy assignment operator.
749 ///
750 /// \returns *this after assignment of RHS.
751 APInt &operator=(const APInt &RHS) {
752 // If the bitwidths are the same, we can avoid mucking with memory
753 if (isSingleWord() && RHS.isSingleWord()) {
754 U.VAL = RHS.U.VAL;
755 BitWidth = RHS.BitWidth;
756 return clearUnusedBits();
757 }
758
759 AssignSlowCase(RHS);
760 return *this;
761 }
762
763 /// Move assignment operator.
764 APInt &operator=(APInt &&that) {
765#ifdef _MSC_VER
766 // The MSVC std::shuffle implementation still does self-assignment.
767 if (this == &that)
768 return *this;
769#endif
770 assert(this != &that && "Self-move not supported")((this != &that && "Self-move not supported") ? static_cast
<void> (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 770, __PRETTY_FUNCTION__))
;
771 if (!isSingleWord())
772 delete[] U.pVal;
773
774 // Use memcpy so that type based alias analysis sees both VAL and pVal
775 // as modified.
776 memcpy(&U, &that.U, sizeof(U));
777
778 BitWidth = that.BitWidth;
779 that.BitWidth = 0;
780
781 return *this;
782 }
783
784 /// Assignment operator.
785 ///
786 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
787 /// the bit width, the excess bits are truncated. If the bit width is larger
788 /// than 64, the value is zero filled in the unspecified high order bits.
789 ///
790 /// \returns *this after assignment of RHS value.
791 APInt &operator=(uint64_t RHS) {
792 if (isSingleWord()) {
793 U.VAL = RHS;
794 clearUnusedBits();
795 } else {
796 U.pVal[0] = RHS;
797 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
798 }
799 return *this;
800 }
801
802 /// Bitwise AND assignment operator.
803 ///
804 /// Performs a bitwise AND operation on this APInt and RHS. The result is
805 /// assigned to *this.
806 ///
807 /// \returns *this after ANDing with RHS.
808 APInt &operator&=(const APInt &RHS) {
809 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 809, __PRETTY_FUNCTION__))
;
810 if (isSingleWord())
811 U.VAL &= RHS.U.VAL;
812 else
813 AndAssignSlowCase(RHS);
814 return *this;
815 }
816
817 /// Bitwise AND assignment operator.
818 ///
819 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
820 /// logically zero-extended or truncated to match the bit-width of
821 /// the LHS.
822 APInt &operator&=(uint64_t RHS) {
823 if (isSingleWord()) {
824 U.VAL &= RHS;
825 return *this;
826 }
827 U.pVal[0] &= RHS;
828 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
829 return *this;
830 }
831
832 /// Bitwise OR assignment operator.
833 ///
834 /// Performs a bitwise OR operation on this APInt and RHS. The result is
835 /// assigned *this;
836 ///
837 /// \returns *this after ORing with RHS.
838 APInt &operator|=(const APInt &RHS) {
839 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 839, __PRETTY_FUNCTION__))
;
840 if (isSingleWord())
841 U.VAL |= RHS.U.VAL;
842 else
843 OrAssignSlowCase(RHS);
844 return *this;
845 }
846
847 /// Bitwise OR assignment operator.
848 ///
849 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
850 /// logically zero-extended or truncated to match the bit-width of
851 /// the LHS.
852 APInt &operator|=(uint64_t RHS) {
853 if (isSingleWord()) {
854 U.VAL |= RHS;
855 clearUnusedBits();
856 } else {
857 U.pVal[0] |= RHS;
858 }
859 return *this;
860 }
861
862 /// Bitwise XOR assignment operator.
863 ///
864 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
865 /// assigned to *this.
866 ///
867 /// \returns *this after XORing with RHS.
868 APInt &operator^=(const APInt &RHS) {
869 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 869, __PRETTY_FUNCTION__))
;
870 if (isSingleWord())
871 U.VAL ^= RHS.U.VAL;
872 else
873 XorAssignSlowCase(RHS);
874 return *this;
875 }
876
877 /// Bitwise XOR assignment operator.
878 ///
879 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
880 /// logically zero-extended or truncated to match the bit-width of
881 /// the LHS.
882 APInt &operator^=(uint64_t RHS) {
883 if (isSingleWord()) {
884 U.VAL ^= RHS;
885 clearUnusedBits();
886 } else {
887 U.pVal[0] ^= RHS;
888 }
889 return *this;
890 }
891
892 /// Multiplication assignment operator.
893 ///
894 /// Multiplies this APInt by RHS and assigns the result to *this.
895 ///
896 /// \returns *this
897 APInt &operator*=(const APInt &RHS);
898 APInt &operator*=(uint64_t RHS);
899
900 /// Addition assignment operator.
901 ///
902 /// Adds RHS to *this and assigns the result to *this.
903 ///
904 /// \returns *this
905 APInt &operator+=(const APInt &RHS);
906 APInt &operator+=(uint64_t RHS);
907
908 /// Subtraction assignment operator.
909 ///
910 /// Subtracts RHS from *this and assigns the result to *this.
911 ///
912 /// \returns *this
913 APInt &operator-=(const APInt &RHS);
914 APInt &operator-=(uint64_t RHS);
915
916 /// Left-shift assignment function.
917 ///
918 /// Shifts *this left by shiftAmt and assigns the result to *this.
919 ///
920 /// \returns *this after shifting left by ShiftAmt
921 APInt &operator<<=(unsigned ShiftAmt) {
922 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 922, __PRETTY_FUNCTION__))
;
923 if (isSingleWord()) {
924 if (ShiftAmt == BitWidth)
925 U.VAL = 0;
926 else
927 U.VAL <<= ShiftAmt;
928 return clearUnusedBits();
929 }
930 shlSlowCase(ShiftAmt);
931 return *this;
932 }
933
934 /// Left-shift assignment function.
935 ///
936 /// Shifts *this left by shiftAmt and assigns the result to *this.
937 ///
938 /// \returns *this after shifting left by ShiftAmt
939 APInt &operator<<=(const APInt &ShiftAmt);
940
941 /// @}
942 /// \name Binary Operators
943 /// @{
944
945 /// Multiplication operator.
946 ///
947 /// Multiplies this APInt by RHS and returns the result.
948 APInt operator*(const APInt &RHS) const;
949
950 /// Left logical shift operator.
951 ///
952 /// Shifts this APInt left by \p Bits and returns the result.
953 APInt operator<<(unsigned Bits) const { return shl(Bits); }
954
955 /// Left logical shift operator.
956 ///
957 /// Shifts this APInt left by \p Bits and returns the result.
958 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
959
960 /// Arithmetic right-shift function.
961 ///
962 /// Arithmetic right-shift this APInt by shiftAmt.
963 APInt ashr(unsigned ShiftAmt) const {
964 APInt R(*this);
965 R.ashrInPlace(ShiftAmt);
966 return R;
967 }
968
969 /// Arithmetic right-shift this APInt by ShiftAmt in place.
970 void ashrInPlace(unsigned ShiftAmt) {
971 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 971, __PRETTY_FUNCTION__))
;
972 if (isSingleWord()) {
973 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
974 if (ShiftAmt == BitWidth)
975 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
976 else
977 U.VAL = SExtVAL >> ShiftAmt;
978 clearUnusedBits();
979 return;
980 }
981 ashrSlowCase(ShiftAmt);
982 }
983
984 /// Logical right-shift function.
985 ///
986 /// Logical right-shift this APInt by shiftAmt.
987 APInt lshr(unsigned shiftAmt) const {
988 APInt R(*this);
989 R.lshrInPlace(shiftAmt);
990 return R;
991 }
992
993 /// Logical right-shift this APInt by ShiftAmt in place.
994 void lshrInPlace(unsigned ShiftAmt) {
995 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 995, __PRETTY_FUNCTION__))
;
996 if (isSingleWord()) {
997 if (ShiftAmt == BitWidth)
998 U.VAL = 0;
999 else
1000 U.VAL >>= ShiftAmt;
1001 return;
1002 }
1003 lshrSlowCase(ShiftAmt);
1004 }
1005
1006 /// Left-shift function.
1007 ///
1008 /// Left-shift this APInt by shiftAmt.
1009 APInt shl(unsigned shiftAmt) const {
1010 APInt R(*this);
1011 R <<= shiftAmt;
1012 return R;
1013 }
1014
1015 /// Rotate left by rotateAmt.
1016 APInt rotl(unsigned rotateAmt) const;
1017
1018 /// Rotate right by rotateAmt.
1019 APInt rotr(unsigned rotateAmt) const;
1020
1021 /// Arithmetic right-shift function.
1022 ///
1023 /// Arithmetic right-shift this APInt by shiftAmt.
1024 APInt ashr(const APInt &ShiftAmt) const {
1025 APInt R(*this);
1026 R.ashrInPlace(ShiftAmt);
1027 return R;
1028 }
1029
1030 /// Arithmetic right-shift this APInt by shiftAmt in place.
1031 void ashrInPlace(const APInt &shiftAmt);
1032
1033 /// Logical right-shift function.
1034 ///
1035 /// Logical right-shift this APInt by shiftAmt.
1036 APInt lshr(const APInt &ShiftAmt) const {
1037 APInt R(*this);
1038 R.lshrInPlace(ShiftAmt);
1039 return R;
1040 }
1041
1042 /// Logical right-shift this APInt by ShiftAmt in place.
1043 void lshrInPlace(const APInt &ShiftAmt);
1044
1045 /// Left-shift function.
1046 ///
1047 /// Left-shift this APInt by shiftAmt.
1048 APInt shl(const APInt &ShiftAmt) const {
1049 APInt R(*this);
1050 R <<= ShiftAmt;
1051 return R;
1052 }
1053
1054 /// Rotate left by rotateAmt.
1055 APInt rotl(const APInt &rotateAmt) const;
1056
1057 /// Rotate right by rotateAmt.
1058 APInt rotr(const APInt &rotateAmt) const;
1059
1060 /// Unsigned division operation.
1061 ///
1062 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
1063 /// RHS are treated as unsigned quantities for purposes of this division.
1064 ///
1065 /// \returns a new APInt value containing the division result, rounded towards
1066 /// zero.
1067 APInt udiv(const APInt &RHS) const;
1068 APInt udiv(uint64_t RHS) const;
1069
1070 /// Signed division function for APInt.
1071 ///
1072 /// Signed divide this APInt by APInt RHS.
1073 ///
1074 /// The result is rounded towards zero.
1075 APInt sdiv(const APInt &RHS) const;
1076 APInt sdiv(int64_t RHS) const;
1077
1078 /// Unsigned remainder operation.
1079 ///
1080 /// Perform an unsigned remainder operation on this APInt with RHS being the
1081 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
1082 /// of this operation. Note that this is a true remainder operation and not a
1083 /// modulo operation because the sign follows the sign of the dividend which
1084 /// is *this.
1085 ///
1086 /// \returns a new APInt value containing the remainder result
1087 APInt urem(const APInt &RHS) const;
1088 uint64_t urem(uint64_t RHS) const;
1089
1090 /// Function for signed remainder operation.
1091 ///
1092 /// Signed remainder operation on APInt.
1093 APInt srem(const APInt &RHS) const;
1094 int64_t srem(int64_t RHS) const;
1095
1096 /// Dual division/remainder interface.
1097 ///
1098 /// Sometimes it is convenient to divide two APInt values and obtain both the
1099 /// quotient and remainder. This function does both operations in the same
1100 /// computation making it a little more efficient. The pair of input arguments
1101 /// may overlap with the pair of output arguments. It is safe to call
1102 /// udivrem(X, Y, X, Y), for example.
1103 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1104 APInt &Remainder);
1105 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1106 uint64_t &Remainder);
1107
1108 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1109 APInt &Remainder);
1110 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
1111 int64_t &Remainder);
1112
1113 // Operations that return overflow indicators.
1114 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
1115 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1116 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1117 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1118 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1119 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1120 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1121 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1122 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1123
1124 // Operations that saturate
1125 APInt sadd_sat(const APInt &RHS) const;
1126 APInt uadd_sat(const APInt &RHS) const;
1127 APInt ssub_sat(const APInt &RHS) const;
1128 APInt usub_sat(const APInt &RHS) const;
1129 APInt smul_sat(const APInt &RHS) const;
1130 APInt umul_sat(const APInt &RHS) const;
1131 APInt sshl_sat(const APInt &RHS) const;
1132 APInt ushl_sat(const APInt &RHS) const;
1133
1134 /// Array-indexing support.
1135 ///
1136 /// \returns the bit value at bitPosition
1137 bool operator[](unsigned bitPosition) const {
1138 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")((bitPosition < getBitWidth() && "Bit position out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1138, __PRETTY_FUNCTION__))
;
1139 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1140 }
1141
1142 /// @}
1143 /// \name Comparison Operators
1144 /// @{
1145
1146 /// Equality operator.
1147 ///
1148 /// Compares this APInt with RHS for the validity of the equality
1149 /// relationship.
1150 bool operator==(const APInt &RHS) const {
1151 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")((BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1151, __PRETTY_FUNCTION__))
;
1152 if (isSingleWord())
1153 return U.VAL == RHS.U.VAL;
1154 return EqualSlowCase(RHS);
1155 }
1156
1157 /// Equality operator.
1158 ///
1159 /// Compares this APInt with a uint64_t for the validity of the equality
1160 /// relationship.
1161 ///
1162 /// \returns true if *this == Val
1163 bool operator==(uint64_t Val) const {
1164 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1165 }
1166
1167 /// Equality comparison.
1168 ///
1169 /// Compares this APInt with RHS for the validity of the equality
1170 /// relationship.
1171 ///
1172 /// \returns true if *this == Val
1173 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1174
1175 /// Inequality operator.
1176 ///
1177 /// Compares this APInt with RHS for the validity of the inequality
1178 /// relationship.
1179 ///
1180 /// \returns true if *this != Val
1181 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1182
1183 /// Inequality operator.
1184 ///
1185 /// Compares this APInt with a uint64_t for the validity of the inequality
1186 /// relationship.
1187 ///
1188 /// \returns true if *this != Val
1189 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1190
1191 /// Inequality comparison
1192 ///
1193 /// Compares this APInt with RHS for the validity of the inequality
1194 /// relationship.
1195 ///
1196 /// \returns true if *this != Val
1197 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1198
1199 /// Unsigned less than comparison
1200 ///
1201 /// Regards both *this and RHS as unsigned quantities and compares them for
1202 /// the validity of the less-than relationship.
1203 ///
1204 /// \returns true if *this < RHS when both are considered unsigned.
1205 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1206
1207 /// Unsigned less than comparison
1208 ///
1209 /// Regards both *this as an unsigned quantity and compares it with RHS for
1210 /// the validity of the less-than relationship.
1211 ///
1212 /// \returns true if *this < RHS when considered unsigned.
1213 bool ult(uint64_t RHS) const {
1214 // Only need to check active bits if not a single word.
1215 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1216 }
1217
1218 /// Signed less than comparison
1219 ///
1220 /// Regards both *this and RHS as signed quantities and compares them for
1221 /// validity of the less-than relationship.
1222 ///
1223 /// \returns true if *this < RHS when both are considered signed.
1224 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1225
1226 /// Signed less than comparison
1227 ///
1228 /// Regards both *this as a signed quantity and compares it with RHS for
1229 /// the validity of the less-than relationship.
1230 ///
1231 /// \returns true if *this < RHS when considered signed.
1232 bool slt(int64_t RHS) const {
1233 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1234 : getSExtValue() < RHS;
1235 }
1236
1237 /// Unsigned less or equal comparison
1238 ///
1239 /// Regards both *this and RHS as unsigned quantities and compares them for
1240 /// validity of the less-or-equal relationship.
1241 ///
1242 /// \returns true if *this <= RHS when both are considered unsigned.
1243 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1244
1245 /// Unsigned less or equal comparison
1246 ///
1247 /// Regards both *this as an unsigned quantity and compares it with RHS for
1248 /// the validity of the less-or-equal relationship.
1249 ///
1250 /// \returns true if *this <= RHS when considered unsigned.
1251 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1252
1253 /// Signed less or equal comparison
1254 ///
1255 /// Regards both *this and RHS as signed quantities and compares them for
1256 /// validity of the less-or-equal relationship.
1257 ///
1258 /// \returns true if *this <= RHS when both are considered signed.
1259 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1260
1261 /// Signed less or equal comparison
1262 ///
1263 /// Regards both *this as a signed quantity and compares it with RHS for the
1264 /// validity of the less-or-equal relationship.
1265 ///
1266 /// \returns true if *this <= RHS when considered signed.
1267 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1268
1269 /// Unsigned greater than comparison
1270 ///
1271 /// Regards both *this and RHS as unsigned quantities and compares them for
1272 /// the validity of the greater-than relationship.
1273 ///
1274 /// \returns true if *this > RHS when both are considered unsigned.
1275 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1276
1277 /// Unsigned greater than comparison
1278 ///
1279 /// Regards both *this as an unsigned quantity and compares it with RHS for
1280 /// the validity of the greater-than relationship.
1281 ///
1282 /// \returns true if *this > RHS when considered unsigned.
1283 bool ugt(uint64_t RHS) const {
1284 // Only need to check active bits if not a single word.
1285 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1286 }
1287
1288 /// Signed greater than comparison
1289 ///
1290 /// Regards both *this and RHS as signed quantities and compares them for the
1291 /// validity of the greater-than relationship.
1292 ///
1293 /// \returns true if *this > RHS when both are considered signed.
1294 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1295
1296 /// Signed greater than comparison
1297 ///
1298 /// Regards both *this as a signed quantity and compares it with RHS for
1299 /// the validity of the greater-than relationship.
1300 ///
1301 /// \returns true if *this > RHS when considered signed.
1302 bool sgt(int64_t RHS) const {
1303 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1304 : getSExtValue() > RHS;
1305 }
1306
1307 /// Unsigned greater or equal comparison
1308 ///
1309 /// Regards both *this and RHS as unsigned quantities and compares them for
1310 /// validity of the greater-or-equal relationship.
1311 ///
1312 /// \returns true if *this >= RHS when both are considered unsigned.
1313 bool uge(const APInt &RHS) const { return !ult(RHS); }
1314
1315 /// Unsigned greater or equal comparison
1316 ///
1317 /// Regards both *this as an unsigned quantity and compares it with RHS for
1318 /// the validity of the greater-or-equal relationship.
1319 ///
1320 /// \returns true if *this >= RHS when considered unsigned.
1321 bool uge(uint64_t RHS) const { return !ult(RHS); }
1322
1323 /// Signed greater or equal comparison
1324 ///
1325 /// Regards both *this and RHS as signed quantities and compares them for
1326 /// validity of the greater-or-equal relationship.
1327 ///
1328 /// \returns true if *this >= RHS when both are considered signed.
1329 bool sge(const APInt &RHS) const { return !slt(RHS); }
1330
1331 /// Signed greater or equal comparison
1332 ///
1333 /// Regards both *this as a signed quantity and compares it with RHS for
1334 /// the validity of the greater-or-equal relationship.
1335 ///
1336 /// \returns true if *this >= RHS when considered signed.
1337 bool sge(int64_t RHS) const { return !slt(RHS); }
1338
1339 /// This operation tests if there are any pairs of corresponding bits
1340 /// between this APInt and RHS that are both set.
1341 bool intersects(const APInt &RHS) const {
1342 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1342, __PRETTY_FUNCTION__))
;
1343 if (isSingleWord())
1344 return (U.VAL & RHS.U.VAL) != 0;
1345 return intersectsSlowCase(RHS);
1346 }
1347
1348 /// This operation checks that all bits set in this APInt are also set in RHS.
1349 bool isSubsetOf(const APInt &RHS) const {
1350 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1350, __PRETTY_FUNCTION__))
;
1351 if (isSingleWord())
1352 return (U.VAL & ~RHS.U.VAL) == 0;
1353 return isSubsetOfSlowCase(RHS);
1354 }
1355
1356 /// @}
1357 /// \name Resizing Operators
1358 /// @{
1359
1360 /// Truncate to new width.
1361 ///
1362 /// Truncate the APInt to a specified width. It is an error to specify a width
1363 /// that is greater than or equal to the current width.
1364 APInt trunc(unsigned width) const;
1365
1366 /// Truncate to new width with unsigned saturation.
1367 ///
1368 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1369 /// the new bitwidth, then return truncated APInt. Else, return max value.
1370 APInt truncUSat(unsigned width) const;
1371
1372 /// Truncate to new width with signed saturation.
1373 ///
1374 /// If this APInt, treated as signed integer, can be losslessly truncated to
1375 /// the new bitwidth, then return truncated APInt. Else, return either
1376 /// signed min value if the APInt was negative, or signed max value.
1377 APInt truncSSat(unsigned width) const;
1378
1379 /// Sign extend to a new width.
1380 ///
1381 /// This operation sign extends the APInt to a new width. If the high order
1382 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1383 /// It is an error to specify a width that is less than or equal to the
1384 /// current width.
1385 APInt sext(unsigned width) const;
1386
1387 /// Zero extend to a new width.
1388 ///
1389 /// This operation zero extends the APInt to a new width. The high order bits
1390 /// are filled with 0 bits. It is an error to specify a width that is less
1391 /// than or equal to the current width.
1392 APInt zext(unsigned width) const;
1393
1394 /// Sign extend or truncate to width
1395 ///
1396 /// Make this APInt have the bit width given by \p width. The value is sign
1397 /// extended, truncated, or left alone to make it that width.
1398 APInt sextOrTrunc(unsigned width) const;
1399
1400 /// Zero extend or truncate to width
1401 ///
1402 /// Make this APInt have the bit width given by \p width. The value is zero
1403 /// extended, truncated, or left alone to make it that width.
1404 APInt zextOrTrunc(unsigned width) const;
1405
1406 /// Sign extend or truncate to width
1407 ///
1408 /// Make this APInt have the bit width given by \p width. The value is sign
1409 /// extended, or left alone to make it that width.
1410 APInt sextOrSelf(unsigned width) const;
1411
1412 /// Zero extend or truncate to width
1413 ///
1414 /// Make this APInt have the bit width given by \p width. The value is zero
1415 /// extended, or left alone to make it that width.
1416 APInt zextOrSelf(unsigned width) const;
1417
1418 /// @}
1419 /// \name Bit Manipulation Operators
1420 /// @{
1421
1422 /// Set every bit to 1.
1423 void setAllBits() {
1424 if (isSingleWord())
1425 U.VAL = WORDTYPE_MAX;
1426 else
1427 // Set all the bits in all the words.
1428 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1429 // Clear the unused ones
1430 clearUnusedBits();
1431 }
1432
1433 /// Set a given bit to 1.
1434 ///
1435 /// Set the given bit to 1 whose position is given as "bitPosition".
1436 void setBit(unsigned BitPosition) {
1437 assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1437, __PRETTY_FUNCTION__))
;
1438 WordType Mask = maskBit(BitPosition);
1439 if (isSingleWord())
1440 U.VAL |= Mask;
1441 else
1442 U.pVal[whichWord(BitPosition)] |= Mask;
1443 }
1444
1445 /// Set the sign bit to 1.
1446 void setSignBit() {
1447 setBit(BitWidth - 1);
1448 }
1449
1450 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1451 /// This function handles "wrap" case when \p loBit > \p hiBit, and calls
1452 /// setBits when \p loBit <= \p hiBit.
1453 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1454 assert(hiBit <= BitWidth && "hiBit out of range")((hiBit <= BitWidth && "hiBit out of range") ? static_cast
<void> (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1454, __PRETTY_FUNCTION__))
;
1455 assert(loBit <= BitWidth && "loBit out of range")((loBit <= BitWidth && "loBit out of range") ? static_cast
<void> (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1455, __PRETTY_FUNCTION__))
;
1456 if (loBit <= hiBit) {
1457 setBits(loBit, hiBit);
1458 return;
1459 }
1460 setLowBits(hiBit);
1461 setHighBits(BitWidth - loBit);
1462 }
1463
1464 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1465 /// This function handles case when \p loBit <= \p hiBit.
1466 void setBits(unsigned loBit, unsigned hiBit) {
1467 assert(hiBit <= BitWidth && "hiBit out of range")((hiBit <= BitWidth && "hiBit out of range") ? static_cast
<void> (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1467, __PRETTY_FUNCTION__))
;
6
Assuming 'hiBit' is <= field 'BitWidth'
7
'?' condition is true
1468 assert
7.1
'loBit' is <= field 'BitWidth'
7.1
'loBit' is <= field 'BitWidth'
(loBit <= BitWidth && "loBit out of range")((loBit <= BitWidth && "loBit out of range") ? static_cast
<void> (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1468, __PRETTY_FUNCTION__))
;
8
'?' condition is true
1469 assert
8.1
'loBit' is <= 'hiBit'
8.1
'loBit' is <= 'hiBit'
(loBit <= hiBit && "loBit greater than hiBit")((loBit <= hiBit && "loBit greater than hiBit") ? static_cast
<void> (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1469, __PRETTY_FUNCTION__))
;
9
'?' condition is true
1470 if (loBit == hiBit)
10
Assuming 'loBit' is not equal to 'hiBit'
11
Taking false branch
1471 return;
1472 if (loBit
11.1
'loBit' is < APINT_BITS_PER_WORD
11.1
'loBit' is < APINT_BITS_PER_WORD
< APINT_BITS_PER_WORD && hiBit
11.2
'hiBit' is <= APINT_BITS_PER_WORD
11.2
'hiBit' is <= APINT_BITS_PER_WORD
<= APINT_BITS_PER_WORD) {
12
Taking true branch
1473 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1474 mask <<= loBit;
1475 if (isSingleWord())
13
Taking true branch
1476 U.VAL |= mask;
1477 else
1478 U.pVal[0] |= mask;
1479 } else {
1480 setBitsSlowCase(loBit, hiBit);
1481 }
1482 }
1483
1484 /// Set the top bits starting from loBit.
1485 void setBitsFrom(unsigned loBit) {
1486 return setBits(loBit, BitWidth);
1487 }
1488
1489 /// Set the bottom loBits bits.
1490 void setLowBits(unsigned loBits) {
1491 return setBits(0, loBits);
5
Calling 'APInt::setBits'
14
Returning from 'APInt::setBits'
1492 }
1493
1494 /// Set the top hiBits bits.
1495 void setHighBits(unsigned hiBits) {
1496 return setBits(BitWidth - hiBits, BitWidth);
1497 }
1498
1499 /// Set every bit to 0.
1500 void clearAllBits() {
1501 if (isSingleWord())
1502 U.VAL = 0;
1503 else
1504 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1505 }
1506
1507 /// Set a given bit to 0.
1508 ///
1509 /// Set the given bit to 0 whose position is given as "bitPosition".
1510 void clearBit(unsigned BitPosition) {
1511 assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1511, __PRETTY_FUNCTION__))
;
1512 WordType Mask = ~maskBit(BitPosition);
1513 if (isSingleWord())
1514 U.VAL &= Mask;
1515 else
1516 U.pVal[whichWord(BitPosition)] &= Mask;
1517 }
1518
1519 /// Set bottom loBits bits to 0.
1520 void clearLowBits(unsigned loBits) {
1521 assert(loBits <= BitWidth && "More bits than bitwidth")((loBits <= BitWidth && "More bits than bitwidth")
? static_cast<void> (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1521, __PRETTY_FUNCTION__))
;
1522 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1523 *this &= Keep;
1524 }
1525
1526 /// Set the sign bit to 0.
1527 void clearSignBit() {
1528 clearBit(BitWidth - 1);
1529 }
1530
1531 /// Toggle every bit to its opposite value.
1532 void flipAllBits() {
1533 if (isSingleWord()) {
1534 U.VAL ^= WORDTYPE_MAX;
1535 clearUnusedBits();
1536 } else {
1537 flipAllBitsSlowCase();
1538 }
1539 }
1540
1541 /// Toggles a given bit to its opposite value.
1542 ///
1543 /// Toggle a given bit to its opposite value whose position is given
1544 /// as "bitPosition".
1545 void flipBit(unsigned bitPosition);
1546
1547 /// Negate this APInt in place.
1548 void negate() {
1549 flipAllBits();
1550 ++(*this);
1551 }
1552
1553 /// Insert the bits from a smaller APInt starting at bitPosition.
1554 void insertBits(const APInt &SubBits, unsigned bitPosition);
1555 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1556
1557 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1558 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1559 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1560
1561 /// @}
1562 /// \name Value Characterization Functions
1563 /// @{
1564
1565 /// Return the number of bits in the APInt.
1566 unsigned getBitWidth() const { return BitWidth; }
1567
1568 /// Get the number of words.
1569 ///
1570 /// Here one word's bitwidth equals to that of uint64_t.
1571 ///
1572 /// \returns the number of words to hold the integer value of this APInt.
1573 unsigned getNumWords() const { return getNumWords(BitWidth); }
1574
1575 /// Get the number of words.
1576 ///
1577 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1578 ///
1579 /// \returns the number of words to hold the integer value with a given bit
1580 /// width.
1581 static unsigned getNumWords(unsigned BitWidth) {
1582 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1583 }
1584
1585 /// Compute the number of active bits in the value
1586 ///
1587 /// This function returns the number of active bits which is defined as the
1588 /// bit width minus the number of leading zeros. This is used in several
1589 /// computations to see how "wide" the value is.
1590 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1591
1592 /// Compute the number of active words in the value of this APInt.
1593 ///
1594 /// This is used in conjunction with getActiveData to extract the raw value of
1595 /// the APInt.
1596 unsigned getActiveWords() const {
1597 unsigned numActiveBits = getActiveBits();
1598 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1599 }
1600
1601 /// Get the minimum bit size for this signed APInt
1602 ///
1603 /// Computes the minimum bit width for this APInt while considering it to be a
1604 /// signed (and probably negative) value. If the value is not negative, this
1605 /// function returns the same value as getActiveBits()+1. Otherwise, it
1606 /// returns the smallest bit width that will retain the negative value. For
1607 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1608 /// for -1, this function will always return 1.
1609 unsigned getMinSignedBits() const {
1610 if (isNegative())
1611 return BitWidth - countLeadingOnes() + 1;
1612 return getActiveBits() + 1;
1613 }
1614
1615 /// Get zero extended value
1616 ///
1617 /// This method attempts to return the value of this APInt as a zero extended
1618 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1619 /// uint64_t. Otherwise an assertion will result.
1620 uint64_t getZExtValue() const {
1621 if (isSingleWord())
1622 return U.VAL;
1623 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")((getActiveBits() <= 64 && "Too many bits for uint64_t"
) ? static_cast<void> (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1623, __PRETTY_FUNCTION__))
;
1624 return U.pVal[0];
1625 }
1626
1627 /// Get sign extended value
1628 ///
1629 /// This method attempts to return the value of this APInt as a sign extended
1630 /// int64_t. The bit width must be <= 64 or the value must fit within an
1631 /// int64_t. Otherwise an assertion will result.
1632 int64_t getSExtValue() const {
1633 if (isSingleWord())
1634 return SignExtend64(U.VAL, BitWidth);
1635 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")((getMinSignedBits() <= 64 && "Too many bits for int64_t"
) ? static_cast<void> (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/ADT/APInt.h"
, 1635, __PRETTY_FUNCTION__))
;
1636 return int64_t(U.pVal[0]);
1637 }
1638
1639 /// Get bits required for string value.
1640 ///
1641 /// This method determines how many bits are required to hold the APInt
1642 /// equivalent of the string given by \p str.
1643 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1644
1645 /// The APInt version of the countLeadingZeros functions in
1646 /// MathExtras.h.
1647 ///
1648 /// It counts the number of zeros from the most significant bit to the first
1649 /// one bit.
1650 ///
1651 /// \returns BitWidth if the value is zero, otherwise returns the number of
1652 /// zeros from the most significant bit to the first one bits.
1653 unsigned countLeadingZeros() const {
1654 if (isSingleWord()) {
1655 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1656 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1657 }
1658 return countLeadingZerosSlowCase();
1659 }
1660
1661 /// Count the number of leading one bits.
1662 ///
1663 /// This function is an APInt version of the countLeadingOnes
1664 /// functions in MathExtras.h. It counts the number of ones from the most
1665 /// significant bit to the first zero bit.
1666 ///
1667 /// \returns 0 if the high order bit is not set, otherwise returns the number
1668 /// of 1 bits from the most significant to the least
1669 unsigned countLeadingOnes() const {
1670 if (isSingleWord())
1671 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1672 return countLeadingOnesSlowCase();
1673 }
1674
1675 /// Computes the number of leading bits of this APInt that are equal to its
1676 /// sign bit.
1677 unsigned getNumSignBits() const {
1678 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1679 }
1680
1681 /// Count the number of trailing zero bits.
1682 ///
1683 /// This function is an APInt version of the countTrailingZeros
1684 /// functions in MathExtras.h. It counts the number of zeros from the least
1685 /// significant bit to the first set bit.
1686 ///
1687 /// \returns BitWidth if the value is zero, otherwise returns the number of
1688 /// zeros from the least significant bit to the first one bit.
1689 unsigned countTrailingZeros() const {
1690 if (isSingleWord())
1691 return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
1692 return countTrailingZerosSlowCase();
1693 }
1694
1695 /// Count the number of trailing one bits.
1696 ///
1697 /// This function is an APInt version of the countTrailingOnes
1698 /// functions in MathExtras.h. It counts the number of ones from the least
1699 /// significant bit to the first zero bit.
1700 ///
1701 /// \returns BitWidth if the value is all ones, otherwise returns the number
1702 /// of ones from the least significant bit to the first zero bit.
1703 unsigned countTrailingOnes() const {
1704 if (isSingleWord())
1705 return llvm::countTrailingOnes(U.VAL);
1706 return countTrailingOnesSlowCase();
1707 }
1708
1709 /// Count the number of bits set.
1710 ///
1711 /// This function is an APInt version of the countPopulation functions
1712 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1713 ///
1714 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1715 unsigned countPopulation() const {
1716 if (isSingleWord())
1717 return llvm::countPopulation(U.VAL);
1718 return countPopulationSlowCase();
1719 }
1720
1721 /// @}
1722 /// \name Conversion Functions
1723 /// @{
1724 void print(raw_ostream &OS, bool isSigned) const;
1725
1726 /// Converts an APInt to a string and append it to Str. Str is commonly a
1727 /// SmallString.
1728 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1729 bool formatAsCLiteral = false) const;
1730
1731 /// Considers the APInt to be unsigned and converts it into a string in the
1732 /// radix given. The radix can be 2, 8, 10 16, or 36.
1733 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1734 toString(Str, Radix, false, false);
1735 }
1736
1737 /// Considers the APInt to be signed and converts it into a string in the
1738 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1739 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1740 toString(Str, Radix, true, false);
1741 }
1742
1743 /// Return the APInt as a std::string.
1744 ///
1745 /// Note that this is an inefficient method. It is better to pass in a
1746 /// SmallVector/SmallString to the methods above to avoid thrashing the heap
1747 /// for the string.
1748 std::string toString(unsigned Radix, bool Signed) const;
1749
1750 /// \returns a byte-swapped representation of this APInt Value.
1751 APInt byteSwap() const;
1752
1753 /// \returns the value with the bit representation reversed of this APInt
1754 /// Value.
1755 APInt reverseBits() const;
1756
1757 /// Converts this APInt to a double value.
1758 double roundToDouble(bool isSigned) const;
1759
1760 /// Converts this unsigned APInt to a double value.
1761 double roundToDouble() const { return roundToDouble(false); }
1762
1763 /// Converts this signed APInt to a double value.
1764 double signedRoundToDouble() const { return roundToDouble(true); }
1765
1766 /// Converts APInt bits to a double
1767 ///
1768 /// The conversion does not do a translation from integer to double, it just
1769 /// re-interprets the bits as a double. Note that it is valid to do this on
1770 /// any bit width. Exactly 64 bits will be translated.
1771 double bitsToDouble() const {
1772 return BitsToDouble(getWord(0));
1773 }
1774
1775 /// Converts APInt bits to a float
1776 ///
1777 /// The conversion does not do a translation from integer to float, it just
1778 /// re-interprets the bits as a float. Note that it is valid to do this on
1779 /// any bit width. Exactly 32 bits will be translated.
1780 float bitsToFloat() const {
1781 return BitsToFloat(static_cast<uint32_t>(getWord(0)));
1782 }
1783
1784 /// Converts a double to APInt bits.
1785 ///
1786 /// The conversion does not do a translation from double to integer, it just
1787 /// re-interprets the bits of the double.
1788 static APInt doubleToBits(double V) {
1789 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1790 }
1791
1792 /// Converts a float to APInt bits.
1793 ///
1794 /// The conversion does not do a translation from float to integer, it just
1795 /// re-interprets the bits of the float.
1796 static APInt floatToBits(float V) {
1797 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1798 }
1799
1800 /// @}
1801 /// \name Mathematics Operations
1802 /// @{
1803
1804 /// \returns the floor log base 2 of this APInt.
1805 unsigned logBase2() const { return getActiveBits() - 1; }
1806
1807 /// \returns the ceil log base 2 of this APInt.
1808 unsigned ceilLogBase2() const {
1809 APInt temp(*this);
1810 --temp;
1811 return temp.getActiveBits();
1812 }
1813
1814 /// \returns the nearest log base 2 of this APInt. Ties round up.
1815 ///
1816 /// NOTE: When we have a BitWidth of 1, we define:
1817 ///
1818 /// log2(0) = UINT32_MAX
1819 /// log2(1) = 0
1820 ///
1821 /// to get around any mathematical concerns resulting from
1822 /// referencing 2 in a space where 2 does no exist.
1823 unsigned nearestLogBase2() const {
1824 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1825 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1826 // UINT32_MAX.
1827 if (BitWidth == 1)
1828 return U.VAL - 1;
1829
1830 // Handle the zero case.
1831 if (isNullValue())
1832 return UINT32_MAX(4294967295U);
1833
1834 // The non-zero case is handled by computing:
1835 //
1836 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1837 //
1838 // where x[i] is referring to the value of the ith bit of x.
1839 unsigned lg = logBase2();
1840 return lg + unsigned((*this)[lg - 1]);
1841 }
1842
1843 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1844 /// otherwise
1845 int32_t exactLogBase2() const {
1846 if (!isPowerOf2())
1847 return -1;
1848 return logBase2();
1849 }
1850
1851 /// Compute the square root
1852 APInt sqrt() const;
1853
1854 /// Get the absolute value;
1855 ///
1856 /// If *this is < 0 then return -(*this), otherwise *this;
1857 APInt abs() const {
1858 if (isNegative())
1859 return -(*this);
1860 return *this;
1861 }
1862
1863 /// \returns the multiplicative inverse for a given modulo.
1864 APInt multiplicativeInverse(const APInt &modulo) const;
1865
1866 /// @}
1867 /// \name Support for division by constant
1868 /// @{
1869
1870 /// Calculate the magic number for signed division by a constant.
1871 struct ms;
1872 ms magic() const;
1873
1874 /// Calculate the magic number for unsigned division by a constant.
1875 struct mu;
1876 mu magicu(unsigned LeadingZeros = 0) const;
1877
1878 /// @}
1879 /// \name Building-block Operations for APInt and APFloat
1880 /// @{
1881
1882 // These building block operations operate on a representation of arbitrary
1883 // precision, two's-complement, bignum integer values. They should be
1884 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1885 // generally a pointer to the base of an array of integer parts, representing
1886 // an unsigned bignum, and a count of how many parts there are.
1887
1888 /// Sets the least significant part of a bignum to the input value, and zeroes
1889 /// out higher parts.
1890 static void tcSet(WordType *, WordType, unsigned);
1891
1892 /// Assign one bignum to another.
1893 static void tcAssign(WordType *, const WordType *, unsigned);
1894
1895 /// Returns true if a bignum is zero, false otherwise.
1896 static bool tcIsZero(const WordType *, unsigned);
1897
1898 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1899 static int tcExtractBit(const WordType *, unsigned bit);
1900
1901 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1902 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1903 /// significant bit of DST. All high bits above srcBITS in DST are
1904 /// zero-filled.
1905 static void tcExtract(WordType *, unsigned dstCount,
1906 const WordType *, unsigned srcBits,
1907 unsigned srcLSB);
1908
1909 /// Set the given bit of a bignum. Zero-based.
1910 static void tcSetBit(WordType *, unsigned bit);
1911
1912 /// Clear the given bit of a bignum. Zero-based.
1913 static void tcClearBit(WordType *, unsigned bit);
1914
1915 /// Returns the bit number of the least or most significant set bit of a
1916 /// number. If the input number has no bits set -1U is returned.
1917 static unsigned tcLSB(const WordType *, unsigned n);
1918 static unsigned tcMSB(const WordType *parts, unsigned n);
1919
1920 /// Negate a bignum in-place.
1921 static void tcNegate(WordType *, unsigned);
1922
1923 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1924 static WordType tcAdd(WordType *, const WordType *,
1925 WordType carry, unsigned);
1926 /// DST += RHS. Returns the carry flag.
1927 static WordType tcAddPart(WordType *, WordType, unsigned);
1928
1929 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1930 static WordType tcSubtract(WordType *, const WordType *,
1931 WordType carry, unsigned);
1932 /// DST -= RHS. Returns the carry flag.
1933 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1934
1935 /// DST += SRC * MULTIPLIER + PART if add is true
1936 /// DST = SRC * MULTIPLIER + PART if add is false
1937 ///
1938 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1939 /// start at the same point, i.e. DST == SRC.
1940 ///
1941 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1942 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1943 /// result, and if all of the omitted higher parts were zero return zero,
1944 /// otherwise overflow occurred and return one.
1945 static int tcMultiplyPart(WordType *dst, const WordType *src,
1946 WordType multiplier, WordType carry,
1947 unsigned srcParts, unsigned dstParts,
1948 bool add);
1949
1950 /// DST = LHS * RHS, where DST has the same width as the operands and is
1951 /// filled with the least significant parts of the result. Returns one if
1952 /// overflow occurred, otherwise zero. DST must be disjoint from both
1953 /// operands.
1954 static int tcMultiply(WordType *, const WordType *, const WordType *,
1955 unsigned);
1956
1957 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1958 /// operands. No overflow occurs. DST must be disjoint from both operands.
1959 static void tcFullMultiply(WordType *, const WordType *,
1960 const WordType *, unsigned, unsigned);
1961
1962 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1963 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1964 /// REMAINDER to the remainder, return zero. i.e.
1965 ///
1966 /// OLD_LHS = RHS * LHS + REMAINDER
1967 ///
1968 /// SCRATCH is a bignum of the same size as the operands and result for use by
1969 /// the routine; its contents need not be initialized and are destroyed. LHS,
1970 /// REMAINDER and SCRATCH must be distinct.
1971 static int tcDivide(WordType *lhs, const WordType *rhs,
1972 WordType *remainder, WordType *scratch,
1973 unsigned parts);
1974
1975 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1976 /// restrictions on Count.
1977 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1978
1979 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1980 /// restrictions on Count.
1981 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1982
1983 /// The obvious AND, OR and XOR and complement operations.
1984 static void tcAnd(WordType *, const WordType *, unsigned);
1985 static void tcOr(WordType *, const WordType *, unsigned);
1986 static void tcXor(WordType *, const WordType *, unsigned);
1987 static void tcComplement(WordType *, unsigned);
1988
1989 /// Comparison (unsigned) of two bignums.
1990 static int tcCompare(const WordType *, const WordType *, unsigned);
1991
1992 /// Increment a bignum in-place. Return the carry flag.
1993 static WordType tcIncrement(WordType *dst, unsigned parts) {
1994 return tcAddPart(dst, 1, parts);
1995 }
1996
1997 /// Decrement a bignum in-place. Return the borrow flag.
1998 static WordType tcDecrement(WordType *dst, unsigned parts) {
1999 return tcSubtractPart(dst, 1, parts);
2000 }
2001
2002 /// Set the least significant BITS and clear the rest.
2003 static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
2004
2005 /// debug method
2006 void dump() const;
2007
2008 /// @}
2009};
2010
2011/// Magic data for optimising signed division by a constant.
2012struct APInt::ms {
2013 APInt m; ///< magic number
2014 unsigned s; ///< shift amount
2015};
2016
2017/// Magic data for optimising unsigned division by a constant.
2018struct APInt::mu {
2019 APInt m; ///< magic number
2020 bool a; ///< add indicator
2021 unsigned s; ///< shift amount
2022};
2023
2024inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
2025
2026inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
2027
2028/// Unary bitwise complement operator.
2029///
2030/// \returns an APInt that is the bitwise complement of \p v.
2031inline APInt operator~(APInt v) {
2032 v.flipAllBits();
2033 return v;
2034}
2035
2036inline APInt operator&(APInt a, const APInt &b) {
2037 a &= b;
2038 return a;
2039}
2040
2041inline APInt operator&(const APInt &a, APInt &&b) {
2042 b &= a;
2043 return std::move(b);
2044}
2045
2046inline APInt operator&(APInt a, uint64_t RHS) {
2047 a &= RHS;
2048 return a;
2049}
2050
2051inline APInt operator&(uint64_t LHS, APInt b) {
2052 b &= LHS;
2053 return b;
2054}
2055
2056inline APInt operator|(APInt a, const APInt &b) {
2057 a |= b;
2058 return a;
2059}
2060
2061inline APInt operator|(const APInt &a, APInt &&b) {
2062 b |= a;
2063 return std::move(b);
2064}
2065
2066inline APInt operator|(APInt a, uint64_t RHS) {
2067 a |= RHS;
2068 return a;
2069}
2070
2071inline APInt operator|(uint64_t LHS, APInt b) {
2072 b |= LHS;
2073 return b;
2074}
2075
2076inline APInt operator^(APInt a, const APInt &b) {
2077 a ^= b;
2078 return a;
2079}
2080
2081inline APInt operator^(const APInt &a, APInt &&b) {
2082 b ^= a;
2083 return std::move(b);
2084}
2085
2086inline APInt operator^(APInt a, uint64_t RHS) {
2087 a ^= RHS;
2088 return a;
2089}
2090
2091inline APInt operator^(uint64_t LHS, APInt b) {
2092 b ^= LHS;
2093 return b;
2094}
2095
2096inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2097 I.print(OS, true);
2098 return OS;
2099}
2100
2101inline APInt operator-(APInt v) {
2102 v.negate();
2103 return v;
2104}
2105
2106inline APInt operator+(APInt a, const APInt &b) {
2107 a += b;
2108 return a;
2109}
2110
2111inline APInt operator+(const APInt &a, APInt &&b) {
2112 b += a;
2113 return std::move(b);
2114}
2115
2116inline APInt operator+(APInt a, uint64_t RHS) {
2117 a += RHS;
2118 return a;
2119}
2120
2121inline APInt operator+(uint64_t LHS, APInt b) {
2122 b += LHS;
2123 return b;
2124}
2125
2126inline APInt operator-(APInt a, const APInt &b) {
2127 a -= b;
2128 return a;
2129}
2130
2131inline APInt operator-(const APInt &a, APInt &&b) {
2132 b.negate();
2133 b += a;
2134 return std::move(b);
2135}
2136
2137inline APInt operator-(APInt a, uint64_t RHS) {
2138 a -= RHS;
2139 return a;
2140}
2141
2142inline APInt operator-(uint64_t LHS, APInt b) {
2143 b.negate();
2144 b += LHS;
2145 return b;
2146}
2147
2148inline APInt operator*(APInt a, uint64_t RHS) {
2149 a *= RHS;
2150 return a;
2151}
2152
2153inline APInt operator*(uint64_t LHS, APInt b) {
2154 b *= LHS;
2155 return b;
2156}
2157
2158
2159namespace APIntOps {
2160
2161/// Determine the smaller of two APInts considered to be signed.
2162inline const APInt &smin(const APInt &A, const APInt &B) {
2163 return A.slt(B) ? A : B;
2164}
2165
2166/// Determine the larger of two APInts considered to be signed.
2167inline const APInt &smax(const APInt &A, const APInt &B) {
2168 return A.sgt(B) ? A : B;
2169}
2170
2171/// Determine the smaller of two APInts considered to be signed.
2172inline const APInt &umin(const APInt &A, const APInt &B) {
2173 return A.ult(B) ? A : B;
2174}
2175
2176/// Determine the larger of two APInts considered to be unsigned.
2177inline const APInt &umax(const APInt &A, const APInt &B) {
2178 return A.ugt(B) ? A : B;
2179}
2180
2181/// Compute GCD of two unsigned APInt values.
2182///
2183/// This function returns the greatest common divisor of the two APInt values
2184/// using Stein's algorithm.
2185///
2186/// \returns the greatest common divisor of A and B.
2187APInt GreatestCommonDivisor(APInt A, APInt B);
2188
2189/// Converts the given APInt to a double value.
2190///
2191/// Treats the APInt as an unsigned value for conversion purposes.
2192inline double RoundAPIntToDouble(const APInt &APIVal) {
2193 return APIVal.roundToDouble();
2194}
2195
2196/// Converts the given APInt to a double value.
2197///
2198/// Treats the APInt as a signed value for conversion purposes.
2199inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2200 return APIVal.signedRoundToDouble();
2201}
2202
2203/// Converts the given APInt to a float vlalue.
2204inline float RoundAPIntToFloat(const APInt &APIVal) {
2205 return float(RoundAPIntToDouble(APIVal));
2206}
2207
2208/// Converts the given APInt to a float value.
2209///
2210/// Treats the APInt as a signed value for conversion purposes.
2211inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2212 return float(APIVal.signedRoundToDouble());
2213}
2214
2215/// Converts the given double value into a APInt.
2216///
2217/// This function convert a double value to an APInt value.
2218APInt RoundDoubleToAPInt(double Double, unsigned width);
2219
2220/// Converts a float value into a APInt.
2221///
2222/// Converts a float value into an APInt value.
2223inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2224 return RoundDoubleToAPInt(double(Float), width);
2225}
2226
2227/// Return A unsign-divided by B, rounded by the given rounding mode.
2228APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2229
2230/// Return A sign-divided by B, rounded by the given rounding mode.
2231APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2232
2233/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2234/// (e.g. 32 for i32).
2235/// This function finds the smallest number n, such that
2236/// (a) n >= 0 and q(n) = 0, or
2237/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2238/// integers, belong to two different intervals [Rk, Rk+R),
2239/// where R = 2^BW, and k is an integer.
2240/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2241/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2242/// subtraction (treated as addition of negated numbers) would always
2243/// count as an overflow, but here we want to allow values to decrease
2244/// and increase as long as they are within the same interval.
2245/// Specifically, adding of two negative numbers should not cause an
2246/// overflow (as long as the magnitude does not exceed the bit width).
2247/// On the other hand, given a positive number, adding a negative
2248/// number to it can give a negative result, which would cause the
2249/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2250/// treated as a special case of an overflow.
2251///
2252/// This function returns None if after finding k that minimizes the
2253/// positive solution to q(n) = kR, both solutions are contained between
2254/// two consecutive integers.
2255///
2256/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2257/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2258/// virtue of *signed* overflow. This function will *not* find such an n,
2259/// however it may find a value of n satisfying the inequalities due to
2260/// an *unsigned* overflow (if the values are treated as unsigned).
2261/// To find a solution for a signed overflow, treat it as a problem of
2262/// finding an unsigned overflow with a range with of BW-1.
2263///
2264/// The returned value may have a different bit width from the input
2265/// coefficients.
2266Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2267 unsigned RangeWidth);
2268
2269/// Compare two values, and if they are different, return the position of the
2270/// most significant bit that is different in the values.
2271Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2272 const APInt &B);
2273
2274} // End of APIntOps namespace
2275
2276// See friend declaration above. This additional declaration is required in
2277// order to compile LLVM with IBM xlC compiler.
2278hash_code hash_value(const APInt &Arg);
2279
2280/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2281/// with the integer held in IntVal.
2282void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2283
2284/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2285/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2286void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes);
2287
2288} // namespace llvm
2289
2290#endif