Bug Summary

File:llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1565, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/Hexagon -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/Hexagon -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonLoopIdiomRecognition.h"
10#include "llvm/ADT/APInt.h"
11#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/Triple.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/InstructionSimplify.h"
20#include "llvm/Analysis/LoopAnalysisManager.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopPass.h"
23#include "llvm/Analysis/MemoryLocation.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/ScalarEvolutionExpressions.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/ValueTracking.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/IntrinsicsHexagon.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/User.h"
49#include "llvm/IR/Value.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
63#include <algorithm>
64#include <array>
65#include <cassert>
66#include <cstdint>
67#include <cstdlib>
68#include <deque>
69#include <functional>
70#include <iterator>
71#include <map>
72#include <set>
73#include <utility>
74#include <vector>
75
76#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
77
78using namespace llvm;
79
80static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memcpy in loop idiom recognition"));
83
84static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
85 cl::Hidden, cl::init(false),
86 cl::desc("Disable generation of memmove in loop idiom recognition"));
87
88static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
89 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
90 "check guarding the memmove."));
91
92static cl::opt<unsigned> CompileTimeMemSizeThreshold(
93 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
94 cl::desc("Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
96
97static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
98 cl::Hidden, cl::init(true),
99 cl::desc("Only enable generating memmove in non-nested loops"));
100
101static cl::opt<bool> HexagonVolatileMemcpy(
102 "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
103 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
104
105static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
106 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
107
108static const char *HexagonVolatileMemcpyName
109 = "hexagon_memcpy_forward_vp4cp4n2";
110
111
112namespace llvm {
113
114void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
115Pass *createHexagonLoopIdiomPass();
116
117} // end namespace llvm
118
119namespace {
120
121class HexagonLoopIdiomRecognize {
122public:
123 explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
124 LoopInfo *LF, const TargetLibraryInfo *TLI,
125 ScalarEvolution *SE)
126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
127
128 bool run(Loop *L);
129
130private:
131 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
132 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
133 void collectStores(Loop *CurLoop, BasicBlock *BB,
134 SmallVectorImpl<StoreInst *> &Stores);
135 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
136 bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const;
137 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
138 SmallVectorImpl<BasicBlock *> &ExitBlocks);
139 bool runOnCountableLoop(Loop *L);
140
141 AliasAnalysis *AA;
142 const DataLayout *DL;
143 DominatorTree *DT;
144 LoopInfo *LF;
145 const TargetLibraryInfo *TLI;
146 ScalarEvolution *SE;
147 bool HasMemcpy, HasMemmove;
148};
149
150class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {
151public:
152 static char ID;
153
154 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
155 initializeHexagonLoopIdiomRecognizeLegacyPassPass(
156 *PassRegistry::getPassRegistry());
157 }
158
159 StringRef getPassName() const override {
160 return "Recognize Hexagon-specific loop idioms";
161 }
162
163 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 AU.addRequired<LoopInfoWrapperPass>();
165 AU.addRequiredID(LoopSimplifyID);
166 AU.addRequiredID(LCSSAID);
167 AU.addRequired<AAResultsWrapperPass>();
168 AU.addRequired<ScalarEvolutionWrapperPass>();
169 AU.addRequired<DominatorTreeWrapperPass>();
170 AU.addRequired<TargetLibraryInfoWrapperPass>();
171 AU.addPreserved<TargetLibraryInfoWrapperPass>();
172 }
173
174 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
175};
176
177struct Simplifier {
178 struct Rule {
179 using FuncType = std::function<Value *(Instruction *, LLVMContext &)>;
180 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
181 StringRef Name; // For debugging.
182 FuncType Fn;
183 };
184
185 void addRule(StringRef N, const Rule::FuncType &F) {
186 Rules.push_back(Rule(N, F));
187 }
188
189private:
190 struct WorkListType {
191 WorkListType() = default;
192
193 void push_back(Value *V) {
194 // Do not push back duplicates.
195 if (!S.count(V)) {
196 Q.push_back(V);
197 S.insert(V);
198 }
199 }
200
201 Value *pop_front_val() {
202 Value *V = Q.front();
203 Q.pop_front();
204 S.erase(V);
205 return V;
206 }
207
208 bool empty() const { return Q.empty(); }
209
210 private:
211 std::deque<Value *> Q;
212 std::set<Value *> S;
213 };
214
215 using ValueSetType = std::set<Value *>;
216
217 std::vector<Rule> Rules;
218
219public:
220 struct Context {
221 using ValueMapType = DenseMap<Value *, Value *>;
222
223 Value *Root;
224 ValueSetType Used; // The set of all cloned values used by Root.
225 ValueSetType Clones; // The set of all cloned values.
226 LLVMContext &Ctx;
227
228 Context(Instruction *Exp)
229 : Ctx(Exp->getParent()->getParent()->getContext()) {
230 initialize(Exp);
231 }
232
233 ~Context() { cleanup(); }
234
235 void print(raw_ostream &OS, const Value *V) const;
236 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
237
238 private:
239 friend struct Simplifier;
240
241 void initialize(Instruction *Exp);
242 void cleanup();
243
244 template <typename FuncT> void traverse(Value *V, FuncT F);
245 void record(Value *V);
246 void use(Value *V);
247 void unuse(Value *V);
248
249 bool equal(const Instruction *I, const Instruction *J) const;
250 Value *find(Value *Tree, Value *Sub) const;
251 Value *subst(Value *Tree, Value *OldV, Value *NewV);
252 void replace(Value *OldV, Value *NewV);
253 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
254 };
255
256 Value *simplify(Context &C);
257};
258
259 struct PE {
260 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
261
262 const Simplifier::Context &C;
263 const Value *V;
264 };
265
266 LLVM_ATTRIBUTE_USED__attribute__((__used__))
267 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
268 P.C.print(OS, P.V ? P.V : P.C.Root);
269 return OS;
270 }
271
272} // end anonymous namespace
273
274char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
275
276INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
277 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
278INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
279INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
280INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
281INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
282INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
283INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
284INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
285INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
286 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
287
288template <typename FuncT>
289void Simplifier::Context::traverse(Value *V, FuncT F) {
290 WorkListType Q;
291 Q.push_back(V);
292
293 while (!Q.empty()) {
294 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
295 if (!U || U->getParent())
296 continue;
297 if (!F(U))
298 continue;
299 for (Value *Op : U->operands())
300 Q.push_back(Op);
301 }
302}
303
304void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
305 const auto *U = dyn_cast<const Instruction>(V);
306 if (!U) {
307 OS << V << '(' << *V << ')';
308 return;
309 }
310
311 if (U->getParent()) {
312 OS << U << '(';
313 U->printAsOperand(OS, true);
314 OS << ')';
315 return;
316 }
317
318 unsigned N = U->getNumOperands();
319 if (N != 0)
320 OS << U << '(';
321 OS << U->getOpcodeName();
322 for (const Value *Op : U->operands()) {
323 OS << ' ';
324 print(OS, Op);
325 }
326 if (N != 0)
327 OS << ')';
328}
329
330void Simplifier::Context::initialize(Instruction *Exp) {
331 // Perform a deep clone of the expression, set Root to the root
332 // of the clone, and build a map from the cloned values to the
333 // original ones.
334 ValueMapType M;
335 BasicBlock *Block = Exp->getParent();
336 WorkListType Q;
337 Q.push_back(Exp);
338
339 while (!Q.empty()) {
340 Value *V = Q.pop_front_val();
341 if (M.find(V) != M.end())
342 continue;
343 if (Instruction *U = dyn_cast<Instruction>(V)) {
344 if (isa<PHINode>(U) || U->getParent() != Block)
345 continue;
346 for (Value *Op : U->operands())
347 Q.push_back(Op);
348 M.insert({U, U->clone()});
349 }
350 }
351
352 for (std::pair<Value*,Value*> P : M) {
353 Instruction *U = cast<Instruction>(P.second);
354 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
355 auto F = M.find(U->getOperand(i));
356 if (F != M.end())
357 U->setOperand(i, F->second);
358 }
359 }
360
361 auto R = M.find(Exp);
362 assert(R != M.end())(static_cast<void> (0));
363 Root = R->second;
364
365 record(Root);
366 use(Root);
367}
368
369void Simplifier::Context::record(Value *V) {
370 auto Record = [this](Instruction *U) -> bool {
371 Clones.insert(U);
372 return true;
373 };
374 traverse(V, Record);
375}
376
377void Simplifier::Context::use(Value *V) {
378 auto Use = [this](Instruction *U) -> bool {
379 Used.insert(U);
380 return true;
381 };
382 traverse(V, Use);
383}
384
385void Simplifier::Context::unuse(Value *V) {
386 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
387 return;
388
389 auto Unuse = [this](Instruction *U) -> bool {
390 if (!U->use_empty())
391 return false;
392 Used.erase(U);
393 return true;
394 };
395 traverse(V, Unuse);
396}
397
398Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
399 if (Tree == OldV)
400 return NewV;
401 if (OldV == NewV)
402 return Tree;
403
404 WorkListType Q;
405 Q.push_back(Tree);
406 while (!Q.empty()) {
407 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
408 // If U is not an instruction, or it's not a clone, skip it.
409 if (!U || U->getParent())
410 continue;
411 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
412 Value *Op = U->getOperand(i);
413 if (Op == OldV) {
414 U->setOperand(i, NewV);
415 unuse(OldV);
416 } else {
417 Q.push_back(Op);
418 }
419 }
420 }
421 return Tree;
422}
423
424void Simplifier::Context::replace(Value *OldV, Value *NewV) {
425 if (Root == OldV) {
426 Root = NewV;
427 use(Root);
428 return;
429 }
430
431 // NewV may be a complex tree that has just been created by one of the
432 // transformation rules. We need to make sure that it is commoned with
433 // the existing Root to the maximum extent possible.
434 // Identify all subtrees of NewV (including NewV itself) that have
435 // equivalent counterparts in Root, and replace those subtrees with
436 // these counterparts.
437 WorkListType Q;
438 Q.push_back(NewV);
439 while (!Q.empty()) {
440 Value *V = Q.pop_front_val();
441 Instruction *U = dyn_cast<Instruction>(V);
442 if (!U || U->getParent())
443 continue;
444 if (Value *DupV = find(Root, V)) {
445 if (DupV != V)
446 NewV = subst(NewV, V, DupV);
447 } else {
448 for (Value *Op : U->operands())
449 Q.push_back(Op);
450 }
451 }
452
453 // Now, simply replace OldV with NewV in Root.
454 Root = subst(Root, OldV, NewV);
455 use(Root);
456}
457
458void Simplifier::Context::cleanup() {
459 for (Value *V : Clones) {
460 Instruction *U = cast<Instruction>(V);
461 if (!U->getParent())
462 U->dropAllReferences();
463 }
464
465 for (Value *V : Clones) {
466 Instruction *U = cast<Instruction>(V);
467 if (!U->getParent())
468 U->deleteValue();
469 }
470}
471
472bool Simplifier::Context::equal(const Instruction *I,
473 const Instruction *J) const {
474 if (I == J)
475 return true;
476 if (!I->isSameOperationAs(J))
477 return false;
478 if (isa<PHINode>(I))
479 return I->isIdenticalTo(J);
480
481 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
482 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
483 if (OpI == OpJ)
484 continue;
485 auto *InI = dyn_cast<const Instruction>(OpI);
486 auto *InJ = dyn_cast<const Instruction>(OpJ);
487 if (InI && InJ) {
488 if (!equal(InI, InJ))
489 return false;
490 } else if (InI != InJ || !InI)
491 return false;
492 }
493 return true;
494}
495
496Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
497 Instruction *SubI = dyn_cast<Instruction>(Sub);
498 WorkListType Q;
499 Q.push_back(Tree);
500
501 while (!Q.empty()) {
502 Value *V = Q.pop_front_val();
503 if (V == Sub)
504 return V;
505 Instruction *U = dyn_cast<Instruction>(V);
506 if (!U || U->getParent())
507 continue;
508 if (SubI && equal(SubI, U))
509 return U;
510 assert(!isa<PHINode>(U))(static_cast<void> (0));
511 for (Value *Op : U->operands())
512 Q.push_back(Op);
513 }
514 return nullptr;
515}
516
517void Simplifier::Context::link(Instruction *I, BasicBlock *B,
518 BasicBlock::iterator At) {
519 if (I->getParent())
520 return;
521
522 for (Value *Op : I->operands()) {
523 if (Instruction *OpI = dyn_cast<Instruction>(Op))
524 link(OpI, B, At);
525 }
526
527 B->getInstList().insert(At, I);
528}
529
530Value *Simplifier::Context::materialize(BasicBlock *B,
531 BasicBlock::iterator At) {
532 if (Instruction *RootI = dyn_cast<Instruction>(Root))
533 link(RootI, B, At);
534 return Root;
535}
536
537Value *Simplifier::simplify(Context &C) {
538 WorkListType Q;
539 Q.push_back(C.Root);
540 unsigned Count = 0;
541 const unsigned Limit = SimplifyLimit;
542
543 while (!Q.empty()) {
544 if (Count++ >= Limit)
545 break;
546 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
547 if (!U || U->getParent() || !C.Used.count(U))
548 continue;
549 bool Changed = false;
550 for (Rule &R : Rules) {
551 Value *W = R.Fn(U, C.Ctx);
552 if (!W)
553 continue;
554 Changed = true;
555 C.record(W);
556 C.replace(U, W);
557 Q.push_back(C.Root);
558 break;
559 }
560 if (!Changed) {
561 for (Value *Op : U->operands())
562 Q.push_back(Op);
563 }
564 }
565 return Count < Limit ? C.Root : nullptr;
566}
567
568//===----------------------------------------------------------------------===//
569//
570// Implementation of PolynomialMultiplyRecognize
571//
572//===----------------------------------------------------------------------===//
573
574namespace {
575
576 class PolynomialMultiplyRecognize {
577 public:
578 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
579 const DominatorTree &dt, const TargetLibraryInfo &tli,
580 ScalarEvolution &se)
581 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
582
583 bool recognize();
584
585 private:
586 using ValueSeq = SetVector<Value *>;
587
588 IntegerType *getPmpyType() const {
589 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
590 return IntegerType::get(Ctx, 32);
591 }
592
593 bool isPromotableTo(Value *V, IntegerType *Ty);
594 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
595 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
596
597 Value *getCountIV(BasicBlock *BB);
598 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
599 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
600 ValueSeq &Late);
601 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
602 bool commutesWithShift(Instruction *I);
603 bool highBitsAreZero(Value *V, unsigned IterCount);
604 bool keepsHighBitsZero(Value *V, unsigned IterCount);
605 bool isOperandShifted(Instruction *I, Value *Op);
606 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
607 unsigned IterCount);
608 void cleanupLoopBody(BasicBlock *LoopB);
609
610 struct ParsedValues {
611 ParsedValues() = default;
612
613 Value *M = nullptr;
614 Value *P = nullptr;
615 Value *Q = nullptr;
616 Value *R = nullptr;
617 Value *X = nullptr;
618 Instruction *Res = nullptr;
619 unsigned IterCount = 0;
620 bool Left = false;
621 bool Inv = false;
622 };
623
624 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
625 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
626 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
627 Value *CIV, ParsedValues &PV, bool PreScan);
628 unsigned getInverseMxN(unsigned QP);
629 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
630
631 void setupPreSimplifier(Simplifier &S);
632 void setupPostSimplifier(Simplifier &S);
633
634 Loop *CurLoop;
635 const DataLayout &DL;
636 const DominatorTree &DT;
637 const TargetLibraryInfo &TLI;
638 ScalarEvolution &SE;
639 };
640
641} // end anonymous namespace
642
643Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
644 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
645 if (std::distance(PI, PE) != 2)
646 return nullptr;
647 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
648
649 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
650 auto *PN = cast<PHINode>(I);
651 Value *InitV = PN->getIncomingValueForBlock(PB);
652 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
653 continue;
654 Value *IterV = PN->getIncomingValueForBlock(BB);
655 auto *BO = dyn_cast<BinaryOperator>(IterV);
656 if (!BO)
657 continue;
658 if (BO->getOpcode() != Instruction::Add)
659 continue;
660 Value *IncV = nullptr;
661 if (BO->getOperand(0) == PN)
662 IncV = BO->getOperand(1);
663 else if (BO->getOperand(1) == PN)
664 IncV = BO->getOperand(0);
665 if (IncV == nullptr)
666 continue;
667
668 if (auto *T = dyn_cast<ConstantInt>(IncV))
669 if (T->getZExtValue() == 1)
670 return PN;
671 }
672 return nullptr;
673}
674
675static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
676 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
677 Use &TheUse = UI.getUse();
678 ++UI;
679 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
680 if (BB == II->getParent())
681 II->replaceUsesOfWith(I, J);
682 }
683}
684
685bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
686 Value *CIV, ParsedValues &PV) {
687 // Match the following:
688 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
689 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
690 // The condition may also check for equality with the masked value, i.e
691 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
692 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
693
694 Value *CondV = SelI->getCondition();
695 Value *TrueV = SelI->getTrueValue();
696 Value *FalseV = SelI->getFalseValue();
697
698 using namespace PatternMatch;
699
700 CmpInst::Predicate P;
701 Value *A = nullptr, *B = nullptr, *C = nullptr;
702
703 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
704 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
705 return false;
706 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
707 return false;
708 // Matched: select (A & B) == C ? ... : ...
709 // select (A & B) != C ? ... : ...
710
711 Value *X = nullptr, *Sh1 = nullptr;
712 // Check (A & B) for (X & (1 << i)):
713 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
714 Sh1 = A;
715 X = B;
716 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
717 Sh1 = B;
718 X = A;
719 } else {
720 // TODO: Could also check for an induction variable containing single
721 // bit shifted left by 1 in each iteration.
722 return false;
723 }
724
725 bool TrueIfZero;
726
727 // Check C against the possible values for comparison: 0 and (1 << i):
728 if (match(C, m_Zero()))
729 TrueIfZero = (P == CmpInst::ICMP_EQ);
730 else if (C == Sh1)
731 TrueIfZero = (P == CmpInst::ICMP_NE);
732 else
733 return false;
734
735 // So far, matched:
736 // select (X & (1 << i)) ? ... : ...
737 // including variations of the check against zero/non-zero value.
738
739 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
740 if (TrueIfZero) {
741 ShouldSameV = TrueV;
742 ShouldXoredV = FalseV;
743 } else {
744 ShouldSameV = FalseV;
745 ShouldXoredV = TrueV;
746 }
747
748 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
749 Value *T = nullptr;
750 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
751 // Matched: select +++ ? ... : Y ^ Z
752 // select +++ ? Y ^ Z : ...
753 // where +++ denotes previously checked matches.
754 if (ShouldSameV == Y)
755 T = Z;
756 else if (ShouldSameV == Z)
757 T = Y;
758 else
759 return false;
760 R = ShouldSameV;
761 // Matched: select +++ ? R : R ^ T
762 // select +++ ? R ^ T : R
763 // depending on TrueIfZero.
764
765 } else if (match(ShouldSameV, m_Zero())) {
766 // Matched: select +++ ? 0 : ...
767 // select +++ ? ... : 0
768 if (!SelI->hasOneUse())
769 return false;
770 T = ShouldXoredV;
771 // Matched: select +++ ? 0 : T
772 // select +++ ? T : 0
773
774 Value *U = *SelI->user_begin();
775 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
776 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
777 return false;
778 // Matched: xor (select +++ ? 0 : T), R
779 // xor (select +++ ? T : 0), R
780 } else
781 return false;
782
783 // The xor input value T is isolated into its own match so that it could
784 // be checked against an induction variable containing a shifted bit
785 // (todo).
786 // For now, check against (Q << i).
787 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
788 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
789 return false;
790 // Matched: select +++ ? R : R ^ (Q << i)
791 // select +++ ? R ^ (Q << i) : R
792
793 PV.X = X;
794 PV.Q = Q;
795 PV.R = R;
796 PV.Left = true;
797 return true;
798}
799
800bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
801 ParsedValues &PV) {
802 // Match the following:
803 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
804 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
805 // The condition may also check for equality with the masked value, i.e
806 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
807 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
808
809 Value *CondV = SelI->getCondition();
810 Value *TrueV = SelI->getTrueValue();
811 Value *FalseV = SelI->getFalseValue();
812
813 using namespace PatternMatch;
814
815 Value *C = nullptr;
816 CmpInst::Predicate P;
817 bool TrueIfZero;
818
819 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
820 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
821 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
822 return false;
823 // Matched: select C == 0 ? ... : ...
824 // select C != 0 ? ... : ...
825 TrueIfZero = (P == CmpInst::ICMP_EQ);
826 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
827 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
828 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
829 return false;
830 // Matched: select C == 1 ? ... : ...
831 // select C != 1 ? ... : ...
832 TrueIfZero = (P == CmpInst::ICMP_NE);
833 } else
834 return false;
835
836 Value *X = nullptr;
837 if (!match(C, m_And(m_Value(X), m_One())) &&
838 !match(C, m_And(m_One(), m_Value(X))))
839 return false;
840 // Matched: select (X & 1) == +++ ? ... : ...
841 // select (X & 1) != +++ ? ... : ...
842
843 Value *R = nullptr, *Q = nullptr;
844 if (TrueIfZero) {
845 // The select's condition is true if the tested bit is 0.
846 // TrueV must be the shift, FalseV must be the xor.
847 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
848 return false;
849 // Matched: select +++ ? (R >> 1) : ...
850 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
851 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
852 return false;
853 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
854 // with commuting ^.
855 } else {
856 // The select's condition is true if the tested bit is 1.
857 // TrueV must be the xor, FalseV must be the shift.
858 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
859 return false;
860 // Matched: select +++ ? ... : (R >> 1)
861 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
862 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
863 return false;
864 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
865 // with commuting ^.
866 }
867
868 PV.X = X;
869 PV.Q = Q;
870 PV.R = R;
871 PV.Left = false;
872 return true;
873}
874
875bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
876 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
877 bool PreScan) {
878 using namespace PatternMatch;
879
880 // The basic pattern for R = P.Q is:
881 // for i = 0..31
882 // R = phi (0, R')
883 // if (P & (1 << i)) ; test-bit(P, i)
884 // R' = R ^ (Q << i)
885 //
886 // Similarly, the basic pattern for R = (P/Q).Q - P
887 // for i = 0..31
888 // R = phi(P, R')
889 // if (R & (1 << i))
890 // R' = R ^ (Q << i)
891
892 // There exist idioms, where instead of Q being shifted left, P is shifted
893 // right. This produces a result that is shifted right by 32 bits (the
894 // non-shifted result is 64-bit).
895 //
896 // For R = P.Q, this would be:
897 // for i = 0..31
898 // R = phi (0, R')
899 // if ((P >> i) & 1)
900 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
901 // else ; be shifted by 1, not i.
902 // R' = R >> 1
903 //
904 // And for the inverse:
905 // for i = 0..31
906 // R = phi (P, R')
907 // if (R & 1)
908 // R' = (R >> 1) ^ Q
909 // else
910 // R' = R >> 1
911
912 // The left-shifting idioms share the same pattern:
913 // select (X & (1 << i)) ? R ^ (Q << i) : R
914 // Similarly for right-shifting idioms:
915 // select (X & 1) ? (R >> 1) ^ Q
916
917 if (matchLeftShift(SelI, CIV, PV)) {
918 // If this is a pre-scan, getting this far is sufficient.
919 if (PreScan)
920 return true;
921
922 // Need to make sure that the SelI goes back into R.
923 auto *RPhi = dyn_cast<PHINode>(PV.R);
924 if (!RPhi)
925 return false;
926 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
927 return false;
928 PV.Res = SelI;
929
930 // If X is loop invariant, it must be the input polynomial, and the
931 // idiom is the basic polynomial multiply.
932 if (CurLoop->isLoopInvariant(PV.X)) {
933 PV.P = PV.X;
934 PV.Inv = false;
935 } else {
936 // X is not loop invariant. If X == R, this is the inverse pmpy.
937 // Otherwise, check for an xor with an invariant value. If the
938 // variable argument to the xor is R, then this is still a valid
939 // inverse pmpy.
940 PV.Inv = true;
941 if (PV.X != PV.R) {
942 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
943 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
944 return false;
945 auto *I1 = dyn_cast<Instruction>(X1);
946 auto *I2 = dyn_cast<Instruction>(X2);
947 if (!I1 || I1->getParent() != LoopB) {
948 Var = X2;
949 Inv = X1;
950 } else if (!I2 || I2->getParent() != LoopB) {
951 Var = X1;
952 Inv = X2;
953 } else
954 return false;
955 if (Var != PV.R)
956 return false;
957 PV.M = Inv;
958 }
959 // The input polynomial P still needs to be determined. It will be
960 // the entry value of R.
961 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
962 PV.P = EntryP;
963 }
964
965 return true;
966 }
967
968 if (matchRightShift(SelI, PV)) {
969 // If this is an inverse pattern, the Q polynomial must be known at
970 // compile time.
971 if (PV.Inv && !isa<ConstantInt>(PV.Q))
972 return false;
973 if (PreScan)
974 return true;
975 // There is no exact matching of right-shift pmpy.
976 return false;
977 }
978
979 return false;
980}
981
982bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
983 IntegerType *DestTy) {
984 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
985 if (!T || T->getBitWidth() > DestTy->getBitWidth())
986 return false;
987 if (T->getBitWidth() == DestTy->getBitWidth())
988 return true;
989 // Non-instructions are promotable. The reason why an instruction may not
990 // be promotable is that it may produce a different result if its operands
991 // and the result are promoted, for example, it may produce more non-zero
992 // bits. While it would still be possible to represent the proper result
993 // in a wider type, it may require adding additional instructions (which
994 // we don't want to do).
995 Instruction *In = dyn_cast<Instruction>(Val);
996 if (!In)
997 return true;
998 // The bitwidth of the source type is smaller than the destination.
999 // Check if the individual operation can be promoted.
1000 switch (In->getOpcode()) {
1001 case Instruction::PHI:
1002 case Instruction::ZExt:
1003 case Instruction::And:
1004 case Instruction::Or:
1005 case Instruction::Xor:
1006 case Instruction::LShr: // Shift right is ok.
1007 case Instruction::Select:
1008 case Instruction::Trunc:
1009 return true;
1010 case Instruction::ICmp:
1011 if (CmpInst *CI = cast<CmpInst>(In))
1012 return CI->isEquality() || CI->isUnsigned();
1013 llvm_unreachable("Cast failed unexpectedly")__builtin_unreachable();
1014 case Instruction::Add:
1015 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
1016 }
1017 return false;
1018}
1019
1020void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1021 IntegerType *DestTy, BasicBlock *LoopB) {
1022 Type *OrigTy = In->getType();
1023 assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast<void> (0));
1024
1025 // Leave boolean values alone.
1026 if (!In->getType()->isIntegerTy(1))
1027 In->mutateType(DestTy);
1028 unsigned DestBW = DestTy->getBitWidth();
1029
1030 // Handle PHIs.
1031 if (PHINode *P = dyn_cast<PHINode>(In)) {
1032 unsigned N = P->getNumIncomingValues();
1033 for (unsigned i = 0; i != N; ++i) {
1034 BasicBlock *InB = P->getIncomingBlock(i);
1035 if (InB == LoopB)
1036 continue;
1037 Value *InV = P->getIncomingValue(i);
1038 IntegerType *Ty = cast<IntegerType>(InV->getType());
1039 // Do not promote values in PHI nodes of type i1.
1040 if (Ty != P->getType()) {
1041 // If the value type does not match the PHI type, the PHI type
1042 // must have been promoted.
1043 assert(Ty->getBitWidth() < DestBW)(static_cast<void> (0));
1044 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1045 P->setIncomingValue(i, InV);
1046 }
1047 }
1048 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1049 Value *Op = Z->getOperand(0);
1050 if (Op->getType() == Z->getType())
1051 Z->replaceAllUsesWith(Op);
1052 Z->eraseFromParent();
1053 return;
1054 }
1055 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1056 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1057 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1058 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1059 T->replaceAllUsesWith(And);
1060 T->eraseFromParent();
1061 return;
1062 }
1063
1064 // Promote immediates.
1065 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1066 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1067 if (CI->getType()->getBitWidth() < DestBW)
1068 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1069 }
1070}
1071
1072bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1073 BasicBlock *ExitB) {
1074 assert(LoopB)(static_cast<void> (0));
1075 // Skip loops where the exit block has more than one predecessor. The values
1076 // coming from the loop block will be promoted to another type, and so the
1077 // values coming into the exit block from other predecessors would also have
1078 // to be promoted.
1079 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1080 return false;
1081 IntegerType *DestTy = getPmpyType();
1082 // Check if the exit values have types that are no wider than the type
1083 // that we want to promote to.
1084 unsigned DestBW = DestTy->getBitWidth();
1085 for (PHINode &P : ExitB->phis()) {
1086 if (P.getNumIncomingValues() != 1)
1087 return false;
1088 assert(P.getIncomingBlock(0) == LoopB)(static_cast<void> (0));
1089 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1090 if (!T || T->getBitWidth() > DestBW)
1091 return false;
1092 }
1093
1094 // Check all instructions in the loop.
1095 for (Instruction &In : *LoopB)
1096 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1097 return false;
1098
1099 // Perform the promotion.
1100 std::vector<Instruction*> LoopIns;
1101 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1102 [](Instruction &In) { return &In; });
1103 for (Instruction *In : LoopIns)
1104 if (!In->isTerminator())
1105 promoteTo(In, DestTy, LoopB);
1106
1107 // Fix up the PHI nodes in the exit block.
1108 Instruction *EndI = ExitB->getFirstNonPHI();
1109 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1110 for (auto I = ExitB->begin(); I != End; ++I) {
1111 PHINode *P = dyn_cast<PHINode>(I);
1112 if (!P)
1113 break;
1114 Type *Ty0 = P->getIncomingValue(0)->getType();
1115 Type *PTy = P->getType();
1116 if (PTy != Ty0) {
1117 assert(Ty0 == DestTy)(static_cast<void> (0));
1118 // In order to create the trunc, P must have the promoted type.
1119 P->mutateType(Ty0);
1120 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1121 // In order for the RAUW to work, the types of P and T must match.
1122 P->mutateType(PTy);
1123 P->replaceAllUsesWith(T);
1124 // Final update of the P's type.
1125 P->mutateType(Ty0);
1126 cast<Instruction>(T)->setOperand(0, P);
1127 }
1128 }
1129
1130 return true;
1131}
1132
1133bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1134 ValueSeq &Cycle) {
1135 // Out = ..., In, ...
1136 if (Out == In)
1137 return true;
1138
1139 auto *BB = cast<Instruction>(Out)->getParent();
1140 bool HadPhi = false;
1141
1142 for (auto U : Out->users()) {
1143 auto *I = dyn_cast<Instruction>(&*U);
1144 if (I == nullptr || I->getParent() != BB)
1145 continue;
1146 // Make sure that there are no multi-iteration cycles, e.g.
1147 // p1 = phi(p2)
1148 // p2 = phi(p1)
1149 // The cycle p1->p2->p1 would span two loop iterations.
1150 // Check that there is only one phi in the cycle.
1151 bool IsPhi = isa<PHINode>(I);
1152 if (IsPhi && HadPhi)
1153 return false;
1154 HadPhi |= IsPhi;
1155 if (Cycle.count(I))
1156 return false;
1157 Cycle.insert(I);
1158 if (findCycle(I, In, Cycle))
1159 break;
1160 Cycle.remove(I);
1161 }
1162 return !Cycle.empty();
1163}
1164
1165void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1166 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1167 // All the values in the cycle that are between the phi node and the
1168 // divider instruction will be classified as "early", all other values
1169 // will be "late".
1170
1171 bool IsE = true;
1172 unsigned I, N = Cycle.size();
1173 for (I = 0; I < N; ++I) {
1174 Value *V = Cycle[I];
1175 if (DivI == V)
1176 IsE = false;
1177 else if (!isa<PHINode>(V))
1178 continue;
1179 // Stop if found either.
1180 break;
1181 }
1182 // "I" is the index of either DivI or the phi node, whichever was first.
1183 // "E" is "false" or "true" respectively.
1184 ValueSeq &First = !IsE ? Early : Late;
1185 for (unsigned J = 0; J < I; ++J)
1186 First.insert(Cycle[J]);
1187
1188 ValueSeq &Second = IsE ? Early : Late;
1189 Second.insert(Cycle[I]);
1190 for (++I; I < N; ++I) {
1191 Value *V = Cycle[I];
1192 if (DivI == V || isa<PHINode>(V))
1193 break;
1194 Second.insert(V);
1195 }
1196
1197 for (; I < N; ++I)
1198 First.insert(Cycle[I]);
1199}
1200
1201bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1202 ValueSeq &Early, ValueSeq &Late) {
1203 // Select is an exception, since the condition value does not have to be
1204 // classified in the same way as the true/false values. The true/false
1205 // values do have to be both early or both late.
1206 if (UseI->getOpcode() == Instruction::Select) {
1207 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1208 if (Early.count(TV) || Early.count(FV)) {
1209 if (Late.count(TV) || Late.count(FV))
1210 return false;
1211 Early.insert(UseI);
1212 } else if (Late.count(TV) || Late.count(FV)) {
1213 if (Early.count(TV) || Early.count(FV))
1214 return false;
1215 Late.insert(UseI);
1216 }
1217 return true;
1218 }
1219
1220 // Not sure what would be the example of this, but the code below relies
1221 // on having at least one operand.
1222 if (UseI->getNumOperands() == 0)
1223 return true;
1224
1225 bool AE = true, AL = true;
1226 for (auto &I : UseI->operands()) {
1227 if (Early.count(&*I))
1228 AL = false;
1229 else if (Late.count(&*I))
1230 AE = false;
1231 }
1232 // If the operands appear "all early" and "all late" at the same time,
1233 // then it means that none of them are actually classified as either.
1234 // This is harmless.
1235 if (AE && AL)
1236 return true;
1237 // Conversely, if they are neither "all early" nor "all late", then
1238 // we have a mixture of early and late operands that is not a known
1239 // exception.
1240 if (!AE && !AL)
1241 return false;
1242
1243 // Check that we have covered the two special cases.
1244 assert(AE != AL)(static_cast<void> (0));
1245
1246 if (AE)
1247 Early.insert(UseI);
1248 else
1249 Late.insert(UseI);
1250 return true;
1251}
1252
1253bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1254 switch (I->getOpcode()) {
1255 case Instruction::And:
1256 case Instruction::Or:
1257 case Instruction::Xor:
1258 case Instruction::LShr:
1259 case Instruction::Shl:
1260 case Instruction::Select:
1261 case Instruction::ICmp:
1262 case Instruction::PHI:
1263 break;
1264 default:
1265 return false;
1266 }
1267 return true;
1268}
1269
1270bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1271 unsigned IterCount) {
1272 auto *T = dyn_cast<IntegerType>(V->getType());
1273 if (!T)
1274 return false;
1275
1276 KnownBits Known(T->getBitWidth());
1277 computeKnownBits(V, Known, DL);
1278 return Known.countMinLeadingZeros() >= IterCount;
1279}
1280
1281bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1282 unsigned IterCount) {
1283 // Assume that all inputs to the value have the high bits zero.
1284 // Check if the value itself preserves the zeros in the high bits.
1285 if (auto *C = dyn_cast<ConstantInt>(V))
1286 return C->getValue().countLeadingZeros() >= IterCount;
1287
1288 if (auto *I = dyn_cast<Instruction>(V)) {
1289 switch (I->getOpcode()) {
1290 case Instruction::And:
1291 case Instruction::Or:
1292 case Instruction::Xor:
1293 case Instruction::LShr:
1294 case Instruction::Select:
1295 case Instruction::ICmp:
1296 case Instruction::PHI:
1297 case Instruction::ZExt:
1298 return true;
1299 }
1300 }
1301
1302 return false;
1303}
1304
1305bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1306 unsigned Opc = I->getOpcode();
1307 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1308 return Op != I->getOperand(1);
1309 return true;
1310}
1311
1312bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1313 BasicBlock *ExitB, unsigned IterCount) {
1314 Value *CIV = getCountIV(LoopB);
1315 if (CIV == nullptr)
1316 return false;
1317 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1318 if (CIVTy == nullptr)
1319 return false;
1320
1321 ValueSeq RShifts;
1322 ValueSeq Early, Late, Cycled;
1323
1324 // Find all value cycles that contain logical right shifts by 1.
1325 for (Instruction &I : *LoopB) {
1326 using namespace PatternMatch;
1327
1328 Value *V = nullptr;
1329 if (!match(&I, m_LShr(m_Value(V), m_One())))
1330 continue;
1331 ValueSeq C;
1332 if (!findCycle(&I, V, C))
1333 continue;
1334
1335 // Found a cycle.
1336 C.insert(&I);
1337 classifyCycle(&I, C, Early, Late);
1338 Cycled.insert(C.begin(), C.end());
1339 RShifts.insert(&I);
1340 }
1341
1342 // Find the set of all values affected by the shift cycles, i.e. all
1343 // cycled values, and (recursively) all their users.
1344 ValueSeq Users(Cycled.begin(), Cycled.end());
1345 for (unsigned i = 0; i < Users.size(); ++i) {
1346 Value *V = Users[i];
1347 if (!isa<IntegerType>(V->getType()))
1348 return false;
1349 auto *R = cast<Instruction>(V);
1350 // If the instruction does not commute with shifts, the loop cannot
1351 // be unshifted.
1352 if (!commutesWithShift(R))
1353 return false;
1354 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1355 auto *T = cast<Instruction>(*I);
1356 // Skip users from outside of the loop. They will be handled later.
1357 // Also, skip the right-shifts and phi nodes, since they mix early
1358 // and late values.
1359 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1360 continue;
1361
1362 Users.insert(T);
1363 if (!classifyInst(T, Early, Late))
1364 return false;
1365 }
1366 }
1367
1368 if (Users.empty())
1369 return false;
1370
1371 // Verify that high bits remain zero.
1372 ValueSeq Internal(Users.begin(), Users.end());
1373 ValueSeq Inputs;
1374 for (unsigned i = 0; i < Internal.size(); ++i) {
1375 auto *R = dyn_cast<Instruction>(Internal[i]);
1376 if (!R)
1377 continue;
1378 for (Value *Op : R->operands()) {
1379 auto *T = dyn_cast<Instruction>(Op);
1380 if (T && T->getParent() != LoopB)
1381 Inputs.insert(Op);
1382 else
1383 Internal.insert(Op);
1384 }
1385 }
1386 for (Value *V : Inputs)
1387 if (!highBitsAreZero(V, IterCount))
1388 return false;
1389 for (Value *V : Internal)
1390 if (!keepsHighBitsZero(V, IterCount))
1391 return false;
1392
1393 // Finally, the work can be done. Unshift each user.
1394 IRBuilder<> IRB(LoopB);
1395 std::map<Value*,Value*> ShiftMap;
1396
1397 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1398
1399 CastMapType CastMap;
1400
1401 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1402 IntegerType *Ty) -> Value* {
1403 auto H = CM.find(std::make_pair(V, Ty));
1404 if (H != CM.end())
1405 return H->second;
1406 Value *CV = IRB.CreateIntCast(V, Ty, false);
1407 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1408 return CV;
1409 };
1410
1411 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1412 using namespace PatternMatch;
1413
1414 if (isa<PHINode>(I) || !Users.count(&*I))
1415 continue;
1416
1417 // Match lshr x, 1.
1418 Value *V = nullptr;
1419 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1420 replaceAllUsesOfWithIn(&*I, V, LoopB);
1421 continue;
1422 }
1423 // For each non-cycled operand, replace it with the corresponding
1424 // value shifted left.
1425 for (auto &J : I->operands()) {
1426 Value *Op = J.get();
1427 if (!isOperandShifted(&*I, Op))
1428 continue;
1429 if (Users.count(Op))
1430 continue;
1431 // Skip shifting zeros.
1432 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1433 continue;
1434 // Check if we have already generated a shift for this value.
1435 auto F = ShiftMap.find(Op);
1436 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1437 if (W == nullptr) {
1438 IRB.SetInsertPoint(&*I);
1439 // First, the shift amount will be CIV or CIV+1, depending on
1440 // whether the value is early or late. Instead of creating CIV+1,
1441 // do a single shift of the value.
1442 Value *ShAmt = CIV, *ShVal = Op;
1443 auto *VTy = cast<IntegerType>(ShVal->getType());
1444 auto *ATy = cast<IntegerType>(ShAmt->getType());
1445 if (Late.count(&*I))
1446 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1447 // Second, the types of the shifted value and the shift amount
1448 // must match.
1449 if (VTy != ATy) {
1450 if (VTy->getBitWidth() < ATy->getBitWidth())
1451 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1452 else
1453 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1454 }
1455 // Ready to generate the shift and memoize it.
1456 W = IRB.CreateShl(ShVal, ShAmt);
1457 ShiftMap.insert(std::make_pair(Op, W));
1458 }
1459 I->replaceUsesOfWith(Op, W);
1460 }
1461 }
1462
1463 // Update the users outside of the loop to account for having left
1464 // shifts. They would normally be shifted right in the loop, so shift
1465 // them right after the loop exit.
1466 // Take advantage of the loop-closed SSA form, which has all the post-
1467 // loop values in phi nodes.
1468 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1469 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1470 if (!isa<PHINode>(P))
1471 break;
1472 auto *PN = cast<PHINode>(P);
1473 Value *U = PN->getIncomingValueForBlock(LoopB);
1474 if (!Users.count(U))
1475 continue;
1476 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1477 PN->replaceAllUsesWith(S);
1478 // The above RAUW will create
1479 // S = lshr S, IterCount
1480 // so we need to fix it back into
1481 // S = lshr PN, IterCount
1482 cast<User>(S)->replaceUsesOfWith(S, PN);
1483 }
1484
1485 return true;
1486}
1487
1488void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1489 for (auto &I : *LoopB)
1490 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1491 I.replaceAllUsesWith(SV);
1492
1493 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1494 N = std::next(I);
1495 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1496 }
1497}
1498
1499unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1500 // Arrays of coefficients of Q and the inverse, C.
1501 // Q[i] = coefficient at x^i.
1502 std::array<char,32> Q, C;
1503
1504 for (unsigned i = 0; i < 32; ++i) {
1505 Q[i] = QP & 1;
1506 QP >>= 1;
1507 }
1508 assert(Q[0] == 1)(static_cast<void> (0));
1509
1510 // Find C, such that
1511 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1512 //
1513 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1514 // operations * and + are & and ^ respectively.
1515 //
1516 // Find C[i] recursively, by comparing i-th coefficient in the product
1517 // with 0 (or 1 for i=0).
1518 //
1519 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1520 C[0] = 1;
1521 for (unsigned i = 1; i < 32; ++i) {
1522 // Solve for C[i] in:
1523 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1524 // This is equivalent to
1525 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1526 // which is
1527 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1528 unsigned T = 0;
1529 for (unsigned j = 0; j < i; ++j)
1530 T = T ^ (C[j] & Q[i-j]);
1531 C[i] = T;
1532 }
1533
1534 unsigned QV = 0;
1535 for (unsigned i = 0; i < 32; ++i)
1536 if (C[i])
1537 QV |= (1 << i);
1538
1539 return QV;
1540}
1541
1542Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1543 ParsedValues &PV) {
1544 IRBuilder<> B(&*At);
1545 Module *M = At->getParent()->getParent()->getParent();
1546 Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1547
1548 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1549 unsigned IC = PV.IterCount;
1550
1551 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1552 P0 = P = B.CreateXor(P, PV.M);
1553
1554 // Create a bit mask to clear the high bits beyond IterCount.
1555 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
12
Returning from 'APInt::getLowBitsSet'
1556
1557 if (PV.IterCount != 32)
13
Assuming field 'IterCount' is equal to 32
14
Taking false branch
1558 P = B.CreateAnd(P, BMI);
1559
1560 if (PV.Inv) {
15
Assuming field 'Inv' is true
16
Taking true branch
1561 auto *QI = dyn_cast<ConstantInt>(PV.Q);
17
Assuming field 'Q' is not a 'ConstantInt'
1562 assert(QI && QI->getBitWidth() <= 32)(static_cast<void> (0));
1563
1564 // Again, clearing bits beyond IterCount.
1565 unsigned M = (1 << PV.IterCount) - 1;
18
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1566 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1567 unsigned QV = getInverseMxN(Tmp) & M;
1568 auto *QVI = ConstantInt::get(QI->getType(), QV);
1569 P = B.CreateCall(PMF, {P, QVI});
1570 P = B.CreateTrunc(P, QI->getType());
1571 if (IC != 32)
1572 P = B.CreateAnd(P, BMI);
1573 }
1574
1575 Value *R = B.CreateCall(PMF, {P, Q});
1576
1577 if (PV.M != nullptr)
1578 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1579
1580 return R;
1581}
1582
1583static bool hasZeroSignBit(const Value *V) {
1584 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1585 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1586 const Instruction *I = dyn_cast<const Instruction>(V);
1587 if (!I)
1588 return false;
1589 switch (I->getOpcode()) {
1590 case Instruction::LShr:
1591 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1592 return SI->getZExtValue() > 0;
1593 return false;
1594 case Instruction::Or:
1595 case Instruction::Xor:
1596 return hasZeroSignBit(I->getOperand(0)) &&
1597 hasZeroSignBit(I->getOperand(1));
1598 case Instruction::And:
1599 return hasZeroSignBit(I->getOperand(0)) ||
1600 hasZeroSignBit(I->getOperand(1));
1601 }
1602 return false;
1603}
1604
1605void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1606 S.addRule("sink-zext",
1607 // Sink zext past bitwise operations.
1608 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1609 if (I->getOpcode() != Instruction::ZExt)
1610 return nullptr;
1611 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1612 if (!T)
1613 return nullptr;
1614 switch (T->getOpcode()) {
1615 case Instruction::And:
1616 case Instruction::Or:
1617 case Instruction::Xor:
1618 break;
1619 default:
1620 return nullptr;
1621 }
1622 IRBuilder<> B(Ctx);
1623 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1624 B.CreateZExt(T->getOperand(0), I->getType()),
1625 B.CreateZExt(T->getOperand(1), I->getType()));
1626 });
1627 S.addRule("xor/and -> and/xor",
1628 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1629 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1630 if (I->getOpcode() != Instruction::Xor)
1631 return nullptr;
1632 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1633 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1634 if (!And0 || !And1)
1635 return nullptr;
1636 if (And0->getOpcode() != Instruction::And ||
1637 And1->getOpcode() != Instruction::And)
1638 return nullptr;
1639 if (And0->getOperand(1) != And1->getOperand(1))
1640 return nullptr;
1641 IRBuilder<> B(Ctx);
1642 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1643 And0->getOperand(1));
1644 });
1645 S.addRule("sink binop into select",
1646 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1647 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1648 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1649 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1650 if (!BO)
1651 return nullptr;
1652 Instruction::BinaryOps Op = BO->getOpcode();
1653 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1654 IRBuilder<> B(Ctx);
1655 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1656 Value *Z = BO->getOperand(1);
1657 return B.CreateSelect(Sel->getCondition(),
1658 B.CreateBinOp(Op, X, Z),
1659 B.CreateBinOp(Op, Y, Z));
1660 }
1661 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1662 IRBuilder<> B(Ctx);
1663 Value *X = BO->getOperand(0);
1664 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1665 return B.CreateSelect(Sel->getCondition(),
1666 B.CreateBinOp(Op, X, Y),
1667 B.CreateBinOp(Op, X, Z));
1668 }
1669 return nullptr;
1670 });
1671 S.addRule("fold select-select",
1672 // (select c (select c x y) z) -> (select c x z)
1673 // (select c x (select c y z)) -> (select c x z)
1674 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1675 SelectInst *Sel = dyn_cast<SelectInst>(I);
1676 if (!Sel)
1677 return nullptr;
1678 IRBuilder<> B(Ctx);
1679 Value *C = Sel->getCondition();
1680 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1681 if (Sel0->getCondition() == C)
1682 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1683 }
1684 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1685 if (Sel1->getCondition() == C)
1686 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1687 }
1688 return nullptr;
1689 });
1690 S.addRule("or-signbit -> xor-signbit",
1691 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1692 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1693 if (I->getOpcode() != Instruction::Or)
1694 return nullptr;
1695 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1696 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1697 return nullptr;
1698 if (!hasZeroSignBit(I->getOperand(0)))
1699 return nullptr;
1700 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1701 });
1702 S.addRule("sink lshr into binop",
1703 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1704 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1705 if (I->getOpcode() != Instruction::LShr)
1706 return nullptr;
1707 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1708 if (!BitOp)
1709 return nullptr;
1710 switch (BitOp->getOpcode()) {
1711 case Instruction::And:
1712 case Instruction::Or:
1713 case Instruction::Xor:
1714 break;
1715 default:
1716 return nullptr;
1717 }
1718 IRBuilder<> B(Ctx);
1719 Value *S = I->getOperand(1);
1720 return B.CreateBinOp(BitOp->getOpcode(),
1721 B.CreateLShr(BitOp->getOperand(0), S),
1722 B.CreateLShr(BitOp->getOperand(1), S));
1723 });
1724 S.addRule("expose bitop-const",
1725 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1726 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1727 auto IsBitOp = [](unsigned Op) -> bool {
1728 switch (Op) {
1729 case Instruction::And:
1730 case Instruction::Or:
1731 case Instruction::Xor:
1732 return true;
1733 }
1734 return false;
1735 };
1736 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1737 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1738 return nullptr;
1739 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1740 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1741 return nullptr;
1742 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1743 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1744 if (!CA || !CB)
1745 return nullptr;
1746 IRBuilder<> B(Ctx);
1747 Value *X = BitOp2->getOperand(0);
1748 return B.CreateBinOp(BitOp2->getOpcode(), X,
1749 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1750 });
1751}
1752
1753void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1754 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1755 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1756 if (I->getOpcode() != Instruction::And)
1757 return nullptr;
1758 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1759 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1760 if (!Xor || !C0)
1761 return nullptr;
1762 if (Xor->getOpcode() != Instruction::Xor)
1763 return nullptr;
1764 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1765 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1766 // Pick the first non-null and.
1767 if (!And0 || And0->getOpcode() != Instruction::And)
1768 std::swap(And0, And1);
1769 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1770 if (!C1)
1771 return nullptr;
1772 uint32_t V0 = C0->getZExtValue();
1773 uint32_t V1 = C1->getZExtValue();
1774 if (V0 != (V0 & V1))
1775 return nullptr;
1776 IRBuilder<> B(Ctx);
1777 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1778 });
1779}
1780
1781bool PolynomialMultiplyRecognize::recognize() {
1782 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { } while (false)
1783 << *CurLoop << '\n')do { } while (false);
1784 // Restrictions:
1785 // - The loop must consist of a single block.
1786 // - The iteration count must be known at compile-time.
1787 // - The loop must have an induction variable starting from 0, and
1788 // incremented in each iteration of the loop.
1789 BasicBlock *LoopB = CurLoop->getHeader();
1790 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { } while (false);
1791
1792 if (LoopB != CurLoop->getLoopLatch())
1793 return false;
1794 BasicBlock *ExitB = CurLoop->getExitBlock();
1795 if (ExitB == nullptr)
1796 return false;
1797 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1798 if (EntryB == nullptr)
1799 return false;
1800
1801 unsigned IterCount = 0;
1802 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1803 if (isa<SCEVCouldNotCompute>(CT))
1804 return false;
1805 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1806 IterCount = CV->getValue()->getZExtValue() + 1;
1807
1808 Value *CIV = getCountIV(LoopB);
1809 ParsedValues PV;
1810 Simplifier PreSimp;
1811 PV.IterCount = IterCount;
1812 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { } while (false)
1813 << '\n')do { } while (false);
1814
1815 setupPreSimplifier(PreSimp);
1816
1817 // Perform a preliminary scan of select instructions to see if any of them
1818 // looks like a generator of the polynomial multiply steps. Assume that a
1819 // loop can only contain a single transformable operation, so stop the
1820 // traversal after the first reasonable candidate was found.
1821 // XXX: Currently this approach can modify the loop before being 100% sure
1822 // that the transformation can be carried out.
1823 bool FoundPreScan = false;
1824 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1825 for (const Value *U : V->users()) {
1826 if (const auto *P = dyn_cast<const PHINode>(U))
1827 if (P->getParent() == LoopB)
1828 return true;
1829 }
1830 return false;
1831 };
1832 for (Instruction &In : *LoopB) {
1833 SelectInst *SI = dyn_cast<SelectInst>(&In);
1834 if (!SI || !FeedsPHI(SI))
1835 continue;
1836
1837 Simplifier::Context C(SI);
1838 Value *T = PreSimp.simplify(C);
1839 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1840 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { } while (false);
1841 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1842 FoundPreScan = true;
1843 if (SelI != SI) {
1844 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1845 SI->replaceAllUsesWith(NewSel);
1846 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1847 }
1848 break;
1849 }
1850 }
1851
1852 if (!FoundPreScan) {
1853 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { } while (false);
1854 return false;
1855 }
1856
1857 if (!PV.Left) {
1858 // The right shift version actually only returns the higher bits of
1859 // the result (each iteration discards the LSB). If we want to convert it
1860 // to a left-shifting loop, the working data type must be at least as
1861 // wide as the target's pmpy instruction.
1862 if (!promoteTypes(LoopB, ExitB))
1863 return false;
1864 // Run post-promotion simplifications.
1865 Simplifier PostSimp;
1866 setupPostSimplifier(PostSimp);
1867 for (Instruction &In : *LoopB) {
1868 SelectInst *SI = dyn_cast<SelectInst>(&In);
1869 if (!SI || !FeedsPHI(SI))
1870 continue;
1871 Simplifier::Context C(SI);
1872 Value *T = PostSimp.simplify(C);
1873 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1874 if (SelI != SI) {
1875 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1876 SI->replaceAllUsesWith(NewSel);
1877 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1878 }
1879 break;
1880 }
1881
1882 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1883 return false;
1884 cleanupLoopBody(LoopB);
1885 }
1886
1887 // Scan the loop again, find the generating select instruction.
1888 bool FoundScan = false;
1889 for (Instruction &In : *LoopB) {
1890 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1891 if (!SelI)
1892 continue;
1893 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { } while (false);
1894 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1895 if (FoundScan)
1896 break;
1897 }
1898 assert(FoundScan)(static_cast<void> (0));
1899
1900 LLVM_DEBUG({do { } while (false)
1901 StringRef PP = (PV.M ? "(P+M)" : "P");do { } while (false)
1902 if (!PV.Inv)do { } while (false)
1903 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { } while (false)
1904 elsedo { } while (false)
1905 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { } while (false)
1906 << PP << "\n";do { } while (false)
1907 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { } while (false)
1908 if (PV.M)do { } while (false)
1909 dbgs() << " M:" << *PV.M << "\n";do { } while (false)
1910 dbgs() << " Q:" << *PV.Q << "\n";do { } while (false)
1911 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { } while (false)
1912 })do { } while (false);
1913
1914 BasicBlock::iterator At(EntryB->getTerminator());
1915 Value *PM = generate(At, PV);
1916 if (PM == nullptr)
1917 return false;
1918
1919 if (PM->getType() != PV.Res->getType())
1920 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1921
1922 PV.Res->replaceAllUsesWith(PM);
1923 PV.Res->eraseFromParent();
1924 return true;
1925}
1926
1927int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1928 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1929 return SC->getAPInt().getSExtValue();
1930 return 0;
1931}
1932
1933bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1934 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1935 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1936 return false;
1937
1938 Value *StoredVal = SI->getValueOperand();
1939 Value *StorePtr = SI->getPointerOperand();
1940
1941 // Reject stores that are so large that they overflow an unsigned.
1942 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1943 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1944 return false;
1945
1946 // See if the pointer expression is an AddRec like {base,+,1} on the current
1947 // loop, which indicates a strided store. If we have something else, it's a
1948 // random store we can't handle.
1949 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1950 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1951 return false;
1952
1953 // Check to see if the stride matches the size of the store. If so, then we
1954 // know that every byte is touched in the loop.
1955 int Stride = getSCEVStride(StoreEv);
1956 if (Stride == 0)
1957 return false;
1958 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1959 if (StoreSize != unsigned(std::abs(Stride)))
1960 return false;
1961
1962 // The store must be feeding a non-volatile load.
1963 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1964 if (!LI || !LI->isSimple())
1965 return false;
1966
1967 // See if the pointer expression is an AddRec like {base,+,1} on the current
1968 // loop, which indicates a strided load. If we have something else, it's a
1969 // random load we can't handle.
1970 Value *LoadPtr = LI->getPointerOperand();
1971 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1972 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1973 return false;
1974
1975 // The store and load must share the same stride.
1976 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1977 return false;
1978
1979 // Success. This store can be converted into a memcpy.
1980 return true;
1981}
1982
1983/// mayLoopAccessLocation - Return true if the specified loop might access the
1984/// specified pointer location, which is a loop-strided access. The 'Access'
1985/// argument specifies what the verboten forms of access are (read or write).
1986static bool
1987mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1988 const SCEV *BECount, unsigned StoreSize,
1989 AliasAnalysis &AA,
1990 SmallPtrSetImpl<Instruction *> &Ignored) {
1991 // Get the location that may be stored across the loop. Since the access
1992 // is strided positively through memory, we say that the modified location
1993 // starts at the pointer and has infinite size.
1994 LocationSize AccessSize = LocationSize::afterPointer();
1995
1996 // If the loop iterates a fixed number of times, we can refine the access
1997 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1998 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1999 AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
2000 StoreSize);
2001
2002 // TODO: For this to be really effective, we have to dive into the pointer
2003 // operand in the store. Store to &A[i] of 100 will always return may alias
2004 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
2005 // which will then no-alias a store to &A[100].
2006 MemoryLocation StoreLoc(Ptr, AccessSize);
2007
2008 for (auto *B : L->blocks())
2009 for (auto &I : *B)
2010 if (Ignored.count(&I) == 0 &&
2011 isModOrRefSet(
2012 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
2013 return true;
2014
2015 return false;
2016}
2017
2018void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2019 SmallVectorImpl<StoreInst*> &Stores) {
2020 Stores.clear();
2021 for (Instruction &I : *BB)
2022 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2023 if (isLegalStore(CurLoop, SI))
2024 Stores.push_back(SI);
2025}
2026
2027bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2028 StoreInst *SI, const SCEV *BECount) {
2029 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast<void> (0))
2030 "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast<void> (0))
2031 "to volatile destination.")(static_cast<void> (0));
2032
2033 Value *StorePtr = SI->getPointerOperand();
2034 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2035 unsigned Stride = getSCEVStride(StoreEv);
2036 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2037 if (Stride != StoreSize)
2038 return false;
2039
2040 // See if the pointer expression is an AddRec like {base,+,1} on the current
2041 // loop, which indicates a strided load. If we have something else, it's a
2042 // random load we can't handle.
2043 auto *LI = cast<LoadInst>(SI->getValueOperand());
2044 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2045
2046 // The trip count of the loop and the base pointer of the addrec SCEV is
2047 // guaranteed to be loop invariant, which means that it should dominate the
2048 // header. This allows us to insert code for it in the preheader.
2049 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2050 Instruction *ExpPt = Preheader->getTerminator();
2051 IRBuilder<> Builder(ExpPt);
2052 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2053
2054 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2055
2056 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2057 // this into a memcpy/memmove in the loop preheader now if we want. However,
2058 // this would be unsafe to do if there is anything else in the loop that may
2059 // read or write the memory region we're storing to. For memcpy, this
2060 // includes the load that feeds the stores. Check for an alias by generating
2061 // the base address and checking everything.
2062 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2063 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2064 Value *LoadBasePtr = nullptr;
2065
2066 bool Overlap = false;
2067 bool DestVolatile = SI->isVolatile();
2068 Type *BECountTy = BECount->getType();
2069
2070 if (DestVolatile) {
2071 // The trip count must fit in i32, since it is the type of the "num_words"
2072 // argument to hexagon_memcpy_forward_vp4cp4n2.
2073 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2074CleanupAndExit:
2075 // If we generated new code for the base pointer, clean up.
2076 Expander.clear();
2077 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2078 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2079 StoreBasePtr = nullptr;
2080 }
2081 if (LoadBasePtr) {
2082 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2083 LoadBasePtr = nullptr;
2084 }
2085 return false;
2086 }
2087 }
2088
2089 SmallPtrSet<Instruction*, 2> Ignore1;
2090 Ignore1.insert(SI);
2091 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2092 StoreSize, *AA, Ignore1)) {
2093 // Check if the load is the offending instruction.
2094 Ignore1.insert(LI);
2095 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2096 BECount, StoreSize, *AA, Ignore1)) {
2097 // Still bad. Nothing we can do.
2098 goto CleanupAndExit;
2099 }
2100 // It worked with the load ignored.
2101 Overlap = true;
2102 }
2103
2104 if (!Overlap) {
2105 if (DisableMemcpyIdiom || !HasMemcpy)
2106 goto CleanupAndExit;
2107 } else {
2108 // Don't generate memmove if this function will be inlined. This is
2109 // because the caller will undergo this transformation after inlining.
2110 Function *Func = CurLoop->getHeader()->getParent();
2111 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2112 goto CleanupAndExit;
2113
2114 // In case of a memmove, the call to memmove will be executed instead
2115 // of the loop, so we need to make sure that there is nothing else in
2116 // the loop than the load, store and instructions that these two depend
2117 // on.
2118 SmallVector<Instruction*,2> Insts;
2119 Insts.push_back(SI);
2120 Insts.push_back(LI);
2121 if (!coverLoop(CurLoop, Insts))
2122 goto CleanupAndExit;
2123
2124 if (DisableMemmoveIdiom || !HasMemmove)
2125 goto CleanupAndExit;
2126 bool IsNested = CurLoop->getParentLoop() != nullptr;
2127 if (IsNested && OnlyNonNestedMemmove)
2128 goto CleanupAndExit;
2129 }
2130
2131 // For a memcpy, we have to make sure that the input array is not being
2132 // mutated by the loop.
2133 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2134 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2135
2136 SmallPtrSet<Instruction*, 2> Ignore2;
2137 Ignore2.insert(SI);
2138 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2139 StoreSize, *AA, Ignore2))
2140 goto CleanupAndExit;
2141
2142 // Check the stride.
2143 bool StridePos = getSCEVStride(LoadEv) >= 0;
2144
2145 // Currently, the volatile memcpy only emulates traversing memory forward.
2146 if (!StridePos && DestVolatile)
2147 goto CleanupAndExit;
2148
2149 bool RuntimeCheck = (Overlap || DestVolatile);
2150
2151 BasicBlock *ExitB;
2152 if (RuntimeCheck) {
2153 // The runtime check needs a single exit block.
2154 SmallVector<BasicBlock*, 8> ExitBlocks;
2155 CurLoop->getUniqueExitBlocks(ExitBlocks);
2156 if (ExitBlocks.size() != 1)
2157 goto CleanupAndExit;
2158 ExitB = ExitBlocks[0];
2159 }
2160
2161 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2162 // pointer size if it isn't already.
2163 LLVMContext &Ctx = SI->getContext();
2164 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2165 DebugLoc DLoc = SI->getDebugLoc();
2166
2167 const SCEV *NumBytesS =
2168 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2169 if (StoreSize != 1)
2170 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2171 SCEV::FlagNUW);
2172 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2173 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2174 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2175 NumBytes = Simp;
2176
2177 CallInst *NewCall;
2178
2179 if (RuntimeCheck) {
2180 unsigned Threshold = RuntimeMemSizeThreshold;
2181 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2182 uint64_t C = CI->getZExtValue();
2183 if (Threshold != 0 && C < Threshold)
2184 goto CleanupAndExit;
2185 if (C < CompileTimeMemSizeThreshold)
2186 goto CleanupAndExit;
2187 }
2188
2189 BasicBlock *Header = CurLoop->getHeader();
2190 Function *Func = Header->getParent();
2191 Loop *ParentL = LF->getLoopFor(Preheader);
2192 StringRef HeaderName = Header->getName();
2193
2194 // Create a new (empty) preheader, and update the PHI nodes in the
2195 // header to use the new preheader.
2196 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2197 Func, Header);
2198 if (ParentL)
2199 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2200 IRBuilder<>(NewPreheader).CreateBr(Header);
2201 for (auto &In : *Header) {
2202 PHINode *PN = dyn_cast<PHINode>(&In);
2203 if (!PN)
2204 break;
2205 int bx = PN->getBasicBlockIndex(Preheader);
2206 if (bx >= 0)
2207 PN->setIncomingBlock(bx, NewPreheader);
2208 }
2209 DT->addNewBlock(NewPreheader, Preheader);
2210 DT->changeImmediateDominator(Header, NewPreheader);
2211
2212 // Check for safe conditions to execute memmove.
2213 // If stride is positive, copying things from higher to lower addresses
2214 // is equivalent to memmove. For negative stride, it's the other way
2215 // around. Copying forward in memory with positive stride may not be
2216 // same as memmove since we may be copying values that we just stored
2217 // in some previous iteration.
2218 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2219 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2220 Value *LowA = StridePos ? SA : LA;
2221 Value *HighA = StridePos ? LA : SA;
2222 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2223 Value *Cond = CmpA;
2224
2225 // Check for distance between pointers. Since the case LowA < HighA
2226 // is checked for above, assume LowA >= HighA.
2227 Value *Dist = Builder.CreateSub(LowA, HighA);
2228 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2229 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2230 Cond = CmpEither;
2231
2232 if (Threshold != 0) {
2233 Type *Ty = NumBytes->getType();
2234 Value *Thr = ConstantInt::get(Ty, Threshold);
2235 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2236 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2237 Cond = CmpBoth;
2238 }
2239 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2240 Func, NewPreheader);
2241 if (ParentL)
2242 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2243 Instruction *OldT = Preheader->getTerminator();
2244 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2245 OldT->eraseFromParent();
2246 Preheader->setName(Preheader->getName()+".old");
2247 DT->addNewBlock(MemmoveB, Preheader);
2248 // Find the new immediate dominator of the exit block.
2249 BasicBlock *ExitD = Preheader;
2250 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2251 BasicBlock *PB = *PI;
2252 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2253 if (!ExitD)
2254 break;
2255 }
2256 // If the prior immediate dominator of ExitB was dominated by the
2257 // old preheader, then the old preheader becomes the new immediate
2258 // dominator. Otherwise don't change anything (because the newly
2259 // added blocks are dominated by the old preheader).
2260 if (ExitD && DT->dominates(Preheader, ExitD)) {
2261 DomTreeNode *BN = DT->getNode(ExitB);
2262 DomTreeNode *DN = DT->getNode(ExitD);
2263 BN->setIDom(DN);
2264 }
2265
2266 // Add a call to memmove to the conditional block.
2267 IRBuilder<> CondBuilder(MemmoveB);
2268 CondBuilder.CreateBr(ExitB);
2269 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2270
2271 if (DestVolatile) {
2272 Type *Int32Ty = Type::getInt32Ty(Ctx);
2273 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2274 Type *VoidTy = Type::getVoidTy(Ctx);
2275 Module *M = Func->getParent();
2276 FunctionCallee Fn = M->getOrInsertFunction(
2277 HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
2278
2279 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2280 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2281 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2282 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2283 MemmoveB->getTerminator());
2284 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2285 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2286 NumWords = Simp;
2287
2288 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2289 ? StoreBasePtr
2290 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2291 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2292 ? LoadBasePtr
2293 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2294 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2295 } else {
2296 NewCall = CondBuilder.CreateMemMove(
2297 StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
2298 }
2299 } else {
2300 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
2301 LI->getAlign(), NumBytes);
2302 // Okay, the memcpy has been formed. Zap the original store and
2303 // anything that feeds into it.
2304 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2305 }
2306
2307 NewCall->setDebugLoc(DLoc);
2308
2309 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { } while (false)
2310 << *NewCall << "\n"do { } while (false)
2311 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { } while (false)
2312 << " from store ptr=" << *StoreEv << " at: " << *SIdo { } while (false)
2313 << "\n")do { } while (false);
2314
2315 return true;
2316}
2317
2318// Check if the instructions in Insts, together with their dependencies
2319// cover the loop in the sense that the loop could be safely eliminated once
2320// the instructions in Insts are removed.
2321bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2322 SmallVectorImpl<Instruction*> &Insts) const {
2323 SmallSet<BasicBlock*,8> LoopBlocks;
2324 for (auto *B : L->blocks())
2325 LoopBlocks.insert(B);
2326
2327 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2328
2329 // Collect all instructions from the loop that the instructions in Insts
2330 // depend on (plus their dependencies, etc.). These instructions will
2331 // constitute the expression trees that feed those in Insts, but the trees
2332 // will be limited only to instructions contained in the loop.
2333 for (unsigned i = 0; i < Worklist.size(); ++i) {
2334 Instruction *In = Worklist[i];
2335 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2336 Instruction *OpI = dyn_cast<Instruction>(I);
2337 if (!OpI)
2338 continue;
2339 BasicBlock *PB = OpI->getParent();
2340 if (!LoopBlocks.count(PB))
2341 continue;
2342 Worklist.insert(OpI);
2343 }
2344 }
2345
2346 // Scan all instructions in the loop, if any of them have a user outside
2347 // of the loop, or outside of the expressions collected above, then either
2348 // the loop has a side-effect visible outside of it, or there are
2349 // instructions in it that are not involved in the original set Insts.
2350 for (auto *B : L->blocks()) {
2351 for (auto &In : *B) {
2352 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2353 continue;
2354 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2355 return false;
2356 for (auto K : In.users()) {
2357 Instruction *UseI = dyn_cast<Instruction>(K);
2358 if (!UseI)
2359 continue;
2360 BasicBlock *UseB = UseI->getParent();
2361 if (LF->getLoopFor(UseB) != L)
2362 return false;
2363 }
2364 }
2365 }
2366
2367 return true;
2368}
2369
2370/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2371/// with the specified backedge count. This block is known to be in the current
2372/// loop and not in any subloops.
2373bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2374 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2375 // We can only promote stores in this block if they are unconditionally
2376 // executed in the loop. For a block to be unconditionally executed, it has
2377 // to dominate all the exit blocks of the loop. Verify this now.
2378 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2379 return DT->dominates(BB, EB);
2380 };
2381 if (!all_of(ExitBlocks, DominatedByBB))
2382 return false;
2383
2384 bool MadeChange = false;
2385 // Look for store instructions, which may be optimized to memset/memcpy.
2386 SmallVector<StoreInst*,8> Stores;
2387 collectStores(CurLoop, BB, Stores);
2388
2389 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2390 for (auto &SI : Stores)
2391 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2392
2393 return MadeChange;
2394}
2395
2396bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2397 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2398 if (PMR.recognize())
2399 return true;
2400
2401 if (!HasMemcpy && !HasMemmove)
2402 return false;
2403
2404 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2405 assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast<void> (0))
2406 "runOnCountableLoop() called on a loop without a predictable"(static_cast<void> (0))
2407 "backedge-taken count")(static_cast<void> (0));
2408
2409 SmallVector<BasicBlock *, 8> ExitBlocks;
2410 L->getUniqueExitBlocks(ExitBlocks);
2411
2412 bool Changed = false;
2413
2414 // Scan all the blocks in the loop that are not in subloops.
2415 for (auto *BB : L->getBlocks()) {
2416 // Ignore blocks in subloops.
2417 if (LF->getLoopFor(BB) != L)
2418 continue;
2419 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2420 }
2421
2422 return Changed;
2423}
2424
2425bool HexagonLoopIdiomRecognize::run(Loop *L) {
2426 const Module &M = *L->getHeader()->getParent()->getParent();
2427 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2428 return false;
2429
2430 // If the loop could not be converted to canonical form, it must have an
2431 // indirectbr in it, just give up.
2432 if (!L->getLoopPreheader())
2433 return false;
2434
2435 // Disable loop idiom recognition if the function's name is a common idiom.
2436 StringRef Name = L->getHeader()->getParent()->getName();
2437 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2438 return false;
2439
2440 DL = &L->getHeader()->getModule()->getDataLayout();
2441
2442 HasMemcpy = TLI->has(LibFunc_memcpy);
2443 HasMemmove = TLI->has(LibFunc_memmove);
2444
2445 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2446 return runOnCountableLoop(L);
2447 return false;
2448}
2449
2450bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
2451 LPPassManager &LPM) {
2452 if (skipLoop(L))
2453 return false;
2454
2455 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2456 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2457 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2458 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2459 *L->getHeader()->getParent());
2460 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2461 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2462}
2463
2464Pass *llvm::createHexagonLoopIdiomPass() {
2465 return new HexagonLoopIdiomRecognizeLegacyPass();
2466}
2467
2468PreservedAnalyses
2469HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM,
2470 LoopStandardAnalysisResults &AR,
2471 LPMUpdater &U) {
2472 return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)
2473 .run(&L)
2474 ? getLoopPassPreservedAnalyses()
2475 : PreservedAnalyses::all();
2476}

/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <utility>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34template <typename T> struct DenseMapInfo;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69///
70class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
71public:
72 typedef uint64_t WordType;
73
74 /// This enum is used to hold the constants we needed for APInt.
75 enum : unsigned {
76 /// Byte size of a word.
77 APINT_WORD_SIZE = sizeof(WordType),
78 /// Bits in a word.
79 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
80 };
81
82 enum class Rounding {
83 DOWN,
84 TOWARD_ZERO,
85 UP,
86 };
87
88 static constexpr WordType WORDTYPE_MAX = ~WordType(0);
89
90private:
91 /// This union is used to store the integer value. When the
92 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
93 union {
94 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
95 uint64_t *pVal; ///< Used to store the >64 bits integer value.
96 } U;
97
98 unsigned BitWidth; ///< The number of bits in this APInt.
99
100 friend struct DenseMapInfo<APInt>;
101
102 friend class APSInt;
103
104 /// Fast internal constructor
105 ///
106 /// This constructor is used only internally for speed of construction of
107 /// temporaries. It is unsafe for general use so it is not public.
108 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
109 U.pVal = val;
110 }
111
112 /// Determine which word a bit is in.
113 ///
114 /// \returns the word position for the specified bit position.
115 static unsigned whichWord(unsigned bitPosition) {
116 return bitPosition / APINT_BITS_PER_WORD;
117 }
118
119 /// Determine which bit in a word a bit is in.
120 ///
121 /// \returns the bit position in a word for the specified bit position
122 /// in the APInt.
123 static unsigned whichBit(unsigned bitPosition) {
124 return bitPosition % APINT_BITS_PER_WORD;
125 }
126
127 /// Get a single bit mask.
128 ///
129 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
130 /// This method generates and returns a uint64_t (word) mask for a single
131 /// bit at a specific bit position. This is used to mask the bit in the
132 /// corresponding word.
133 static uint64_t maskBit(unsigned bitPosition) {
134 return 1ULL << whichBit(bitPosition);
135 }
136
137 /// Clear unused high order bits
138 ///
139 /// This method is used internally to clear the top "N" bits in the high order
140 /// word that are not used by the APInt. This is needed after the most
141 /// significant word is assigned a value to ensure that those bits are
142 /// zero'd out.
143 APInt &clearUnusedBits() {
144 // Compute how many bits are used in the final word
145 unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
146
147 // Mask out the high bits.
148 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
149 if (isSingleWord())
150 U.VAL &= mask;
151 else
152 U.pVal[getNumWords() - 1] &= mask;
153 return *this;
154 }
155
156 /// Get the word corresponding to a bit position
157 /// \returns the corresponding word for the specified bit position.
158 uint64_t getWord(unsigned bitPosition) const {
159 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
160 }
161
162 /// Utility method to change the bit width of this APInt to new bit width,
163 /// allocating and/or deallocating as necessary. There is no guarantee on the
164 /// value of any bits upon return. Caller should populate the bits after.
165 void reallocate(unsigned NewBitWidth);
166
167 /// Convert a char array into an APInt
168 ///
169 /// \param radix 2, 8, 10, 16, or 36
170 /// Converts a string into a number. The string must be non-empty
171 /// and well-formed as a number of the given base. The bit-width
172 /// must be sufficient to hold the result.
173 ///
174 /// This is used by the constructors that take string arguments.
175 ///
176 /// StringRef::getAsInteger is superficially similar but (1) does
177 /// not assume that the string is well-formed and (2) grows the
178 /// result to hold the input.
179 void fromString(unsigned numBits, StringRef str, uint8_t radix);
180
181 /// An internal division function for dividing APInts.
182 ///
183 /// This is used by the toString method to divide by the radix. It simply
184 /// provides a more convenient form of divide for internal use since KnuthDiv
185 /// has specific constraints on its inputs. If those constraints are not met
186 /// then it provides a simpler form of divide.
187 static void divide(const WordType *LHS, unsigned lhsWords,
188 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
189 WordType *Remainder);
190
191 /// out-of-line slow case for inline constructor
192 void initSlowCase(uint64_t val, bool isSigned);
193
194 /// shared code between two array constructors
195 void initFromArray(ArrayRef<uint64_t> array);
196
197 /// out-of-line slow case for inline copy constructor
198 void initSlowCase(const APInt &that);
199
200 /// out-of-line slow case for shl
201 void shlSlowCase(unsigned ShiftAmt);
202
203 /// out-of-line slow case for lshr.
204 void lshrSlowCase(unsigned ShiftAmt);
205
206 /// out-of-line slow case for ashr.
207 void ashrSlowCase(unsigned ShiftAmt);
208
209 /// out-of-line slow case for operator=
210 void AssignSlowCase(const APInt &RHS);
211
212 /// out-of-line slow case for operator==
213 bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
214
215 /// out-of-line slow case for countLeadingZeros
216 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
217
218 /// out-of-line slow case for countLeadingOnes.
219 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
220
221 /// out-of-line slow case for countTrailingZeros.
222 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
223
224 /// out-of-line slow case for countTrailingOnes
225 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
226
227 /// out-of-line slow case for countPopulation
228 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
229
230 /// out-of-line slow case for intersects.
231 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
232
233 /// out-of-line slow case for isSubsetOf.
234 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
235
236 /// out-of-line slow case for setBits.
237 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
238
239 /// out-of-line slow case for flipAllBits.
240 void flipAllBitsSlowCase();
241
242 /// out-of-line slow case for operator&=.
243 void AndAssignSlowCase(const APInt& RHS);
244
245 /// out-of-line slow case for operator|=.
246 void OrAssignSlowCase(const APInt& RHS);
247
248 /// out-of-line slow case for operator^=.
249 void XorAssignSlowCase(const APInt& RHS);
250
251 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
252 /// to, or greater than RHS.
253 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
254
255 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
256 /// to, or greater than RHS.
257 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
258
259public:
260 /// \name Constructors
261 /// @{
262
263 /// Create a new APInt of numBits width, initialized as val.
264 ///
265 /// If isSigned is true then val is treated as if it were a signed value
266 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
267 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
268 /// the range of val are zero filled).
269 ///
270 /// \param numBits the bit width of the constructed APInt
271 /// \param val the initial value of the APInt
272 /// \param isSigned how to treat signedness of val
273 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
274 : BitWidth(numBits) {
275 assert(BitWidth && "bitwidth too small")(static_cast<void> (0));
276 if (isSingleWord()) {
277 U.VAL = val;
278 clearUnusedBits();
279 } else {
280 initSlowCase(val, isSigned);
281 }
282 }
283
284 /// Construct an APInt of numBits width, initialized as bigVal[].
285 ///
286 /// Note that bigVal.size() can be smaller or larger than the corresponding
287 /// bit width but any extraneous bits will be dropped.
288 ///
289 /// \param numBits the bit width of the constructed APInt
290 /// \param bigVal a sequence of words to form the initial value of the APInt
291 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
292
293 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
294 /// deprecated because this constructor is prone to ambiguity with the
295 /// APInt(unsigned, uint64_t, bool) constructor.
296 ///
297 /// If this overload is ever deleted, care should be taken to prevent calls
298 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
299 /// constructor.
300 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
301
302 /// Construct an APInt from a string representation.
303 ///
304 /// This constructor interprets the string \p str in the given radix. The
305 /// interpretation stops when the first character that is not suitable for the
306 /// radix is encountered, or the end of the string. Acceptable radix values
307 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
308 /// string to require more bits than numBits.
309 ///
310 /// \param numBits the bit width of the constructed APInt
311 /// \param str the string to be interpreted
312 /// \param radix the radix to use for the conversion
313 APInt(unsigned numBits, StringRef str, uint8_t radix);
314
315 /// Simply makes *this a copy of that.
316 /// Copy Constructor.
317 APInt(const APInt &that) : BitWidth(that.BitWidth) {
318 if (isSingleWord())
319 U.VAL = that.U.VAL;
320 else
321 initSlowCase(that);
322 }
323
324 /// Move Constructor.
325 APInt(APInt &&that) : BitWidth(that.BitWidth) {
326 memcpy(&U, &that.U, sizeof(U));
327 that.BitWidth = 0;
328 }
329
330 /// Destructor.
331 ~APInt() {
332 if (needsCleanup())
333 delete[] U.pVal;
334 }
335
336 /// Default constructor that creates an uninteresting APInt
337 /// representing a 1-bit zero value.
338 ///
339 /// This is useful for object deserialization (pair this with the static
340 /// method Read).
341 explicit APInt() : BitWidth(1) { U.VAL = 0; }
342
343 /// Returns whether this instance allocated memory.
344 bool needsCleanup() const { return !isSingleWord(); }
345
346 /// Used to insert APInt objects, or objects that contain APInt objects, into
347 /// FoldingSets.
348 void Profile(FoldingSetNodeID &id) const;
349
350 /// @}
351 /// \name Value Tests
352 /// @{
353
354 /// Determine if this APInt just has one word to store value.
355 ///
356 /// \returns true if the number of bits <= 64, false otherwise.
357 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
358
359 /// Determine sign of this APInt.
360 ///
361 /// This tests the high bit of this APInt to determine if it is set.
362 ///
363 /// \returns true if this APInt is negative, false otherwise
364 bool isNegative() const { return (*this)[BitWidth - 1]; }
365
366 /// Determine if this APInt Value is non-negative (>= 0)
367 ///
368 /// This tests the high bit of the APInt to determine if it is unset.
369 bool isNonNegative() const { return !isNegative(); }
370
371 /// Determine if sign bit of this APInt is set.
372 ///
373 /// This tests the high bit of this APInt to determine if it is set.
374 ///
375 /// \returns true if this APInt has its sign bit set, false otherwise.
376 bool isSignBitSet() const { return (*this)[BitWidth-1]; }
377
378 /// Determine if sign bit of this APInt is clear.
379 ///
380 /// This tests the high bit of this APInt to determine if it is clear.
381 ///
382 /// \returns true if this APInt has its sign bit clear, false otherwise.
383 bool isSignBitClear() const { return !isSignBitSet(); }
384
385 /// Determine if this APInt Value is positive.
386 ///
387 /// This tests if the value of this APInt is positive (> 0). Note
388 /// that 0 is not a positive value.
389 ///
390 /// \returns true if this APInt is positive.
391 bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
392
393 /// Determine if this APInt Value is non-positive (<= 0).
394 ///
395 /// \returns true if this APInt is non-positive.
396 bool isNonPositive() const { return !isStrictlyPositive(); }
397
398 /// Determine if all bits are set
399 ///
400 /// This checks to see if the value has all bits of the APInt are set or not.
401 bool isAllOnesValue() const {
402 if (isSingleWord())
403 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
404 return countTrailingOnesSlowCase() == BitWidth;
405 }
406
407 /// Determine if all bits are clear
408 ///
409 /// This checks to see if the value has all bits of the APInt are clear or
410 /// not.
411 bool isNullValue() const { return !*this; }
412
413 /// Determine if this is a value of 1.
414 ///
415 /// This checks to see if the value of this APInt is one.
416 bool isOneValue() const {
417 if (isSingleWord())
418 return U.VAL == 1;
419 return countLeadingZerosSlowCase() == BitWidth - 1;
420 }
421
422 /// Determine if this is the largest unsigned value.
423 ///
424 /// This checks to see if the value of this APInt is the maximum unsigned
425 /// value for the APInt's bit width.
426 bool isMaxValue() const { return isAllOnesValue(); }
427
428 /// Determine if this is the largest signed value.
429 ///
430 /// This checks to see if the value of this APInt is the maximum signed
431 /// value for the APInt's bit width.
432 bool isMaxSignedValue() const {
433 if (isSingleWord())
434 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
435 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
436 }
437
438 /// Determine if this is the smallest unsigned value.
439 ///
440 /// This checks to see if the value of this APInt is the minimum unsigned
441 /// value for the APInt's bit width.
442 bool isMinValue() const { return isNullValue(); }
443
444 /// Determine if this is the smallest signed value.
445 ///
446 /// This checks to see if the value of this APInt is the minimum signed
447 /// value for the APInt's bit width.
448 bool isMinSignedValue() const {
449 if (isSingleWord())
450 return U.VAL == (WordType(1) << (BitWidth - 1));
451 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
452 }
453
454 /// Check if this APInt has an N-bits unsigned integer value.
455 bool isIntN(unsigned N) const {
456 assert(N && "N == 0 ???")(static_cast<void> (0));
457 return getActiveBits() <= N;
458 }
459
460 /// Check if this APInt has an N-bits signed integer value.
461 bool isSignedIntN(unsigned N) const {
462 assert(N && "N == 0 ???")(static_cast<void> (0));
463 return getMinSignedBits() <= N;
464 }
465
466 /// Check if this APInt's value is a power of two greater than zero.
467 ///
468 /// \returns true if the argument APInt value is a power of two > 0.
469 bool isPowerOf2() const {
470 if (isSingleWord())
471 return isPowerOf2_64(U.VAL);
472 return countPopulationSlowCase() == 1;
473 }
474
475 /// Check if the APInt's value is returned by getSignMask.
476 ///
477 /// \returns true if this is the value returned by getSignMask.
478 bool isSignMask() const { return isMinSignedValue(); }
479
480 /// Convert APInt to a boolean value.
481 ///
482 /// This converts the APInt to a boolean value as a test against zero.
483 bool getBoolValue() const { return !!*this; }
484
485 /// If this value is smaller than the specified limit, return it, otherwise
486 /// return the limit value. This causes the value to saturate to the limit.
487 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
488 return ugt(Limit) ? Limit : getZExtValue();
489 }
490
491 /// Check if the APInt consists of a repeated bit pattern.
492 ///
493 /// e.g. 0x01010101 satisfies isSplat(8).
494 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
495 /// width without remainder.
496 bool isSplat(unsigned SplatSizeInBits) const;
497
498 /// \returns true if this APInt value is a sequence of \param numBits ones
499 /// starting at the least significant bit with the remainder zero.
500 bool isMask(unsigned numBits) const {
501 assert(numBits != 0 && "numBits must be non-zero")(static_cast<void> (0));
502 assert(numBits <= BitWidth && "numBits out of range")(static_cast<void> (0));
503 if (isSingleWord())
504 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
505 unsigned Ones = countTrailingOnesSlowCase();
506 return (numBits == Ones) &&
507 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
508 }
509
510 /// \returns true if this APInt is a non-empty sequence of ones starting at
511 /// the least significant bit with the remainder zero.
512 /// Ex. isMask(0x0000FFFFU) == true.
513 bool isMask() const {
514 if (isSingleWord())
515 return isMask_64(U.VAL);
516 unsigned Ones = countTrailingOnesSlowCase();
517 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
518 }
519
520 /// Return true if this APInt value contains a sequence of ones with
521 /// the remainder zero.
522 bool isShiftedMask() const {
523 if (isSingleWord())
524 return isShiftedMask_64(U.VAL);
525 unsigned Ones = countPopulationSlowCase();
526 unsigned LeadZ = countLeadingZerosSlowCase();
527 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
528 }
529
530 /// @}
531 /// \name Value Generators
532 /// @{
533
534 /// Gets maximum unsigned value of APInt for specific bit width.
535 static APInt getMaxValue(unsigned numBits) {
536 return getAllOnesValue(numBits);
537 }
538
539 /// Gets maximum signed value of APInt for a specific bit width.
540 static APInt getSignedMaxValue(unsigned numBits) {
541 APInt API = getAllOnesValue(numBits);
542 API.clearBit(numBits - 1);
543 return API;
544 }
545
546 /// Gets minimum unsigned value of APInt for a specific bit width.
547 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
548
549 /// Gets minimum signed value of APInt for a specific bit width.
550 static APInt getSignedMinValue(unsigned numBits) {
551 APInt API(numBits, 0);
552 API.setBit(numBits - 1);
553 return API;
554 }
555
556 /// Get the SignMask for a specific bit width.
557 ///
558 /// This is just a wrapper function of getSignedMinValue(), and it helps code
559 /// readability when we want to get a SignMask.
560 static APInt getSignMask(unsigned BitWidth) {
561 return getSignedMinValue(BitWidth);
562 }
563
564 /// Get the all-ones value.
565 ///
566 /// \returns the all-ones value for an APInt of the specified bit-width.
567 static APInt getAllOnesValue(unsigned numBits) {
568 return APInt(numBits, WORDTYPE_MAX, true);
569 }
570
571 /// Get the '0' value.
572 ///
573 /// \returns the '0' value for an APInt of the specified bit-width.
574 static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
575
576 /// Compute an APInt containing numBits highbits from this APInt.
577 ///
578 /// Get an APInt with the same BitWidth as this APInt, just zero mask
579 /// the low bits and right shift to the least significant bit.
580 ///
581 /// \returns the high "numBits" bits of this APInt.
582 APInt getHiBits(unsigned numBits) const;
583
584 /// Compute an APInt containing numBits lowbits from this APInt.
585 ///
586 /// Get an APInt with the same BitWidth as this APInt, just zero mask
587 /// the high bits.
588 ///
589 /// \returns the low "numBits" bits of this APInt.
590 APInt getLoBits(unsigned numBits) const;
591
592 /// Return an APInt with exactly one bit set in the result.
593 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
594 APInt Res(numBits, 0);
595 Res.setBit(BitNo);
596 return Res;
597 }
598
599 /// Get a value with a block of bits set.
600 ///
601 /// Constructs an APInt value that has a contiguous range of bits set. The
602 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
603 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
604 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
605 /// \p hiBit.
606 ///
607 /// \param numBits the intended bit width of the result
608 /// \param loBit the index of the lowest bit set.
609 /// \param hiBit the index of the highest bit set.
610 ///
611 /// \returns An APInt value with the requested bits set.
612 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
613 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast<void> (0));
614 APInt Res(numBits, 0);
615 Res.setBits(loBit, hiBit);
616 return Res;
617 }
618
619 /// Wrap version of getBitsSet.
620 /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
621 /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
622 /// with parameters (32, 28, 4), you would get 0xF000000F.
623 /// If \p hiBit is equal to \p loBit, you would get a result with all bits
624 /// set.
625 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
626 unsigned hiBit) {
627 APInt Res(numBits, 0);
628 Res.setBitsWithWrap(loBit, hiBit);
629 return Res;
630 }
631
632 /// Get a value with upper bits starting at loBit set.
633 ///
634 /// Constructs an APInt value that has a contiguous range of bits set. The
635 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
636 /// bits will be zero. For example, with parameters(32, 12) you would get
637 /// 0xFFFFF000.
638 ///
639 /// \param numBits the intended bit width of the result
640 /// \param loBit the index of the lowest bit to set.
641 ///
642 /// \returns An APInt value with the requested bits set.
643 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
644 APInt Res(numBits, 0);
645 Res.setBitsFrom(loBit);
646 return Res;
647 }
648
649 /// Get a value with high bits set
650 ///
651 /// Constructs an APInt value that has the top hiBitsSet bits set.
652 ///
653 /// \param numBits the bitwidth of the result
654 /// \param hiBitsSet the number of high-order bits set in the result.
655 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
656 APInt Res(numBits, 0);
657 Res.setHighBits(hiBitsSet);
658 return Res;
659 }
660
661 /// Get a value with low bits set
662 ///
663 /// Constructs an APInt value that has the bottom loBitsSet bits set.
664 ///
665 /// \param numBits the bitwidth of the result
666 /// \param loBitsSet the number of low-order bits set in the result.
667 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
668 APInt Res(numBits, 0);
669 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
11
Returning from 'APInt::setLowBits'
670 return Res;
671 }
672
673 /// Return a value containing V broadcasted over NewLen bits.
674 static APInt getSplat(unsigned NewLen, const APInt &V);
675
676 /// Determine if two APInts have the same value, after zero-extending
677 /// one of them (if needed!) to ensure that the bit-widths match.
678 static bool isSameValue(const APInt &I1, const APInt &I2) {
679 if (I1.getBitWidth() == I2.getBitWidth())
680 return I1 == I2;
681
682 if (I1.getBitWidth() > I2.getBitWidth())
683 return I1 == I2.zext(I1.getBitWidth());
684
685 return I1.zext(I2.getBitWidth()) == I2;
686 }
687
688 /// Overload to compute a hash_code for an APInt value.
689 friend hash_code hash_value(const APInt &Arg);
690
691 /// This function returns a pointer to the internal storage of the APInt.
692 /// This is useful for writing out the APInt in binary form without any
693 /// conversions.
694 const uint64_t *getRawData() const {
695 if (isSingleWord())
696 return &U.VAL;
697 return &U.pVal[0];
698 }
699
700 /// @}
701 /// \name Unary Operators
702 /// @{
703
704 /// Postfix increment operator.
705 ///
706 /// Increments *this by 1.
707 ///
708 /// \returns a new APInt value representing the original value of *this.
709 APInt operator++(int) {
710 APInt API(*this);
711 ++(*this);
712 return API;
713 }
714
715 /// Prefix increment operator.
716 ///
717 /// \returns *this incremented by one
718 APInt &operator++();
719
720 /// Postfix decrement operator.
721 ///
722 /// Decrements *this by 1.
723 ///
724 /// \returns a new APInt value representing the original value of *this.
725 APInt operator--(int) {
726 APInt API(*this);
727 --(*this);
728 return API;
729 }
730
731 /// Prefix decrement operator.
732 ///
733 /// \returns *this decremented by one.
734 APInt &operator--();
735
736 /// Logical negation operator.
737 ///
738 /// Performs logical negation operation on this APInt.
739 ///
740 /// \returns true if *this is zero, false otherwise.
741 bool operator!() const {
742 if (isSingleWord())
743 return U.VAL == 0;
744 return countLeadingZerosSlowCase() == BitWidth;
745 }
746
747 /// @}
748 /// \name Assignment Operators
749 /// @{
750
751 /// Copy assignment operator.
752 ///
753 /// \returns *this after assignment of RHS.
754 APInt &operator=(const APInt &RHS) {
755 // If the bitwidths are the same, we can avoid mucking with memory
756 if (isSingleWord() && RHS.isSingleWord()) {
757 U.VAL = RHS.U.VAL;
758 BitWidth = RHS.BitWidth;
759 return clearUnusedBits();
760 }
761
762 AssignSlowCase(RHS);
763 return *this;
764 }
765
766 /// Move assignment operator.
767 APInt &operator=(APInt &&that) {
768#ifdef EXPENSIVE_CHECKS
769 // Some std::shuffle implementations still do self-assignment.
770 if (this == &that)
771 return *this;
772#endif
773 assert(this != &that && "Self-move not supported")(static_cast<void> (0));
774 if (!isSingleWord())
775 delete[] U.pVal;
776
777 // Use memcpy so that type based alias analysis sees both VAL and pVal
778 // as modified.
779 memcpy(&U, &that.U, sizeof(U));
780
781 BitWidth = that.BitWidth;
782 that.BitWidth = 0;
783
784 return *this;
785 }
786
787 /// Assignment operator.
788 ///
789 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
790 /// the bit width, the excess bits are truncated. If the bit width is larger
791 /// than 64, the value is zero filled in the unspecified high order bits.
792 ///
793 /// \returns *this after assignment of RHS value.
794 APInt &operator=(uint64_t RHS) {
795 if (isSingleWord()) {
796 U.VAL = RHS;
797 return clearUnusedBits();
798 }
799 U.pVal[0] = RHS;
800 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
801 return *this;
802 }
803
804 /// Bitwise AND assignment operator.
805 ///
806 /// Performs a bitwise AND operation on this APInt and RHS. The result is
807 /// assigned to *this.
808 ///
809 /// \returns *this after ANDing with RHS.
810 APInt &operator&=(const APInt &RHS) {
811 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast<void> (0));
812 if (isSingleWord())
813 U.VAL &= RHS.U.VAL;
814 else
815 AndAssignSlowCase(RHS);
816 return *this;
817 }
818
819 /// Bitwise AND assignment operator.
820 ///
821 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
822 /// logically zero-extended or truncated to match the bit-width of
823 /// the LHS.
824 APInt &operator&=(uint64_t RHS) {
825 if (isSingleWord()) {
826 U.VAL &= RHS;
827 return *this;
828 }
829 U.pVal[0] &= RHS;
830 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
831 return *this;
832 }
833
834 /// Bitwise OR assignment operator.
835 ///
836 /// Performs a bitwise OR operation on this APInt and RHS. The result is
837 /// assigned *this;
838 ///
839 /// \returns *this after ORing with RHS.
840 APInt &operator|=(const APInt &RHS) {
841 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast<void> (0));
842 if (isSingleWord())
843 U.VAL |= RHS.U.VAL;
844 else
845 OrAssignSlowCase(RHS);
846 return *this;
847 }
848
849 /// Bitwise OR assignment operator.
850 ///
851 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
852 /// logically zero-extended or truncated to match the bit-width of
853 /// the LHS.
854 APInt &operator|=(uint64_t RHS) {
855 if (isSingleWord()) {
856 U.VAL |= RHS;
857 return clearUnusedBits();
858 }
859 U.pVal[0] |= RHS;
860 return *this;
861 }
862
863 /// Bitwise XOR assignment operator.
864 ///
865 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
866 /// assigned to *this.
867 ///
868 /// \returns *this after XORing with RHS.
869 APInt &operator^=(const APInt &RHS) {
870 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast<void> (0));
871 if (isSingleWord())
872 U.VAL ^= RHS.U.VAL;
873 else
874 XorAssignSlowCase(RHS);
875 return *this;
876 }
877
878 /// Bitwise XOR assignment operator.
879 ///
880 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
881 /// logically zero-extended or truncated to match the bit-width of
882 /// the LHS.
883 APInt &operator^=(uint64_t RHS) {
884 if (isSingleWord()) {
885 U.VAL ^= RHS;
886 return clearUnusedBits();
887 }
888 U.pVal[0] ^= RHS;
889 return *this;
890 }
891
892 /// Multiplication assignment operator.
893 ///
894 /// Multiplies this APInt by RHS and assigns the result to *this.
895 ///
896 /// \returns *this
897 APInt &operator*=(const APInt &RHS);
898 APInt &operator*=(uint64_t RHS);
899
900 /// Addition assignment operator.
901 ///
902 /// Adds RHS to *this and assigns the result to *this.
903 ///
904 /// \returns *this
905 APInt &operator+=(const APInt &RHS);
906 APInt &operator+=(uint64_t RHS);
907
908 /// Subtraction assignment operator.
909 ///
910 /// Subtracts RHS from *this and assigns the result to *this.
911 ///
912 /// \returns *this
913 APInt &operator-=(const APInt &RHS);
914 APInt &operator-=(uint64_t RHS);
915
916 /// Left-shift assignment function.
917 ///
918 /// Shifts *this left by shiftAmt and assigns the result to *this.
919 ///
920 /// \returns *this after shifting left by ShiftAmt
921 APInt &operator<<=(unsigned ShiftAmt) {
922 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast<void> (0));
923 if (isSingleWord()) {
924 if (ShiftAmt == BitWidth)
925 U.VAL = 0;
926 else
927 U.VAL <<= ShiftAmt;
928 return clearUnusedBits();
929 }
930 shlSlowCase(ShiftAmt);
931 return *this;
932 }
933
934 /// Left-shift assignment function.
935 ///
936 /// Shifts *this left by shiftAmt and assigns the result to *this.
937 ///
938 /// \returns *this after shifting left by ShiftAmt
939 APInt &operator<<=(const APInt &ShiftAmt);
940
941 /// @}
942 /// \name Binary Operators
943 /// @{
944
945 /// Multiplication operator.
946 ///
947 /// Multiplies this APInt by RHS and returns the result.
948 APInt operator*(const APInt &RHS) const;
949
950 /// Left logical shift operator.
951 ///
952 /// Shifts this APInt left by \p Bits and returns the result.
953 APInt operator<<(unsigned Bits) const { return shl(Bits); }
954
955 /// Left logical shift operator.
956 ///
957 /// Shifts this APInt left by \p Bits and returns the result.
958 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
959
960 /// Arithmetic right-shift function.
961 ///
962 /// Arithmetic right-shift this APInt by shiftAmt.
963 APInt ashr(unsigned ShiftAmt) const {
964 APInt R(*this);
965 R.ashrInPlace(ShiftAmt);
966 return R;
967 }
968
969 /// Arithmetic right-shift this APInt by ShiftAmt in place.
970 void ashrInPlace(unsigned ShiftAmt) {
971 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast<void> (0));
972 if (isSingleWord()) {
973 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
974 if (ShiftAmt == BitWidth)
975 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
976 else
977 U.VAL = SExtVAL >> ShiftAmt;
978 clearUnusedBits();
979 return;
980 }
981 ashrSlowCase(ShiftAmt);
982 }
983
984 /// Logical right-shift function.
985 ///
986 /// Logical right-shift this APInt by shiftAmt.
987 APInt lshr(unsigned shiftAmt) const {
988 APInt R(*this);
989 R.lshrInPlace(shiftAmt);
990 return R;
991 }
992
993 /// Logical right-shift this APInt by ShiftAmt in place.
994 void lshrInPlace(unsigned ShiftAmt) {
995 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast<void> (0));
996 if (isSingleWord()) {
997 if (ShiftAmt == BitWidth)
998 U.VAL = 0;
999 else
1000 U.VAL >>= ShiftAmt;
1001 return;
1002 }
1003 lshrSlowCase(ShiftAmt);
1004 }
1005
1006 /// Left-shift function.
1007 ///
1008 /// Left-shift this APInt by shiftAmt.
1009 APInt shl(unsigned shiftAmt) const {
1010 APInt R(*this);
1011 R <<= shiftAmt;
1012 return R;
1013 }
1014
1015 /// Rotate left by rotateAmt.
1016 APInt rotl(unsigned rotateAmt) const;
1017
1018 /// Rotate right by rotateAmt.
1019 APInt rotr(unsigned rotateAmt) const;
1020
1021 /// Arithmetic right-shift function.
1022 ///
1023 /// Arithmetic right-shift this APInt by shiftAmt.
1024 APInt ashr(const APInt &ShiftAmt) const {
1025 APInt R(*this);
1026 R.ashrInPlace(ShiftAmt);
1027 return R;
1028 }
1029
1030 /// Arithmetic right-shift this APInt by shiftAmt in place.
1031 void ashrInPlace(const APInt &shiftAmt);
1032
1033 /// Logical right-shift function.
1034 ///
1035 /// Logical right-shift this APInt by shiftAmt.
1036 APInt lshr(const APInt &ShiftAmt) const {
1037 APInt R(*this);
1038 R.lshrInPlace(ShiftAmt);
1039 return R;
1040 }
1041
1042 /// Logical right-shift this APInt by ShiftAmt in place.
1043 void lshrInPlace(const APInt &ShiftAmt);
1044
1045 /// Left-shift function.
1046 ///
1047 /// Left-shift this APInt by shiftAmt.
1048 APInt shl(const APInt &ShiftAmt) const {
1049 APInt R(*this);
1050 R <<= ShiftAmt;
1051 return R;
1052 }
1053
1054 /// Rotate left by rotateAmt.
1055 APInt rotl(const APInt &rotateAmt) const;
1056
1057 /// Rotate right by rotateAmt.
1058 APInt rotr(const APInt &rotateAmt) const;
1059
1060 /// Unsigned division operation.
1061 ///
1062 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
1063 /// RHS are treated as unsigned quantities for purposes of this division.
1064 ///
1065 /// \returns a new APInt value containing the division result, rounded towards
1066 /// zero.
1067 APInt udiv(const APInt &RHS) const;
1068 APInt udiv(uint64_t RHS) const;
1069
1070 /// Signed division function for APInt.
1071 ///
1072 /// Signed divide this APInt by APInt RHS.
1073 ///
1074 /// The result is rounded towards zero.
1075 APInt sdiv(const APInt &RHS) const;
1076 APInt sdiv(int64_t RHS) const;
1077
1078 /// Unsigned remainder operation.
1079 ///
1080 /// Perform an unsigned remainder operation on this APInt with RHS being the
1081 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
1082 /// of this operation. Note that this is a true remainder operation and not a
1083 /// modulo operation because the sign follows the sign of the dividend which
1084 /// is *this.
1085 ///
1086 /// \returns a new APInt value containing the remainder result
1087 APInt urem(const APInt &RHS) const;
1088 uint64_t urem(uint64_t RHS) const;
1089
1090 /// Function for signed remainder operation.
1091 ///
1092 /// Signed remainder operation on APInt.
1093 APInt srem(const APInt &RHS) const;
1094 int64_t srem(int64_t RHS) const;
1095
1096 /// Dual division/remainder interface.
1097 ///
1098 /// Sometimes it is convenient to divide two APInt values and obtain both the
1099 /// quotient and remainder. This function does both operations in the same
1100 /// computation making it a little more efficient. The pair of input arguments
1101 /// may overlap with the pair of output arguments. It is safe to call
1102 /// udivrem(X, Y, X, Y), for example.
1103 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1104 APInt &Remainder);
1105 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1106 uint64_t &Remainder);
1107
1108 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1109 APInt &Remainder);
1110 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
1111 int64_t &Remainder);
1112
1113 // Operations that return overflow indicators.
1114 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
1115 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1116 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1117 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1118 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1119 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1120 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1121 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1122 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1123
1124 // Operations that saturate
1125 APInt sadd_sat(const APInt &RHS) const;
1126 APInt uadd_sat(const APInt &RHS) const;
1127 APInt ssub_sat(const APInt &RHS) const;
1128 APInt usub_sat(const APInt &RHS) const;
1129 APInt smul_sat(const APInt &RHS) const;
1130 APInt umul_sat(const APInt &RHS) const;
1131 APInt sshl_sat(const APInt &RHS) const;
1132 APInt ushl_sat(const APInt &RHS) const;
1133
1134 /// Array-indexing support.
1135 ///
1136 /// \returns the bit value at bitPosition
1137 bool operator[](unsigned bitPosition) const {
1138 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast<void> (0));
1139 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1140 }
1141
1142 /// @}
1143 /// \name Comparison Operators
1144 /// @{
1145
1146 /// Equality operator.
1147 ///
1148 /// Compares this APInt with RHS for the validity of the equality
1149 /// relationship.
1150 bool operator==(const APInt &RHS) const {
1151 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast<void> (0));
1152 if (isSingleWord())
1153 return U.VAL == RHS.U.VAL;
1154 return EqualSlowCase(RHS);
1155 }
1156
1157 /// Equality operator.
1158 ///
1159 /// Compares this APInt with a uint64_t for the validity of the equality
1160 /// relationship.
1161 ///
1162 /// \returns true if *this == Val
1163 bool operator==(uint64_t Val) const {
1164 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1165 }
1166
1167 /// Equality comparison.
1168 ///
1169 /// Compares this APInt with RHS for the validity of the equality
1170 /// relationship.
1171 ///
1172 /// \returns true if *this == Val
1173 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1174
1175 /// Inequality operator.
1176 ///
1177 /// Compares this APInt with RHS for the validity of the inequality
1178 /// relationship.
1179 ///
1180 /// \returns true if *this != Val
1181 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1182
1183 /// Inequality operator.
1184 ///
1185 /// Compares this APInt with a uint64_t for the validity of the inequality
1186 /// relationship.
1187 ///
1188 /// \returns true if *this != Val
1189 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1190
1191 /// Inequality comparison
1192 ///
1193 /// Compares this APInt with RHS for the validity of the inequality
1194 /// relationship.
1195 ///
1196 /// \returns true if *this != Val
1197 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1198
1199 /// Unsigned less than comparison
1200 ///
1201 /// Regards both *this and RHS as unsigned quantities and compares them for
1202 /// the validity of the less-than relationship.
1203 ///
1204 /// \returns true if *this < RHS when both are considered unsigned.
1205 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1206
1207 /// Unsigned less than comparison
1208 ///
1209 /// Regards both *this as an unsigned quantity and compares it with RHS for
1210 /// the validity of the less-than relationship.
1211 ///
1212 /// \returns true if *this < RHS when considered unsigned.
1213 bool ult(uint64_t RHS) const {
1214 // Only need to check active bits if not a single word.
1215 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1216 }
1217
1218 /// Signed less than comparison
1219 ///
1220 /// Regards both *this and RHS as signed quantities and compares them for
1221 /// validity of the less-than relationship.
1222 ///
1223 /// \returns true if *this < RHS when both are considered signed.
1224 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1225
1226 /// Signed less than comparison
1227 ///
1228 /// Regards both *this as a signed quantity and compares it with RHS for
1229 /// the validity of the less-than relationship.
1230 ///
1231 /// \returns true if *this < RHS when considered signed.
1232 bool slt(int64_t RHS) const {
1233 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1234 : getSExtValue() < RHS;
1235 }
1236
1237 /// Unsigned less or equal comparison
1238 ///
1239 /// Regards both *this and RHS as unsigned quantities and compares them for
1240 /// validity of the less-or-equal relationship.
1241 ///
1242 /// \returns true if *this <= RHS when both are considered unsigned.
1243 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1244
1245 /// Unsigned less or equal comparison
1246 ///
1247 /// Regards both *this as an unsigned quantity and compares it with RHS for
1248 /// the validity of the less-or-equal relationship.
1249 ///
1250 /// \returns true if *this <= RHS when considered unsigned.
1251 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1252
1253 /// Signed less or equal comparison
1254 ///
1255 /// Regards both *this and RHS as signed quantities and compares them for
1256 /// validity of the less-or-equal relationship.
1257 ///
1258 /// \returns true if *this <= RHS when both are considered signed.
1259 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1260
1261 /// Signed less or equal comparison
1262 ///
1263 /// Regards both *this as a signed quantity and compares it with RHS for the
1264 /// validity of the less-or-equal relationship.
1265 ///
1266 /// \returns true if *this <= RHS when considered signed.
1267 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1268
1269 /// Unsigned greater than comparison
1270 ///
1271 /// Regards both *this and RHS as unsigned quantities and compares them for
1272 /// the validity of the greater-than relationship.
1273 ///
1274 /// \returns true if *this > RHS when both are considered unsigned.
1275 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1276
1277 /// Unsigned greater than comparison
1278 ///
1279 /// Regards both *this as an unsigned quantity and compares it with RHS for
1280 /// the validity of the greater-than relationship.
1281 ///
1282 /// \returns true if *this > RHS when considered unsigned.
1283 bool ugt(uint64_t RHS) const {
1284 // Only need to check active bits if not a single word.
1285 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1286 }
1287
1288 /// Signed greater than comparison
1289 ///
1290 /// Regards both *this and RHS as signed quantities and compares them for the
1291 /// validity of the greater-than relationship.
1292 ///
1293 /// \returns true if *this > RHS when both are considered signed.
1294 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1295
1296 /// Signed greater than comparison
1297 ///
1298 /// Regards both *this as a signed quantity and compares it with RHS for
1299 /// the validity of the greater-than relationship.
1300 ///
1301 /// \returns true if *this > RHS when considered signed.
1302 bool sgt(int64_t RHS) const {
1303 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1304 : getSExtValue() > RHS;
1305 }
1306
1307 /// Unsigned greater or equal comparison
1308 ///
1309 /// Regards both *this and RHS as unsigned quantities and compares them for
1310 /// validity of the greater-or-equal relationship.
1311 ///
1312 /// \returns true if *this >= RHS when both are considered unsigned.
1313 bool uge(const APInt &RHS) const { return !ult(RHS); }
1314
1315 /// Unsigned greater or equal comparison
1316 ///
1317 /// Regards both *this as an unsigned quantity and compares it with RHS for
1318 /// the validity of the greater-or-equal relationship.
1319 ///
1320 /// \returns true if *this >= RHS when considered unsigned.
1321 bool uge(uint64_t RHS) const { return !ult(RHS); }
1322
1323 /// Signed greater or equal comparison
1324 ///
1325 /// Regards both *this and RHS as signed quantities and compares them for
1326 /// validity of the greater-or-equal relationship.
1327 ///
1328 /// \returns true if *this >= RHS when both are considered signed.
1329 bool sge(const APInt &RHS) const { return !slt(RHS); }
1330
1331 /// Signed greater or equal comparison
1332 ///
1333 /// Regards both *this as a signed quantity and compares it with RHS for
1334 /// the validity of the greater-or-equal relationship.
1335 ///
1336 /// \returns true if *this >= RHS when considered signed.
1337 bool sge(int64_t RHS) const { return !slt(RHS); }
1338
1339 /// This operation tests if there are any pairs of corresponding bits
1340 /// between this APInt and RHS that are both set.
1341 bool intersects(const APInt &RHS) const {
1342 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast<void> (0));
1343 if (isSingleWord())
1344 return (U.VAL & RHS.U.VAL) != 0;
1345 return intersectsSlowCase(RHS);
1346 }
1347
1348 /// This operation checks that all bits set in this APInt are also set in RHS.
1349 bool isSubsetOf(const APInt &RHS) const {
1350 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast<void> (0));
1351 if (isSingleWord())
1352 return (U.VAL & ~RHS.U.VAL) == 0;
1353 return isSubsetOfSlowCase(RHS);
1354 }
1355
1356 /// @}
1357 /// \name Resizing Operators
1358 /// @{
1359
1360 /// Truncate to new width.
1361 ///
1362 /// Truncate the APInt to a specified width. It is an error to specify a width
1363 /// that is greater than or equal to the current width.
1364 APInt trunc(unsigned width) const;
1365
1366 /// Truncate to new width with unsigned saturation.
1367 ///
1368 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1369 /// the new bitwidth, then return truncated APInt. Else, return max value.
1370 APInt truncUSat(unsigned width) const;
1371
1372 /// Truncate to new width with signed saturation.
1373 ///
1374 /// If this APInt, treated as signed integer, can be losslessly truncated to
1375 /// the new bitwidth, then return truncated APInt. Else, return either
1376 /// signed min value if the APInt was negative, or signed max value.
1377 APInt truncSSat(unsigned width) const;
1378
1379 /// Sign extend to a new width.
1380 ///
1381 /// This operation sign extends the APInt to a new width. If the high order
1382 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1383 /// It is an error to specify a width that is less than or equal to the
1384 /// current width.
1385 APInt sext(unsigned width) const;
1386
1387 /// Zero extend to a new width.
1388 ///
1389 /// This operation zero extends the APInt to a new width. The high order bits
1390 /// are filled with 0 bits. It is an error to specify a width that is less
1391 /// than or equal to the current width.
1392 APInt zext(unsigned width) const;
1393
1394 /// Sign extend or truncate to width
1395 ///
1396 /// Make this APInt have the bit width given by \p width. The value is sign
1397 /// extended, truncated, or left alone to make it that width.
1398 APInt sextOrTrunc(unsigned width) const;
1399
1400 /// Zero extend or truncate to width
1401 ///
1402 /// Make this APInt have the bit width given by \p width. The value is zero
1403 /// extended, truncated, or left alone to make it that width.
1404 APInt zextOrTrunc(unsigned width) const;
1405
1406 /// Truncate to width
1407 ///
1408 /// Make this APInt have the bit width given by \p width. The value is
1409 /// truncated or left alone to make it that width.
1410 APInt truncOrSelf(unsigned width) const;
1411
1412 /// Sign extend or truncate to width
1413 ///
1414 /// Make this APInt have the bit width given by \p width. The value is sign
1415 /// extended, or left alone to make it that width.
1416 APInt sextOrSelf(unsigned width) const;
1417
1418 /// Zero extend or truncate to width
1419 ///
1420 /// Make this APInt have the bit width given by \p width. The value is zero
1421 /// extended, or left alone to make it that width.
1422 APInt zextOrSelf(unsigned width) const;
1423
1424 /// @}
1425 /// \name Bit Manipulation Operators
1426 /// @{
1427
1428 /// Set every bit to 1.
1429 void setAllBits() {
1430 if (isSingleWord())
1431 U.VAL = WORDTYPE_MAX;
1432 else
1433 // Set all the bits in all the words.
1434 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1435 // Clear the unused ones
1436 clearUnusedBits();
1437 }
1438
1439 /// Set a given bit to 1.
1440 ///
1441 /// Set the given bit to 1 whose position is given as "bitPosition".
1442 void setBit(unsigned BitPosition) {
1443 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast<void> (0));
1444 WordType Mask = maskBit(BitPosition);
1445 if (isSingleWord())
1446 U.VAL |= Mask;
1447 else
1448 U.pVal[whichWord(BitPosition)] |= Mask;
1449 }
1450
1451 /// Set the sign bit to 1.
1452 void setSignBit() {
1453 setBit(BitWidth - 1);
1454 }
1455
1456 /// Set a given bit to a given value.
1457 void setBitVal(unsigned BitPosition, bool BitValue) {
1458 if (BitValue)
1459 setBit(BitPosition);
1460 else
1461 clearBit(BitPosition);
1462 }
1463
1464 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1465 /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
1466 /// setBits when \p loBit < \p hiBit.
1467 /// For \p loBit == \p hiBit wrap case, set every bit to 1.
1468 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1469 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast<void> (0));
1470 assert(loBit <= BitWidth && "loBit out of range")(static_cast<void> (0));
1471 if (loBit < hiBit) {
1472 setBits(loBit, hiBit);
1473 return;
1474 }
1475 setLowBits(hiBit);
1476 setHighBits(BitWidth - loBit);
1477 }
1478
1479 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1480 /// This function handles case when \p loBit <= \p hiBit.
1481 void setBits(unsigned loBit, unsigned hiBit) {
1482 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast<void> (0));
1483 assert(loBit <= BitWidth && "loBit out of range")(static_cast<void> (0));
1484 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast<void> (0));
1485 if (loBit == hiBit)
6
Assuming 'loBit' is not equal to 'hiBit'
1486 return;
1487 if (loBit
6.1
'loBit' is < APINT_BITS_PER_WORD
6.1
'loBit' is < APINT_BITS_PER_WORD
< APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
7
Assuming 'hiBit' is <= APINT_BITS_PER_WORD
8
Taking true branch
1488 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1489 mask <<= loBit;
1490 if (isSingleWord())
9
Taking true branch
1491 U.VAL |= mask;
1492 else
1493 U.pVal[0] |= mask;
1494 } else {
1495 setBitsSlowCase(loBit, hiBit);
1496 }
1497 }
1498
1499 /// Set the top bits starting from loBit.
1500 void setBitsFrom(unsigned loBit) {
1501 return setBits(loBit, BitWidth);
1502 }
1503
1504 /// Set the bottom loBits bits.
1505 void setLowBits(unsigned loBits) {
1506 return setBits(0, loBits);
5
Calling 'APInt::setBits'
10
Returning from 'APInt::setBits'
1507 }
1508
1509 /// Set the top hiBits bits.
1510 void setHighBits(unsigned hiBits) {
1511 return setBits(BitWidth - hiBits, BitWidth);
1512 }
1513
1514 /// Set every bit to 0.
1515 void clearAllBits() {
1516 if (isSingleWord())
1517 U.VAL = 0;
1518 else
1519 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1520 }
1521
1522 /// Set a given bit to 0.
1523 ///
1524 /// Set the given bit to 0 whose position is given as "bitPosition".
1525 void clearBit(unsigned BitPosition) {
1526 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast<void> (0));
1527 WordType Mask = ~maskBit(BitPosition);
1528 if (isSingleWord())
1529 U.VAL &= Mask;
1530 else
1531 U.pVal[whichWord(BitPosition)] &= Mask;
1532 }
1533
1534 /// Set bottom loBits bits to 0.
1535 void clearLowBits(unsigned loBits) {
1536 assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast<void> (0));
1537 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1538 *this &= Keep;
1539 }
1540
1541 /// Set the sign bit to 0.
1542 void clearSignBit() {
1543 clearBit(BitWidth - 1);
1544 }
1545
1546 /// Toggle every bit to its opposite value.
1547 void flipAllBits() {
1548 if (isSingleWord()) {
1549 U.VAL ^= WORDTYPE_MAX;
1550 clearUnusedBits();
1551 } else {
1552 flipAllBitsSlowCase();
1553 }
1554 }
1555
1556 /// Toggles a given bit to its opposite value.
1557 ///
1558 /// Toggle a given bit to its opposite value whose position is given
1559 /// as "bitPosition".
1560 void flipBit(unsigned bitPosition);
1561
1562 /// Negate this APInt in place.
1563 void negate() {
1564 flipAllBits();
1565 ++(*this);
1566 }
1567
1568 /// Insert the bits from a smaller APInt starting at bitPosition.
1569 void insertBits(const APInt &SubBits, unsigned bitPosition);
1570 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1571
1572 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1573 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1574 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1575
1576 /// @}
1577 /// \name Value Characterization Functions
1578 /// @{
1579
1580 /// Return the number of bits in the APInt.
1581 unsigned getBitWidth() const { return BitWidth; }
1582
1583 /// Get the number of words.
1584 ///
1585 /// Here one word's bitwidth equals to that of uint64_t.
1586 ///
1587 /// \returns the number of words to hold the integer value of this APInt.
1588 unsigned getNumWords() const { return getNumWords(BitWidth); }
1589
1590 /// Get the number of words.
1591 ///
1592 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1593 ///
1594 /// \returns the number of words to hold the integer value with a given bit
1595 /// width.
1596 static unsigned getNumWords(unsigned BitWidth) {
1597 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1598 }
1599
1600 /// Compute the number of active bits in the value
1601 ///
1602 /// This function returns the number of active bits which is defined as the
1603 /// bit width minus the number of leading zeros. This is used in several
1604 /// computations to see how "wide" the value is.
1605 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1606
1607 /// Compute the number of active words in the value of this APInt.
1608 ///
1609 /// This is used in conjunction with getActiveData to extract the raw value of
1610 /// the APInt.
1611 unsigned getActiveWords() const {
1612 unsigned numActiveBits = getActiveBits();
1613 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1614 }
1615
1616 /// Get the minimum bit size for this signed APInt
1617 ///
1618 /// Computes the minimum bit width for this APInt while considering it to be a
1619 /// signed (and probably negative) value. If the value is not negative, this
1620 /// function returns the same value as getActiveBits()+1. Otherwise, it
1621 /// returns the smallest bit width that will retain the negative value. For
1622 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1623 /// for -1, this function will always return 1.
1624 unsigned getMinSignedBits() const { return BitWidth - getNumSignBits() + 1; }
1625
1626 /// Get zero extended value
1627 ///
1628 /// This method attempts to return the value of this APInt as a zero extended
1629 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1630 /// uint64_t. Otherwise an assertion will result.
1631 uint64_t getZExtValue() const {
1632 if (isSingleWord())
1633 return U.VAL;
1634 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast<void> (0));
1635 return U.pVal[0];
1636 }
1637
1638 /// Get sign extended value
1639 ///
1640 /// This method attempts to return the value of this APInt as a sign extended
1641 /// int64_t. The bit width must be <= 64 or the value must fit within an
1642 /// int64_t. Otherwise an assertion will result.
1643 int64_t getSExtValue() const {
1644 if (isSingleWord())
1645 return SignExtend64(U.VAL, BitWidth);
1646 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")(static_cast<void> (0));
1647 return int64_t(U.pVal[0]);
1648 }
1649
1650 /// Get bits required for string value.
1651 ///
1652 /// This method determines how many bits are required to hold the APInt
1653 /// equivalent of the string given by \p str.
1654 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1655
1656 /// The APInt version of the countLeadingZeros functions in
1657 /// MathExtras.h.
1658 ///
1659 /// It counts the number of zeros from the most significant bit to the first
1660 /// one bit.
1661 ///
1662 /// \returns BitWidth if the value is zero, otherwise returns the number of
1663 /// zeros from the most significant bit to the first one bits.
1664 unsigned countLeadingZeros() const {
1665 if (isSingleWord()) {
1666 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1667 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1668 }
1669 return countLeadingZerosSlowCase();
1670 }
1671
1672 /// Count the number of leading one bits.
1673 ///
1674 /// This function is an APInt version of the countLeadingOnes
1675 /// functions in MathExtras.h. It counts the number of ones from the most
1676 /// significant bit to the first zero bit.
1677 ///
1678 /// \returns 0 if the high order bit is not set, otherwise returns the number
1679 /// of 1 bits from the most significant to the least
1680 unsigned countLeadingOnes() const {
1681 if (isSingleWord())
1682 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1683 return countLeadingOnesSlowCase();
1684 }
1685
1686 /// Computes the number of leading bits of this APInt that are equal to its
1687 /// sign bit.
1688 unsigned getNumSignBits() const {
1689 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1690 }
1691
1692 /// Count the number of trailing zero bits.
1693 ///
1694 /// This function is an APInt version of the countTrailingZeros
1695 /// functions in MathExtras.h. It counts the number of zeros from the least
1696 /// significant bit to the first set bit.
1697 ///
1698 /// \returns BitWidth if the value is zero, otherwise returns the number of
1699 /// zeros from the least significant bit to the first one bit.
1700 unsigned countTrailingZeros() const {
1701 if (isSingleWord()) {
1702 unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
1703 return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
1704 }
1705 return countTrailingZerosSlowCase();
1706 }
1707
1708 /// Count the number of trailing one bits.
1709 ///
1710 /// This function is an APInt version of the countTrailingOnes
1711 /// functions in MathExtras.h. It counts the number of ones from the least
1712 /// significant bit to the first zero bit.
1713 ///
1714 /// \returns BitWidth if the value is all ones, otherwise returns the number
1715 /// of ones from the least significant bit to the first zero bit.
1716 unsigned countTrailingOnes() const {
1717 if (isSingleWord())
1718 return llvm::countTrailingOnes(U.VAL);
1719 return countTrailingOnesSlowCase();
1720 }
1721
1722 /// Count the number of bits set.
1723 ///
1724 /// This function is an APInt version of the countPopulation functions
1725 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1726 ///
1727 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1728 unsigned countPopulation() const {
1729 if (isSingleWord())
1730 return llvm::countPopulation(U.VAL);
1731 return countPopulationSlowCase();
1732 }
1733
1734 /// @}
1735 /// \name Conversion Functions
1736 /// @{
1737 void print(raw_ostream &OS, bool isSigned) const;
1738
1739 /// Converts an APInt to a string and append it to Str. Str is commonly a
1740 /// SmallString.
1741 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1742 bool formatAsCLiteral = false) const;
1743
1744 /// Considers the APInt to be unsigned and converts it into a string in the
1745 /// radix given. The radix can be 2, 8, 10 16, or 36.
1746 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1747 toString(Str, Radix, false, false);
1748 }
1749
1750 /// Considers the APInt to be signed and converts it into a string in the
1751 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1752 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1753 toString(Str, Radix, true, false);
1754 }
1755
1756 /// \returns a byte-swapped representation of this APInt Value.
1757 APInt byteSwap() const;
1758
1759 /// \returns the value with the bit representation reversed of this APInt
1760 /// Value.
1761 APInt reverseBits() const;
1762
1763 /// Converts this APInt to a double value.
1764 double roundToDouble(bool isSigned) const;
1765
1766 /// Converts this unsigned APInt to a double value.
1767 double roundToDouble() const { return roundToDouble(false); }
1768
1769 /// Converts this signed APInt to a double value.
1770 double signedRoundToDouble() const { return roundToDouble(true); }
1771
1772 /// Converts APInt bits to a double
1773 ///
1774 /// The conversion does not do a translation from integer to double, it just
1775 /// re-interprets the bits as a double. Note that it is valid to do this on
1776 /// any bit width. Exactly 64 bits will be translated.
1777 double bitsToDouble() const {
1778 return BitsToDouble(getWord(0));
1779 }
1780
1781 /// Converts APInt bits to a float
1782 ///
1783 /// The conversion does not do a translation from integer to float, it just
1784 /// re-interprets the bits as a float. Note that it is valid to do this on
1785 /// any bit width. Exactly 32 bits will be translated.
1786 float bitsToFloat() const {
1787 return BitsToFloat(static_cast<uint32_t>(getWord(0)));
1788 }
1789
1790 /// Converts a double to APInt bits.
1791 ///
1792 /// The conversion does not do a translation from double to integer, it just
1793 /// re-interprets the bits of the double.
1794 static APInt doubleToBits(double V) {
1795 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1796 }
1797
1798 /// Converts a float to APInt bits.
1799 ///
1800 /// The conversion does not do a translation from float to integer, it just
1801 /// re-interprets the bits of the float.
1802 static APInt floatToBits(float V) {
1803 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1804 }
1805
1806 /// @}
1807 /// \name Mathematics Operations
1808 /// @{
1809
1810 /// \returns the floor log base 2 of this APInt.
1811 unsigned logBase2() const { return getActiveBits() - 1; }
1812
1813 /// \returns the ceil log base 2 of this APInt.
1814 unsigned ceilLogBase2() const {
1815 APInt temp(*this);
1816 --temp;
1817 return temp.getActiveBits();
1818 }
1819
1820 /// \returns the nearest log base 2 of this APInt. Ties round up.
1821 ///
1822 /// NOTE: When we have a BitWidth of 1, we define:
1823 ///
1824 /// log2(0) = UINT32_MAX
1825 /// log2(1) = 0
1826 ///
1827 /// to get around any mathematical concerns resulting from
1828 /// referencing 2 in a space where 2 does no exist.
1829 unsigned nearestLogBase2() const {
1830 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1831 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1832 // UINT32_MAX.
1833 if (BitWidth == 1)
1834 return U.VAL - 1;
1835
1836 // Handle the zero case.
1837 if (isNullValue())
1838 return UINT32_MAX(4294967295U);
1839
1840 // The non-zero case is handled by computing:
1841 //
1842 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1843 //
1844 // where x[i] is referring to the value of the ith bit of x.
1845 unsigned lg = logBase2();
1846 return lg + unsigned((*this)[lg - 1]);
1847 }
1848
1849 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1850 /// otherwise
1851 int32_t exactLogBase2() const {
1852 if (!isPowerOf2())
1853 return -1;
1854 return logBase2();
1855 }
1856
1857 /// Compute the square root
1858 APInt sqrt() const;
1859
1860 /// Get the absolute value;
1861 ///
1862 /// If *this is < 0 then return -(*this), otherwise *this;
1863 APInt abs() const {
1864 if (isNegative())
1865 return -(*this);
1866 return *this;
1867 }
1868
1869 /// \returns the multiplicative inverse for a given modulo.
1870 APInt multiplicativeInverse(const APInt &modulo) const;
1871
1872 /// @}
1873 /// \name Support for division by constant
1874 /// @{
1875
1876 /// Calculate the magic number for signed division by a constant.
1877 struct ms;
1878 ms magic() const;
1879
1880 /// Calculate the magic number for unsigned division by a constant.
1881 struct mu;
1882 mu magicu(unsigned LeadingZeros = 0) const;
1883
1884 /// @}
1885 /// \name Building-block Operations for APInt and APFloat
1886 /// @{
1887
1888 // These building block operations operate on a representation of arbitrary
1889 // precision, two's-complement, bignum integer values. They should be
1890 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1891 // generally a pointer to the base of an array of integer parts, representing
1892 // an unsigned bignum, and a count of how many parts there are.
1893
1894 /// Sets the least significant part of a bignum to the input value, and zeroes
1895 /// out higher parts.
1896 static void tcSet(WordType *, WordType, unsigned);
1897
1898 /// Assign one bignum to another.
1899 static void tcAssign(WordType *, const WordType *, unsigned);
1900
1901 /// Returns true if a bignum is zero, false otherwise.
1902 static bool tcIsZero(const WordType *, unsigned);
1903
1904 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1905 static int tcExtractBit(const WordType *, unsigned bit);
1906
1907 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1908 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1909 /// significant bit of DST. All high bits above srcBITS in DST are
1910 /// zero-filled.
1911 static void tcExtract(WordType *, unsigned dstCount,
1912 const WordType *, unsigned srcBits,
1913 unsigned srcLSB);
1914
1915 /// Set the given bit of a bignum. Zero-based.
1916 static void tcSetBit(WordType *, unsigned bit);
1917
1918 /// Clear the given bit of a bignum. Zero-based.
1919 static void tcClearBit(WordType *, unsigned bit);
1920
1921 /// Returns the bit number of the least or most significant set bit of a
1922 /// number. If the input number has no bits set -1U is returned.
1923 static unsigned tcLSB(const WordType *, unsigned n);
1924 static unsigned tcMSB(const WordType *parts, unsigned n);
1925
1926 /// Negate a bignum in-place.
1927 static void tcNegate(WordType *, unsigned);
1928
1929 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1930 static WordType tcAdd(WordType *, const WordType *,
1931 WordType carry, unsigned);
1932 /// DST += RHS. Returns the carry flag.
1933 static WordType tcAddPart(WordType *, WordType, unsigned);
1934
1935 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1936 static WordType tcSubtract(WordType *, const WordType *,
1937 WordType carry, unsigned);
1938 /// DST -= RHS. Returns the carry flag.
1939 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1940
1941 /// DST += SRC * MULTIPLIER + PART if add is true
1942 /// DST = SRC * MULTIPLIER + PART if add is false
1943 ///
1944 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1945 /// start at the same point, i.e. DST == SRC.
1946 ///
1947 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1948 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1949 /// result, and if all of the omitted higher parts were zero return zero,
1950 /// otherwise overflow occurred and return one.
1951 static int tcMultiplyPart(WordType *dst, const WordType *src,
1952 WordType multiplier, WordType carry,
1953 unsigned srcParts, unsigned dstParts,
1954 bool add);
1955
1956 /// DST = LHS * RHS, where DST has the same width as the operands and is
1957 /// filled with the least significant parts of the result. Returns one if
1958 /// overflow occurred, otherwise zero. DST must be disjoint from both
1959 /// operands.
1960 static int tcMultiply(WordType *, const WordType *, const WordType *,
1961 unsigned);
1962
1963 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1964 /// operands. No overflow occurs. DST must be disjoint from both operands.
1965 static void tcFullMultiply(WordType *, const WordType *,
1966 const WordType *, unsigned, unsigned);
1967
1968 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1969 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1970 /// REMAINDER to the remainder, return zero. i.e.
1971 ///
1972 /// OLD_LHS = RHS * LHS + REMAINDER
1973 ///
1974 /// SCRATCH is a bignum of the same size as the operands and result for use by
1975 /// the routine; its contents need not be initialized and are destroyed. LHS,
1976 /// REMAINDER and SCRATCH must be distinct.
1977 static int tcDivide(WordType *lhs, const WordType *rhs,
1978 WordType *remainder, WordType *scratch,
1979 unsigned parts);
1980
1981 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1982 /// restrictions on Count.
1983 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1984
1985 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1986 /// restrictions on Count.
1987 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1988
1989 /// The obvious AND, OR and XOR and complement operations.
1990 static void tcAnd(WordType *, const WordType *, unsigned);
1991 static void tcOr(WordType *, const WordType *, unsigned);
1992 static void tcXor(WordType *, const WordType *, unsigned);
1993 static void tcComplement(WordType *, unsigned);
1994
1995 /// Comparison (unsigned) of two bignums.
1996 static int tcCompare(const WordType *, const WordType *, unsigned);
1997
1998 /// Increment a bignum in-place. Return the carry flag.
1999 static WordType tcIncrement(WordType *dst, unsigned parts) {
2000 return tcAddPart(dst, 1, parts);
2001 }
2002
2003 /// Decrement a bignum in-place. Return the borrow flag.
2004 static WordType tcDecrement(WordType *dst, unsigned parts) {
2005 return tcSubtractPart(dst, 1, parts);
2006 }
2007
2008 /// Set the least significant BITS and clear the rest.
2009 static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
2010
2011 /// debug method
2012 void dump() const;
2013
2014 /// @}
2015};
2016
2017/// Magic data for optimising signed division by a constant.
2018struct APInt::ms {
2019 APInt m; ///< magic number
2020 unsigned s; ///< shift amount
2021};
2022
2023/// Magic data for optimising unsigned division by a constant.
2024struct APInt::mu {
2025 APInt m; ///< magic number
2026 bool a; ///< add indicator
2027 unsigned s; ///< shift amount
2028};
2029
2030inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
2031
2032inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
2033
2034/// Unary bitwise complement operator.
2035///
2036/// \returns an APInt that is the bitwise complement of \p v.
2037inline APInt operator~(APInt v) {
2038 v.flipAllBits();
2039 return v;
2040}
2041
2042inline APInt operator&(APInt a, const APInt &b) {
2043 a &= b;
2044 return a;
2045}
2046
2047inline APInt operator&(const APInt &a, APInt &&b) {
2048 b &= a;
2049 return std::move(b);
2050}
2051
2052inline APInt operator&(APInt a, uint64_t RHS) {
2053 a &= RHS;
2054 return a;
2055}
2056
2057inline APInt operator&(uint64_t LHS, APInt b) {
2058 b &= LHS;
2059 return b;
2060}
2061
2062inline APInt operator|(APInt a, const APInt &b) {
2063 a |= b;
2064 return a;
2065}
2066
2067inline APInt operator|(const APInt &a, APInt &&b) {
2068 b |= a;
2069 return std::move(b);
2070}
2071
2072inline APInt operator|(APInt a, uint64_t RHS) {
2073 a |= RHS;
2074 return a;
2075}
2076
2077inline APInt operator|(uint64_t LHS, APInt b) {
2078 b |= LHS;
2079 return b;
2080}
2081
2082inline APInt operator^(APInt a, const APInt &b) {
2083 a ^= b;
2084 return a;
2085}
2086
2087inline APInt operator^(const APInt &a, APInt &&b) {
2088 b ^= a;
2089 return std::move(b);
2090}
2091
2092inline APInt operator^(APInt a, uint64_t RHS) {
2093 a ^= RHS;
2094 return a;
2095}
2096
2097inline APInt operator^(uint64_t LHS, APInt b) {
2098 b ^= LHS;
2099 return b;
2100}
2101
2102inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2103 I.print(OS, true);
2104 return OS;
2105}
2106
2107inline APInt operator-(APInt v) {
2108 v.negate();
2109 return v;
2110}
2111
2112inline APInt operator+(APInt a, const APInt &b) {
2113 a += b;
2114 return a;
2115}
2116
2117inline APInt operator+(const APInt &a, APInt &&b) {
2118 b += a;
2119 return std::move(b);
2120}
2121
2122inline APInt operator+(APInt a, uint64_t RHS) {
2123 a += RHS;
2124 return a;
2125}
2126
2127inline APInt operator+(uint64_t LHS, APInt b) {
2128 b += LHS;
2129 return b;
2130}
2131
2132inline APInt operator-(APInt a, const APInt &b) {
2133 a -= b;
2134 return a;
2135}
2136
2137inline APInt operator-(const APInt &a, APInt &&b) {
2138 b.negate();
2139 b += a;
2140 return std::move(b);
2141}
2142
2143inline APInt operator-(APInt a, uint64_t RHS) {
2144 a -= RHS;
2145 return a;
2146}
2147
2148inline APInt operator-(uint64_t LHS, APInt b) {
2149 b.negate();
2150 b += LHS;
2151 return b;
2152}
2153
2154inline APInt operator*(APInt a, uint64_t RHS) {
2155 a *= RHS;
2156 return a;
2157}
2158
2159inline APInt operator*(uint64_t LHS, APInt b) {
2160 b *= LHS;
2161 return b;
2162}
2163
2164
2165namespace APIntOps {
2166
2167/// Determine the smaller of two APInts considered to be signed.
2168inline const APInt &smin(const APInt &A, const APInt &B) {
2169 return A.slt(B) ? A : B;
2170}
2171
2172/// Determine the larger of two APInts considered to be signed.
2173inline const APInt &smax(const APInt &A, const APInt &B) {
2174 return A.sgt(B) ? A : B;
2175}
2176
2177/// Determine the smaller of two APInts considered to be unsigned.
2178inline const APInt &umin(const APInt &A, const APInt &B) {
2179 return A.ult(B) ? A : B;
2180}
2181
2182/// Determine the larger of two APInts considered to be unsigned.
2183inline const APInt &umax(const APInt &A, const APInt &B) {
2184 return A.ugt(B) ? A : B;
2185}
2186
2187/// Compute GCD of two unsigned APInt values.
2188///
2189/// This function returns the greatest common divisor of the two APInt values
2190/// using Stein's algorithm.
2191///
2192/// \returns the greatest common divisor of A and B.
2193APInt GreatestCommonDivisor(APInt A, APInt B);
2194
2195/// Converts the given APInt to a double value.
2196///
2197/// Treats the APInt as an unsigned value for conversion purposes.
2198inline double RoundAPIntToDouble(const APInt &APIVal) {
2199 return APIVal.roundToDouble();
2200}
2201
2202/// Converts the given APInt to a double value.
2203///
2204/// Treats the APInt as a signed value for conversion purposes.
2205inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2206 return APIVal.signedRoundToDouble();
2207}
2208
2209/// Converts the given APInt to a float value.
2210inline float RoundAPIntToFloat(const APInt &APIVal) {
2211 return float(RoundAPIntToDouble(APIVal));
2212}
2213
2214/// Converts the given APInt to a float value.
2215///
2216/// Treats the APInt as a signed value for conversion purposes.
2217inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2218 return float(APIVal.signedRoundToDouble());
2219}
2220
2221/// Converts the given double value into a APInt.
2222///
2223/// This function convert a double value to an APInt value.
2224APInt RoundDoubleToAPInt(double Double, unsigned width);
2225
2226/// Converts a float value into a APInt.
2227///
2228/// Converts a float value into an APInt value.
2229inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2230 return RoundDoubleToAPInt(double(Float), width);
2231}
2232
2233/// Return A unsign-divided by B, rounded by the given rounding mode.
2234APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2235
2236/// Return A sign-divided by B, rounded by the given rounding mode.
2237APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2238
2239/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2240/// (e.g. 32 for i32).
2241/// This function finds the smallest number n, such that
2242/// (a) n >= 0 and q(n) = 0, or
2243/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2244/// integers, belong to two different intervals [Rk, Rk+R),
2245/// where R = 2^BW, and k is an integer.
2246/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2247/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2248/// subtraction (treated as addition of negated numbers) would always
2249/// count as an overflow, but here we want to allow values to decrease
2250/// and increase as long as they are within the same interval.
2251/// Specifically, adding of two negative numbers should not cause an
2252/// overflow (as long as the magnitude does not exceed the bit width).
2253/// On the other hand, given a positive number, adding a negative
2254/// number to it can give a negative result, which would cause the
2255/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2256/// treated as a special case of an overflow.
2257///
2258/// This function returns None if after finding k that minimizes the
2259/// positive solution to q(n) = kR, both solutions are contained between
2260/// two consecutive integers.
2261///
2262/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2263/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2264/// virtue of *signed* overflow. This function will *not* find such an n,
2265/// however it may find a value of n satisfying the inequalities due to
2266/// an *unsigned* overflow (if the values are treated as unsigned).
2267/// To find a solution for a signed overflow, treat it as a problem of
2268/// finding an unsigned overflow with a range with of BW-1.
2269///
2270/// The returned value may have a different bit width from the input
2271/// coefficients.
2272Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2273 unsigned RangeWidth);
2274
2275/// Compare two values, and if they are different, return the position of the
2276/// most significant bit that is different in the values.
2277Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2278 const APInt &B);
2279
2280} // End of APIntOps namespace
2281
2282// See friend declaration above. This additional declaration is required in
2283// order to compile LLVM with IBM xlC compiler.
2284hash_code hash_value(const APInt &Arg);
2285
2286/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2287/// with the integer held in IntVal.
2288void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2289
2290/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2291/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2292void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
2293
2294/// Provide DenseMapInfo for APInt.
2295template <> struct DenseMapInfo<APInt> {
2296 static inline APInt getEmptyKey() {
2297 APInt V(nullptr, 0);
2298 V.U.VAL = 0;
2299 return V;
2300 }
2301
2302 static inline APInt getTombstoneKey() {
2303 APInt V(nullptr, 0);
2304 V.U.VAL = 1;
2305 return V;
2306 }
2307
2308 static unsigned getHashValue(const APInt &Key);
2309
2310 static bool isEqual(const APInt &LHS, const APInt &RHS) {
2311 return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
2312 }
2313};
2314
2315} // namespace llvm
2316
2317#endif