Bug Summary

File:build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1560, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-16/lib/clang/16.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/Hexagon -I include -I /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-16/lib/clang/16.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-09-04-125545-48738-1 -x c++ /build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonLoopIdiomRecognition.h"
10#include "llvm/ADT/APInt.h"
11#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/Triple.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/InstructionSimplify.h"
20#include "llvm/Analysis/LoopAnalysisManager.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopPass.h"
23#include "llvm/Analysis/MemoryLocation.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/ScalarEvolutionExpressions.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/ValueTracking.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/IntrinsicsHexagon.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/User.h"
49#include "llvm/IR/Value.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
63#include <algorithm>
64#include <array>
65#include <cassert>
66#include <cstdint>
67#include <cstdlib>
68#include <deque>
69#include <functional>
70#include <iterator>
71#include <map>
72#include <set>
73#include <utility>
74#include <vector>
75
76#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
77
78using namespace llvm;
79
80static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memcpy in loop idiom recognition"));
83
84static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
85 cl::Hidden, cl::init(false),
86 cl::desc("Disable generation of memmove in loop idiom recognition"));
87
88static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
89 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
90 "check guarding the memmove."));
91
92static cl::opt<unsigned> CompileTimeMemSizeThreshold(
93 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
94 cl::desc("Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
96
97static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
98 cl::Hidden, cl::init(true),
99 cl::desc("Only enable generating memmove in non-nested loops"));
100
101static cl::opt<bool> HexagonVolatileMemcpy(
102 "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
103 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
104
105static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
106 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
107
108static const char *HexagonVolatileMemcpyName
109 = "hexagon_memcpy_forward_vp4cp4n2";
110
111
112namespace llvm {
113
114void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
115Pass *createHexagonLoopIdiomPass();
116
117} // end namespace llvm
118
119namespace {
120
121class HexagonLoopIdiomRecognize {
122public:
123 explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
124 LoopInfo *LF, const TargetLibraryInfo *TLI,
125 ScalarEvolution *SE)
126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
127
128 bool run(Loop *L);
129
130private:
131 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
132 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
133 void collectStores(Loop *CurLoop, BasicBlock *BB,
134 SmallVectorImpl<StoreInst *> &Stores);
135 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
136 bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const;
137 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
138 SmallVectorImpl<BasicBlock *> &ExitBlocks);
139 bool runOnCountableLoop(Loop *L);
140
141 AliasAnalysis *AA;
142 const DataLayout *DL;
143 DominatorTree *DT;
144 LoopInfo *LF;
145 const TargetLibraryInfo *TLI;
146 ScalarEvolution *SE;
147 bool HasMemcpy, HasMemmove;
148};
149
150class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {
151public:
152 static char ID;
153
154 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
155 initializeHexagonLoopIdiomRecognizeLegacyPassPass(
156 *PassRegistry::getPassRegistry());
157 }
158
159 StringRef getPassName() const override {
160 return "Recognize Hexagon-specific loop idioms";
161 }
162
163 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 AU.addRequired<LoopInfoWrapperPass>();
165 AU.addRequiredID(LoopSimplifyID);
166 AU.addRequiredID(LCSSAID);
167 AU.addRequired<AAResultsWrapperPass>();
168 AU.addRequired<ScalarEvolutionWrapperPass>();
169 AU.addRequired<DominatorTreeWrapperPass>();
170 AU.addRequired<TargetLibraryInfoWrapperPass>();
171 AU.addPreserved<TargetLibraryInfoWrapperPass>();
172 }
173
174 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
175};
176
177struct Simplifier {
178 struct Rule {
179 using FuncType = std::function<Value *(Instruction *, LLVMContext &)>;
180 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
181 StringRef Name; // For debugging.
182 FuncType Fn;
183 };
184
185 void addRule(StringRef N, const Rule::FuncType &F) {
186 Rules.push_back(Rule(N, F));
187 }
188
189private:
190 struct WorkListType {
191 WorkListType() = default;
192
193 void push_back(Value *V) {
194 // Do not push back duplicates.
195 if (S.insert(V).second)
196 Q.push_back(V);
197 }
198
199 Value *pop_front_val() {
200 Value *V = Q.front();
201 Q.pop_front();
202 S.erase(V);
203 return V;
204 }
205
206 bool empty() const { return Q.empty(); }
207
208 private:
209 std::deque<Value *> Q;
210 std::set<Value *> S;
211 };
212
213 using ValueSetType = std::set<Value *>;
214
215 std::vector<Rule> Rules;
216
217public:
218 struct Context {
219 using ValueMapType = DenseMap<Value *, Value *>;
220
221 Value *Root;
222 ValueSetType Used; // The set of all cloned values used by Root.
223 ValueSetType Clones; // The set of all cloned values.
224 LLVMContext &Ctx;
225
226 Context(Instruction *Exp)
227 : Ctx(Exp->getParent()->getParent()->getContext()) {
228 initialize(Exp);
229 }
230
231 ~Context() { cleanup(); }
232
233 void print(raw_ostream &OS, const Value *V) const;
234 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
235
236 private:
237 friend struct Simplifier;
238
239 void initialize(Instruction *Exp);
240 void cleanup();
241
242 template <typename FuncT> void traverse(Value *V, FuncT F);
243 void record(Value *V);
244 void use(Value *V);
245 void unuse(Value *V);
246
247 bool equal(const Instruction *I, const Instruction *J) const;
248 Value *find(Value *Tree, Value *Sub) const;
249 Value *subst(Value *Tree, Value *OldV, Value *NewV);
250 void replace(Value *OldV, Value *NewV);
251 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
252 };
253
254 Value *simplify(Context &C);
255};
256
257 struct PE {
258 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
259
260 const Simplifier::Context &C;
261 const Value *V;
262 };
263
264 LLVM_ATTRIBUTE_USED__attribute__((__used__))
265 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
266 P.C.print(OS, P.V ? P.V : P.C.Root);
267 return OS;
268 }
269
270} // end anonymous namespace
271
272char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
273
274INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
275 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
276INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
277INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
278INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
279INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
280INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
281INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
282INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
283INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
284 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
285
286template <typename FuncT>
287void Simplifier::Context::traverse(Value *V, FuncT F) {
288 WorkListType Q;
289 Q.push_back(V);
290
291 while (!Q.empty()) {
292 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
293 if (!U || U->getParent())
294 continue;
295 if (!F(U))
296 continue;
297 for (Value *Op : U->operands())
298 Q.push_back(Op);
299 }
300}
301
302void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
303 const auto *U = dyn_cast<const Instruction>(V);
304 if (!U) {
305 OS << V << '(' << *V << ')';
306 return;
307 }
308
309 if (U->getParent()) {
310 OS << U << '(';
311 U->printAsOperand(OS, true);
312 OS << ')';
313 return;
314 }
315
316 unsigned N = U->getNumOperands();
317 if (N != 0)
318 OS << U << '(';
319 OS << U->getOpcodeName();
320 for (const Value *Op : U->operands()) {
321 OS << ' ';
322 print(OS, Op);
323 }
324 if (N != 0)
325 OS << ')';
326}
327
328void Simplifier::Context::initialize(Instruction *Exp) {
329 // Perform a deep clone of the expression, set Root to the root
330 // of the clone, and build a map from the cloned values to the
331 // original ones.
332 ValueMapType M;
333 BasicBlock *Block = Exp->getParent();
334 WorkListType Q;
335 Q.push_back(Exp);
336
337 while (!Q.empty()) {
338 Value *V = Q.pop_front_val();
339 if (M.find(V) != M.end())
340 continue;
341 if (Instruction *U = dyn_cast<Instruction>(V)) {
342 if (isa<PHINode>(U) || U->getParent() != Block)
343 continue;
344 for (Value *Op : U->operands())
345 Q.push_back(Op);
346 M.insert({U, U->clone()});
347 }
348 }
349
350 for (std::pair<Value*,Value*> P : M) {
351 Instruction *U = cast<Instruction>(P.second);
352 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
353 auto F = M.find(U->getOperand(i));
354 if (F != M.end())
355 U->setOperand(i, F->second);
356 }
357 }
358
359 auto R = M.find(Exp);
360 assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail
("R != M.end()", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 360, __extension__ __PRETTY_FUNCTION__))
;
361 Root = R->second;
362
363 record(Root);
364 use(Root);
365}
366
367void Simplifier::Context::record(Value *V) {
368 auto Record = [this](Instruction *U) -> bool {
369 Clones.insert(U);
370 return true;
371 };
372 traverse(V, Record);
373}
374
375void Simplifier::Context::use(Value *V) {
376 auto Use = [this](Instruction *U) -> bool {
377 Used.insert(U);
378 return true;
379 };
380 traverse(V, Use);
381}
382
383void Simplifier::Context::unuse(Value *V) {
384 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
385 return;
386
387 auto Unuse = [this](Instruction *U) -> bool {
388 if (!U->use_empty())
389 return false;
390 Used.erase(U);
391 return true;
392 };
393 traverse(V, Unuse);
394}
395
396Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
397 if (Tree == OldV)
398 return NewV;
399 if (OldV == NewV)
400 return Tree;
401
402 WorkListType Q;
403 Q.push_back(Tree);
404 while (!Q.empty()) {
405 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
406 // If U is not an instruction, or it's not a clone, skip it.
407 if (!U || U->getParent())
408 continue;
409 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
410 Value *Op = U->getOperand(i);
411 if (Op == OldV) {
412 U->setOperand(i, NewV);
413 unuse(OldV);
414 } else {
415 Q.push_back(Op);
416 }
417 }
418 }
419 return Tree;
420}
421
422void Simplifier::Context::replace(Value *OldV, Value *NewV) {
423 if (Root == OldV) {
424 Root = NewV;
425 use(Root);
426 return;
427 }
428
429 // NewV may be a complex tree that has just been created by one of the
430 // transformation rules. We need to make sure that it is commoned with
431 // the existing Root to the maximum extent possible.
432 // Identify all subtrees of NewV (including NewV itself) that have
433 // equivalent counterparts in Root, and replace those subtrees with
434 // these counterparts.
435 WorkListType Q;
436 Q.push_back(NewV);
437 while (!Q.empty()) {
438 Value *V = Q.pop_front_val();
439 Instruction *U = dyn_cast<Instruction>(V);
440 if (!U || U->getParent())
441 continue;
442 if (Value *DupV = find(Root, V)) {
443 if (DupV != V)
444 NewV = subst(NewV, V, DupV);
445 } else {
446 for (Value *Op : U->operands())
447 Q.push_back(Op);
448 }
449 }
450
451 // Now, simply replace OldV with NewV in Root.
452 Root = subst(Root, OldV, NewV);
453 use(Root);
454}
455
456void Simplifier::Context::cleanup() {
457 for (Value *V : Clones) {
458 Instruction *U = cast<Instruction>(V);
459 if (!U->getParent())
460 U->dropAllReferences();
461 }
462
463 for (Value *V : Clones) {
464 Instruction *U = cast<Instruction>(V);
465 if (!U->getParent())
466 U->deleteValue();
467 }
468}
469
470bool Simplifier::Context::equal(const Instruction *I,
471 const Instruction *J) const {
472 if (I == J)
473 return true;
474 if (!I->isSameOperationAs(J))
475 return false;
476 if (isa<PHINode>(I))
477 return I->isIdenticalTo(J);
478
479 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
480 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
481 if (OpI == OpJ)
482 continue;
483 auto *InI = dyn_cast<const Instruction>(OpI);
484 auto *InJ = dyn_cast<const Instruction>(OpJ);
485 if (InI && InJ) {
486 if (!equal(InI, InJ))
487 return false;
488 } else if (InI != InJ || !InI)
489 return false;
490 }
491 return true;
492}
493
494Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
495 Instruction *SubI = dyn_cast<Instruction>(Sub);
496 WorkListType Q;
497 Q.push_back(Tree);
498
499 while (!Q.empty()) {
500 Value *V = Q.pop_front_val();
501 if (V == Sub)
502 return V;
503 Instruction *U = dyn_cast<Instruction>(V);
504 if (!U || U->getParent())
505 continue;
506 if (SubI && equal(SubI, U))
507 return U;
508 assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0)
: __assert_fail ("!isa<PHINode>(U)", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 508, __extension__ __PRETTY_FUNCTION__))
;
509 for (Value *Op : U->operands())
510 Q.push_back(Op);
511 }
512 return nullptr;
513}
514
515void Simplifier::Context::link(Instruction *I, BasicBlock *B,
516 BasicBlock::iterator At) {
517 if (I->getParent())
518 return;
519
520 for (Value *Op : I->operands()) {
521 if (Instruction *OpI = dyn_cast<Instruction>(Op))
522 link(OpI, B, At);
523 }
524
525 B->getInstList().insert(At, I);
526}
527
528Value *Simplifier::Context::materialize(BasicBlock *B,
529 BasicBlock::iterator At) {
530 if (Instruction *RootI = dyn_cast<Instruction>(Root))
531 link(RootI, B, At);
532 return Root;
533}
534
535Value *Simplifier::simplify(Context &C) {
536 WorkListType Q;
537 Q.push_back(C.Root);
538 unsigned Count = 0;
539 const unsigned Limit = SimplifyLimit;
540
541 while (!Q.empty()) {
542 if (Count++ >= Limit)
543 break;
544 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
545 if (!U || U->getParent() || !C.Used.count(U))
546 continue;
547 bool Changed = false;
548 for (Rule &R : Rules) {
549 Value *W = R.Fn(U, C.Ctx);
550 if (!W)
551 continue;
552 Changed = true;
553 C.record(W);
554 C.replace(U, W);
555 Q.push_back(C.Root);
556 break;
557 }
558 if (!Changed) {
559 for (Value *Op : U->operands())
560 Q.push_back(Op);
561 }
562 }
563 return Count < Limit ? C.Root : nullptr;
564}
565
566//===----------------------------------------------------------------------===//
567//
568// Implementation of PolynomialMultiplyRecognize
569//
570//===----------------------------------------------------------------------===//
571
572namespace {
573
574 class PolynomialMultiplyRecognize {
575 public:
576 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
577 const DominatorTree &dt, const TargetLibraryInfo &tli,
578 ScalarEvolution &se)
579 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
580
581 bool recognize();
582
583 private:
584 using ValueSeq = SetVector<Value *>;
585
586 IntegerType *getPmpyType() const {
587 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
588 return IntegerType::get(Ctx, 32);
589 }
590
591 bool isPromotableTo(Value *V, IntegerType *Ty);
592 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
593 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
594
595 Value *getCountIV(BasicBlock *BB);
596 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
597 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
598 ValueSeq &Late);
599 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
600 bool commutesWithShift(Instruction *I);
601 bool highBitsAreZero(Value *V, unsigned IterCount);
602 bool keepsHighBitsZero(Value *V, unsigned IterCount);
603 bool isOperandShifted(Instruction *I, Value *Op);
604 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
605 unsigned IterCount);
606 void cleanupLoopBody(BasicBlock *LoopB);
607
608 struct ParsedValues {
609 ParsedValues() = default;
610
611 Value *M = nullptr;
612 Value *P = nullptr;
613 Value *Q = nullptr;
614 Value *R = nullptr;
615 Value *X = nullptr;
616 Instruction *Res = nullptr;
617 unsigned IterCount = 0;
618 bool Left = false;
619 bool Inv = false;
620 };
621
622 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
623 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
624 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
625 Value *CIV, ParsedValues &PV, bool PreScan);
626 unsigned getInverseMxN(unsigned QP);
627 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
628
629 void setupPreSimplifier(Simplifier &S);
630 void setupPostSimplifier(Simplifier &S);
631
632 Loop *CurLoop;
633 const DataLayout &DL;
634 const DominatorTree &DT;
635 const TargetLibraryInfo &TLI;
636 ScalarEvolution &SE;
637 };
638
639} // end anonymous namespace
640
641Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
642 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
643 if (std::distance(PI, PE) != 2)
644 return nullptr;
645 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
646
647 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
648 auto *PN = cast<PHINode>(I);
649 Value *InitV = PN->getIncomingValueForBlock(PB);
650 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
651 continue;
652 Value *IterV = PN->getIncomingValueForBlock(BB);
653 auto *BO = dyn_cast<BinaryOperator>(IterV);
654 if (!BO)
655 continue;
656 if (BO->getOpcode() != Instruction::Add)
657 continue;
658 Value *IncV = nullptr;
659 if (BO->getOperand(0) == PN)
660 IncV = BO->getOperand(1);
661 else if (BO->getOperand(1) == PN)
662 IncV = BO->getOperand(0);
663 if (IncV == nullptr)
664 continue;
665
666 if (auto *T = dyn_cast<ConstantInt>(IncV))
667 if (T->getZExtValue() == 1)
668 return PN;
669 }
670 return nullptr;
671}
672
673static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
674 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
675 Use &TheUse = UI.getUse();
676 ++UI;
677 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
678 if (BB == II->getParent())
679 II->replaceUsesOfWith(I, J);
680 }
681}
682
683bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
684 Value *CIV, ParsedValues &PV) {
685 // Match the following:
686 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
687 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
688 // The condition may also check for equality with the masked value, i.e
689 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
690 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
691
692 Value *CondV = SelI->getCondition();
693 Value *TrueV = SelI->getTrueValue();
694 Value *FalseV = SelI->getFalseValue();
695
696 using namespace PatternMatch;
697
698 CmpInst::Predicate P;
699 Value *A = nullptr, *B = nullptr, *C = nullptr;
700
701 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
702 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
703 return false;
704 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
705 return false;
706 // Matched: select (A & B) == C ? ... : ...
707 // select (A & B) != C ? ... : ...
708
709 Value *X = nullptr, *Sh1 = nullptr;
710 // Check (A & B) for (X & (1 << i)):
711 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
712 Sh1 = A;
713 X = B;
714 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
715 Sh1 = B;
716 X = A;
717 } else {
718 // TODO: Could also check for an induction variable containing single
719 // bit shifted left by 1 in each iteration.
720 return false;
721 }
722
723 bool TrueIfZero;
724
725 // Check C against the possible values for comparison: 0 and (1 << i):
726 if (match(C, m_Zero()))
727 TrueIfZero = (P == CmpInst::ICMP_EQ);
728 else if (C == Sh1)
729 TrueIfZero = (P == CmpInst::ICMP_NE);
730 else
731 return false;
732
733 // So far, matched:
734 // select (X & (1 << i)) ? ... : ...
735 // including variations of the check against zero/non-zero value.
736
737 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
738 if (TrueIfZero) {
739 ShouldSameV = TrueV;
740 ShouldXoredV = FalseV;
741 } else {
742 ShouldSameV = FalseV;
743 ShouldXoredV = TrueV;
744 }
745
746 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
747 Value *T = nullptr;
748 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
749 // Matched: select +++ ? ... : Y ^ Z
750 // select +++ ? Y ^ Z : ...
751 // where +++ denotes previously checked matches.
752 if (ShouldSameV == Y)
753 T = Z;
754 else if (ShouldSameV == Z)
755 T = Y;
756 else
757 return false;
758 R = ShouldSameV;
759 // Matched: select +++ ? R : R ^ T
760 // select +++ ? R ^ T : R
761 // depending on TrueIfZero.
762
763 } else if (match(ShouldSameV, m_Zero())) {
764 // Matched: select +++ ? 0 : ...
765 // select +++ ? ... : 0
766 if (!SelI->hasOneUse())
767 return false;
768 T = ShouldXoredV;
769 // Matched: select +++ ? 0 : T
770 // select +++ ? T : 0
771
772 Value *U = *SelI->user_begin();
773 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
774 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
775 return false;
776 // Matched: xor (select +++ ? 0 : T), R
777 // xor (select +++ ? T : 0), R
778 } else
779 return false;
780
781 // The xor input value T is isolated into its own match so that it could
782 // be checked against an induction variable containing a shifted bit
783 // (todo).
784 // For now, check against (Q << i).
785 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
786 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
787 return false;
788 // Matched: select +++ ? R : R ^ (Q << i)
789 // select +++ ? R ^ (Q << i) : R
790
791 PV.X = X;
792 PV.Q = Q;
793 PV.R = R;
794 PV.Left = true;
795 return true;
796}
797
798bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
799 ParsedValues &PV) {
800 // Match the following:
801 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
802 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
803 // The condition may also check for equality with the masked value, i.e
804 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
805 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
806
807 Value *CondV = SelI->getCondition();
808 Value *TrueV = SelI->getTrueValue();
809 Value *FalseV = SelI->getFalseValue();
810
811 using namespace PatternMatch;
812
813 Value *C = nullptr;
814 CmpInst::Predicate P;
815 bool TrueIfZero;
816
817 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
818 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
819 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
820 return false;
821 // Matched: select C == 0 ? ... : ...
822 // select C != 0 ? ... : ...
823 TrueIfZero = (P == CmpInst::ICMP_EQ);
824 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
825 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
826 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
827 return false;
828 // Matched: select C == 1 ? ... : ...
829 // select C != 1 ? ... : ...
830 TrueIfZero = (P == CmpInst::ICMP_NE);
831 } else
832 return false;
833
834 Value *X = nullptr;
835 if (!match(C, m_And(m_Value(X), m_One())) &&
836 !match(C, m_And(m_One(), m_Value(X))))
837 return false;
838 // Matched: select (X & 1) == +++ ? ... : ...
839 // select (X & 1) != +++ ? ... : ...
840
841 Value *R = nullptr, *Q = nullptr;
842 if (TrueIfZero) {
843 // The select's condition is true if the tested bit is 0.
844 // TrueV must be the shift, FalseV must be the xor.
845 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
846 return false;
847 // Matched: select +++ ? (R >> 1) : ...
848 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
849 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
850 return false;
851 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
852 // with commuting ^.
853 } else {
854 // The select's condition is true if the tested bit is 1.
855 // TrueV must be the xor, FalseV must be the shift.
856 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
857 return false;
858 // Matched: select +++ ? ... : (R >> 1)
859 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
860 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
861 return false;
862 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
863 // with commuting ^.
864 }
865
866 PV.X = X;
867 PV.Q = Q;
868 PV.R = R;
869 PV.Left = false;
870 return true;
871}
872
873bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
874 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
875 bool PreScan) {
876 using namespace PatternMatch;
877
878 // The basic pattern for R = P.Q is:
879 // for i = 0..31
880 // R = phi (0, R')
881 // if (P & (1 << i)) ; test-bit(P, i)
882 // R' = R ^ (Q << i)
883 //
884 // Similarly, the basic pattern for R = (P/Q).Q - P
885 // for i = 0..31
886 // R = phi(P, R')
887 // if (R & (1 << i))
888 // R' = R ^ (Q << i)
889
890 // There exist idioms, where instead of Q being shifted left, P is shifted
891 // right. This produces a result that is shifted right by 32 bits (the
892 // non-shifted result is 64-bit).
893 //
894 // For R = P.Q, this would be:
895 // for i = 0..31
896 // R = phi (0, R')
897 // if ((P >> i) & 1)
898 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
899 // else ; be shifted by 1, not i.
900 // R' = R >> 1
901 //
902 // And for the inverse:
903 // for i = 0..31
904 // R = phi (P, R')
905 // if (R & 1)
906 // R' = (R >> 1) ^ Q
907 // else
908 // R' = R >> 1
909
910 // The left-shifting idioms share the same pattern:
911 // select (X & (1 << i)) ? R ^ (Q << i) : R
912 // Similarly for right-shifting idioms:
913 // select (X & 1) ? (R >> 1) ^ Q
914
915 if (matchLeftShift(SelI, CIV, PV)) {
916 // If this is a pre-scan, getting this far is sufficient.
917 if (PreScan)
918 return true;
919
920 // Need to make sure that the SelI goes back into R.
921 auto *RPhi = dyn_cast<PHINode>(PV.R);
922 if (!RPhi)
923 return false;
924 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
925 return false;
926 PV.Res = SelI;
927
928 // If X is loop invariant, it must be the input polynomial, and the
929 // idiom is the basic polynomial multiply.
930 if (CurLoop->isLoopInvariant(PV.X)) {
931 PV.P = PV.X;
932 PV.Inv = false;
933 } else {
934 // X is not loop invariant. If X == R, this is the inverse pmpy.
935 // Otherwise, check for an xor with an invariant value. If the
936 // variable argument to the xor is R, then this is still a valid
937 // inverse pmpy.
938 PV.Inv = true;
939 if (PV.X != PV.R) {
940 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
941 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
942 return false;
943 auto *I1 = dyn_cast<Instruction>(X1);
944 auto *I2 = dyn_cast<Instruction>(X2);
945 if (!I1 || I1->getParent() != LoopB) {
946 Var = X2;
947 Inv = X1;
948 } else if (!I2 || I2->getParent() != LoopB) {
949 Var = X1;
950 Inv = X2;
951 } else
952 return false;
953 if (Var != PV.R)
954 return false;
955 PV.M = Inv;
956 }
957 // The input polynomial P still needs to be determined. It will be
958 // the entry value of R.
959 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
960 PV.P = EntryP;
961 }
962
963 return true;
964 }
965
966 if (matchRightShift(SelI, PV)) {
967 // If this is an inverse pattern, the Q polynomial must be known at
968 // compile time.
969 if (PV.Inv && !isa<ConstantInt>(PV.Q))
970 return false;
971 if (PreScan)
972 return true;
973 // There is no exact matching of right-shift pmpy.
974 return false;
975 }
976
977 return false;
978}
979
980bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
981 IntegerType *DestTy) {
982 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
983 if (!T || T->getBitWidth() > DestTy->getBitWidth())
984 return false;
985 if (T->getBitWidth() == DestTy->getBitWidth())
986 return true;
987 // Non-instructions are promotable. The reason why an instruction may not
988 // be promotable is that it may produce a different result if its operands
989 // and the result are promoted, for example, it may produce more non-zero
990 // bits. While it would still be possible to represent the proper result
991 // in a wider type, it may require adding additional instructions (which
992 // we don't want to do).
993 Instruction *In = dyn_cast<Instruction>(Val);
994 if (!In)
995 return true;
996 // The bitwidth of the source type is smaller than the destination.
997 // Check if the individual operation can be promoted.
998 switch (In->getOpcode()) {
999 case Instruction::PHI:
1000 case Instruction::ZExt:
1001 case Instruction::And:
1002 case Instruction::Or:
1003 case Instruction::Xor:
1004 case Instruction::LShr: // Shift right is ok.
1005 case Instruction::Select:
1006 case Instruction::Trunc:
1007 return true;
1008 case Instruction::ICmp:
1009 if (CmpInst *CI = cast<CmpInst>(In))
1010 return CI->isEquality() || CI->isUnsigned();
1011 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1011
)
;
1012 case Instruction::Add:
1013 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
1014 }
1015 return false;
1016}
1017
1018void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1019 IntegerType *DestTy, BasicBlock *LoopB) {
1020 Type *OrigTy = In->getType();
1021 assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote"
) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1021
, __extension__ __PRETTY_FUNCTION__))
;
1022
1023 // Leave boolean values alone.
1024 if (!In->getType()->isIntegerTy(1))
1025 In->mutateType(DestTy);
1026 unsigned DestBW = DestTy->getBitWidth();
1027
1028 // Handle PHIs.
1029 if (PHINode *P = dyn_cast<PHINode>(In)) {
1030 unsigned N = P->getNumIncomingValues();
1031 for (unsigned i = 0; i != N; ++i) {
1032 BasicBlock *InB = P->getIncomingBlock(i);
1033 if (InB == LoopB)
1034 continue;
1035 Value *InV = P->getIncomingValue(i);
1036 IntegerType *Ty = cast<IntegerType>(InV->getType());
1037 // Do not promote values in PHI nodes of type i1.
1038 if (Ty != P->getType()) {
1039 // If the value type does not match the PHI type, the PHI type
1040 // must have been promoted.
1041 assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ?
void (0) : __assert_fail ("Ty->getBitWidth() < DestBW"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1041
, __extension__ __PRETTY_FUNCTION__))
;
1042 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1043 P->setIncomingValue(i, InV);
1044 }
1045 }
1046 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1047 Value *Op = Z->getOperand(0);
1048 if (Op->getType() == Z->getType())
1049 Z->replaceAllUsesWith(Op);
1050 Z->eraseFromParent();
1051 return;
1052 }
1053 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1054 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1055 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1056 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1057 T->replaceAllUsesWith(And);
1058 T->eraseFromParent();
1059 return;
1060 }
1061
1062 // Promote immediates.
1063 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1064 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1065 if (CI->getType()->getBitWidth() < DestBW)
1066 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1067 }
1068}
1069
1070bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1071 BasicBlock *ExitB) {
1072 assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail (
"LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1072, __extension__ __PRETTY_FUNCTION__))
;
1073 // Skip loops where the exit block has more than one predecessor. The values
1074 // coming from the loop block will be promoted to another type, and so the
1075 // values coming into the exit block from other predecessors would also have
1076 // to be promoted.
1077 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1078 return false;
1079 IntegerType *DestTy = getPmpyType();
1080 // Check if the exit values have types that are no wider than the type
1081 // that we want to promote to.
1082 unsigned DestBW = DestTy->getBitWidth();
1083 for (PHINode &P : ExitB->phis()) {
1084 if (P.getNumIncomingValues() != 1)
1085 return false;
1086 assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void
(0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1086, __extension__ __PRETTY_FUNCTION__))
;
1087 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1088 if (!T || T->getBitWidth() > DestBW)
1089 return false;
1090 }
1091
1092 // Check all instructions in the loop.
1093 for (Instruction &In : *LoopB)
1094 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1095 return false;
1096
1097 // Perform the promotion.
1098 std::vector<Instruction*> LoopIns;
1099 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1100 [](Instruction &In) { return &In; });
1101 for (Instruction *In : LoopIns)
1102 if (!In->isTerminator())
1103 promoteTo(In, DestTy, LoopB);
1104
1105 // Fix up the PHI nodes in the exit block.
1106 Instruction *EndI = ExitB->getFirstNonPHI();
1107 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1108 for (auto I = ExitB->begin(); I != End; ++I) {
1109 PHINode *P = dyn_cast<PHINode>(I);
1110 if (!P)
1111 break;
1112 Type *Ty0 = P->getIncomingValue(0)->getType();
1113 Type *PTy = P->getType();
1114 if (PTy != Ty0) {
1115 assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail
("Ty0 == DestTy", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1115, __extension__ __PRETTY_FUNCTION__))
;
1116 // In order to create the trunc, P must have the promoted type.
1117 P->mutateType(Ty0);
1118 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1119 // In order for the RAUW to work, the types of P and T must match.
1120 P->mutateType(PTy);
1121 P->replaceAllUsesWith(T);
1122 // Final update of the P's type.
1123 P->mutateType(Ty0);
1124 cast<Instruction>(T)->setOperand(0, P);
1125 }
1126 }
1127
1128 return true;
1129}
1130
1131bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1132 ValueSeq &Cycle) {
1133 // Out = ..., In, ...
1134 if (Out == In)
1135 return true;
1136
1137 auto *BB = cast<Instruction>(Out)->getParent();
1138 bool HadPhi = false;
1139
1140 for (auto *U : Out->users()) {
1141 auto *I = dyn_cast<Instruction>(&*U);
1142 if (I == nullptr || I->getParent() != BB)
1143 continue;
1144 // Make sure that there are no multi-iteration cycles, e.g.
1145 // p1 = phi(p2)
1146 // p2 = phi(p1)
1147 // The cycle p1->p2->p1 would span two loop iterations.
1148 // Check that there is only one phi in the cycle.
1149 bool IsPhi = isa<PHINode>(I);
1150 if (IsPhi && HadPhi)
1151 return false;
1152 HadPhi |= IsPhi;
1153 if (!Cycle.insert(I))
1154 return false;
1155 if (findCycle(I, In, Cycle))
1156 break;
1157 Cycle.remove(I);
1158 }
1159 return !Cycle.empty();
1160}
1161
1162void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1163 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1164 // All the values in the cycle that are between the phi node and the
1165 // divider instruction will be classified as "early", all other values
1166 // will be "late".
1167
1168 bool IsE = true;
1169 unsigned I, N = Cycle.size();
1170 for (I = 0; I < N; ++I) {
1171 Value *V = Cycle[I];
1172 if (DivI == V)
1173 IsE = false;
1174 else if (!isa<PHINode>(V))
1175 continue;
1176 // Stop if found either.
1177 break;
1178 }
1179 // "I" is the index of either DivI or the phi node, whichever was first.
1180 // "E" is "false" or "true" respectively.
1181 ValueSeq &First = !IsE ? Early : Late;
1182 for (unsigned J = 0; J < I; ++J)
1183 First.insert(Cycle[J]);
1184
1185 ValueSeq &Second = IsE ? Early : Late;
1186 Second.insert(Cycle[I]);
1187 for (++I; I < N; ++I) {
1188 Value *V = Cycle[I];
1189 if (DivI == V || isa<PHINode>(V))
1190 break;
1191 Second.insert(V);
1192 }
1193
1194 for (; I < N; ++I)
1195 First.insert(Cycle[I]);
1196}
1197
1198bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1199 ValueSeq &Early, ValueSeq &Late) {
1200 // Select is an exception, since the condition value does not have to be
1201 // classified in the same way as the true/false values. The true/false
1202 // values do have to be both early or both late.
1203 if (UseI->getOpcode() == Instruction::Select) {
1204 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1205 if (Early.count(TV) || Early.count(FV)) {
1206 if (Late.count(TV) || Late.count(FV))
1207 return false;
1208 Early.insert(UseI);
1209 } else if (Late.count(TV) || Late.count(FV)) {
1210 if (Early.count(TV) || Early.count(FV))
1211 return false;
1212 Late.insert(UseI);
1213 }
1214 return true;
1215 }
1216
1217 // Not sure what would be the example of this, but the code below relies
1218 // on having at least one operand.
1219 if (UseI->getNumOperands() == 0)
1220 return true;
1221
1222 bool AE = true, AL = true;
1223 for (auto &I : UseI->operands()) {
1224 if (Early.count(&*I))
1225 AL = false;
1226 else if (Late.count(&*I))
1227 AE = false;
1228 }
1229 // If the operands appear "all early" and "all late" at the same time,
1230 // then it means that none of them are actually classified as either.
1231 // This is harmless.
1232 if (AE && AL)
1233 return true;
1234 // Conversely, if they are neither "all early" nor "all late", then
1235 // we have a mixture of early and late operands that is not a known
1236 // exception.
1237 if (!AE && !AL)
1238 return false;
1239
1240 // Check that we have covered the two special cases.
1241 assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail
("AE != AL", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1241, __extension__ __PRETTY_FUNCTION__))
;
1242
1243 if (AE)
1244 Early.insert(UseI);
1245 else
1246 Late.insert(UseI);
1247 return true;
1248}
1249
1250bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1251 switch (I->getOpcode()) {
1252 case Instruction::And:
1253 case Instruction::Or:
1254 case Instruction::Xor:
1255 case Instruction::LShr:
1256 case Instruction::Shl:
1257 case Instruction::Select:
1258 case Instruction::ICmp:
1259 case Instruction::PHI:
1260 break;
1261 default:
1262 return false;
1263 }
1264 return true;
1265}
1266
1267bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1268 unsigned IterCount) {
1269 auto *T = dyn_cast<IntegerType>(V->getType());
1270 if (!T)
1271 return false;
1272
1273 KnownBits Known(T->getBitWidth());
1274 computeKnownBits(V, Known, DL);
1275 return Known.countMinLeadingZeros() >= IterCount;
1276}
1277
1278bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1279 unsigned IterCount) {
1280 // Assume that all inputs to the value have the high bits zero.
1281 // Check if the value itself preserves the zeros in the high bits.
1282 if (auto *C = dyn_cast<ConstantInt>(V))
1283 return C->getValue().countLeadingZeros() >= IterCount;
1284
1285 if (auto *I = dyn_cast<Instruction>(V)) {
1286 switch (I->getOpcode()) {
1287 case Instruction::And:
1288 case Instruction::Or:
1289 case Instruction::Xor:
1290 case Instruction::LShr:
1291 case Instruction::Select:
1292 case Instruction::ICmp:
1293 case Instruction::PHI:
1294 case Instruction::ZExt:
1295 return true;
1296 }
1297 }
1298
1299 return false;
1300}
1301
1302bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1303 unsigned Opc = I->getOpcode();
1304 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1305 return Op != I->getOperand(1);
1306 return true;
1307}
1308
1309bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1310 BasicBlock *ExitB, unsigned IterCount) {
1311 Value *CIV = getCountIV(LoopB);
1312 if (CIV == nullptr)
1313 return false;
1314 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1315 if (CIVTy == nullptr)
1316 return false;
1317
1318 ValueSeq RShifts;
1319 ValueSeq Early, Late, Cycled;
1320
1321 // Find all value cycles that contain logical right shifts by 1.
1322 for (Instruction &I : *LoopB) {
1323 using namespace PatternMatch;
1324
1325 Value *V = nullptr;
1326 if (!match(&I, m_LShr(m_Value(V), m_One())))
1327 continue;
1328 ValueSeq C;
1329 if (!findCycle(&I, V, C))
1330 continue;
1331
1332 // Found a cycle.
1333 C.insert(&I);
1334 classifyCycle(&I, C, Early, Late);
1335 Cycled.insert(C.begin(), C.end());
1336 RShifts.insert(&I);
1337 }
1338
1339 // Find the set of all values affected by the shift cycles, i.e. all
1340 // cycled values, and (recursively) all their users.
1341 ValueSeq Users(Cycled.begin(), Cycled.end());
1342 for (unsigned i = 0; i < Users.size(); ++i) {
1343 Value *V = Users[i];
1344 if (!isa<IntegerType>(V->getType()))
1345 return false;
1346 auto *R = cast<Instruction>(V);
1347 // If the instruction does not commute with shifts, the loop cannot
1348 // be unshifted.
1349 if (!commutesWithShift(R))
1350 return false;
1351 for (User *U : R->users()) {
1352 auto *T = cast<Instruction>(U);
1353 // Skip users from outside of the loop. They will be handled later.
1354 // Also, skip the right-shifts and phi nodes, since they mix early
1355 // and late values.
1356 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1357 continue;
1358
1359 Users.insert(T);
1360 if (!classifyInst(T, Early, Late))
1361 return false;
1362 }
1363 }
1364
1365 if (Users.empty())
1366 return false;
1367
1368 // Verify that high bits remain zero.
1369 ValueSeq Internal(Users.begin(), Users.end());
1370 ValueSeq Inputs;
1371 for (unsigned i = 0; i < Internal.size(); ++i) {
1372 auto *R = dyn_cast<Instruction>(Internal[i]);
1373 if (!R)
1374 continue;
1375 for (Value *Op : R->operands()) {
1376 auto *T = dyn_cast<Instruction>(Op);
1377 if (T && T->getParent() != LoopB)
1378 Inputs.insert(Op);
1379 else
1380 Internal.insert(Op);
1381 }
1382 }
1383 for (Value *V : Inputs)
1384 if (!highBitsAreZero(V, IterCount))
1385 return false;
1386 for (Value *V : Internal)
1387 if (!keepsHighBitsZero(V, IterCount))
1388 return false;
1389
1390 // Finally, the work can be done. Unshift each user.
1391 IRBuilder<> IRB(LoopB);
1392 std::map<Value*,Value*> ShiftMap;
1393
1394 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1395
1396 CastMapType CastMap;
1397
1398 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1399 IntegerType *Ty) -> Value* {
1400 auto H = CM.find(std::make_pair(V, Ty));
1401 if (H != CM.end())
1402 return H->second;
1403 Value *CV = IRB.CreateIntCast(V, Ty, false);
1404 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1405 return CV;
1406 };
1407
1408 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1409 using namespace PatternMatch;
1410
1411 if (isa<PHINode>(I) || !Users.count(&*I))
1412 continue;
1413
1414 // Match lshr x, 1.
1415 Value *V = nullptr;
1416 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1417 replaceAllUsesOfWithIn(&*I, V, LoopB);
1418 continue;
1419 }
1420 // For each non-cycled operand, replace it with the corresponding
1421 // value shifted left.
1422 for (auto &J : I->operands()) {
1423 Value *Op = J.get();
1424 if (!isOperandShifted(&*I, Op))
1425 continue;
1426 if (Users.count(Op))
1427 continue;
1428 // Skip shifting zeros.
1429 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1430 continue;
1431 // Check if we have already generated a shift for this value.
1432 auto F = ShiftMap.find(Op);
1433 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1434 if (W == nullptr) {
1435 IRB.SetInsertPoint(&*I);
1436 // First, the shift amount will be CIV or CIV+1, depending on
1437 // whether the value is early or late. Instead of creating CIV+1,
1438 // do a single shift of the value.
1439 Value *ShAmt = CIV, *ShVal = Op;
1440 auto *VTy = cast<IntegerType>(ShVal->getType());
1441 auto *ATy = cast<IntegerType>(ShAmt->getType());
1442 if (Late.count(&*I))
1443 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1444 // Second, the types of the shifted value and the shift amount
1445 // must match.
1446 if (VTy != ATy) {
1447 if (VTy->getBitWidth() < ATy->getBitWidth())
1448 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1449 else
1450 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1451 }
1452 // Ready to generate the shift and memoize it.
1453 W = IRB.CreateShl(ShVal, ShAmt);
1454 ShiftMap.insert(std::make_pair(Op, W));
1455 }
1456 I->replaceUsesOfWith(Op, W);
1457 }
1458 }
1459
1460 // Update the users outside of the loop to account for having left
1461 // shifts. They would normally be shifted right in the loop, so shift
1462 // them right after the loop exit.
1463 // Take advantage of the loop-closed SSA form, which has all the post-
1464 // loop values in phi nodes.
1465 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1466 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1467 if (!isa<PHINode>(P))
1468 break;
1469 auto *PN = cast<PHINode>(P);
1470 Value *U = PN->getIncomingValueForBlock(LoopB);
1471 if (!Users.count(U))
1472 continue;
1473 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1474 PN->replaceAllUsesWith(S);
1475 // The above RAUW will create
1476 // S = lshr S, IterCount
1477 // so we need to fix it back into
1478 // S = lshr PN, IterCount
1479 cast<User>(S)->replaceUsesOfWith(S, PN);
1480 }
1481
1482 return true;
1483}
1484
1485void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1486 for (auto &I : *LoopB)
1487 if (Value *SV = simplifyInstruction(&I, {DL, &TLI, &DT}))
1488 I.replaceAllUsesWith(SV);
1489
1490 for (Instruction &I : llvm::make_early_inc_range(*LoopB))
1491 RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI);
1492}
1493
1494unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1495 // Arrays of coefficients of Q and the inverse, C.
1496 // Q[i] = coefficient at x^i.
1497 std::array<char,32> Q, C;
1498
1499 for (unsigned i = 0; i < 32; ++i) {
1500 Q[i] = QP & 1;
1501 QP >>= 1;
1502 }
1503 assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail
("Q[0] == 1", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1503, __extension__ __PRETTY_FUNCTION__))
;
1504
1505 // Find C, such that
1506 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1507 //
1508 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1509 // operations * and + are & and ^ respectively.
1510 //
1511 // Find C[i] recursively, by comparing i-th coefficient in the product
1512 // with 0 (or 1 for i=0).
1513 //
1514 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1515 C[0] = 1;
1516 for (unsigned i = 1; i < 32; ++i) {
1517 // Solve for C[i] in:
1518 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1519 // This is equivalent to
1520 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1521 // which is
1522 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1523 unsigned T = 0;
1524 for (unsigned j = 0; j < i; ++j)
1525 T = T ^ (C[j] & Q[i-j]);
1526 C[i] = T;
1527 }
1528
1529 unsigned QV = 0;
1530 for (unsigned i = 0; i < 32; ++i)
1531 if (C[i])
1532 QV |= (1 << i);
1533
1534 return QV;
1535}
1536
1537Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1538 ParsedValues &PV) {
1539 IRBuilder<> B(&*At);
1540 Module *M = At->getParent()->getParent()->getParent();
1541 Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1542
1543 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1544 unsigned IC = PV.IterCount;
1545
1546 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1547 P0 = P = B.CreateXor(P, PV.M);
1548
1549 // Create a bit mask to clear the high bits beyond IterCount.
1550 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
15
Returning from 'APInt::getLowBitsSet'
1551
1552 if (PV.IterCount != 32)
16
Assuming field 'IterCount' is equal to 32
17
Taking false branch
1553 P = B.CreateAnd(P, BMI);
1554
1555 if (PV.Inv) {
18
Assuming field 'Inv' is true
19
Taking true branch
1556 auto *QI = dyn_cast<ConstantInt>(PV.Q);
20
Assuming field 'Q' is a 'CastReturnType'
1557 assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth()
<= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1557
, __extension__ __PRETTY_FUNCTION__))
;
21
Assuming the condition is true
22
'?' condition is true
1558
1559 // Again, clearing bits beyond IterCount.
1560 unsigned M = (1 << PV.IterCount) - 1;
23
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1561 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1562 unsigned QV = getInverseMxN(Tmp) & M;
1563 auto *QVI = ConstantInt::get(QI->getType(), QV);
1564 P = B.CreateCall(PMF, {P, QVI});
1565 P = B.CreateTrunc(P, QI->getType());
1566 if (IC != 32)
1567 P = B.CreateAnd(P, BMI);
1568 }
1569
1570 Value *R = B.CreateCall(PMF, {P, Q});
1571
1572 if (PV.M != nullptr)
1573 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1574
1575 return R;
1576}
1577
1578static bool hasZeroSignBit(const Value *V) {
1579 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1580 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1581 const Instruction *I = dyn_cast<const Instruction>(V);
1582 if (!I)
1583 return false;
1584 switch (I->getOpcode()) {
1585 case Instruction::LShr:
1586 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1587 return SI->getZExtValue() > 0;
1588 return false;
1589 case Instruction::Or:
1590 case Instruction::Xor:
1591 return hasZeroSignBit(I->getOperand(0)) &&
1592 hasZeroSignBit(I->getOperand(1));
1593 case Instruction::And:
1594 return hasZeroSignBit(I->getOperand(0)) ||
1595 hasZeroSignBit(I->getOperand(1));
1596 }
1597 return false;
1598}
1599
1600void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1601 S.addRule("sink-zext",
1602 // Sink zext past bitwise operations.
1603 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1604 if (I->getOpcode() != Instruction::ZExt)
1605 return nullptr;
1606 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1607 if (!T)
1608 return nullptr;
1609 switch (T->getOpcode()) {
1610 case Instruction::And:
1611 case Instruction::Or:
1612 case Instruction::Xor:
1613 break;
1614 default:
1615 return nullptr;
1616 }
1617 IRBuilder<> B(Ctx);
1618 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1619 B.CreateZExt(T->getOperand(0), I->getType()),
1620 B.CreateZExt(T->getOperand(1), I->getType()));
1621 });
1622 S.addRule("xor/and -> and/xor",
1623 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1624 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1625 if (I->getOpcode() != Instruction::Xor)
1626 return nullptr;
1627 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1628 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1629 if (!And0 || !And1)
1630 return nullptr;
1631 if (And0->getOpcode() != Instruction::And ||
1632 And1->getOpcode() != Instruction::And)
1633 return nullptr;
1634 if (And0->getOperand(1) != And1->getOperand(1))
1635 return nullptr;
1636 IRBuilder<> B(Ctx);
1637 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1638 And0->getOperand(1));
1639 });
1640 S.addRule("sink binop into select",
1641 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1642 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1643 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1644 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1645 if (!BO)
1646 return nullptr;
1647 Instruction::BinaryOps Op = BO->getOpcode();
1648 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1649 IRBuilder<> B(Ctx);
1650 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1651 Value *Z = BO->getOperand(1);
1652 return B.CreateSelect(Sel->getCondition(),
1653 B.CreateBinOp(Op, X, Z),
1654 B.CreateBinOp(Op, Y, Z));
1655 }
1656 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1657 IRBuilder<> B(Ctx);
1658 Value *X = BO->getOperand(0);
1659 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1660 return B.CreateSelect(Sel->getCondition(),
1661 B.CreateBinOp(Op, X, Y),
1662 B.CreateBinOp(Op, X, Z));
1663 }
1664 return nullptr;
1665 });
1666 S.addRule("fold select-select",
1667 // (select c (select c x y) z) -> (select c x z)
1668 // (select c x (select c y z)) -> (select c x z)
1669 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1670 SelectInst *Sel = dyn_cast<SelectInst>(I);
1671 if (!Sel)
1672 return nullptr;
1673 IRBuilder<> B(Ctx);
1674 Value *C = Sel->getCondition();
1675 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1676 if (Sel0->getCondition() == C)
1677 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1678 }
1679 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1680 if (Sel1->getCondition() == C)
1681 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1682 }
1683 return nullptr;
1684 });
1685 S.addRule("or-signbit -> xor-signbit",
1686 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1687 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1688 if (I->getOpcode() != Instruction::Or)
1689 return nullptr;
1690 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1691 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1692 return nullptr;
1693 if (!hasZeroSignBit(I->getOperand(0)))
1694 return nullptr;
1695 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1696 });
1697 S.addRule("sink lshr into binop",
1698 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1699 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1700 if (I->getOpcode() != Instruction::LShr)
1701 return nullptr;
1702 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1703 if (!BitOp)
1704 return nullptr;
1705 switch (BitOp->getOpcode()) {
1706 case Instruction::And:
1707 case Instruction::Or:
1708 case Instruction::Xor:
1709 break;
1710 default:
1711 return nullptr;
1712 }
1713 IRBuilder<> B(Ctx);
1714 Value *S = I->getOperand(1);
1715 return B.CreateBinOp(BitOp->getOpcode(),
1716 B.CreateLShr(BitOp->getOperand(0), S),
1717 B.CreateLShr(BitOp->getOperand(1), S));
1718 });
1719 S.addRule("expose bitop-const",
1720 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1721 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1722 auto IsBitOp = [](unsigned Op) -> bool {
1723 switch (Op) {
1724 case Instruction::And:
1725 case Instruction::Or:
1726 case Instruction::Xor:
1727 return true;
1728 }
1729 return false;
1730 };
1731 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1732 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1733 return nullptr;
1734 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1735 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1736 return nullptr;
1737 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1738 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1739 if (!CA || !CB)
1740 return nullptr;
1741 IRBuilder<> B(Ctx);
1742 Value *X = BitOp2->getOperand(0);
1743 return B.CreateBinOp(BitOp2->getOpcode(), X,
1744 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1745 });
1746}
1747
1748void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1749 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1750 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1751 if (I->getOpcode() != Instruction::And)
1752 return nullptr;
1753 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1754 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1755 if (!Xor || !C0)
1756 return nullptr;
1757 if (Xor->getOpcode() != Instruction::Xor)
1758 return nullptr;
1759 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1760 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1761 // Pick the first non-null and.
1762 if (!And0 || And0->getOpcode() != Instruction::And)
1763 std::swap(And0, And1);
1764 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1765 if (!C1)
1766 return nullptr;
1767 uint32_t V0 = C0->getZExtValue();
1768 uint32_t V1 = C1->getZExtValue();
1769 if (V0 != (V0 & V1))
1770 return nullptr;
1771 IRBuilder<> B(Ctx);
1772 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1773 });
1774}
1775
1776bool PolynomialMultiplyRecognize::recognize() {
1777 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1778 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1779 // Restrictions:
1780 // - The loop must consist of a single block.
1781 // - The iteration count must be known at compile-time.
1782 // - The loop must have an induction variable starting from 0, and
1783 // incremented in each iteration of the loop.
1784 BasicBlock *LoopB = CurLoop->getHeader();
1785 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1786
1787 if (LoopB != CurLoop->getLoopLatch())
1788 return false;
1789 BasicBlock *ExitB = CurLoop->getExitBlock();
1790 if (ExitB == nullptr)
1791 return false;
1792 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1793 if (EntryB == nullptr)
1794 return false;
1795
1796 unsigned IterCount = 0;
1797 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1798 if (isa<SCEVCouldNotCompute>(CT))
1799 return false;
1800 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1801 IterCount = CV->getValue()->getZExtValue() + 1;
1802
1803 Value *CIV = getCountIV(LoopB);
1804 ParsedValues PV;
1805 Simplifier PreSimp;
1806 PV.IterCount = IterCount;
1807 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1808 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1809
1810 setupPreSimplifier(PreSimp);
1811
1812 // Perform a preliminary scan of select instructions to see if any of them
1813 // looks like a generator of the polynomial multiply steps. Assume that a
1814 // loop can only contain a single transformable operation, so stop the
1815 // traversal after the first reasonable candidate was found.
1816 // XXX: Currently this approach can modify the loop before being 100% sure
1817 // that the transformation can be carried out.
1818 bool FoundPreScan = false;
1819 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1820 for (const Value *U : V->users()) {
1821 if (const auto *P = dyn_cast<const PHINode>(U))
1822 if (P->getParent() == LoopB)
1823 return true;
1824 }
1825 return false;
1826 };
1827 for (Instruction &In : *LoopB) {
1828 SelectInst *SI = dyn_cast<SelectInst>(&In);
1829 if (!SI || !FeedsPHI(SI))
1830 continue;
1831
1832 Simplifier::Context C(SI);
1833 Value *T = PreSimp.simplify(C);
1834 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1835 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1836 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1837 FoundPreScan = true;
1838 if (SelI != SI) {
1839 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1840 SI->replaceAllUsesWith(NewSel);
1841 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1842 }
1843 break;
1844 }
1845 }
1846
1847 if (!FoundPreScan) {
1848 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1849 return false;
1850 }
1851
1852 if (!PV.Left) {
1853 // The right shift version actually only returns the higher bits of
1854 // the result (each iteration discards the LSB). If we want to convert it
1855 // to a left-shifting loop, the working data type must be at least as
1856 // wide as the target's pmpy instruction.
1857 if (!promoteTypes(LoopB, ExitB))
1858 return false;
1859 // Run post-promotion simplifications.
1860 Simplifier PostSimp;
1861 setupPostSimplifier(PostSimp);
1862 for (Instruction &In : *LoopB) {
1863 SelectInst *SI = dyn_cast<SelectInst>(&In);
1864 if (!SI || !FeedsPHI(SI))
1865 continue;
1866 Simplifier::Context C(SI);
1867 Value *T = PostSimp.simplify(C);
1868 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1869 if (SelI != SI) {
1870 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1871 SI->replaceAllUsesWith(NewSel);
1872 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1873 }
1874 break;
1875 }
1876
1877 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1878 return false;
1879 cleanupLoopBody(LoopB);
1880 }
1881
1882 // Scan the loop again, find the generating select instruction.
1883 bool FoundScan = false;
1884 for (Instruction &In : *LoopB) {
1885 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1886 if (!SelI)
1887 continue;
1888 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1889 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1890 if (FoundScan)
1891 break;
1892 }
1893 assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail
("FoundScan", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1893, __extension__ __PRETTY_FUNCTION__))
;
1894
1895 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1896 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1897 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1898 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1899 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1900 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1901 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1902 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1903 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1904 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1905 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1906 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1907 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1908
1909 BasicBlock::iterator At(EntryB->getTerminator());
1910 Value *PM = generate(At, PV);
1911 if (PM == nullptr)
1912 return false;
1913
1914 if (PM->getType() != PV.Res->getType())
1915 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1916
1917 PV.Res->replaceAllUsesWith(PM);
1918 PV.Res->eraseFromParent();
1919 return true;
1920}
1921
1922int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1923 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1924 return SC->getAPInt().getSExtValue();
1925 return 0;
1926}
1927
1928bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1929 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1930 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1931 return false;
1932
1933 Value *StoredVal = SI->getValueOperand();
1934 Value *StorePtr = SI->getPointerOperand();
1935
1936 // Reject stores that are so large that they overflow an unsigned.
1937 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1938 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1939 return false;
1940
1941 // See if the pointer expression is an AddRec like {base,+,1} on the current
1942 // loop, which indicates a strided store. If we have something else, it's a
1943 // random store we can't handle.
1944 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1945 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1946 return false;
1947
1948 // Check to see if the stride matches the size of the store. If so, then we
1949 // know that every byte is touched in the loop.
1950 int Stride = getSCEVStride(StoreEv);
1951 if (Stride == 0)
1952 return false;
1953 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1954 if (StoreSize != unsigned(std::abs(Stride)))
1955 return false;
1956
1957 // The store must be feeding a non-volatile load.
1958 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1959 if (!LI || !LI->isSimple())
1960 return false;
1961
1962 // See if the pointer expression is an AddRec like {base,+,1} on the current
1963 // loop, which indicates a strided load. If we have something else, it's a
1964 // random load we can't handle.
1965 Value *LoadPtr = LI->getPointerOperand();
1966 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1967 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1968 return false;
1969
1970 // The store and load must share the same stride.
1971 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1972 return false;
1973
1974 // Success. This store can be converted into a memcpy.
1975 return true;
1976}
1977
1978/// mayLoopAccessLocation - Return true if the specified loop might access the
1979/// specified pointer location, which is a loop-strided access. The 'Access'
1980/// argument specifies what the verboten forms of access are (read or write).
1981static bool
1982mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1983 const SCEV *BECount, unsigned StoreSize,
1984 AliasAnalysis &AA,
1985 SmallPtrSetImpl<Instruction *> &Ignored) {
1986 // Get the location that may be stored across the loop. Since the access
1987 // is strided positively through memory, we say that the modified location
1988 // starts at the pointer and has infinite size.
1989 LocationSize AccessSize = LocationSize::afterPointer();
1990
1991 // If the loop iterates a fixed number of times, we can refine the access
1992 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1993 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1994 AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
1995 StoreSize);
1996
1997 // TODO: For this to be really effective, we have to dive into the pointer
1998 // operand in the store. Store to &A[i] of 100 will always return may alias
1999 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
2000 // which will then no-alias a store to &A[100].
2001 MemoryLocation StoreLoc(Ptr, AccessSize);
2002
2003 for (auto *B : L->blocks())
2004 for (auto &I : *B)
2005 if (Ignored.count(&I) == 0 &&
2006 isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access))
2007 return true;
2008
2009 return false;
2010}
2011
2012void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2013 SmallVectorImpl<StoreInst*> &Stores) {
2014 Stores.clear();
2015 for (Instruction &I : *BB)
2016 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2017 if (isLegalStore(CurLoop, SI))
2018 Stores.push_back(SI);
2019}
2020
2021bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2022 StoreInst *SI, const SCEV *BECount) {
2023 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025
, __extension__ __PRETTY_FUNCTION__))
2024 "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025
, __extension__ __PRETTY_FUNCTION__))
2025 "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025
, __extension__ __PRETTY_FUNCTION__))
;
2026
2027 Value *StorePtr = SI->getPointerOperand();
2028 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2029 unsigned Stride = getSCEVStride(StoreEv);
2030 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2031 if (Stride != StoreSize)
2032 return false;
2033
2034 // See if the pointer expression is an AddRec like {base,+,1} on the current
2035 // loop, which indicates a strided load. If we have something else, it's a
2036 // random load we can't handle.
2037 auto *LI = cast<LoadInst>(SI->getValueOperand());
2038 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2039
2040 // The trip count of the loop and the base pointer of the addrec SCEV is
2041 // guaranteed to be loop invariant, which means that it should dominate the
2042 // header. This allows us to insert code for it in the preheader.
2043 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2044 Instruction *ExpPt = Preheader->getTerminator();
2045 IRBuilder<> Builder(ExpPt);
2046 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2047
2048 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2049
2050 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2051 // this into a memcpy/memmove in the loop preheader now if we want. However,
2052 // this would be unsafe to do if there is anything else in the loop that may
2053 // read or write the memory region we're storing to. For memcpy, this
2054 // includes the load that feeds the stores. Check for an alias by generating
2055 // the base address and checking everything.
2056 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2057 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2058 Value *LoadBasePtr = nullptr;
2059
2060 bool Overlap = false;
2061 bool DestVolatile = SI->isVolatile();
2062 Type *BECountTy = BECount->getType();
2063
2064 if (DestVolatile) {
2065 // The trip count must fit in i32, since it is the type of the "num_words"
2066 // argument to hexagon_memcpy_forward_vp4cp4n2.
2067 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2068CleanupAndExit:
2069 // If we generated new code for the base pointer, clean up.
2070 Expander.clear();
2071 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2072 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2073 StoreBasePtr = nullptr;
2074 }
2075 if (LoadBasePtr) {
2076 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2077 LoadBasePtr = nullptr;
2078 }
2079 return false;
2080 }
2081 }
2082
2083 SmallPtrSet<Instruction*, 2> Ignore1;
2084 Ignore1.insert(SI);
2085 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2086 StoreSize, *AA, Ignore1)) {
2087 // Check if the load is the offending instruction.
2088 Ignore1.insert(LI);
2089 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2090 BECount, StoreSize, *AA, Ignore1)) {
2091 // Still bad. Nothing we can do.
2092 goto CleanupAndExit;
2093 }
2094 // It worked with the load ignored.
2095 Overlap = true;
2096 }
2097
2098 if (!Overlap) {
2099 if (DisableMemcpyIdiom || !HasMemcpy)
2100 goto CleanupAndExit;
2101 } else {
2102 // Don't generate memmove if this function will be inlined. This is
2103 // because the caller will undergo this transformation after inlining.
2104 Function *Func = CurLoop->getHeader()->getParent();
2105 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2106 goto CleanupAndExit;
2107
2108 // In case of a memmove, the call to memmove will be executed instead
2109 // of the loop, so we need to make sure that there is nothing else in
2110 // the loop than the load, store and instructions that these two depend
2111 // on.
2112 SmallVector<Instruction*,2> Insts;
2113 Insts.push_back(SI);
2114 Insts.push_back(LI);
2115 if (!coverLoop(CurLoop, Insts))
2116 goto CleanupAndExit;
2117
2118 if (DisableMemmoveIdiom || !HasMemmove)
2119 goto CleanupAndExit;
2120 bool IsNested = CurLoop->getParentLoop() != nullptr;
2121 if (IsNested && OnlyNonNestedMemmove)
2122 goto CleanupAndExit;
2123 }
2124
2125 // For a memcpy, we have to make sure that the input array is not being
2126 // mutated by the loop.
2127 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2128 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2129
2130 SmallPtrSet<Instruction*, 2> Ignore2;
2131 Ignore2.insert(SI);
2132 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2133 StoreSize, *AA, Ignore2))
2134 goto CleanupAndExit;
2135
2136 // Check the stride.
2137 bool StridePos = getSCEVStride(LoadEv) >= 0;
2138
2139 // Currently, the volatile memcpy only emulates traversing memory forward.
2140 if (!StridePos && DestVolatile)
2141 goto CleanupAndExit;
2142
2143 bool RuntimeCheck = (Overlap || DestVolatile);
2144
2145 BasicBlock *ExitB;
2146 if (RuntimeCheck) {
2147 // The runtime check needs a single exit block.
2148 SmallVector<BasicBlock*, 8> ExitBlocks;
2149 CurLoop->getUniqueExitBlocks(ExitBlocks);
2150 if (ExitBlocks.size() != 1)
2151 goto CleanupAndExit;
2152 ExitB = ExitBlocks[0];
2153 }
2154
2155 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2156 // pointer size if it isn't already.
2157 LLVMContext &Ctx = SI->getContext();
2158 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2159 DebugLoc DLoc = SI->getDebugLoc();
2160
2161 const SCEV *NumBytesS =
2162 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2163 if (StoreSize != 1)
2164 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2165 SCEV::FlagNUW);
2166 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2167 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2168 if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT}))
2169 NumBytes = Simp;
2170
2171 CallInst *NewCall;
2172
2173 if (RuntimeCheck) {
2174 unsigned Threshold = RuntimeMemSizeThreshold;
2175 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2176 uint64_t C = CI->getZExtValue();
2177 if (Threshold != 0 && C < Threshold)
2178 goto CleanupAndExit;
2179 if (C < CompileTimeMemSizeThreshold)
2180 goto CleanupAndExit;
2181 }
2182
2183 BasicBlock *Header = CurLoop->getHeader();
2184 Function *Func = Header->getParent();
2185 Loop *ParentL = LF->getLoopFor(Preheader);
2186 StringRef HeaderName = Header->getName();
2187
2188 // Create a new (empty) preheader, and update the PHI nodes in the
2189 // header to use the new preheader.
2190 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2191 Func, Header);
2192 if (ParentL)
2193 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2194 IRBuilder<>(NewPreheader).CreateBr(Header);
2195 for (auto &In : *Header) {
2196 PHINode *PN = dyn_cast<PHINode>(&In);
2197 if (!PN)
2198 break;
2199 int bx = PN->getBasicBlockIndex(Preheader);
2200 if (bx >= 0)
2201 PN->setIncomingBlock(bx, NewPreheader);
2202 }
2203 DT->addNewBlock(NewPreheader, Preheader);
2204 DT->changeImmediateDominator(Header, NewPreheader);
2205
2206 // Check for safe conditions to execute memmove.
2207 // If stride is positive, copying things from higher to lower addresses
2208 // is equivalent to memmove. For negative stride, it's the other way
2209 // around. Copying forward in memory with positive stride may not be
2210 // same as memmove since we may be copying values that we just stored
2211 // in some previous iteration.
2212 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2213 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2214 Value *LowA = StridePos ? SA : LA;
2215 Value *HighA = StridePos ? LA : SA;
2216 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2217 Value *Cond = CmpA;
2218
2219 // Check for distance between pointers. Since the case LowA < HighA
2220 // is checked for above, assume LowA >= HighA.
2221 Value *Dist = Builder.CreateSub(LowA, HighA);
2222 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2223 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2224 Cond = CmpEither;
2225
2226 if (Threshold != 0) {
2227 Type *Ty = NumBytes->getType();
2228 Value *Thr = ConstantInt::get(Ty, Threshold);
2229 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2230 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2231 Cond = CmpBoth;
2232 }
2233 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2234 Func, NewPreheader);
2235 if (ParentL)
2236 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2237 Instruction *OldT = Preheader->getTerminator();
2238 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2239 OldT->eraseFromParent();
2240 Preheader->setName(Preheader->getName()+".old");
2241 DT->addNewBlock(MemmoveB, Preheader);
2242 // Find the new immediate dominator of the exit block.
2243 BasicBlock *ExitD = Preheader;
2244 for (BasicBlock *PB : predecessors(ExitB)) {
2245 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2246 if (!ExitD)
2247 break;
2248 }
2249 // If the prior immediate dominator of ExitB was dominated by the
2250 // old preheader, then the old preheader becomes the new immediate
2251 // dominator. Otherwise don't change anything (because the newly
2252 // added blocks are dominated by the old preheader).
2253 if (ExitD && DT->dominates(Preheader, ExitD)) {
2254 DomTreeNode *BN = DT->getNode(ExitB);
2255 DomTreeNode *DN = DT->getNode(ExitD);
2256 BN->setIDom(DN);
2257 }
2258
2259 // Add a call to memmove to the conditional block.
2260 IRBuilder<> CondBuilder(MemmoveB);
2261 CondBuilder.CreateBr(ExitB);
2262 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2263
2264 if (DestVolatile) {
2265 Type *Int32Ty = Type::getInt32Ty(Ctx);
2266 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2267 Type *VoidTy = Type::getVoidTy(Ctx);
2268 Module *M = Func->getParent();
2269 FunctionCallee Fn = M->getOrInsertFunction(
2270 HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
2271
2272 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2273 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2274 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2275 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2276 MemmoveB->getTerminator());
2277 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2278 if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT}))
2279 NumWords = Simp;
2280
2281 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2282 ? StoreBasePtr
2283 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2284 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2285 ? LoadBasePtr
2286 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2287 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2288 } else {
2289 NewCall = CondBuilder.CreateMemMove(
2290 StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
2291 }
2292 } else {
2293 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
2294 LI->getAlign(), NumBytes);
2295 // Okay, the memcpy has been formed. Zap the original store and
2296 // anything that feeds into it.
2297 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2298 }
2299
2300 NewCall->setDebugLoc(DLoc);
2301
2302 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2303 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2304 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2305 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2306 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2307
2308 return true;
2309}
2310
2311// Check if the instructions in Insts, together with their dependencies
2312// cover the loop in the sense that the loop could be safely eliminated once
2313// the instructions in Insts are removed.
2314bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2315 SmallVectorImpl<Instruction*> &Insts) const {
2316 SmallSet<BasicBlock*,8> LoopBlocks;
2317 for (auto *B : L->blocks())
2318 LoopBlocks.insert(B);
2319
2320 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2321
2322 // Collect all instructions from the loop that the instructions in Insts
2323 // depend on (plus their dependencies, etc.). These instructions will
2324 // constitute the expression trees that feed those in Insts, but the trees
2325 // will be limited only to instructions contained in the loop.
2326 for (unsigned i = 0; i < Worklist.size(); ++i) {
2327 Instruction *In = Worklist[i];
2328 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2329 Instruction *OpI = dyn_cast<Instruction>(I);
2330 if (!OpI)
2331 continue;
2332 BasicBlock *PB = OpI->getParent();
2333 if (!LoopBlocks.count(PB))
2334 continue;
2335 Worklist.insert(OpI);
2336 }
2337 }
2338
2339 // Scan all instructions in the loop, if any of them have a user outside
2340 // of the loop, or outside of the expressions collected above, then either
2341 // the loop has a side-effect visible outside of it, or there are
2342 // instructions in it that are not involved in the original set Insts.
2343 for (auto *B : L->blocks()) {
2344 for (auto &In : *B) {
2345 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2346 continue;
2347 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2348 return false;
2349 for (auto *K : In.users()) {
2350 Instruction *UseI = dyn_cast<Instruction>(K);
2351 if (!UseI)
2352 continue;
2353 BasicBlock *UseB = UseI->getParent();
2354 if (LF->getLoopFor(UseB) != L)
2355 return false;
2356 }
2357 }
2358 }
2359
2360 return true;
2361}
2362
2363/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2364/// with the specified backedge count. This block is known to be in the current
2365/// loop and not in any subloops.
2366bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2367 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2368 // We can only promote stores in this block if they are unconditionally
2369 // executed in the loop. For a block to be unconditionally executed, it has
2370 // to dominate all the exit blocks of the loop. Verify this now.
2371 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2372 return DT->dominates(BB, EB);
2373 };
2374 if (!all_of(ExitBlocks, DominatedByBB))
2375 return false;
2376
2377 bool MadeChange = false;
2378 // Look for store instructions, which may be optimized to memset/memcpy.
2379 SmallVector<StoreInst*,8> Stores;
2380 collectStores(CurLoop, BB, Stores);
2381
2382 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2383 for (auto &SI : Stores)
2384 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2385
2386 return MadeChange;
2387}
2388
2389bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2390 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2391 if (PMR.recognize())
2392 return true;
2393
2394 if (!HasMemcpy && !HasMemmove)
2395 return false;
2396
2397 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2398 assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400
, __extension__ __PRETTY_FUNCTION__))
2399 "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400
, __extension__ __PRETTY_FUNCTION__))
2400 "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400
, __extension__ __PRETTY_FUNCTION__))
;
2401
2402 SmallVector<BasicBlock *, 8> ExitBlocks;
2403 L->getUniqueExitBlocks(ExitBlocks);
2404
2405 bool Changed = false;
2406
2407 // Scan all the blocks in the loop that are not in subloops.
2408 for (auto *BB : L->getBlocks()) {
2409 // Ignore blocks in subloops.
2410 if (LF->getLoopFor(BB) != L)
2411 continue;
2412 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2413 }
2414
2415 return Changed;
2416}
2417
2418bool HexagonLoopIdiomRecognize::run(Loop *L) {
2419 const Module &M = *L->getHeader()->getParent()->getParent();
2420 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2421 return false;
2422
2423 // If the loop could not be converted to canonical form, it must have an
2424 // indirectbr in it, just give up.
2425 if (!L->getLoopPreheader())
2426 return false;
2427
2428 // Disable loop idiom recognition if the function's name is a common idiom.
2429 StringRef Name = L->getHeader()->getParent()->getName();
2430 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2431 return false;
2432
2433 DL = &L->getHeader()->getModule()->getDataLayout();
2434
2435 HasMemcpy = TLI->has(LibFunc_memcpy);
2436 HasMemmove = TLI->has(LibFunc_memmove);
2437
2438 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2439 return runOnCountableLoop(L);
2440 return false;
2441}
2442
2443bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
2444 LPPassManager &LPM) {
2445 if (skipLoop(L))
2446 return false;
2447
2448 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2449 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2450 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2451 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2452 *L->getHeader()->getParent());
2453 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2454 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2455}
2456
2457Pass *llvm::createHexagonLoopIdiomPass() {
2458 return new HexagonLoopIdiomRecognizeLegacyPass();
2459}
2460
2461PreservedAnalyses
2462HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM,
2463 LoopStandardAnalysisResults &AR,
2464 LPMUpdater &U) {
2465 return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)
2466 .run(&L)
2467 ? getLoopPassPreservedAnalyses()
2468 : PreservedAnalyses::all();
2469}

/build/llvm-toolchain-snapshot-16~++20220904122748+c444af1c20b3/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <utility>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34template <typename T, typename Enable> struct DenseMapInfo;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69/// * APInt supports zero-bit-width values, but operations that require bits
70/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit
71/// integer). This means that operations like zero extension and logical
72/// shifts are defined, but sign extension and ashr is not. Zero bit values
73/// compare and hash equal to themselves, and countLeadingZeros returns 0.
74///
75class [[nodiscard]] APInt {
76public:
77 typedef uint64_t WordType;
78
79 /// This enum is used to hold the constants we needed for APInt.
80 enum : unsigned {
81 /// Byte size of a word.
82 APINT_WORD_SIZE = sizeof(WordType),
83 /// Bits in a word.
84 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
85 };
86
87 enum class Rounding {
88 DOWN,
89 TOWARD_ZERO,
90 UP,
91 };
92
93 static constexpr WordType WORDTYPE_MAX = ~WordType(0);
94
95 /// \name Constructors
96 /// @{
97
98 /// Create a new APInt of numBits width, initialized as val.
99 ///
100 /// If isSigned is true then val is treated as if it were a signed value
101 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
102 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
103 /// the range of val are zero filled).
104 ///
105 /// \param numBits the bit width of the constructed APInt
106 /// \param val the initial value of the APInt
107 /// \param isSigned how to treat signedness of val
108 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
109 : BitWidth(numBits) {
110 if (isSingleWord()) {
111 U.VAL = val;
112 clearUnusedBits();
113 } else {
114 initSlowCase(val, isSigned);
115 }
116 }
117
118 /// Construct an APInt of numBits width, initialized as bigVal[].
119 ///
120 /// Note that bigVal.size() can be smaller or larger than the corresponding
121 /// bit width but any extraneous bits will be dropped.
122 ///
123 /// \param numBits the bit width of the constructed APInt
124 /// \param bigVal a sequence of words to form the initial value of the APInt
125 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
126
127 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
128 /// deprecated because this constructor is prone to ambiguity with the
129 /// APInt(unsigned, uint64_t, bool) constructor.
130 ///
131 /// If this overload is ever deleted, care should be taken to prevent calls
132 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
133 /// constructor.
134 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
135
136 /// Construct an APInt from a string representation.
137 ///
138 /// This constructor interprets the string \p str in the given radix. The
139 /// interpretation stops when the first character that is not suitable for the
140 /// radix is encountered, or the end of the string. Acceptable radix values
141 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
142 /// string to require more bits than numBits.
143 ///
144 /// \param numBits the bit width of the constructed APInt
145 /// \param str the string to be interpreted
146 /// \param radix the radix to use for the conversion
147 APInt(unsigned numBits, StringRef str, uint8_t radix);
148
149 /// Default constructor that creates an APInt with a 1-bit zero value.
150 explicit APInt() { U.VAL = 0; }
151
152 /// Copy Constructor.
153 APInt(const APInt &that) : BitWidth(that.BitWidth) {
154 if (isSingleWord())
155 U.VAL = that.U.VAL;
156 else
157 initSlowCase(that);
158 }
159
160 /// Move Constructor.
161 APInt(APInt &&that) : BitWidth(that.BitWidth) {
162 memcpy(&U, &that.U, sizeof(U));
163 that.BitWidth = 0;
164 }
165
166 /// Destructor.
167 ~APInt() {
168 if (needsCleanup())
169 delete[] U.pVal;
170 }
171
172 /// @}
173 /// \name Value Generators
174 /// @{
175
176 /// Get the '0' value for the specified bit-width.
177 static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
178
179 /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
180 static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
181
182 /// Return an APInt zero bits wide.
183 static APInt getZeroWidth() { return getZero(0); }
184
185 /// Gets maximum unsigned value of APInt for specific bit width.
186 static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
187
188 /// Gets maximum signed value of APInt for a specific bit width.
189 static APInt getSignedMaxValue(unsigned numBits) {
190 APInt API = getAllOnes(numBits);
191 API.clearBit(numBits - 1);
192 return API;
193 }
194
195 /// Gets minimum unsigned value of APInt for a specific bit width.
196 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
197
198 /// Gets minimum signed value of APInt for a specific bit width.
199 static APInt getSignedMinValue(unsigned numBits) {
200 APInt API(numBits, 0);
201 API.setBit(numBits - 1);
202 return API;
203 }
204
205 /// Get the SignMask for a specific bit width.
206 ///
207 /// This is just a wrapper function of getSignedMinValue(), and it helps code
208 /// readability when we want to get a SignMask.
209 static APInt getSignMask(unsigned BitWidth) {
210 return getSignedMinValue(BitWidth);
211 }
212
213 /// Return an APInt of a specified width with all bits set.
214 static APInt getAllOnes(unsigned numBits) {
215 return APInt(numBits, WORDTYPE_MAX, true);
216 }
217
218 /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
219 static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
220
221 /// Return an APInt with exactly one bit set in the result.
222 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
223 APInt Res(numBits, 0);
224 Res.setBit(BitNo);
225 return Res;
226 }
227
228 /// Get a value with a block of bits set.
229 ///
230 /// Constructs an APInt value that has a contiguous range of bits set. The
231 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
232 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
233 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
234 /// \p hiBit.
235 ///
236 /// \param numBits the intended bit width of the result
237 /// \param loBit the index of the lowest bit set.
238 /// \param hiBit the index of the highest bit set.
239 ///
240 /// \returns An APInt value with the requested bits set.
241 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
242 APInt Res(numBits, 0);
243 Res.setBits(loBit, hiBit);
244 return Res;
245 }
246
247 /// Wrap version of getBitsSet.
248 /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
249 /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
250 /// with parameters (32, 28, 4), you would get 0xF000000F.
251 /// If \p hiBit is equal to \p loBit, you would get a result with all bits
252 /// set.
253 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
254 unsigned hiBit) {
255 APInt Res(numBits, 0);
256 Res.setBitsWithWrap(loBit, hiBit);
257 return Res;
258 }
259
260 /// Constructs an APInt value that has a contiguous range of bits set. The
261 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
262 /// bits will be zero. For example, with parameters(32, 12) you would get
263 /// 0xFFFFF000.
264 ///
265 /// \param numBits the intended bit width of the result
266 /// \param loBit the index of the lowest bit to set.
267 ///
268 /// \returns An APInt value with the requested bits set.
269 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
270 APInt Res(numBits, 0);
271 Res.setBitsFrom(loBit);
272 return Res;
273 }
274
275 /// Constructs an APInt value that has the top hiBitsSet bits set.
276 ///
277 /// \param numBits the bitwidth of the result
278 /// \param hiBitsSet the number of high-order bits set in the result.
279 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
280 APInt Res(numBits, 0);
281 Res.setHighBits(hiBitsSet);
282 return Res;
283 }
284
285 /// Constructs an APInt value that has the bottom loBitsSet bits set.
286 ///
287 /// \param numBits the bitwidth of the result
288 /// \param loBitsSet the number of low-order bits set in the result.
289 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
290 APInt Res(numBits, 0);
291 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
14
Returning from 'APInt::setLowBits'
292 return Res;
293 }
294
295 /// Return a value containing V broadcasted over NewLen bits.
296 static APInt getSplat(unsigned NewLen, const APInt &V);
297
298 /// @}
299 /// \name Value Tests
300 /// @{
301
302 /// Determine if this APInt just has one word to store value.
303 ///
304 /// \returns true if the number of bits <= 64, false otherwise.
305 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
306
307 /// Determine sign of this APInt.
308 ///
309 /// This tests the high bit of this APInt to determine if it is set.
310 ///
311 /// \returns true if this APInt is negative, false otherwise
312 bool isNegative() const { return (*this)[BitWidth - 1]; }
313
314 /// Determine if this APInt Value is non-negative (>= 0)
315 ///
316 /// This tests the high bit of the APInt to determine if it is unset.
317 bool isNonNegative() const { return !isNegative(); }
318
319 /// Determine if sign bit of this APInt is set.
320 ///
321 /// This tests the high bit of this APInt to determine if it is set.
322 ///
323 /// \returns true if this APInt has its sign bit set, false otherwise.
324 bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
325
326 /// Determine if sign bit of this APInt is clear.
327 ///
328 /// This tests the high bit of this APInt to determine if it is clear.
329 ///
330 /// \returns true if this APInt has its sign bit clear, false otherwise.
331 bool isSignBitClear() const { return !isSignBitSet(); }
332
333 /// Determine if this APInt Value is positive.
334 ///
335 /// This tests if the value of this APInt is positive (> 0). Note
336 /// that 0 is not a positive value.
337 ///
338 /// \returns true if this APInt is positive.
339 bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
340
341 /// Determine if this APInt Value is non-positive (<= 0).
342 ///
343 /// \returns true if this APInt is non-positive.
344 bool isNonPositive() const { return !isStrictlyPositive(); }
345
346 /// Determine if all bits are set. This is true for zero-width values.
347 bool isAllOnes() const {
348 if (BitWidth == 0)
349 return true;
350 if (isSingleWord())
351 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
352 return countTrailingOnesSlowCase() == BitWidth;
353 }
354
355 /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
356 bool isAllOnesValue() const { return isAllOnes(); }
357
358 /// Determine if this value is zero, i.e. all bits are clear.
359 bool isZero() const {
360 if (isSingleWord())
361 return U.VAL == 0;
362 return countLeadingZerosSlowCase() == BitWidth;
363 }
364
365 /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
366 bool isNullValue() const { return isZero(); }
367
368 /// Determine if this is a value of 1.
369 ///
370 /// This checks to see if the value of this APInt is one.
371 bool isOne() const {
372 if (isSingleWord())
373 return U.VAL == 1;
374 return countLeadingZerosSlowCase() == BitWidth - 1;
375 }
376
377 /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
378 bool isOneValue() const { return isOne(); }
379
380 /// Determine if this is the largest unsigned value.
381 ///
382 /// This checks to see if the value of this APInt is the maximum unsigned
383 /// value for the APInt's bit width.
384 bool isMaxValue() const { return isAllOnes(); }
385
386 /// Determine if this is the largest signed value.
387 ///
388 /// This checks to see if the value of this APInt is the maximum signed
389 /// value for the APInt's bit width.
390 bool isMaxSignedValue() const {
391 if (isSingleWord()) {
392 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 392, __extension__ __PRETTY_FUNCTION__
))
;
393 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
394 }
395 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
396 }
397
398 /// Determine if this is the smallest unsigned value.
399 ///
400 /// This checks to see if the value of this APInt is the minimum unsigned
401 /// value for the APInt's bit width.
402 bool isMinValue() const { return isZero(); }
403
404 /// Determine if this is the smallest signed value.
405 ///
406 /// This checks to see if the value of this APInt is the minimum signed
407 /// value for the APInt's bit width.
408 bool isMinSignedValue() const {
409 if (isSingleWord()) {
410 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 410, __extension__ __PRETTY_FUNCTION__
))
;
411 return U.VAL == (WordType(1) << (BitWidth - 1));
412 }
413 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
414 }
415
416 /// Check if this APInt has an N-bits unsigned integer value.
417 bool isIntN(unsigned N) const { return getActiveBits() <= N; }
418
419 /// Check if this APInt has an N-bits signed integer value.
420 bool isSignedIntN(unsigned N) const { return getSignificantBits() <= N; }
421
422 /// Check if this APInt's value is a power of two greater than zero.
423 ///
424 /// \returns true if the argument APInt value is a power of two > 0.
425 bool isPowerOf2() const {
426 if (isSingleWord()) {
427 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 427, __extension__ __PRETTY_FUNCTION__
))
;
428 return isPowerOf2_64(U.VAL);
429 }
430 return countPopulationSlowCase() == 1;
431 }
432
433 /// Check if this APInt's negated value is a power of two greater than zero.
434 bool isNegatedPowerOf2() const {
435 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 435, __extension__ __PRETTY_FUNCTION__
))
;
436 if (isNonNegative())
437 return false;
438 // NegatedPowerOf2 - shifted mask in the top bits.
439 unsigned LO = countLeadingOnes();
440 unsigned TZ = countTrailingZeros();
441 return (LO + TZ) == BitWidth;
442 }
443
444 /// Check if the APInt's value is returned by getSignMask.
445 ///
446 /// \returns true if this is the value returned by getSignMask.
447 bool isSignMask() const { return isMinSignedValue(); }
448
449 /// Convert APInt to a boolean value.
450 ///
451 /// This converts the APInt to a boolean value as a test against zero.
452 bool getBoolValue() const { return !isZero(); }
453
454 /// If this value is smaller than the specified limit, return it, otherwise
455 /// return the limit value. This causes the value to saturate to the limit.
456 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
457 return ugt(Limit) ? Limit : getZExtValue();
458 }
459
460 /// Check if the APInt consists of a repeated bit pattern.
461 ///
462 /// e.g. 0x01010101 satisfies isSplat(8).
463 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
464 /// width without remainder.
465 bool isSplat(unsigned SplatSizeInBits) const;
466
467 /// \returns true if this APInt value is a sequence of \param numBits ones
468 /// starting at the least significant bit with the remainder zero.
469 bool isMask(unsigned numBits) const {
470 assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "llvm/include/llvm/ADT/APInt.h", 470, __extension__ __PRETTY_FUNCTION__
))
;
471 assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "llvm/include/llvm/ADT/APInt.h", 471, __extension__ __PRETTY_FUNCTION__
))
;
472 if (isSingleWord())
473 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
474 unsigned Ones = countTrailingOnesSlowCase();
475 return (numBits == Ones) &&
476 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
477 }
478
479 /// \returns true if this APInt is a non-empty sequence of ones starting at
480 /// the least significant bit with the remainder zero.
481 /// Ex. isMask(0x0000FFFFU) == true.
482 bool isMask() const {
483 if (isSingleWord())
484 return isMask_64(U.VAL);
485 unsigned Ones = countTrailingOnesSlowCase();
486 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
487 }
488
489 /// Return true if this APInt value contains a non-empty sequence of ones with
490 /// the remainder zero.
491 bool isShiftedMask() const {
492 if (isSingleWord())
493 return isShiftedMask_64(U.VAL);
494 unsigned Ones = countPopulationSlowCase();
495 unsigned LeadZ = countLeadingZerosSlowCase();
496 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
497 }
498
499 /// Return true if this APInt value contains a non-empty sequence of ones with
500 /// the remainder zero. If true, \p MaskIdx will specify the index of the
501 /// lowest set bit and \p MaskLen is updated to specify the length of the
502 /// mask, else neither are updated.
503 bool isShiftedMask(unsigned &MaskIdx, unsigned &MaskLen) const {
504 if (isSingleWord())
505 return isShiftedMask_64(U.VAL, MaskIdx, MaskLen);
506 unsigned Ones = countPopulationSlowCase();
507 unsigned LeadZ = countLeadingZerosSlowCase();
508 unsigned TrailZ = countTrailingZerosSlowCase();
509 if ((Ones + LeadZ + TrailZ) != BitWidth)
510 return false;
511 MaskLen = Ones;
512 MaskIdx = TrailZ;
513 return true;
514 }
515
516 /// Compute an APInt containing numBits highbits from this APInt.
517 ///
518 /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
519 /// bits and right shift to the least significant bit.
520 ///
521 /// \returns the high "numBits" bits of this APInt.
522 APInt getHiBits(unsigned numBits) const;
523
524 /// Compute an APInt containing numBits lowbits from this APInt.
525 ///
526 /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
527 /// bits.
528 ///
529 /// \returns the low "numBits" bits of this APInt.
530 APInt getLoBits(unsigned numBits) const;
531
532 /// Determine if two APInts have the same value, after zero-extending
533 /// one of them (if needed!) to ensure that the bit-widths match.
534 static bool isSameValue(const APInt &I1, const APInt &I2) {
535 if (I1.getBitWidth() == I2.getBitWidth())
536 return I1 == I2;
537
538 if (I1.getBitWidth() > I2.getBitWidth())
539 return I1 == I2.zext(I1.getBitWidth());
540
541 return I1.zext(I2.getBitWidth()) == I2;
542 }
543
544 /// Overload to compute a hash_code for an APInt value.
545 friend hash_code hash_value(const APInt &Arg);
546
547 /// This function returns a pointer to the internal storage of the APInt.
548 /// This is useful for writing out the APInt in binary form without any
549 /// conversions.
550 const uint64_t *getRawData() const {
551 if (isSingleWord())
552 return &U.VAL;
553 return &U.pVal[0];
554 }
555
556 /// @}
557 /// \name Unary Operators
558 /// @{
559
560 /// Postfix increment operator. Increment *this by 1.
561 ///
562 /// \returns a new APInt value representing the original value of *this.
563 APInt operator++(int) {
564 APInt API(*this);
565 ++(*this);
566 return API;
567 }
568
569 /// Prefix increment operator.
570 ///
571 /// \returns *this incremented by one
572 APInt &operator++();
573
574 /// Postfix decrement operator. Decrement *this by 1.
575 ///
576 /// \returns a new APInt value representing the original value of *this.
577 APInt operator--(int) {
578 APInt API(*this);
579 --(*this);
580 return API;
581 }
582
583 /// Prefix decrement operator.
584 ///
585 /// \returns *this decremented by one.
586 APInt &operator--();
587
588 /// Logical negation operation on this APInt returns true if zero, like normal
589 /// integers.
590 bool operator!() const { return isZero(); }
591
592 /// @}
593 /// \name Assignment Operators
594 /// @{
595
596 /// Copy assignment operator.
597 ///
598 /// \returns *this after assignment of RHS.
599 APInt &operator=(const APInt &RHS) {
600 // The common case (both source or dest being inline) doesn't require
601 // allocation or deallocation.
602 if (isSingleWord() && RHS.isSingleWord()) {
603 U.VAL = RHS.U.VAL;
604 BitWidth = RHS.BitWidth;
605 return *this;
606 }
607
608 assignSlowCase(RHS);
609 return *this;
610 }
611
612 /// Move assignment operator.
613 APInt &operator=(APInt &&that) {
614#ifdef EXPENSIVE_CHECKS
615 // Some std::shuffle implementations still do self-assignment.
616 if (this == &that)
617 return *this;
618#endif
619 assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "llvm/include/llvm/ADT/APInt.h", 619, __extension__ __PRETTY_FUNCTION__
))
;
620 if (!isSingleWord())
621 delete[] U.pVal;
622
623 // Use memcpy so that type based alias analysis sees both VAL and pVal
624 // as modified.
625 memcpy(&U, &that.U, sizeof(U));
626
627 BitWidth = that.BitWidth;
628 that.BitWidth = 0;
629 return *this;
630 }
631
632 /// Assignment operator.
633 ///
634 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
635 /// the bit width, the excess bits are truncated. If the bit width is larger
636 /// than 64, the value is zero filled in the unspecified high order bits.
637 ///
638 /// \returns *this after assignment of RHS value.
639 APInt &operator=(uint64_t RHS) {
640 if (isSingleWord()) {
641 U.VAL = RHS;
642 return clearUnusedBits();
643 }
644 U.pVal[0] = RHS;
645 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
646 return *this;
647 }
648
649 /// Bitwise AND assignment operator.
650 ///
651 /// Performs a bitwise AND operation on this APInt and RHS. The result is
652 /// assigned to *this.
653 ///
654 /// \returns *this after ANDing with RHS.
655 APInt &operator&=(const APInt &RHS) {
656 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 656, __extension__ __PRETTY_FUNCTION__
))
;
657 if (isSingleWord())
658 U.VAL &= RHS.U.VAL;
659 else
660 andAssignSlowCase(RHS);
661 return *this;
662 }
663
664 /// Bitwise AND assignment operator.
665 ///
666 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
667 /// logically zero-extended or truncated to match the bit-width of
668 /// the LHS.
669 APInt &operator&=(uint64_t RHS) {
670 if (isSingleWord()) {
671 U.VAL &= RHS;
672 return *this;
673 }
674 U.pVal[0] &= RHS;
675 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
676 return *this;
677 }
678
679 /// Bitwise OR assignment operator.
680 ///
681 /// Performs a bitwise OR operation on this APInt and RHS. The result is
682 /// assigned *this;
683 ///
684 /// \returns *this after ORing with RHS.
685 APInt &operator|=(const APInt &RHS) {
686 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 686, __extension__ __PRETTY_FUNCTION__
))
;
687 if (isSingleWord())
688 U.VAL |= RHS.U.VAL;
689 else
690 orAssignSlowCase(RHS);
691 return *this;
692 }
693
694 /// Bitwise OR assignment operator.
695 ///
696 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
697 /// logically zero-extended or truncated to match the bit-width of
698 /// the LHS.
699 APInt &operator|=(uint64_t RHS) {
700 if (isSingleWord()) {
701 U.VAL |= RHS;
702 return clearUnusedBits();
703 }
704 U.pVal[0] |= RHS;
705 return *this;
706 }
707
708 /// Bitwise XOR assignment operator.
709 ///
710 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
711 /// assigned to *this.
712 ///
713 /// \returns *this after XORing with RHS.
714 APInt &operator^=(const APInt &RHS) {
715 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 715, __extension__ __PRETTY_FUNCTION__
))
;
716 if (isSingleWord())
717 U.VAL ^= RHS.U.VAL;
718 else
719 xorAssignSlowCase(RHS);
720 return *this;
721 }
722
723 /// Bitwise XOR assignment operator.
724 ///
725 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
726 /// logically zero-extended or truncated to match the bit-width of
727 /// the LHS.
728 APInt &operator^=(uint64_t RHS) {
729 if (isSingleWord()) {
730 U.VAL ^= RHS;
731 return clearUnusedBits();
732 }
733 U.pVal[0] ^= RHS;
734 return *this;
735 }
736
737 /// Multiplication assignment operator.
738 ///
739 /// Multiplies this APInt by RHS and assigns the result to *this.
740 ///
741 /// \returns *this
742 APInt &operator*=(const APInt &RHS);
743 APInt &operator*=(uint64_t RHS);
744
745 /// Addition assignment operator.
746 ///
747 /// Adds RHS to *this and assigns the result to *this.
748 ///
749 /// \returns *this
750 APInt &operator+=(const APInt &RHS);
751 APInt &operator+=(uint64_t RHS);
752
753 /// Subtraction assignment operator.
754 ///
755 /// Subtracts RHS from *this and assigns the result to *this.
756 ///
757 /// \returns *this
758 APInt &operator-=(const APInt &RHS);
759 APInt &operator-=(uint64_t RHS);
760
761 /// Left-shift assignment function.
762 ///
763 /// Shifts *this left by shiftAmt and assigns the result to *this.
764 ///
765 /// \returns *this after shifting left by ShiftAmt
766 APInt &operator<<=(unsigned ShiftAmt) {
767 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 767, __extension__ __PRETTY_FUNCTION__
))
;
768 if (isSingleWord()) {
769 if (ShiftAmt == BitWidth)
770 U.VAL = 0;
771 else
772 U.VAL <<= ShiftAmt;
773 return clearUnusedBits();
774 }
775 shlSlowCase(ShiftAmt);
776 return *this;
777 }
778
779 /// Left-shift assignment function.
780 ///
781 /// Shifts *this left by shiftAmt and assigns the result to *this.
782 ///
783 /// \returns *this after shifting left by ShiftAmt
784 APInt &operator<<=(const APInt &ShiftAmt);
785
786 /// @}
787 /// \name Binary Operators
788 /// @{
789
790 /// Multiplication operator.
791 ///
792 /// Multiplies this APInt by RHS and returns the result.
793 APInt operator*(const APInt &RHS) const;
794
795 /// Left logical shift operator.
796 ///
797 /// Shifts this APInt left by \p Bits and returns the result.
798 APInt operator<<(unsigned Bits) const { return shl(Bits); }
799
800 /// Left logical shift operator.
801 ///
802 /// Shifts this APInt left by \p Bits and returns the result.
803 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
804
805 /// Arithmetic right-shift function.
806 ///
807 /// Arithmetic right-shift this APInt by shiftAmt.
808 APInt ashr(unsigned ShiftAmt) const {
809 APInt R(*this);
810 R.ashrInPlace(ShiftAmt);
811 return R;
812 }
813
814 /// Arithmetic right-shift this APInt by ShiftAmt in place.
815 void ashrInPlace(unsigned ShiftAmt) {
816 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 816, __extension__ __PRETTY_FUNCTION__
))
;
817 if (isSingleWord()) {
818 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
819 if (ShiftAmt == BitWidth)
820 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
821 else
822 U.VAL = SExtVAL >> ShiftAmt;
823 clearUnusedBits();
824 return;
825 }
826 ashrSlowCase(ShiftAmt);
827 }
828
829 /// Logical right-shift function.
830 ///
831 /// Logical right-shift this APInt by shiftAmt.
832 APInt lshr(unsigned shiftAmt) const {
833 APInt R(*this);
834 R.lshrInPlace(shiftAmt);
835 return R;
836 }
837
838 /// Logical right-shift this APInt by ShiftAmt in place.
839 void lshrInPlace(unsigned ShiftAmt) {
840 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 840, __extension__ __PRETTY_FUNCTION__
))
;
841 if (isSingleWord()) {
842 if (ShiftAmt == BitWidth)
843 U.VAL = 0;
844 else
845 U.VAL >>= ShiftAmt;
846 return;
847 }
848 lshrSlowCase(ShiftAmt);
849 }
850
851 /// Left-shift function.
852 ///
853 /// Left-shift this APInt by shiftAmt.
854 APInt shl(unsigned shiftAmt) const {
855 APInt R(*this);
856 R <<= shiftAmt;
857 return R;
858 }
859
860 /// Rotate left by rotateAmt.
861 APInt rotl(unsigned rotateAmt) const;
862
863 /// Rotate right by rotateAmt.
864 APInt rotr(unsigned rotateAmt) const;
865
866 /// Arithmetic right-shift function.
867 ///
868 /// Arithmetic right-shift this APInt by shiftAmt.
869 APInt ashr(const APInt &ShiftAmt) const {
870 APInt R(*this);
871 R.ashrInPlace(ShiftAmt);
872 return R;
873 }
874
875 /// Arithmetic right-shift this APInt by shiftAmt in place.
876 void ashrInPlace(const APInt &shiftAmt);
877
878 /// Logical right-shift function.
879 ///
880 /// Logical right-shift this APInt by shiftAmt.
881 APInt lshr(const APInt &ShiftAmt) const {
882 APInt R(*this);
883 R.lshrInPlace(ShiftAmt);
884 return R;
885 }
886
887 /// Logical right-shift this APInt by ShiftAmt in place.
888 void lshrInPlace(const APInt &ShiftAmt);
889
890 /// Left-shift function.
891 ///
892 /// Left-shift this APInt by shiftAmt.
893 APInt shl(const APInt &ShiftAmt) const {
894 APInt R(*this);
895 R <<= ShiftAmt;
896 return R;
897 }
898
899 /// Rotate left by rotateAmt.
900 APInt rotl(const APInt &rotateAmt) const;
901
902 /// Rotate right by rotateAmt.
903 APInt rotr(const APInt &rotateAmt) const;
904
905 /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
906 /// equivalent to:
907 /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
908 APInt concat(const APInt &NewLSB) const {
909 /// If the result will be small, then both the merged values are small.
910 unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
911 if (NewWidth <= APINT_BITS_PER_WORD)
912 return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
913 return concatSlowCase(NewLSB);
914 }
915
916 /// Unsigned division operation.
917 ///
918 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
919 /// RHS are treated as unsigned quantities for purposes of this division.
920 ///
921 /// \returns a new APInt value containing the division result, rounded towards
922 /// zero.
923 APInt udiv(const APInt &RHS) const;
924 APInt udiv(uint64_t RHS) const;
925
926 /// Signed division function for APInt.
927 ///
928 /// Signed divide this APInt by APInt RHS.
929 ///
930 /// The result is rounded towards zero.
931 APInt sdiv(const APInt &RHS) const;
932 APInt sdiv(int64_t RHS) const;
933
934 /// Unsigned remainder operation.
935 ///
936 /// Perform an unsigned remainder operation on this APInt with RHS being the
937 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
938 /// of this operation. Note that this is a true remainder operation and not a
939 /// modulo operation because the sign follows the sign of the dividend which
940 /// is *this.
941 ///
942 /// \returns a new APInt value containing the remainder result
943 APInt urem(const APInt &RHS) const;
944 uint64_t urem(uint64_t RHS) const;
945
946 /// Function for signed remainder operation.
947 ///
948 /// Signed remainder operation on APInt.
949 APInt srem(const APInt &RHS) const;
950 int64_t srem(int64_t RHS) const;
951
952 /// Dual division/remainder interface.
953 ///
954 /// Sometimes it is convenient to divide two APInt values and obtain both the
955 /// quotient and remainder. This function does both operations in the same
956 /// computation making it a little more efficient. The pair of input arguments
957 /// may overlap with the pair of output arguments. It is safe to call
958 /// udivrem(X, Y, X, Y), for example.
959 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
960 APInt &Remainder);
961 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
962 uint64_t &Remainder);
963
964 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
965 APInt &Remainder);
966 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
967 int64_t &Remainder);
968
969 // Operations that return overflow indicators.
970 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
971 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
972 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
973 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
974 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
975 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
976 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
977 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
978 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
979
980 // Operations that saturate
981 APInt sadd_sat(const APInt &RHS) const;
982 APInt uadd_sat(const APInt &RHS) const;
983 APInt ssub_sat(const APInt &RHS) const;
984 APInt usub_sat(const APInt &RHS) const;
985 APInt smul_sat(const APInt &RHS) const;
986 APInt umul_sat(const APInt &RHS) const;
987 APInt sshl_sat(const APInt &RHS) const;
988 APInt ushl_sat(const APInt &RHS) const;
989
990 /// Array-indexing support.
991 ///
992 /// \returns the bit value at bitPosition
993 bool operator[](unsigned bitPosition) const {
994 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
"Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "llvm/include/llvm/ADT/APInt.h", 994, __extension__ __PRETTY_FUNCTION__
))
;
995 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
996 }
997
998 /// @}
999 /// \name Comparison Operators
1000 /// @{
1001
1002 /// Equality operator.
1003 ///
1004 /// Compares this APInt with RHS for the validity of the equality
1005 /// relationship.
1006 bool operator==(const APInt &RHS) const {
1007 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Comparison requires equal bit widths") ? void (0) : __assert_fail
("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "llvm/include/llvm/ADT/APInt.h", 1007, __extension__ __PRETTY_FUNCTION__
))
;
1008 if (isSingleWord())
1009 return U.VAL == RHS.U.VAL;
1010 return equalSlowCase(RHS);
1011 }
1012
1013 /// Equality operator.
1014 ///
1015 /// Compares this APInt with a uint64_t for the validity of the equality
1016 /// relationship.
1017 ///
1018 /// \returns true if *this == Val
1019 bool operator==(uint64_t Val) const {
1020 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1021 }
1022
1023 /// Equality comparison.
1024 ///
1025 /// Compares this APInt with RHS for the validity of the equality
1026 /// relationship.
1027 ///
1028 /// \returns true if *this == Val
1029 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1030
1031 /// Inequality operator.
1032 ///
1033 /// Compares this APInt with RHS for the validity of the inequality
1034 /// relationship.
1035 ///
1036 /// \returns true if *this != Val
1037 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1038
1039 /// Inequality operator.
1040 ///
1041 /// Compares this APInt with a uint64_t for the validity of the inequality
1042 /// relationship.
1043 ///
1044 /// \returns true if *this != Val
1045 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1046
1047 /// Inequality comparison
1048 ///
1049 /// Compares this APInt with RHS for the validity of the inequality
1050 /// relationship.
1051 ///
1052 /// \returns true if *this != Val
1053 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1054
1055 /// Unsigned less than comparison
1056 ///
1057 /// Regards both *this and RHS as unsigned quantities and compares them for
1058 /// the validity of the less-than relationship.
1059 ///
1060 /// \returns true if *this < RHS when both are considered unsigned.
1061 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1062
1063 /// Unsigned less than comparison
1064 ///
1065 /// Regards both *this as an unsigned quantity and compares it with RHS for
1066 /// the validity of the less-than relationship.
1067 ///
1068 /// \returns true if *this < RHS when considered unsigned.
1069 bool ult(uint64_t RHS) const {
1070 // Only need to check active bits if not a single word.
1071 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1072 }
1073
1074 /// Signed less than comparison
1075 ///
1076 /// Regards both *this and RHS as signed quantities and compares them for
1077 /// validity of the less-than relationship.
1078 ///
1079 /// \returns true if *this < RHS when both are considered signed.
1080 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1081
1082 /// Signed less than comparison
1083 ///
1084 /// Regards both *this as a signed quantity and compares it with RHS for
1085 /// the validity of the less-than relationship.
1086 ///
1087 /// \returns true if *this < RHS when considered signed.
1088 bool slt(int64_t RHS) const {
1089 return (!isSingleWord() && getSignificantBits() > 64)
1090 ? isNegative()
1091 : getSExtValue() < RHS;
1092 }
1093
1094 /// Unsigned less or equal comparison
1095 ///
1096 /// Regards both *this and RHS as unsigned quantities and compares them for
1097 /// validity of the less-or-equal relationship.
1098 ///
1099 /// \returns true if *this <= RHS when both are considered unsigned.
1100 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1101
1102 /// Unsigned less or equal comparison
1103 ///
1104 /// Regards both *this as an unsigned quantity and compares it with RHS for
1105 /// the validity of the less-or-equal relationship.
1106 ///
1107 /// \returns true if *this <= RHS when considered unsigned.
1108 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1109
1110 /// Signed less or equal comparison
1111 ///
1112 /// Regards both *this and RHS as signed quantities and compares them for
1113 /// validity of the less-or-equal relationship.
1114 ///
1115 /// \returns true if *this <= RHS when both are considered signed.
1116 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1117
1118 /// Signed less or equal comparison
1119 ///
1120 /// Regards both *this as a signed quantity and compares it with RHS for the
1121 /// validity of the less-or-equal relationship.
1122 ///
1123 /// \returns true if *this <= RHS when considered signed.
1124 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1125
1126 /// Unsigned greater than comparison
1127 ///
1128 /// Regards both *this and RHS as unsigned quantities and compares them for
1129 /// the validity of the greater-than relationship.
1130 ///
1131 /// \returns true if *this > RHS when both are considered unsigned.
1132 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1133
1134 /// Unsigned greater than comparison
1135 ///
1136 /// Regards both *this as an unsigned quantity and compares it with RHS for
1137 /// the validity of the greater-than relationship.
1138 ///
1139 /// \returns true if *this > RHS when considered unsigned.
1140 bool ugt(uint64_t RHS) const {
1141 // Only need to check active bits if not a single word.
1142 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1143 }
1144
1145 /// Signed greater than comparison
1146 ///
1147 /// Regards both *this and RHS as signed quantities and compares them for the
1148 /// validity of the greater-than relationship.
1149 ///
1150 /// \returns true if *this > RHS when both are considered signed.
1151 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1152
1153 /// Signed greater than comparison
1154 ///
1155 /// Regards both *this as a signed quantity and compares it with RHS for
1156 /// the validity of the greater-than relationship.
1157 ///
1158 /// \returns true if *this > RHS when considered signed.
1159 bool sgt(int64_t RHS) const {
1160 return (!isSingleWord() && getSignificantBits() > 64)
1161 ? !isNegative()
1162 : getSExtValue() > RHS;
1163 }
1164
1165 /// Unsigned greater or equal comparison
1166 ///
1167 /// Regards both *this and RHS as unsigned quantities and compares them for
1168 /// validity of the greater-or-equal relationship.
1169 ///
1170 /// \returns true if *this >= RHS when both are considered unsigned.
1171 bool uge(const APInt &RHS) const { return !ult(RHS); }
1172
1173 /// Unsigned greater or equal comparison
1174 ///
1175 /// Regards both *this as an unsigned quantity and compares it with RHS for
1176 /// the validity of the greater-or-equal relationship.
1177 ///
1178 /// \returns true if *this >= RHS when considered unsigned.
1179 bool uge(uint64_t RHS) const { return !ult(RHS); }
1180
1181 /// Signed greater or equal comparison
1182 ///
1183 /// Regards both *this and RHS as signed quantities and compares them for
1184 /// validity of the greater-or-equal relationship.
1185 ///
1186 /// \returns true if *this >= RHS when both are considered signed.
1187 bool sge(const APInt &RHS) const { return !slt(RHS); }
1188
1189 /// Signed greater or equal comparison
1190 ///
1191 /// Regards both *this as a signed quantity and compares it with RHS for
1192 /// the validity of the greater-or-equal relationship.
1193 ///
1194 /// \returns true if *this >= RHS when considered signed.
1195 bool sge(int64_t RHS) const { return !slt(RHS); }
1196
1197 /// This operation tests if there are any pairs of corresponding bits
1198 /// between this APInt and RHS that are both set.
1199 bool intersects(const APInt &RHS) const {
1200 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1200, __extension__ __PRETTY_FUNCTION__
))
;
1201 if (isSingleWord())
1202 return (U.VAL & RHS.U.VAL) != 0;
1203 return intersectsSlowCase(RHS);
1204 }
1205
1206 /// This operation checks that all bits set in this APInt are also set in RHS.
1207 bool isSubsetOf(const APInt &RHS) const {
1208 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1208, __extension__ __PRETTY_FUNCTION__
))
;
1209 if (isSingleWord())
1210 return (U.VAL & ~RHS.U.VAL) == 0;
1211 return isSubsetOfSlowCase(RHS);
1212 }
1213
1214 /// @}
1215 /// \name Resizing Operators
1216 /// @{
1217
1218 /// Truncate to new width.
1219 ///
1220 /// Truncate the APInt to a specified width. It is an error to specify a width
1221 /// that is greater than the current width.
1222 APInt trunc(unsigned width) const;
1223
1224 /// Truncate to new width with unsigned saturation.
1225 ///
1226 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1227 /// the new bitwidth, then return truncated APInt. Else, return max value.
1228 APInt truncUSat(unsigned width) const;
1229
1230 /// Truncate to new width with signed saturation.
1231 ///
1232 /// If this APInt, treated as signed integer, can be losslessly truncated to
1233 /// the new bitwidth, then return truncated APInt. Else, return either
1234 /// signed min value if the APInt was negative, or signed max value.
1235 APInt truncSSat(unsigned width) const;
1236
1237 /// Sign extend to a new width.
1238 ///
1239 /// This operation sign extends the APInt to a new width. If the high order
1240 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1241 /// It is an error to specify a width that is less than the
1242 /// current width.
1243 APInt sext(unsigned width) const;
1244
1245 /// Zero extend to a new width.
1246 ///
1247 /// This operation zero extends the APInt to a new width. The high order bits
1248 /// are filled with 0 bits. It is an error to specify a width that is less
1249 /// than the current width.
1250 APInt zext(unsigned width) const;
1251
1252 /// Sign extend or truncate to width
1253 ///
1254 /// Make this APInt have the bit width given by \p width. The value is sign
1255 /// extended, truncated, or left alone to make it that width.
1256 APInt sextOrTrunc(unsigned width) const;
1257
1258 /// Zero extend or truncate to width
1259 ///
1260 /// Make this APInt have the bit width given by \p width. The value is zero
1261 /// extended, truncated, or left alone to make it that width.
1262 APInt zextOrTrunc(unsigned width) const;
1263
1264 /// @}
1265 /// \name Bit Manipulation Operators
1266 /// @{
1267
1268 /// Set every bit to 1.
1269 void setAllBits() {
1270 if (isSingleWord())
1271 U.VAL = WORDTYPE_MAX;
1272 else
1273 // Set all the bits in all the words.
1274 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1275 // Clear the unused ones
1276 clearUnusedBits();
1277 }
1278
1279 /// Set the given bit to 1 whose position is given as "bitPosition".
1280 void setBit(unsigned BitPosition) {
1281 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1281, __extension__ __PRETTY_FUNCTION__
))
;
1282 WordType Mask = maskBit(BitPosition);
1283 if (isSingleWord())
1284 U.VAL |= Mask;
1285 else
1286 U.pVal[whichWord(BitPosition)] |= Mask;
1287 }
1288
1289 /// Set the sign bit to 1.
1290 void setSignBit() { setBit(BitWidth - 1); }
1291
1292 /// Set a given bit to a given value.
1293 void setBitVal(unsigned BitPosition, bool BitValue) {
1294 if (BitValue)
1295 setBit(BitPosition);
1296 else
1297 clearBit(BitPosition);
1298 }
1299
1300 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1301 /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
1302 /// setBits when \p loBit < \p hiBit.
1303 /// For \p loBit == \p hiBit wrap case, set every bit to 1.
1304 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1305 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1305, __extension__ __PRETTY_FUNCTION__
))
;
1306 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1306, __extension__ __PRETTY_FUNCTION__
))
;
1307 if (loBit < hiBit) {
1308 setBits(loBit, hiBit);
1309 return;
1310 }
1311 setLowBits(hiBit);
1312 setHighBits(BitWidth - loBit);
1313 }
1314
1315 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1316 /// This function handles case when \p loBit <= \p hiBit.
1317 void setBits(unsigned loBit, unsigned hiBit) {
1318 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1318, __extension__ __PRETTY_FUNCTION__
))
;
6
Assuming 'hiBit' is <= field 'BitWidth'
7
'?' condition is true
1319 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1319, __extension__ __PRETTY_FUNCTION__
))
;
8
'?' condition is true
1320 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "llvm/include/llvm/ADT/APInt.h", 1320, __extension__ __PRETTY_FUNCTION__
))
;
9
'?' condition is true
1321 if (loBit == hiBit)
10
Assuming 'loBit' is not equal to 'hiBit'
1322 return;
1323 if (loBit
10.1
'loBit' is < APINT_BITS_PER_WORD
10.1
'loBit' is < APINT_BITS_PER_WORD
< APINT_BITS_PER_WORD && hiBit
10.2
'hiBit' is <= APINT_BITS_PER_WORD
10.2
'hiBit' is <= APINT_BITS_PER_WORD
<= APINT_BITS_PER_WORD) {
11
Taking true branch
1324 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1325 mask <<= loBit;
1326 if (isSingleWord())
12
Taking true branch
1327 U.VAL |= mask;
1328 else
1329 U.pVal[0] |= mask;
1330 } else {
1331 setBitsSlowCase(loBit, hiBit);
1332 }
1333 }
1334
1335 /// Set the top bits starting from loBit.
1336 void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
1337
1338 /// Set the bottom loBits bits.
1339 void setLowBits(unsigned loBits) { return setBits(0, loBits); }
5
Calling 'APInt::setBits'
13
Returning from 'APInt::setBits'
1340
1341 /// Set the top hiBits bits.
1342 void setHighBits(unsigned hiBits) {
1343 return setBits(BitWidth - hiBits, BitWidth);
1344 }
1345
1346 /// Set every bit to 0.
1347 void clearAllBits() {
1348 if (isSingleWord())
1349 U.VAL = 0;
1350 else
1351 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1352 }
1353
1354 /// Set a given bit to 0.
1355 ///
1356 /// Set the given bit to 0 whose position is given as "bitPosition".
1357 void clearBit(unsigned BitPosition) {
1358 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1358, __extension__ __PRETTY_FUNCTION__
))
;
1359 WordType Mask = ~maskBit(BitPosition);
1360 if (isSingleWord())
1361 U.VAL &= Mask;
1362 else
1363 U.pVal[whichWord(BitPosition)] &= Mask;
1364 }
1365
1366 /// Set bottom loBits bits to 0.
1367 void clearLowBits(unsigned loBits) {
1368 assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth"
) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "llvm/include/llvm/ADT/APInt.h", 1368, __extension__ __PRETTY_FUNCTION__
))
;
1369 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1370 *this &= Keep;
1371 }
1372
1373 /// Set the sign bit to 0.
1374 void clearSignBit() { clearBit(BitWidth - 1); }
1375
1376 /// Toggle every bit to its opposite value.
1377 void flipAllBits() {
1378 if (isSingleWord()) {
1379 U.VAL ^= WORDTYPE_MAX;
1380 clearUnusedBits();
1381 } else {
1382 flipAllBitsSlowCase();
1383 }
1384 }
1385
1386 /// Toggles a given bit to its opposite value.
1387 ///
1388 /// Toggle a given bit to its opposite value whose position is given
1389 /// as "bitPosition".
1390 void flipBit(unsigned bitPosition);
1391
1392 /// Negate this APInt in place.
1393 void negate() {
1394 flipAllBits();
1395 ++(*this);
1396 }
1397
1398 /// Insert the bits from a smaller APInt starting at bitPosition.
1399 void insertBits(const APInt &SubBits, unsigned bitPosition);
1400 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1401
1402 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1403 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1404 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1405
1406 /// @}
1407 /// \name Value Characterization Functions
1408 /// @{
1409
1410 /// Return the number of bits in the APInt.
1411 unsigned getBitWidth() const { return BitWidth; }
1412
1413 /// Get the number of words.
1414 ///
1415 /// Here one word's bitwidth equals to that of uint64_t.
1416 ///
1417 /// \returns the number of words to hold the integer value of this APInt.
1418 unsigned getNumWords() const { return getNumWords(BitWidth); }
1419
1420 /// Get the number of words.
1421 ///
1422 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1423 ///
1424 /// \returns the number of words to hold the integer value with a given bit
1425 /// width.
1426 static unsigned getNumWords(unsigned BitWidth) {
1427 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1428 }
1429
1430 /// Compute the number of active bits in the value
1431 ///
1432 /// This function returns the number of active bits which is defined as the
1433 /// bit width minus the number of leading zeros. This is used in several
1434 /// computations to see how "wide" the value is.
1435 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1436
1437 /// Compute the number of active words in the value of this APInt.
1438 ///
1439 /// This is used in conjunction with getActiveData to extract the raw value of
1440 /// the APInt.
1441 unsigned getActiveWords() const {
1442 unsigned numActiveBits = getActiveBits();
1443 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1444 }
1445
1446 /// Get the minimum bit size for this signed APInt
1447 ///
1448 /// Computes the minimum bit width for this APInt while considering it to be a
1449 /// signed (and probably negative) value. If the value is not negative, this
1450 /// function returns the same value as getActiveBits()+1. Otherwise, it
1451 /// returns the smallest bit width that will retain the negative value. For
1452 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1453 /// for -1, this function will always return 1.
1454 unsigned getSignificantBits() const {
1455 return BitWidth - getNumSignBits() + 1;
1456 }
1457
1458 /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead.
1459 unsigned getMinSignedBits() const { return getSignificantBits(); }
1460
1461 /// Get zero extended value
1462 ///
1463 /// This method attempts to return the value of this APInt as a zero extended
1464 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1465 /// uint64_t. Otherwise an assertion will result.
1466 uint64_t getZExtValue() const {
1467 if (isSingleWord())
1468 return U.VAL;
1469 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
"Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1469, __extension__ __PRETTY_FUNCTION__
))
;
1470 return U.pVal[0];
1471 }
1472
1473 /// Get sign extended value
1474 ///
1475 /// This method attempts to return the value of this APInt as a sign extended
1476 /// int64_t. The bit width must be <= 64 or the value must fit within an
1477 /// int64_t. Otherwise an assertion will result.
1478 int64_t getSExtValue() const {
1479 if (isSingleWord())
1480 return SignExtend64(U.VAL, BitWidth);
1481 assert(getSignificantBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getSignificantBits() <= 64 &&
"Too many bits for int64_t") ? void (0) : __assert_fail ("getSignificantBits() <= 64 && \"Too many bits for int64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1481, __extension__ __PRETTY_FUNCTION__
))
;
1482 return int64_t(U.pVal[0]);
1483 }
1484
1485 /// Get bits required for string value.
1486 ///
1487 /// This method determines how many bits are required to hold the APInt
1488 /// equivalent of the string given by \p str.
1489 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1490
1491 /// Get the bits that are sufficient to represent the string value. This may
1492 /// over estimate the amount of bits required, but it does not require
1493 /// parsing the value in the string.
1494 static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
1495
1496 /// The APInt version of the countLeadingZeros functions in
1497 /// MathExtras.h.
1498 ///
1499 /// It counts the number of zeros from the most significant bit to the first
1500 /// one bit.
1501 ///
1502 /// \returns BitWidth if the value is zero, otherwise returns the number of
1503 /// zeros from the most significant bit to the first one bits.
1504 unsigned countLeadingZeros() const {
1505 if (isSingleWord()) {
1506 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1507 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1508 }
1509 return countLeadingZerosSlowCase();
1510 }
1511
1512 /// Count the number of leading one bits.
1513 ///
1514 /// This function is an APInt version of the countLeadingOnes
1515 /// functions in MathExtras.h. It counts the number of ones from the most
1516 /// significant bit to the first zero bit.
1517 ///
1518 /// \returns 0 if the high order bit is not set, otherwise returns the number
1519 /// of 1 bits from the most significant to the least
1520 unsigned countLeadingOnes() const {
1521 if (isSingleWord()) {
1522 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1523 return 0;
1524 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1525 }
1526 return countLeadingOnesSlowCase();
1527 }
1528
1529 /// Computes the number of leading bits of this APInt that are equal to its
1530 /// sign bit.
1531 unsigned getNumSignBits() const {
1532 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1533 }
1534
1535 /// Count the number of trailing zero bits.
1536 ///
1537 /// This function is an APInt version of the countTrailingZeros
1538 /// functions in MathExtras.h. It counts the number of zeros from the least
1539 /// significant bit to the first set bit.
1540 ///
1541 /// \returns BitWidth if the value is zero, otherwise returns the number of
1542 /// zeros from the least significant bit to the first one bit.
1543 unsigned countTrailingZeros() const {
1544 if (isSingleWord()) {
1545 unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
1546 return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
1547 }
1548 return countTrailingZerosSlowCase();
1549 }
1550
1551 /// Count the number of trailing one bits.
1552 ///
1553 /// This function is an APInt version of the countTrailingOnes
1554 /// functions in MathExtras.h. It counts the number of ones from the least
1555 /// significant bit to the first zero bit.
1556 ///
1557 /// \returns BitWidth if the value is all ones, otherwise returns the number
1558 /// of ones from the least significant bit to the first zero bit.
1559 unsigned countTrailingOnes() const {
1560 if (isSingleWord())
1561 return llvm::countTrailingOnes(U.VAL);
1562 return countTrailingOnesSlowCase();
1563 }
1564
1565 /// Count the number of bits set.
1566 ///
1567 /// This function is an APInt version of the countPopulation functions
1568 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1569 ///
1570 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1571 unsigned countPopulation() const {
1572 if (isSingleWord())
1573 return llvm::countPopulation(U.VAL);
1574 return countPopulationSlowCase();
1575 }
1576
1577 /// @}
1578 /// \name Conversion Functions
1579 /// @{
1580 void print(raw_ostream &OS, bool isSigned) const;
1581
1582 /// Converts an APInt to a string and append it to Str. Str is commonly a
1583 /// SmallString.
1584 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1585 bool formatAsCLiteral = false) const;
1586
1587 /// Considers the APInt to be unsigned and converts it into a string in the
1588 /// radix given. The radix can be 2, 8, 10 16, or 36.
1589 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1590 toString(Str, Radix, false, false);
1591 }
1592
1593 /// Considers the APInt to be signed and converts it into a string in the
1594 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1595 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1596 toString(Str, Radix, true, false);
1597 }
1598
1599 /// \returns a byte-swapped representation of this APInt Value.
1600 APInt byteSwap() const;
1601
1602 /// \returns the value with the bit representation reversed of this APInt
1603 /// Value.
1604 APInt reverseBits() const;
1605
1606 /// Converts this APInt to a double value.
1607 double roundToDouble(bool isSigned) const;
1608
1609 /// Converts this unsigned APInt to a double value.
1610 double roundToDouble() const { return roundToDouble(false); }
1611
1612 /// Converts this signed APInt to a double value.
1613 double signedRoundToDouble() const { return roundToDouble(true); }
1614
1615 /// Converts APInt bits to a double
1616 ///
1617 /// The conversion does not do a translation from integer to double, it just
1618 /// re-interprets the bits as a double. Note that it is valid to do this on
1619 /// any bit width. Exactly 64 bits will be translated.
1620 double bitsToDouble() const { return BitsToDouble(getWord(0)); }
1621
1622 /// Converts APInt bits to a float
1623 ///
1624 /// The conversion does not do a translation from integer to float, it just
1625 /// re-interprets the bits as a float. Note that it is valid to do this on
1626 /// any bit width. Exactly 32 bits will be translated.
1627 float bitsToFloat() const {
1628 return BitsToFloat(static_cast<uint32_t>(getWord(0)));
1629 }
1630
1631 /// Converts a double to APInt bits.
1632 ///
1633 /// The conversion does not do a translation from double to integer, it just
1634 /// re-interprets the bits of the double.
1635 static APInt doubleToBits(double V) {
1636 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1637 }
1638
1639 /// Converts a float to APInt bits.
1640 ///
1641 /// The conversion does not do a translation from float to integer, it just
1642 /// re-interprets the bits of the float.
1643 static APInt floatToBits(float V) {
1644 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1645 }
1646
1647 /// @}
1648 /// \name Mathematics Operations
1649 /// @{
1650
1651 /// \returns the floor log base 2 of this APInt.
1652 unsigned logBase2() const { return getActiveBits() - 1; }
1653
1654 /// \returns the ceil log base 2 of this APInt.
1655 unsigned ceilLogBase2() const {
1656 APInt temp(*this);
1657 --temp;
1658 return temp.getActiveBits();
1659 }
1660
1661 /// \returns the nearest log base 2 of this APInt. Ties round up.
1662 ///
1663 /// NOTE: When we have a BitWidth of 1, we define:
1664 ///
1665 /// log2(0) = UINT32_MAX
1666 /// log2(1) = 0
1667 ///
1668 /// to get around any mathematical concerns resulting from
1669 /// referencing 2 in a space where 2 does no exist.
1670 unsigned nearestLogBase2() const;
1671
1672 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1673 /// otherwise
1674 int32_t exactLogBase2() const {
1675 if (!isPowerOf2())
1676 return -1;
1677 return logBase2();
1678 }
1679
1680 /// Compute the square root.
1681 APInt sqrt() const;
1682
1683 /// Get the absolute value. If *this is < 0 then return -(*this), otherwise
1684 /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit
1685 /// wide APInt) is unchanged due to how negation works.
1686 APInt abs() const {
1687 if (isNegative())
1688 return -(*this);
1689 return *this;
1690 }
1691
1692 /// \returns the multiplicative inverse for a given modulo.
1693 APInt multiplicativeInverse(const APInt &modulo) const;
1694
1695 /// @}
1696 /// \name Building-block Operations for APInt and APFloat
1697 /// @{
1698
1699 // These building block operations operate on a representation of arbitrary
1700 // precision, two's-complement, bignum integer values. They should be
1701 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1702 // generally a pointer to the base of an array of integer parts, representing
1703 // an unsigned bignum, and a count of how many parts there are.
1704
1705 /// Sets the least significant part of a bignum to the input value, and zeroes
1706 /// out higher parts.
1707 static void tcSet(WordType *, WordType, unsigned);
1708
1709 /// Assign one bignum to another.
1710 static void tcAssign(WordType *, const WordType *, unsigned);
1711
1712 /// Returns true if a bignum is zero, false otherwise.
1713 static bool tcIsZero(const WordType *, unsigned);
1714
1715 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1716 static int tcExtractBit(const WordType *, unsigned bit);
1717
1718 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1719 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1720 /// significant bit of DST. All high bits above srcBITS in DST are
1721 /// zero-filled.
1722 static void tcExtract(WordType *, unsigned dstCount, const WordType *,
1723 unsigned srcBits, unsigned srcLSB);
1724
1725 /// Set the given bit of a bignum. Zero-based.
1726 static void tcSetBit(WordType *, unsigned bit);
1727
1728 /// Clear the given bit of a bignum. Zero-based.
1729 static void tcClearBit(WordType *, unsigned bit);
1730
1731 /// Returns the bit number of the least or most significant set bit of a
1732 /// number. If the input number has no bits set -1U is returned.
1733 static unsigned tcLSB(const WordType *, unsigned n);
1734 static unsigned tcMSB(const WordType *parts, unsigned n);
1735
1736 /// Negate a bignum in-place.
1737 static void tcNegate(WordType *, unsigned);
1738
1739 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1740 static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
1741 /// DST += RHS. Returns the carry flag.
1742 static WordType tcAddPart(WordType *, WordType, unsigned);
1743
1744 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1745 static WordType tcSubtract(WordType *, const WordType *, WordType carry,
1746 unsigned);
1747 /// DST -= RHS. Returns the carry flag.
1748 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1749
1750 /// DST += SRC * MULTIPLIER + PART if add is true
1751 /// DST = SRC * MULTIPLIER + PART if add is false
1752 ///
1753 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1754 /// start at the same point, i.e. DST == SRC.
1755 ///
1756 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1757 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1758 /// result, and if all of the omitted higher parts were zero return zero,
1759 /// otherwise overflow occurred and return one.
1760 static int tcMultiplyPart(WordType *dst, const WordType *src,
1761 WordType multiplier, WordType carry,
1762 unsigned srcParts, unsigned dstParts, bool add);
1763
1764 /// DST = LHS * RHS, where DST has the same width as the operands and is
1765 /// filled with the least significant parts of the result. Returns one if
1766 /// overflow occurred, otherwise zero. DST must be disjoint from both
1767 /// operands.
1768 static int tcMultiply(WordType *, const WordType *, const WordType *,
1769 unsigned);
1770
1771 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1772 /// operands. No overflow occurs. DST must be disjoint from both operands.
1773 static void tcFullMultiply(WordType *, const WordType *, const WordType *,
1774 unsigned, unsigned);
1775
1776 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1777 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1778 /// REMAINDER to the remainder, return zero. i.e.
1779 ///
1780 /// OLD_LHS = RHS * LHS + REMAINDER
1781 ///
1782 /// SCRATCH is a bignum of the same size as the operands and result for use by
1783 /// the routine; its contents need not be initialized and are destroyed. LHS,
1784 /// REMAINDER and SCRATCH must be distinct.
1785 static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
1786 WordType *scratch, unsigned parts);
1787
1788 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1789 /// restrictions on Count.
1790 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1791
1792 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1793 /// restrictions on Count.
1794 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1795
1796 /// Comparison (unsigned) of two bignums.
1797 static int tcCompare(const WordType *, const WordType *, unsigned);
1798
1799 /// Increment a bignum in-place. Return the carry flag.
1800 static WordType tcIncrement(WordType *dst, unsigned parts) {
1801 return tcAddPart(dst, 1, parts);
1802 }
1803
1804 /// Decrement a bignum in-place. Return the borrow flag.
1805 static WordType tcDecrement(WordType *dst, unsigned parts) {
1806 return tcSubtractPart(dst, 1, parts);
1807 }
1808
1809 /// Used to insert APInt objects, or objects that contain APInt objects, into
1810 /// FoldingSets.
1811 void Profile(FoldingSetNodeID &id) const;
1812
1813 /// debug method
1814 void dump() const;
1815
1816 /// Returns whether this instance allocated memory.
1817 bool needsCleanup() const { return !isSingleWord(); }
1818
1819private:
1820 /// This union is used to store the integer value. When the
1821 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
1822 union {
1823 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
1824 uint64_t *pVal; ///< Used to store the >64 bits integer value.
1825 } U;
1826
1827 unsigned BitWidth = 1; ///< The number of bits in this APInt.
1828
1829 friend struct DenseMapInfo<APInt, void>;
1830 friend class APSInt;
1831
1832 /// This constructor is used only internally for speed of construction of
1833 /// temporaries. It is unsafe since it takes ownership of the pointer, so it
1834 /// is not public.
1835 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
1836
1837 /// Determine which word a bit is in.
1838 ///
1839 /// \returns the word position for the specified bit position.
1840 static unsigned whichWord(unsigned bitPosition) {
1841 return bitPosition / APINT_BITS_PER_WORD;
1842 }
1843
1844 /// Determine which bit in a word the specified bit position is in.
1845 static unsigned whichBit(unsigned bitPosition) {
1846 return bitPosition % APINT_BITS_PER_WORD;
1847 }
1848
1849 /// Get a single bit mask.
1850 ///
1851 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
1852 /// This method generates and returns a uint64_t (word) mask for a single
1853 /// bit at a specific bit position. This is used to mask the bit in the
1854 /// corresponding word.
1855 static uint64_t maskBit(unsigned bitPosition) {
1856 return 1ULL << whichBit(bitPosition);
1857 }
1858
1859 /// Clear unused high order bits
1860 ///
1861 /// This method is used internally to clear the top "N" bits in the high order
1862 /// word that are not used by the APInt. This is needed after the most
1863 /// significant word is assigned a value to ensure that those bits are
1864 /// zero'd out.
1865 APInt &clearUnusedBits() {
1866 // Compute how many bits are used in the final word.
1867 unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
1868
1869 // Mask out the high bits.
1870 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
1871 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1872 mask = 0;
1873
1874 if (isSingleWord())
1875 U.VAL &= mask;
1876 else
1877 U.pVal[getNumWords() - 1] &= mask;
1878 return *this;
1879 }
1880
1881 /// Get the word corresponding to a bit position
1882 /// \returns the corresponding word for the specified bit position.
1883 uint64_t getWord(unsigned bitPosition) const {
1884 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
1885 }
1886
1887 /// Utility method to change the bit width of this APInt to new bit width,
1888 /// allocating and/or deallocating as necessary. There is no guarantee on the
1889 /// value of any bits upon return. Caller should populate the bits after.
1890 void reallocate(unsigned NewBitWidth);
1891
1892 /// Convert a char array into an APInt
1893 ///
1894 /// \param radix 2, 8, 10, 16, or 36
1895 /// Converts a string into a number. The string must be non-empty
1896 /// and well-formed as a number of the given base. The bit-width
1897 /// must be sufficient to hold the result.
1898 ///
1899 /// This is used by the constructors that take string arguments.
1900 ///
1901 /// StringRef::getAsInteger is superficially similar but (1) does
1902 /// not assume that the string is well-formed and (2) grows the
1903 /// result to hold the input.
1904 void fromString(unsigned numBits, StringRef str, uint8_t radix);
1905
1906 /// An internal division function for dividing APInts.
1907 ///
1908 /// This is used by the toString method to divide by the radix. It simply
1909 /// provides a more convenient form of divide for internal use since KnuthDiv
1910 /// has specific constraints on its inputs. If those constraints are not met
1911 /// then it provides a simpler form of divide.
1912 static void divide(const WordType *LHS, unsigned lhsWords,
1913 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
1914 WordType *Remainder);
1915
1916 /// out-of-line slow case for inline constructor
1917 void initSlowCase(uint64_t val, bool isSigned);
1918
1919 /// shared code between two array constructors
1920 void initFromArray(ArrayRef<uint64_t> array);
1921
1922 /// out-of-line slow case for inline copy constructor
1923 void initSlowCase(const APInt &that);
1924
1925 /// out-of-line slow case for shl
1926 void shlSlowCase(unsigned ShiftAmt);
1927
1928 /// out-of-line slow case for lshr.
1929 void lshrSlowCase(unsigned ShiftAmt);
1930
1931 /// out-of-line slow case for ashr.
1932 void ashrSlowCase(unsigned ShiftAmt);
1933
1934 /// out-of-line slow case for operator=
1935 void assignSlowCase(const APInt &RHS);
1936
1937 /// out-of-line slow case for operator==
1938 bool equalSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1939
1940 /// out-of-line slow case for countLeadingZeros
1941 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1942
1943 /// out-of-line slow case for countLeadingOnes.
1944 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1945
1946 /// out-of-line slow case for countTrailingZeros.
1947 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1948
1949 /// out-of-line slow case for countTrailingOnes
1950 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1951
1952 /// out-of-line slow case for countPopulation
1953 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
1954
1955 /// out-of-line slow case for intersects.
1956 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1957
1958 /// out-of-line slow case for isSubsetOf.
1959 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1960
1961 /// out-of-line slow case for setBits.
1962 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
1963
1964 /// out-of-line slow case for flipAllBits.
1965 void flipAllBitsSlowCase();
1966
1967 /// out-of-line slow case for concat.
1968 APInt concatSlowCase(const APInt &NewLSB) const;
1969
1970 /// out-of-line slow case for operator&=.
1971 void andAssignSlowCase(const APInt &RHS);
1972
1973 /// out-of-line slow case for operator|=.
1974 void orAssignSlowCase(const APInt &RHS);
1975
1976 /// out-of-line slow case for operator^=.
1977 void xorAssignSlowCase(const APInt &RHS);
1978
1979 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
1980 /// to, or greater than RHS.
1981 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1982
1983 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
1984 /// to, or greater than RHS.
1985 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1986
1987 /// @}
1988};
1989
1990inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
1991
1992inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
1993
1994/// Unary bitwise complement operator.
1995///
1996/// \returns an APInt that is the bitwise complement of \p v.
1997inline APInt operator~(APInt v) {
1998 v.flipAllBits();
1999 return v;
2000}
2001
2002inline APInt operator&(APInt a, const APInt &b) {
2003 a &= b;
2004 return a;
2005}
2006
2007inline APInt operator&(const APInt &a, APInt &&b) {
2008 b &= a;
2009 return std::move(b);
2010}
2011
2012inline APInt operator&(APInt a, uint64_t RHS) {
2013 a &= RHS;
2014 return a;
2015}
2016
2017inline APInt operator&(uint64_t LHS, APInt b) {
2018 b &= LHS;
2019 return b;
2020}
2021
2022inline APInt operator|(APInt a, const APInt &b) {
2023 a |= b;
2024 return a;
2025}
2026
2027inline APInt operator|(const APInt &a, APInt &&b) {
2028 b |= a;
2029 return std::move(b);
2030}
2031
2032inline APInt operator|(APInt a, uint64_t RHS) {
2033 a |= RHS;
2034 return a;
2035}
2036
2037inline APInt operator|(uint64_t LHS, APInt b) {
2038 b |= LHS;
2039 return b;
2040}
2041
2042inline APInt operator^(APInt a, const APInt &b) {
2043 a ^= b;
2044 return a;
2045}
2046
2047inline APInt operator^(const APInt &a, APInt &&b) {
2048 b ^= a;
2049 return std::move(b);
2050}
2051
2052inline APInt operator^(APInt a, uint64_t RHS) {
2053 a ^= RHS;
2054 return a;
2055}
2056
2057inline APInt operator^(uint64_t LHS, APInt b) {
2058 b ^= LHS;
2059 return b;
2060}
2061
2062inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2063 I.print(OS, true);
2064 return OS;
2065}
2066
2067inline APInt operator-(APInt v) {
2068 v.negate();
2069 return v;
2070}
2071
2072inline APInt operator+(APInt a, const APInt &b) {
2073 a += b;
2074 return a;
2075}
2076
2077inline APInt operator+(const APInt &a, APInt &&b) {
2078 b += a;
2079 return std::move(b);
2080}
2081
2082inline APInt operator+(APInt a, uint64_t RHS) {
2083 a += RHS;
2084 return a;
2085}
2086
2087inline APInt operator+(uint64_t LHS, APInt b) {
2088 b += LHS;
2089 return b;
2090}
2091
2092inline APInt operator-(APInt a, const APInt &b) {
2093 a -= b;
2094 return a;
2095}
2096
2097inline APInt operator-(const APInt &a, APInt &&b) {
2098 b.negate();
2099 b += a;
2100 return std::move(b);
2101}
2102
2103inline APInt operator-(APInt a, uint64_t RHS) {
2104 a -= RHS;
2105 return a;
2106}
2107
2108inline APInt operator-(uint64_t LHS, APInt b) {
2109 b.negate();
2110 b += LHS;
2111 return b;
2112}
2113
2114inline APInt operator*(APInt a, uint64_t RHS) {
2115 a *= RHS;
2116 return a;
2117}
2118
2119inline APInt operator*(uint64_t LHS, APInt b) {
2120 b *= LHS;
2121 return b;
2122}
2123
2124namespace APIntOps {
2125
2126/// Determine the smaller of two APInts considered to be signed.
2127inline const APInt &smin(const APInt &A, const APInt &B) {
2128 return A.slt(B) ? A : B;
2129}
2130
2131/// Determine the larger of two APInts considered to be signed.
2132inline const APInt &smax(const APInt &A, const APInt &B) {
2133 return A.sgt(B) ? A : B;
2134}
2135
2136/// Determine the smaller of two APInts considered to be unsigned.
2137inline const APInt &umin(const APInt &A, const APInt &B) {
2138 return A.ult(B) ? A : B;
2139}
2140
2141/// Determine the larger of two APInts considered to be unsigned.
2142inline const APInt &umax(const APInt &A, const APInt &B) {
2143 return A.ugt(B) ? A : B;
2144}
2145
2146/// Compute GCD of two unsigned APInt values.
2147///
2148/// This function returns the greatest common divisor of the two APInt values
2149/// using Stein's algorithm.
2150///
2151/// \returns the greatest common divisor of A and B.
2152APInt GreatestCommonDivisor(APInt A, APInt B);
2153
2154/// Converts the given APInt to a double value.
2155///
2156/// Treats the APInt as an unsigned value for conversion purposes.
2157inline double RoundAPIntToDouble(const APInt &APIVal) {
2158 return APIVal.roundToDouble();
2159}
2160
2161/// Converts the given APInt to a double value.
2162///
2163/// Treats the APInt as a signed value for conversion purposes.
2164inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2165 return APIVal.signedRoundToDouble();
2166}
2167
2168/// Converts the given APInt to a float value.
2169inline float RoundAPIntToFloat(const APInt &APIVal) {
2170 return float(RoundAPIntToDouble(APIVal));
2171}
2172
2173/// Converts the given APInt to a float value.
2174///
2175/// Treats the APInt as a signed value for conversion purposes.
2176inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2177 return float(APIVal.signedRoundToDouble());
2178}
2179
2180/// Converts the given double value into a APInt.
2181///
2182/// This function convert a double value to an APInt value.
2183APInt RoundDoubleToAPInt(double Double, unsigned width);
2184
2185/// Converts a float value into a APInt.
2186///
2187/// Converts a float value into an APInt value.
2188inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2189 return RoundDoubleToAPInt(double(Float), width);
2190}
2191
2192/// Return A unsign-divided by B, rounded by the given rounding mode.
2193APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2194
2195/// Return A sign-divided by B, rounded by the given rounding mode.
2196APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2197
2198/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2199/// (e.g. 32 for i32).
2200/// This function finds the smallest number n, such that
2201/// (a) n >= 0 and q(n) = 0, or
2202/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2203/// integers, belong to two different intervals [Rk, Rk+R),
2204/// where R = 2^BW, and k is an integer.
2205/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2206/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2207/// subtraction (treated as addition of negated numbers) would always
2208/// count as an overflow, but here we want to allow values to decrease
2209/// and increase as long as they are within the same interval.
2210/// Specifically, adding of two negative numbers should not cause an
2211/// overflow (as long as the magnitude does not exceed the bit width).
2212/// On the other hand, given a positive number, adding a negative
2213/// number to it can give a negative result, which would cause the
2214/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2215/// treated as a special case of an overflow.
2216///
2217/// This function returns None if after finding k that minimizes the
2218/// positive solution to q(n) = kR, both solutions are contained between
2219/// two consecutive integers.
2220///
2221/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2222/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2223/// virtue of *signed* overflow. This function will *not* find such an n,
2224/// however it may find a value of n satisfying the inequalities due to
2225/// an *unsigned* overflow (if the values are treated as unsigned).
2226/// To find a solution for a signed overflow, treat it as a problem of
2227/// finding an unsigned overflow with a range with of BW-1.
2228///
2229/// The returned value may have a different bit width from the input
2230/// coefficients.
2231Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2232 unsigned RangeWidth);
2233
2234/// Compare two values, and if they are different, return the position of the
2235/// most significant bit that is different in the values.
2236Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2237 const APInt &B);
2238
2239/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
2240/// by \param A to \param NewBitWidth bits.
2241///
2242/// MatchAnyBits: (Default)
2243/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
2244/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
2245///
2246/// MatchAllBits:
2247/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
2248/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0001
2249/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
2250APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth,
2251 bool MatchAllBits = false);
2252} // namespace APIntOps
2253
2254// See friend declaration above. This additional declaration is required in
2255// order to compile LLVM with IBM xlC compiler.
2256hash_code hash_value(const APInt &Arg);
2257
2258/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2259/// with the integer held in IntVal.
2260void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2261
2262/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2263/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2264void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
2265
2266/// Provide DenseMapInfo for APInt.
2267template <> struct DenseMapInfo<APInt, void> {
2268 static inline APInt getEmptyKey() {
2269 APInt V(nullptr, 0);
2270 V.U.VAL = 0;
2271 return V;
2272 }
2273
2274 static inline APInt getTombstoneKey() {
2275 APInt V(nullptr, 0);
2276 V.U.VAL = 1;
2277 return V;
2278 }
2279
2280 static unsigned getHashValue(const APInt &Key);
2281
2282 static bool isEqual(const APInt &LHS, const APInt &RHS) {
2283 return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
2284 }
2285};
2286
2287} // namespace llvm
2288
2289#endif