Bug Summary

File:llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1563, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/Target/Hexagon -I include -I /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-01-16-232930-107970-1 -x c++ /build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonLoopIdiomRecognition.h"
10#include "llvm/ADT/APInt.h"
11#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/Triple.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/InstructionSimplify.h"
20#include "llvm/Analysis/LoopAnalysisManager.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopPass.h"
23#include "llvm/Analysis/MemoryLocation.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/ScalarEvolutionExpressions.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/ValueTracking.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/IntrinsicsHexagon.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/User.h"
49#include "llvm/IR/Value.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
63#include <algorithm>
64#include <array>
65#include <cassert>
66#include <cstdint>
67#include <cstdlib>
68#include <deque>
69#include <functional>
70#include <iterator>
71#include <map>
72#include <set>
73#include <utility>
74#include <vector>
75
76#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
77
78using namespace llvm;
79
80static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memcpy in loop idiom recognition"));
83
84static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
85 cl::Hidden, cl::init(false),
86 cl::desc("Disable generation of memmove in loop idiom recognition"));
87
88static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
89 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
90 "check guarding the memmove."));
91
92static cl::opt<unsigned> CompileTimeMemSizeThreshold(
93 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
94 cl::desc("Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
96
97static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
98 cl::Hidden, cl::init(true),
99 cl::desc("Only enable generating memmove in non-nested loops"));
100
101static cl::opt<bool> HexagonVolatileMemcpy(
102 "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
103 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
104
105static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
106 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
107
108static const char *HexagonVolatileMemcpyName
109 = "hexagon_memcpy_forward_vp4cp4n2";
110
111
112namespace llvm {
113
114void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
115Pass *createHexagonLoopIdiomPass();
116
117} // end namespace llvm
118
119namespace {
120
121class HexagonLoopIdiomRecognize {
122public:
123 explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
124 LoopInfo *LF, const TargetLibraryInfo *TLI,
125 ScalarEvolution *SE)
126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
127
128 bool run(Loop *L);
129
130private:
131 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
132 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
133 void collectStores(Loop *CurLoop, BasicBlock *BB,
134 SmallVectorImpl<StoreInst *> &Stores);
135 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
136 bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const;
137 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
138 SmallVectorImpl<BasicBlock *> &ExitBlocks);
139 bool runOnCountableLoop(Loop *L);
140
141 AliasAnalysis *AA;
142 const DataLayout *DL;
143 DominatorTree *DT;
144 LoopInfo *LF;
145 const TargetLibraryInfo *TLI;
146 ScalarEvolution *SE;
147 bool HasMemcpy, HasMemmove;
148};
149
150class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {
151public:
152 static char ID;
153
154 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
155 initializeHexagonLoopIdiomRecognizeLegacyPassPass(
156 *PassRegistry::getPassRegistry());
157 }
158
159 StringRef getPassName() const override {
160 return "Recognize Hexagon-specific loop idioms";
161 }
162
163 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 AU.addRequired<LoopInfoWrapperPass>();
165 AU.addRequiredID(LoopSimplifyID);
166 AU.addRequiredID(LCSSAID);
167 AU.addRequired<AAResultsWrapperPass>();
168 AU.addRequired<ScalarEvolutionWrapperPass>();
169 AU.addRequired<DominatorTreeWrapperPass>();
170 AU.addRequired<TargetLibraryInfoWrapperPass>();
171 AU.addPreserved<TargetLibraryInfoWrapperPass>();
172 }
173
174 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
175};
176
177struct Simplifier {
178 struct Rule {
179 using FuncType = std::function<Value *(Instruction *, LLVMContext &)>;
180 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
181 StringRef Name; // For debugging.
182 FuncType Fn;
183 };
184
185 void addRule(StringRef N, const Rule::FuncType &F) {
186 Rules.push_back(Rule(N, F));
187 }
188
189private:
190 struct WorkListType {
191 WorkListType() = default;
192
193 void push_back(Value *V) {
194 // Do not push back duplicates.
195 if (!S.count(V)) {
196 Q.push_back(V);
197 S.insert(V);
198 }
199 }
200
201 Value *pop_front_val() {
202 Value *V = Q.front();
203 Q.pop_front();
204 S.erase(V);
205 return V;
206 }
207
208 bool empty() const { return Q.empty(); }
209
210 private:
211 std::deque<Value *> Q;
212 std::set<Value *> S;
213 };
214
215 using ValueSetType = std::set<Value *>;
216
217 std::vector<Rule> Rules;
218
219public:
220 struct Context {
221 using ValueMapType = DenseMap<Value *, Value *>;
222
223 Value *Root;
224 ValueSetType Used; // The set of all cloned values used by Root.
225 ValueSetType Clones; // The set of all cloned values.
226 LLVMContext &Ctx;
227
228 Context(Instruction *Exp)
229 : Ctx(Exp->getParent()->getParent()->getContext()) {
230 initialize(Exp);
231 }
232
233 ~Context() { cleanup(); }
234
235 void print(raw_ostream &OS, const Value *V) const;
236 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
237
238 private:
239 friend struct Simplifier;
240
241 void initialize(Instruction *Exp);
242 void cleanup();
243
244 template <typename FuncT> void traverse(Value *V, FuncT F);
245 void record(Value *V);
246 void use(Value *V);
247 void unuse(Value *V);
248
249 bool equal(const Instruction *I, const Instruction *J) const;
250 Value *find(Value *Tree, Value *Sub) const;
251 Value *subst(Value *Tree, Value *OldV, Value *NewV);
252 void replace(Value *OldV, Value *NewV);
253 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
254 };
255
256 Value *simplify(Context &C);
257};
258
259 struct PE {
260 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
261
262 const Simplifier::Context &C;
263 const Value *V;
264 };
265
266 LLVM_ATTRIBUTE_USED__attribute__((__used__))
267 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
268 P.C.print(OS, P.V ? P.V : P.C.Root);
269 return OS;
270 }
271
272} // end anonymous namespace
273
274char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
275
276INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
277 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
278INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
279INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
280INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
281INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
282INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
283INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
284INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
285INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
286 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
287
288template <typename FuncT>
289void Simplifier::Context::traverse(Value *V, FuncT F) {
290 WorkListType Q;
291 Q.push_back(V);
292
293 while (!Q.empty()) {
294 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
295 if (!U || U->getParent())
296 continue;
297 if (!F(U))
298 continue;
299 for (Value *Op : U->operands())
300 Q.push_back(Op);
301 }
302}
303
304void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
305 const auto *U = dyn_cast<const Instruction>(V);
306 if (!U) {
307 OS << V << '(' << *V << ')';
308 return;
309 }
310
311 if (U->getParent()) {
312 OS << U << '(';
313 U->printAsOperand(OS, true);
314 OS << ')';
315 return;
316 }
317
318 unsigned N = U->getNumOperands();
319 if (N != 0)
320 OS << U << '(';
321 OS << U->getOpcodeName();
322 for (const Value *Op : U->operands()) {
323 OS << ' ';
324 print(OS, Op);
325 }
326 if (N != 0)
327 OS << ')';
328}
329
330void Simplifier::Context::initialize(Instruction *Exp) {
331 // Perform a deep clone of the expression, set Root to the root
332 // of the clone, and build a map from the cloned values to the
333 // original ones.
334 ValueMapType M;
335 BasicBlock *Block = Exp->getParent();
336 WorkListType Q;
337 Q.push_back(Exp);
338
339 while (!Q.empty()) {
340 Value *V = Q.pop_front_val();
341 if (M.find(V) != M.end())
342 continue;
343 if (Instruction *U = dyn_cast<Instruction>(V)) {
344 if (isa<PHINode>(U) || U->getParent() != Block)
345 continue;
346 for (Value *Op : U->operands())
347 Q.push_back(Op);
348 M.insert({U, U->clone()});
349 }
350 }
351
352 for (std::pair<Value*,Value*> P : M) {
353 Instruction *U = cast<Instruction>(P.second);
354 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
355 auto F = M.find(U->getOperand(i));
356 if (F != M.end())
357 U->setOperand(i, F->second);
358 }
359 }
360
361 auto R = M.find(Exp);
362 assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail
("R != M.end()", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 362, __extension__ __PRETTY_FUNCTION__))
;
363 Root = R->second;
364
365 record(Root);
366 use(Root);
367}
368
369void Simplifier::Context::record(Value *V) {
370 auto Record = [this](Instruction *U) -> bool {
371 Clones.insert(U);
372 return true;
373 };
374 traverse(V, Record);
375}
376
377void Simplifier::Context::use(Value *V) {
378 auto Use = [this](Instruction *U) -> bool {
379 Used.insert(U);
380 return true;
381 };
382 traverse(V, Use);
383}
384
385void Simplifier::Context::unuse(Value *V) {
386 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
387 return;
388
389 auto Unuse = [this](Instruction *U) -> bool {
390 if (!U->use_empty())
391 return false;
392 Used.erase(U);
393 return true;
394 };
395 traverse(V, Unuse);
396}
397
398Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
399 if (Tree == OldV)
400 return NewV;
401 if (OldV == NewV)
402 return Tree;
403
404 WorkListType Q;
405 Q.push_back(Tree);
406 while (!Q.empty()) {
407 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
408 // If U is not an instruction, or it's not a clone, skip it.
409 if (!U || U->getParent())
410 continue;
411 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
412 Value *Op = U->getOperand(i);
413 if (Op == OldV) {
414 U->setOperand(i, NewV);
415 unuse(OldV);
416 } else {
417 Q.push_back(Op);
418 }
419 }
420 }
421 return Tree;
422}
423
424void Simplifier::Context::replace(Value *OldV, Value *NewV) {
425 if (Root == OldV) {
426 Root = NewV;
427 use(Root);
428 return;
429 }
430
431 // NewV may be a complex tree that has just been created by one of the
432 // transformation rules. We need to make sure that it is commoned with
433 // the existing Root to the maximum extent possible.
434 // Identify all subtrees of NewV (including NewV itself) that have
435 // equivalent counterparts in Root, and replace those subtrees with
436 // these counterparts.
437 WorkListType Q;
438 Q.push_back(NewV);
439 while (!Q.empty()) {
440 Value *V = Q.pop_front_val();
441 Instruction *U = dyn_cast<Instruction>(V);
442 if (!U || U->getParent())
443 continue;
444 if (Value *DupV = find(Root, V)) {
445 if (DupV != V)
446 NewV = subst(NewV, V, DupV);
447 } else {
448 for (Value *Op : U->operands())
449 Q.push_back(Op);
450 }
451 }
452
453 // Now, simply replace OldV with NewV in Root.
454 Root = subst(Root, OldV, NewV);
455 use(Root);
456}
457
458void Simplifier::Context::cleanup() {
459 for (Value *V : Clones) {
460 Instruction *U = cast<Instruction>(V);
461 if (!U->getParent())
462 U->dropAllReferences();
463 }
464
465 for (Value *V : Clones) {
466 Instruction *U = cast<Instruction>(V);
467 if (!U->getParent())
468 U->deleteValue();
469 }
470}
471
472bool Simplifier::Context::equal(const Instruction *I,
473 const Instruction *J) const {
474 if (I == J)
475 return true;
476 if (!I->isSameOperationAs(J))
477 return false;
478 if (isa<PHINode>(I))
479 return I->isIdenticalTo(J);
480
481 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
482 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
483 if (OpI == OpJ)
484 continue;
485 auto *InI = dyn_cast<const Instruction>(OpI);
486 auto *InJ = dyn_cast<const Instruction>(OpJ);
487 if (InI && InJ) {
488 if (!equal(InI, InJ))
489 return false;
490 } else if (InI != InJ || !InI)
491 return false;
492 }
493 return true;
494}
495
496Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
497 Instruction *SubI = dyn_cast<Instruction>(Sub);
498 WorkListType Q;
499 Q.push_back(Tree);
500
501 while (!Q.empty()) {
502 Value *V = Q.pop_front_val();
503 if (V == Sub)
504 return V;
505 Instruction *U = dyn_cast<Instruction>(V);
506 if (!U || U->getParent())
507 continue;
508 if (SubI && equal(SubI, U))
509 return U;
510 assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0)
: __assert_fail ("!isa<PHINode>(U)", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 510, __extension__ __PRETTY_FUNCTION__))
;
511 for (Value *Op : U->operands())
512 Q.push_back(Op);
513 }
514 return nullptr;
515}
516
517void Simplifier::Context::link(Instruction *I, BasicBlock *B,
518 BasicBlock::iterator At) {
519 if (I->getParent())
520 return;
521
522 for (Value *Op : I->operands()) {
523 if (Instruction *OpI = dyn_cast<Instruction>(Op))
524 link(OpI, B, At);
525 }
526
527 B->getInstList().insert(At, I);
528}
529
530Value *Simplifier::Context::materialize(BasicBlock *B,
531 BasicBlock::iterator At) {
532 if (Instruction *RootI = dyn_cast<Instruction>(Root))
533 link(RootI, B, At);
534 return Root;
535}
536
537Value *Simplifier::simplify(Context &C) {
538 WorkListType Q;
539 Q.push_back(C.Root);
540 unsigned Count = 0;
541 const unsigned Limit = SimplifyLimit;
542
543 while (!Q.empty()) {
544 if (Count++ >= Limit)
545 break;
546 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
547 if (!U || U->getParent() || !C.Used.count(U))
548 continue;
549 bool Changed = false;
550 for (Rule &R : Rules) {
551 Value *W = R.Fn(U, C.Ctx);
552 if (!W)
553 continue;
554 Changed = true;
555 C.record(W);
556 C.replace(U, W);
557 Q.push_back(C.Root);
558 break;
559 }
560 if (!Changed) {
561 for (Value *Op : U->operands())
562 Q.push_back(Op);
563 }
564 }
565 return Count < Limit ? C.Root : nullptr;
566}
567
568//===----------------------------------------------------------------------===//
569//
570// Implementation of PolynomialMultiplyRecognize
571//
572//===----------------------------------------------------------------------===//
573
574namespace {
575
576 class PolynomialMultiplyRecognize {
577 public:
578 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
579 const DominatorTree &dt, const TargetLibraryInfo &tli,
580 ScalarEvolution &se)
581 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
582
583 bool recognize();
584
585 private:
586 using ValueSeq = SetVector<Value *>;
587
588 IntegerType *getPmpyType() const {
589 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
590 return IntegerType::get(Ctx, 32);
591 }
592
593 bool isPromotableTo(Value *V, IntegerType *Ty);
594 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
595 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
596
597 Value *getCountIV(BasicBlock *BB);
598 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
599 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
600 ValueSeq &Late);
601 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
602 bool commutesWithShift(Instruction *I);
603 bool highBitsAreZero(Value *V, unsigned IterCount);
604 bool keepsHighBitsZero(Value *V, unsigned IterCount);
605 bool isOperandShifted(Instruction *I, Value *Op);
606 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
607 unsigned IterCount);
608 void cleanupLoopBody(BasicBlock *LoopB);
609
610 struct ParsedValues {
611 ParsedValues() = default;
612
613 Value *M = nullptr;
614 Value *P = nullptr;
615 Value *Q = nullptr;
616 Value *R = nullptr;
617 Value *X = nullptr;
618 Instruction *Res = nullptr;
619 unsigned IterCount = 0;
620 bool Left = false;
621 bool Inv = false;
622 };
623
624 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
625 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
626 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
627 Value *CIV, ParsedValues &PV, bool PreScan);
628 unsigned getInverseMxN(unsigned QP);
629 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
630
631 void setupPreSimplifier(Simplifier &S);
632 void setupPostSimplifier(Simplifier &S);
633
634 Loop *CurLoop;
635 const DataLayout &DL;
636 const DominatorTree &DT;
637 const TargetLibraryInfo &TLI;
638 ScalarEvolution &SE;
639 };
640
641} // end anonymous namespace
642
643Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
644 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
645 if (std::distance(PI, PE) != 2)
646 return nullptr;
647 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
648
649 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
650 auto *PN = cast<PHINode>(I);
651 Value *InitV = PN->getIncomingValueForBlock(PB);
652 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
653 continue;
654 Value *IterV = PN->getIncomingValueForBlock(BB);
655 auto *BO = dyn_cast<BinaryOperator>(IterV);
656 if (!BO)
657 continue;
658 if (BO->getOpcode() != Instruction::Add)
659 continue;
660 Value *IncV = nullptr;
661 if (BO->getOperand(0) == PN)
662 IncV = BO->getOperand(1);
663 else if (BO->getOperand(1) == PN)
664 IncV = BO->getOperand(0);
665 if (IncV == nullptr)
666 continue;
667
668 if (auto *T = dyn_cast<ConstantInt>(IncV))
669 if (T->getZExtValue() == 1)
670 return PN;
671 }
672 return nullptr;
673}
674
675static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
676 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
677 Use &TheUse = UI.getUse();
678 ++UI;
679 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
680 if (BB == II->getParent())
681 II->replaceUsesOfWith(I, J);
682 }
683}
684
685bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
686 Value *CIV, ParsedValues &PV) {
687 // Match the following:
688 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
689 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
690 // The condition may also check for equality with the masked value, i.e
691 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
692 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
693
694 Value *CondV = SelI->getCondition();
695 Value *TrueV = SelI->getTrueValue();
696 Value *FalseV = SelI->getFalseValue();
697
698 using namespace PatternMatch;
699
700 CmpInst::Predicate P;
701 Value *A = nullptr, *B = nullptr, *C = nullptr;
702
703 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
704 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
705 return false;
706 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
707 return false;
708 // Matched: select (A & B) == C ? ... : ...
709 // select (A & B) != C ? ... : ...
710
711 Value *X = nullptr, *Sh1 = nullptr;
712 // Check (A & B) for (X & (1 << i)):
713 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
714 Sh1 = A;
715 X = B;
716 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
717 Sh1 = B;
718 X = A;
719 } else {
720 // TODO: Could also check for an induction variable containing single
721 // bit shifted left by 1 in each iteration.
722 return false;
723 }
724
725 bool TrueIfZero;
726
727 // Check C against the possible values for comparison: 0 and (1 << i):
728 if (match(C, m_Zero()))
729 TrueIfZero = (P == CmpInst::ICMP_EQ);
730 else if (C == Sh1)
731 TrueIfZero = (P == CmpInst::ICMP_NE);
732 else
733 return false;
734
735 // So far, matched:
736 // select (X & (1 << i)) ? ... : ...
737 // including variations of the check against zero/non-zero value.
738
739 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
740 if (TrueIfZero) {
741 ShouldSameV = TrueV;
742 ShouldXoredV = FalseV;
743 } else {
744 ShouldSameV = FalseV;
745 ShouldXoredV = TrueV;
746 }
747
748 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
749 Value *T = nullptr;
750 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
751 // Matched: select +++ ? ... : Y ^ Z
752 // select +++ ? Y ^ Z : ...
753 // where +++ denotes previously checked matches.
754 if (ShouldSameV == Y)
755 T = Z;
756 else if (ShouldSameV == Z)
757 T = Y;
758 else
759 return false;
760 R = ShouldSameV;
761 // Matched: select +++ ? R : R ^ T
762 // select +++ ? R ^ T : R
763 // depending on TrueIfZero.
764
765 } else if (match(ShouldSameV, m_Zero())) {
766 // Matched: select +++ ? 0 : ...
767 // select +++ ? ... : 0
768 if (!SelI->hasOneUse())
769 return false;
770 T = ShouldXoredV;
771 // Matched: select +++ ? 0 : T
772 // select +++ ? T : 0
773
774 Value *U = *SelI->user_begin();
775 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
776 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
777 return false;
778 // Matched: xor (select +++ ? 0 : T), R
779 // xor (select +++ ? T : 0), R
780 } else
781 return false;
782
783 // The xor input value T is isolated into its own match so that it could
784 // be checked against an induction variable containing a shifted bit
785 // (todo).
786 // For now, check against (Q << i).
787 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
788 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
789 return false;
790 // Matched: select +++ ? R : R ^ (Q << i)
791 // select +++ ? R ^ (Q << i) : R
792
793 PV.X = X;
794 PV.Q = Q;
795 PV.R = R;
796 PV.Left = true;
797 return true;
798}
799
800bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
801 ParsedValues &PV) {
802 // Match the following:
803 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
804 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
805 // The condition may also check for equality with the masked value, i.e
806 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
807 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
808
809 Value *CondV = SelI->getCondition();
810 Value *TrueV = SelI->getTrueValue();
811 Value *FalseV = SelI->getFalseValue();
812
813 using namespace PatternMatch;
814
815 Value *C = nullptr;
816 CmpInst::Predicate P;
817 bool TrueIfZero;
818
819 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
820 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
821 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
822 return false;
823 // Matched: select C == 0 ? ... : ...
824 // select C != 0 ? ... : ...
825 TrueIfZero = (P == CmpInst::ICMP_EQ);
826 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
827 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
828 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
829 return false;
830 // Matched: select C == 1 ? ... : ...
831 // select C != 1 ? ... : ...
832 TrueIfZero = (P == CmpInst::ICMP_NE);
833 } else
834 return false;
835
836 Value *X = nullptr;
837 if (!match(C, m_And(m_Value(X), m_One())) &&
838 !match(C, m_And(m_One(), m_Value(X))))
839 return false;
840 // Matched: select (X & 1) == +++ ? ... : ...
841 // select (X & 1) != +++ ? ... : ...
842
843 Value *R = nullptr, *Q = nullptr;
844 if (TrueIfZero) {
845 // The select's condition is true if the tested bit is 0.
846 // TrueV must be the shift, FalseV must be the xor.
847 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
848 return false;
849 // Matched: select +++ ? (R >> 1) : ...
850 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
851 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
852 return false;
853 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
854 // with commuting ^.
855 } else {
856 // The select's condition is true if the tested bit is 1.
857 // TrueV must be the xor, FalseV must be the shift.
858 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
859 return false;
860 // Matched: select +++ ? ... : (R >> 1)
861 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
862 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
863 return false;
864 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
865 // with commuting ^.
866 }
867
868 PV.X = X;
869 PV.Q = Q;
870 PV.R = R;
871 PV.Left = false;
872 return true;
873}
874
875bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
876 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
877 bool PreScan) {
878 using namespace PatternMatch;
879
880 // The basic pattern for R = P.Q is:
881 // for i = 0..31
882 // R = phi (0, R')
883 // if (P & (1 << i)) ; test-bit(P, i)
884 // R' = R ^ (Q << i)
885 //
886 // Similarly, the basic pattern for R = (P/Q).Q - P
887 // for i = 0..31
888 // R = phi(P, R')
889 // if (R & (1 << i))
890 // R' = R ^ (Q << i)
891
892 // There exist idioms, where instead of Q being shifted left, P is shifted
893 // right. This produces a result that is shifted right by 32 bits (the
894 // non-shifted result is 64-bit).
895 //
896 // For R = P.Q, this would be:
897 // for i = 0..31
898 // R = phi (0, R')
899 // if ((P >> i) & 1)
900 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
901 // else ; be shifted by 1, not i.
902 // R' = R >> 1
903 //
904 // And for the inverse:
905 // for i = 0..31
906 // R = phi (P, R')
907 // if (R & 1)
908 // R' = (R >> 1) ^ Q
909 // else
910 // R' = R >> 1
911
912 // The left-shifting idioms share the same pattern:
913 // select (X & (1 << i)) ? R ^ (Q << i) : R
914 // Similarly for right-shifting idioms:
915 // select (X & 1) ? (R >> 1) ^ Q
916
917 if (matchLeftShift(SelI, CIV, PV)) {
918 // If this is a pre-scan, getting this far is sufficient.
919 if (PreScan)
920 return true;
921
922 // Need to make sure that the SelI goes back into R.
923 auto *RPhi = dyn_cast<PHINode>(PV.R);
924 if (!RPhi)
925 return false;
926 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
927 return false;
928 PV.Res = SelI;
929
930 // If X is loop invariant, it must be the input polynomial, and the
931 // idiom is the basic polynomial multiply.
932 if (CurLoop->isLoopInvariant(PV.X)) {
933 PV.P = PV.X;
934 PV.Inv = false;
935 } else {
936 // X is not loop invariant. If X == R, this is the inverse pmpy.
937 // Otherwise, check for an xor with an invariant value. If the
938 // variable argument to the xor is R, then this is still a valid
939 // inverse pmpy.
940 PV.Inv = true;
941 if (PV.X != PV.R) {
942 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
943 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
944 return false;
945 auto *I1 = dyn_cast<Instruction>(X1);
946 auto *I2 = dyn_cast<Instruction>(X2);
947 if (!I1 || I1->getParent() != LoopB) {
948 Var = X2;
949 Inv = X1;
950 } else if (!I2 || I2->getParent() != LoopB) {
951 Var = X1;
952 Inv = X2;
953 } else
954 return false;
955 if (Var != PV.R)
956 return false;
957 PV.M = Inv;
958 }
959 // The input polynomial P still needs to be determined. It will be
960 // the entry value of R.
961 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
962 PV.P = EntryP;
963 }
964
965 return true;
966 }
967
968 if (matchRightShift(SelI, PV)) {
969 // If this is an inverse pattern, the Q polynomial must be known at
970 // compile time.
971 if (PV.Inv && !isa<ConstantInt>(PV.Q))
972 return false;
973 if (PreScan)
974 return true;
975 // There is no exact matching of right-shift pmpy.
976 return false;
977 }
978
979 return false;
980}
981
982bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
983 IntegerType *DestTy) {
984 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
985 if (!T || T->getBitWidth() > DestTy->getBitWidth())
986 return false;
987 if (T->getBitWidth() == DestTy->getBitWidth())
988 return true;
989 // Non-instructions are promotable. The reason why an instruction may not
990 // be promotable is that it may produce a different result if its operands
991 // and the result are promoted, for example, it may produce more non-zero
992 // bits. While it would still be possible to represent the proper result
993 // in a wider type, it may require adding additional instructions (which
994 // we don't want to do).
995 Instruction *In = dyn_cast<Instruction>(Val);
996 if (!In)
997 return true;
998 // The bitwidth of the source type is smaller than the destination.
999 // Check if the individual operation can be promoted.
1000 switch (In->getOpcode()) {
1001 case Instruction::PHI:
1002 case Instruction::ZExt:
1003 case Instruction::And:
1004 case Instruction::Or:
1005 case Instruction::Xor:
1006 case Instruction::LShr: // Shift right is ok.
1007 case Instruction::Select:
1008 case Instruction::Trunc:
1009 return true;
1010 case Instruction::ICmp:
1011 if (CmpInst *CI = cast<CmpInst>(In))
1012 return CI->isEquality() || CI->isUnsigned();
1013 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1013
)
;
1014 case Instruction::Add:
1015 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
1016 }
1017 return false;
1018}
1019
1020void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1021 IntegerType *DestTy, BasicBlock *LoopB) {
1022 Type *OrigTy = In->getType();
1023 assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote"
) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1023
, __extension__ __PRETTY_FUNCTION__))
;
1024
1025 // Leave boolean values alone.
1026 if (!In->getType()->isIntegerTy(1))
1027 In->mutateType(DestTy);
1028 unsigned DestBW = DestTy->getBitWidth();
1029
1030 // Handle PHIs.
1031 if (PHINode *P = dyn_cast<PHINode>(In)) {
1032 unsigned N = P->getNumIncomingValues();
1033 for (unsigned i = 0; i != N; ++i) {
1034 BasicBlock *InB = P->getIncomingBlock(i);
1035 if (InB == LoopB)
1036 continue;
1037 Value *InV = P->getIncomingValue(i);
1038 IntegerType *Ty = cast<IntegerType>(InV->getType());
1039 // Do not promote values in PHI nodes of type i1.
1040 if (Ty != P->getType()) {
1041 // If the value type does not match the PHI type, the PHI type
1042 // must have been promoted.
1043 assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ?
void (0) : __assert_fail ("Ty->getBitWidth() < DestBW"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1043
, __extension__ __PRETTY_FUNCTION__))
;
1044 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1045 P->setIncomingValue(i, InV);
1046 }
1047 }
1048 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1049 Value *Op = Z->getOperand(0);
1050 if (Op->getType() == Z->getType())
1051 Z->replaceAllUsesWith(Op);
1052 Z->eraseFromParent();
1053 return;
1054 }
1055 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1056 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1057 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1058 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1059 T->replaceAllUsesWith(And);
1060 T->eraseFromParent();
1061 return;
1062 }
1063
1064 // Promote immediates.
1065 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1066 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1067 if (CI->getType()->getBitWidth() < DestBW)
1068 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1069 }
1070}
1071
1072bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1073 BasicBlock *ExitB) {
1074 assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail (
"LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1074, __extension__ __PRETTY_FUNCTION__))
;
1075 // Skip loops where the exit block has more than one predecessor. The values
1076 // coming from the loop block will be promoted to another type, and so the
1077 // values coming into the exit block from other predecessors would also have
1078 // to be promoted.
1079 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1080 return false;
1081 IntegerType *DestTy = getPmpyType();
1082 // Check if the exit values have types that are no wider than the type
1083 // that we want to promote to.
1084 unsigned DestBW = DestTy->getBitWidth();
1085 for (PHINode &P : ExitB->phis()) {
1086 if (P.getNumIncomingValues() != 1)
1087 return false;
1088 assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void
(0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1088, __extension__ __PRETTY_FUNCTION__))
;
1089 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1090 if (!T || T->getBitWidth() > DestBW)
1091 return false;
1092 }
1093
1094 // Check all instructions in the loop.
1095 for (Instruction &In : *LoopB)
1096 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1097 return false;
1098
1099 // Perform the promotion.
1100 std::vector<Instruction*> LoopIns;
1101 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1102 [](Instruction &In) { return &In; });
1103 for (Instruction *In : LoopIns)
1104 if (!In->isTerminator())
1105 promoteTo(In, DestTy, LoopB);
1106
1107 // Fix up the PHI nodes in the exit block.
1108 Instruction *EndI = ExitB->getFirstNonPHI();
1109 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1110 for (auto I = ExitB->begin(); I != End; ++I) {
1111 PHINode *P = dyn_cast<PHINode>(I);
1112 if (!P)
1113 break;
1114 Type *Ty0 = P->getIncomingValue(0)->getType();
1115 Type *PTy = P->getType();
1116 if (PTy != Ty0) {
1117 assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail
("Ty0 == DestTy", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1117, __extension__ __PRETTY_FUNCTION__))
;
1118 // In order to create the trunc, P must have the promoted type.
1119 P->mutateType(Ty0);
1120 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1121 // In order for the RAUW to work, the types of P and T must match.
1122 P->mutateType(PTy);
1123 P->replaceAllUsesWith(T);
1124 // Final update of the P's type.
1125 P->mutateType(Ty0);
1126 cast<Instruction>(T)->setOperand(0, P);
1127 }
1128 }
1129
1130 return true;
1131}
1132
1133bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1134 ValueSeq &Cycle) {
1135 // Out = ..., In, ...
1136 if (Out == In)
1137 return true;
1138
1139 auto *BB = cast<Instruction>(Out)->getParent();
1140 bool HadPhi = false;
1141
1142 for (auto U : Out->users()) {
1143 auto *I = dyn_cast<Instruction>(&*U);
1144 if (I == nullptr || I->getParent() != BB)
1145 continue;
1146 // Make sure that there are no multi-iteration cycles, e.g.
1147 // p1 = phi(p2)
1148 // p2 = phi(p1)
1149 // The cycle p1->p2->p1 would span two loop iterations.
1150 // Check that there is only one phi in the cycle.
1151 bool IsPhi = isa<PHINode>(I);
1152 if (IsPhi && HadPhi)
1153 return false;
1154 HadPhi |= IsPhi;
1155 if (Cycle.count(I))
1156 return false;
1157 Cycle.insert(I);
1158 if (findCycle(I, In, Cycle))
1159 break;
1160 Cycle.remove(I);
1161 }
1162 return !Cycle.empty();
1163}
1164
1165void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1166 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1167 // All the values in the cycle that are between the phi node and the
1168 // divider instruction will be classified as "early", all other values
1169 // will be "late".
1170
1171 bool IsE = true;
1172 unsigned I, N = Cycle.size();
1173 for (I = 0; I < N; ++I) {
1174 Value *V = Cycle[I];
1175 if (DivI == V)
1176 IsE = false;
1177 else if (!isa<PHINode>(V))
1178 continue;
1179 // Stop if found either.
1180 break;
1181 }
1182 // "I" is the index of either DivI or the phi node, whichever was first.
1183 // "E" is "false" or "true" respectively.
1184 ValueSeq &First = !IsE ? Early : Late;
1185 for (unsigned J = 0; J < I; ++J)
1186 First.insert(Cycle[J]);
1187
1188 ValueSeq &Second = IsE ? Early : Late;
1189 Second.insert(Cycle[I]);
1190 for (++I; I < N; ++I) {
1191 Value *V = Cycle[I];
1192 if (DivI == V || isa<PHINode>(V))
1193 break;
1194 Second.insert(V);
1195 }
1196
1197 for (; I < N; ++I)
1198 First.insert(Cycle[I]);
1199}
1200
1201bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1202 ValueSeq &Early, ValueSeq &Late) {
1203 // Select is an exception, since the condition value does not have to be
1204 // classified in the same way as the true/false values. The true/false
1205 // values do have to be both early or both late.
1206 if (UseI->getOpcode() == Instruction::Select) {
1207 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1208 if (Early.count(TV) || Early.count(FV)) {
1209 if (Late.count(TV) || Late.count(FV))
1210 return false;
1211 Early.insert(UseI);
1212 } else if (Late.count(TV) || Late.count(FV)) {
1213 if (Early.count(TV) || Early.count(FV))
1214 return false;
1215 Late.insert(UseI);
1216 }
1217 return true;
1218 }
1219
1220 // Not sure what would be the example of this, but the code below relies
1221 // on having at least one operand.
1222 if (UseI->getNumOperands() == 0)
1223 return true;
1224
1225 bool AE = true, AL = true;
1226 for (auto &I : UseI->operands()) {
1227 if (Early.count(&*I))
1228 AL = false;
1229 else if (Late.count(&*I))
1230 AE = false;
1231 }
1232 // If the operands appear "all early" and "all late" at the same time,
1233 // then it means that none of them are actually classified as either.
1234 // This is harmless.
1235 if (AE && AL)
1236 return true;
1237 // Conversely, if they are neither "all early" nor "all late", then
1238 // we have a mixture of early and late operands that is not a known
1239 // exception.
1240 if (!AE && !AL)
1241 return false;
1242
1243 // Check that we have covered the two special cases.
1244 assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail
("AE != AL", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1244, __extension__ __PRETTY_FUNCTION__))
;
1245
1246 if (AE)
1247 Early.insert(UseI);
1248 else
1249 Late.insert(UseI);
1250 return true;
1251}
1252
1253bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1254 switch (I->getOpcode()) {
1255 case Instruction::And:
1256 case Instruction::Or:
1257 case Instruction::Xor:
1258 case Instruction::LShr:
1259 case Instruction::Shl:
1260 case Instruction::Select:
1261 case Instruction::ICmp:
1262 case Instruction::PHI:
1263 break;
1264 default:
1265 return false;
1266 }
1267 return true;
1268}
1269
1270bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1271 unsigned IterCount) {
1272 auto *T = dyn_cast<IntegerType>(V->getType());
1273 if (!T)
1274 return false;
1275
1276 KnownBits Known(T->getBitWidth());
1277 computeKnownBits(V, Known, DL);
1278 return Known.countMinLeadingZeros() >= IterCount;
1279}
1280
1281bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1282 unsigned IterCount) {
1283 // Assume that all inputs to the value have the high bits zero.
1284 // Check if the value itself preserves the zeros in the high bits.
1285 if (auto *C = dyn_cast<ConstantInt>(V))
1286 return C->getValue().countLeadingZeros() >= IterCount;
1287
1288 if (auto *I = dyn_cast<Instruction>(V)) {
1289 switch (I->getOpcode()) {
1290 case Instruction::And:
1291 case Instruction::Or:
1292 case Instruction::Xor:
1293 case Instruction::LShr:
1294 case Instruction::Select:
1295 case Instruction::ICmp:
1296 case Instruction::PHI:
1297 case Instruction::ZExt:
1298 return true;
1299 }
1300 }
1301
1302 return false;
1303}
1304
1305bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1306 unsigned Opc = I->getOpcode();
1307 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1308 return Op != I->getOperand(1);
1309 return true;
1310}
1311
1312bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1313 BasicBlock *ExitB, unsigned IterCount) {
1314 Value *CIV = getCountIV(LoopB);
1315 if (CIV == nullptr)
1316 return false;
1317 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1318 if (CIVTy == nullptr)
1319 return false;
1320
1321 ValueSeq RShifts;
1322 ValueSeq Early, Late, Cycled;
1323
1324 // Find all value cycles that contain logical right shifts by 1.
1325 for (Instruction &I : *LoopB) {
1326 using namespace PatternMatch;
1327
1328 Value *V = nullptr;
1329 if (!match(&I, m_LShr(m_Value(V), m_One())))
1330 continue;
1331 ValueSeq C;
1332 if (!findCycle(&I, V, C))
1333 continue;
1334
1335 // Found a cycle.
1336 C.insert(&I);
1337 classifyCycle(&I, C, Early, Late);
1338 Cycled.insert(C.begin(), C.end());
1339 RShifts.insert(&I);
1340 }
1341
1342 // Find the set of all values affected by the shift cycles, i.e. all
1343 // cycled values, and (recursively) all their users.
1344 ValueSeq Users(Cycled.begin(), Cycled.end());
1345 for (unsigned i = 0; i < Users.size(); ++i) {
1346 Value *V = Users[i];
1347 if (!isa<IntegerType>(V->getType()))
1348 return false;
1349 auto *R = cast<Instruction>(V);
1350 // If the instruction does not commute with shifts, the loop cannot
1351 // be unshifted.
1352 if (!commutesWithShift(R))
1353 return false;
1354 for (User *U : R->users()) {
1355 auto *T = cast<Instruction>(U);
1356 // Skip users from outside of the loop. They will be handled later.
1357 // Also, skip the right-shifts and phi nodes, since they mix early
1358 // and late values.
1359 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1360 continue;
1361
1362 Users.insert(T);
1363 if (!classifyInst(T, Early, Late))
1364 return false;
1365 }
1366 }
1367
1368 if (Users.empty())
1369 return false;
1370
1371 // Verify that high bits remain zero.
1372 ValueSeq Internal(Users.begin(), Users.end());
1373 ValueSeq Inputs;
1374 for (unsigned i = 0; i < Internal.size(); ++i) {
1375 auto *R = dyn_cast<Instruction>(Internal[i]);
1376 if (!R)
1377 continue;
1378 for (Value *Op : R->operands()) {
1379 auto *T = dyn_cast<Instruction>(Op);
1380 if (T && T->getParent() != LoopB)
1381 Inputs.insert(Op);
1382 else
1383 Internal.insert(Op);
1384 }
1385 }
1386 for (Value *V : Inputs)
1387 if (!highBitsAreZero(V, IterCount))
1388 return false;
1389 for (Value *V : Internal)
1390 if (!keepsHighBitsZero(V, IterCount))
1391 return false;
1392
1393 // Finally, the work can be done. Unshift each user.
1394 IRBuilder<> IRB(LoopB);
1395 std::map<Value*,Value*> ShiftMap;
1396
1397 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1398
1399 CastMapType CastMap;
1400
1401 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1402 IntegerType *Ty) -> Value* {
1403 auto H = CM.find(std::make_pair(V, Ty));
1404 if (H != CM.end())
1405 return H->second;
1406 Value *CV = IRB.CreateIntCast(V, Ty, false);
1407 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1408 return CV;
1409 };
1410
1411 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1412 using namespace PatternMatch;
1413
1414 if (isa<PHINode>(I) || !Users.count(&*I))
1415 continue;
1416
1417 // Match lshr x, 1.
1418 Value *V = nullptr;
1419 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1420 replaceAllUsesOfWithIn(&*I, V, LoopB);
1421 continue;
1422 }
1423 // For each non-cycled operand, replace it with the corresponding
1424 // value shifted left.
1425 for (auto &J : I->operands()) {
1426 Value *Op = J.get();
1427 if (!isOperandShifted(&*I, Op))
1428 continue;
1429 if (Users.count(Op))
1430 continue;
1431 // Skip shifting zeros.
1432 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1433 continue;
1434 // Check if we have already generated a shift for this value.
1435 auto F = ShiftMap.find(Op);
1436 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1437 if (W == nullptr) {
1438 IRB.SetInsertPoint(&*I);
1439 // First, the shift amount will be CIV or CIV+1, depending on
1440 // whether the value is early or late. Instead of creating CIV+1,
1441 // do a single shift of the value.
1442 Value *ShAmt = CIV, *ShVal = Op;
1443 auto *VTy = cast<IntegerType>(ShVal->getType());
1444 auto *ATy = cast<IntegerType>(ShAmt->getType());
1445 if (Late.count(&*I))
1446 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1447 // Second, the types of the shifted value and the shift amount
1448 // must match.
1449 if (VTy != ATy) {
1450 if (VTy->getBitWidth() < ATy->getBitWidth())
1451 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1452 else
1453 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1454 }
1455 // Ready to generate the shift and memoize it.
1456 W = IRB.CreateShl(ShVal, ShAmt);
1457 ShiftMap.insert(std::make_pair(Op, W));
1458 }
1459 I->replaceUsesOfWith(Op, W);
1460 }
1461 }
1462
1463 // Update the users outside of the loop to account for having left
1464 // shifts. They would normally be shifted right in the loop, so shift
1465 // them right after the loop exit.
1466 // Take advantage of the loop-closed SSA form, which has all the post-
1467 // loop values in phi nodes.
1468 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1469 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1470 if (!isa<PHINode>(P))
1471 break;
1472 auto *PN = cast<PHINode>(P);
1473 Value *U = PN->getIncomingValueForBlock(LoopB);
1474 if (!Users.count(U))
1475 continue;
1476 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1477 PN->replaceAllUsesWith(S);
1478 // The above RAUW will create
1479 // S = lshr S, IterCount
1480 // so we need to fix it back into
1481 // S = lshr PN, IterCount
1482 cast<User>(S)->replaceUsesOfWith(S, PN);
1483 }
1484
1485 return true;
1486}
1487
1488void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1489 for (auto &I : *LoopB)
1490 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1491 I.replaceAllUsesWith(SV);
1492
1493 for (Instruction &I : llvm::make_early_inc_range(*LoopB))
1494 RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI);
1495}
1496
1497unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1498 // Arrays of coefficients of Q and the inverse, C.
1499 // Q[i] = coefficient at x^i.
1500 std::array<char,32> Q, C;
1501
1502 for (unsigned i = 0; i < 32; ++i) {
1503 Q[i] = QP & 1;
1504 QP >>= 1;
1505 }
1506 assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail
("Q[0] == 1", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1506, __extension__ __PRETTY_FUNCTION__))
;
1507
1508 // Find C, such that
1509 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1510 //
1511 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1512 // operations * and + are & and ^ respectively.
1513 //
1514 // Find C[i] recursively, by comparing i-th coefficient in the product
1515 // with 0 (or 1 for i=0).
1516 //
1517 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1518 C[0] = 1;
1519 for (unsigned i = 1; i < 32; ++i) {
1520 // Solve for C[i] in:
1521 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1522 // This is equivalent to
1523 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1524 // which is
1525 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1526 unsigned T = 0;
1527 for (unsigned j = 0; j < i; ++j)
1528 T = T ^ (C[j] & Q[i-j]);
1529 C[i] = T;
1530 }
1531
1532 unsigned QV = 0;
1533 for (unsigned i = 0; i < 32; ++i)
1534 if (C[i])
1535 QV |= (1 << i);
1536
1537 return QV;
1538}
1539
1540Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1541 ParsedValues &PV) {
1542 IRBuilder<> B(&*At);
1543 Module *M = At->getParent()->getParent()->getParent();
1544 Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1545
1546 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1547 unsigned IC = PV.IterCount;
1548
1549 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1550 P0 = P = B.CreateXor(P, PV.M);
1551
1552 // Create a bit mask to clear the high bits beyond IterCount.
1553 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
15
Returning from 'APInt::getLowBitsSet'
1554
1555 if (PV.IterCount != 32)
16
Assuming field 'IterCount' is equal to 32
17
Taking false branch
1556 P = B.CreateAnd(P, BMI);
1557
1558 if (PV.Inv) {
18
Assuming field 'Inv' is true
19
Taking true branch
1559 auto *QI = dyn_cast<ConstantInt>(PV.Q);
20
Assuming field 'Q' is a 'ConstantInt'
1560 assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth()
<= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1560
, __extension__ __PRETTY_FUNCTION__))
;
21
Assuming the condition is true
22
'?' condition is true
1561
1562 // Again, clearing bits beyond IterCount.
1563 unsigned M = (1 << PV.IterCount) - 1;
23
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1564 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1565 unsigned QV = getInverseMxN(Tmp) & M;
1566 auto *QVI = ConstantInt::get(QI->getType(), QV);
1567 P = B.CreateCall(PMF, {P, QVI});
1568 P = B.CreateTrunc(P, QI->getType());
1569 if (IC != 32)
1570 P = B.CreateAnd(P, BMI);
1571 }
1572
1573 Value *R = B.CreateCall(PMF, {P, Q});
1574
1575 if (PV.M != nullptr)
1576 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1577
1578 return R;
1579}
1580
1581static bool hasZeroSignBit(const Value *V) {
1582 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1583 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1584 const Instruction *I = dyn_cast<const Instruction>(V);
1585 if (!I)
1586 return false;
1587 switch (I->getOpcode()) {
1588 case Instruction::LShr:
1589 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1590 return SI->getZExtValue() > 0;
1591 return false;
1592 case Instruction::Or:
1593 case Instruction::Xor:
1594 return hasZeroSignBit(I->getOperand(0)) &&
1595 hasZeroSignBit(I->getOperand(1));
1596 case Instruction::And:
1597 return hasZeroSignBit(I->getOperand(0)) ||
1598 hasZeroSignBit(I->getOperand(1));
1599 }
1600 return false;
1601}
1602
1603void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1604 S.addRule("sink-zext",
1605 // Sink zext past bitwise operations.
1606 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1607 if (I->getOpcode() != Instruction::ZExt)
1608 return nullptr;
1609 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1610 if (!T)
1611 return nullptr;
1612 switch (T->getOpcode()) {
1613 case Instruction::And:
1614 case Instruction::Or:
1615 case Instruction::Xor:
1616 break;
1617 default:
1618 return nullptr;
1619 }
1620 IRBuilder<> B(Ctx);
1621 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1622 B.CreateZExt(T->getOperand(0), I->getType()),
1623 B.CreateZExt(T->getOperand(1), I->getType()));
1624 });
1625 S.addRule("xor/and -> and/xor",
1626 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1627 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1628 if (I->getOpcode() != Instruction::Xor)
1629 return nullptr;
1630 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1631 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1632 if (!And0 || !And1)
1633 return nullptr;
1634 if (And0->getOpcode() != Instruction::And ||
1635 And1->getOpcode() != Instruction::And)
1636 return nullptr;
1637 if (And0->getOperand(1) != And1->getOperand(1))
1638 return nullptr;
1639 IRBuilder<> B(Ctx);
1640 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1641 And0->getOperand(1));
1642 });
1643 S.addRule("sink binop into select",
1644 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1645 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1646 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1647 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1648 if (!BO)
1649 return nullptr;
1650 Instruction::BinaryOps Op = BO->getOpcode();
1651 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1652 IRBuilder<> B(Ctx);
1653 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1654 Value *Z = BO->getOperand(1);
1655 return B.CreateSelect(Sel->getCondition(),
1656 B.CreateBinOp(Op, X, Z),
1657 B.CreateBinOp(Op, Y, Z));
1658 }
1659 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1660 IRBuilder<> B(Ctx);
1661 Value *X = BO->getOperand(0);
1662 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1663 return B.CreateSelect(Sel->getCondition(),
1664 B.CreateBinOp(Op, X, Y),
1665 B.CreateBinOp(Op, X, Z));
1666 }
1667 return nullptr;
1668 });
1669 S.addRule("fold select-select",
1670 // (select c (select c x y) z) -> (select c x z)
1671 // (select c x (select c y z)) -> (select c x z)
1672 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1673 SelectInst *Sel = dyn_cast<SelectInst>(I);
1674 if (!Sel)
1675 return nullptr;
1676 IRBuilder<> B(Ctx);
1677 Value *C = Sel->getCondition();
1678 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1679 if (Sel0->getCondition() == C)
1680 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1681 }
1682 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1683 if (Sel1->getCondition() == C)
1684 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1685 }
1686 return nullptr;
1687 });
1688 S.addRule("or-signbit -> xor-signbit",
1689 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1690 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1691 if (I->getOpcode() != Instruction::Or)
1692 return nullptr;
1693 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1694 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1695 return nullptr;
1696 if (!hasZeroSignBit(I->getOperand(0)))
1697 return nullptr;
1698 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1699 });
1700 S.addRule("sink lshr into binop",
1701 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1702 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1703 if (I->getOpcode() != Instruction::LShr)
1704 return nullptr;
1705 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1706 if (!BitOp)
1707 return nullptr;
1708 switch (BitOp->getOpcode()) {
1709 case Instruction::And:
1710 case Instruction::Or:
1711 case Instruction::Xor:
1712 break;
1713 default:
1714 return nullptr;
1715 }
1716 IRBuilder<> B(Ctx);
1717 Value *S = I->getOperand(1);
1718 return B.CreateBinOp(BitOp->getOpcode(),
1719 B.CreateLShr(BitOp->getOperand(0), S),
1720 B.CreateLShr(BitOp->getOperand(1), S));
1721 });
1722 S.addRule("expose bitop-const",
1723 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1724 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1725 auto IsBitOp = [](unsigned Op) -> bool {
1726 switch (Op) {
1727 case Instruction::And:
1728 case Instruction::Or:
1729 case Instruction::Xor:
1730 return true;
1731 }
1732 return false;
1733 };
1734 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1735 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1736 return nullptr;
1737 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1738 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1739 return nullptr;
1740 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1741 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1742 if (!CA || !CB)
1743 return nullptr;
1744 IRBuilder<> B(Ctx);
1745 Value *X = BitOp2->getOperand(0);
1746 return B.CreateBinOp(BitOp2->getOpcode(), X,
1747 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1748 });
1749}
1750
1751void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1752 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1753 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1754 if (I->getOpcode() != Instruction::And)
1755 return nullptr;
1756 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1757 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1758 if (!Xor || !C0)
1759 return nullptr;
1760 if (Xor->getOpcode() != Instruction::Xor)
1761 return nullptr;
1762 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1763 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1764 // Pick the first non-null and.
1765 if (!And0 || And0->getOpcode() != Instruction::And)
1766 std::swap(And0, And1);
1767 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1768 if (!C1)
1769 return nullptr;
1770 uint32_t V0 = C0->getZExtValue();
1771 uint32_t V1 = C1->getZExtValue();
1772 if (V0 != (V0 & V1))
1773 return nullptr;
1774 IRBuilder<> B(Ctx);
1775 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1776 });
1777}
1778
1779bool PolynomialMultiplyRecognize::recognize() {
1780 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1781 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1782 // Restrictions:
1783 // - The loop must consist of a single block.
1784 // - The iteration count must be known at compile-time.
1785 // - The loop must have an induction variable starting from 0, and
1786 // incremented in each iteration of the loop.
1787 BasicBlock *LoopB = CurLoop->getHeader();
1788 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1789
1790 if (LoopB != CurLoop->getLoopLatch())
1791 return false;
1792 BasicBlock *ExitB = CurLoop->getExitBlock();
1793 if (ExitB == nullptr)
1794 return false;
1795 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1796 if (EntryB == nullptr)
1797 return false;
1798
1799 unsigned IterCount = 0;
1800 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1801 if (isa<SCEVCouldNotCompute>(CT))
1802 return false;
1803 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1804 IterCount = CV->getValue()->getZExtValue() + 1;
1805
1806 Value *CIV = getCountIV(LoopB);
1807 ParsedValues PV;
1808 Simplifier PreSimp;
1809 PV.IterCount = IterCount;
1810 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1811 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1812
1813 setupPreSimplifier(PreSimp);
1814
1815 // Perform a preliminary scan of select instructions to see if any of them
1816 // looks like a generator of the polynomial multiply steps. Assume that a
1817 // loop can only contain a single transformable operation, so stop the
1818 // traversal after the first reasonable candidate was found.
1819 // XXX: Currently this approach can modify the loop before being 100% sure
1820 // that the transformation can be carried out.
1821 bool FoundPreScan = false;
1822 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1823 for (const Value *U : V->users()) {
1824 if (const auto *P = dyn_cast<const PHINode>(U))
1825 if (P->getParent() == LoopB)
1826 return true;
1827 }
1828 return false;
1829 };
1830 for (Instruction &In : *LoopB) {
1831 SelectInst *SI = dyn_cast<SelectInst>(&In);
1832 if (!SI || !FeedsPHI(SI))
1833 continue;
1834
1835 Simplifier::Context C(SI);
1836 Value *T = PreSimp.simplify(C);
1837 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1838 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1839 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1840 FoundPreScan = true;
1841 if (SelI != SI) {
1842 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1843 SI->replaceAllUsesWith(NewSel);
1844 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1845 }
1846 break;
1847 }
1848 }
1849
1850 if (!FoundPreScan) {
1851 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1852 return false;
1853 }
1854
1855 if (!PV.Left) {
1856 // The right shift version actually only returns the higher bits of
1857 // the result (each iteration discards the LSB). If we want to convert it
1858 // to a left-shifting loop, the working data type must be at least as
1859 // wide as the target's pmpy instruction.
1860 if (!promoteTypes(LoopB, ExitB))
1861 return false;
1862 // Run post-promotion simplifications.
1863 Simplifier PostSimp;
1864 setupPostSimplifier(PostSimp);
1865 for (Instruction &In : *LoopB) {
1866 SelectInst *SI = dyn_cast<SelectInst>(&In);
1867 if (!SI || !FeedsPHI(SI))
1868 continue;
1869 Simplifier::Context C(SI);
1870 Value *T = PostSimp.simplify(C);
1871 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1872 if (SelI != SI) {
1873 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1874 SI->replaceAllUsesWith(NewSel);
1875 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1876 }
1877 break;
1878 }
1879
1880 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1881 return false;
1882 cleanupLoopBody(LoopB);
1883 }
1884
1885 // Scan the loop again, find the generating select instruction.
1886 bool FoundScan = false;
1887 for (Instruction &In : *LoopB) {
1888 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1889 if (!SelI)
1890 continue;
1891 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1892 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1893 if (FoundScan)
1894 break;
1895 }
1896 assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail
("FoundScan", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1896, __extension__ __PRETTY_FUNCTION__))
;
1897
1898 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1899 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1900 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1901 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1902 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1903 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1904 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1905 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1906 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1907 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1908 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1909 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1910 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1911
1912 BasicBlock::iterator At(EntryB->getTerminator());
1913 Value *PM = generate(At, PV);
1914 if (PM == nullptr)
1915 return false;
1916
1917 if (PM->getType() != PV.Res->getType())
1918 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1919
1920 PV.Res->replaceAllUsesWith(PM);
1921 PV.Res->eraseFromParent();
1922 return true;
1923}
1924
1925int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1926 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1927 return SC->getAPInt().getSExtValue();
1928 return 0;
1929}
1930
1931bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1932 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1933 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1934 return false;
1935
1936 Value *StoredVal = SI->getValueOperand();
1937 Value *StorePtr = SI->getPointerOperand();
1938
1939 // Reject stores that are so large that they overflow an unsigned.
1940 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1941 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1942 return false;
1943
1944 // See if the pointer expression is an AddRec like {base,+,1} on the current
1945 // loop, which indicates a strided store. If we have something else, it's a
1946 // random store we can't handle.
1947 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1948 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1949 return false;
1950
1951 // Check to see if the stride matches the size of the store. If so, then we
1952 // know that every byte is touched in the loop.
1953 int Stride = getSCEVStride(StoreEv);
1954 if (Stride == 0)
1955 return false;
1956 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1957 if (StoreSize != unsigned(std::abs(Stride)))
1958 return false;
1959
1960 // The store must be feeding a non-volatile load.
1961 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1962 if (!LI || !LI->isSimple())
1963 return false;
1964
1965 // See if the pointer expression is an AddRec like {base,+,1} on the current
1966 // loop, which indicates a strided load. If we have something else, it's a
1967 // random load we can't handle.
1968 Value *LoadPtr = LI->getPointerOperand();
1969 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1970 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1971 return false;
1972
1973 // The store and load must share the same stride.
1974 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1975 return false;
1976
1977 // Success. This store can be converted into a memcpy.
1978 return true;
1979}
1980
1981/// mayLoopAccessLocation - Return true if the specified loop might access the
1982/// specified pointer location, which is a loop-strided access. The 'Access'
1983/// argument specifies what the verboten forms of access are (read or write).
1984static bool
1985mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1986 const SCEV *BECount, unsigned StoreSize,
1987 AliasAnalysis &AA,
1988 SmallPtrSetImpl<Instruction *> &Ignored) {
1989 // Get the location that may be stored across the loop. Since the access
1990 // is strided positively through memory, we say that the modified location
1991 // starts at the pointer and has infinite size.
1992 LocationSize AccessSize = LocationSize::afterPointer();
1993
1994 // If the loop iterates a fixed number of times, we can refine the access
1995 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1996 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1997 AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
1998 StoreSize);
1999
2000 // TODO: For this to be really effective, we have to dive into the pointer
2001 // operand in the store. Store to &A[i] of 100 will always return may alias
2002 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
2003 // which will then no-alias a store to &A[100].
2004 MemoryLocation StoreLoc(Ptr, AccessSize);
2005
2006 for (auto *B : L->blocks())
2007 for (auto &I : *B)
2008 if (Ignored.count(&I) == 0 &&
2009 isModOrRefSet(
2010 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
2011 return true;
2012
2013 return false;
2014}
2015
2016void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2017 SmallVectorImpl<StoreInst*> &Stores) {
2018 Stores.clear();
2019 for (Instruction &I : *BB)
2020 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2021 if (isLegalStore(CurLoop, SI))
2022 Stores.push_back(SI);
2023}
2024
2025bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2026 StoreInst *SI, const SCEV *BECount) {
2027 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2029
, __extension__ __PRETTY_FUNCTION__))
2028 "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2029
, __extension__ __PRETTY_FUNCTION__))
2029 "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2029
, __extension__ __PRETTY_FUNCTION__))
;
2030
2031 Value *StorePtr = SI->getPointerOperand();
2032 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2033 unsigned Stride = getSCEVStride(StoreEv);
2034 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2035 if (Stride != StoreSize)
2036 return false;
2037
2038 // See if the pointer expression is an AddRec like {base,+,1} on the current
2039 // loop, which indicates a strided load. If we have something else, it's a
2040 // random load we can't handle.
2041 auto *LI = cast<LoadInst>(SI->getValueOperand());
2042 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2043
2044 // The trip count of the loop and the base pointer of the addrec SCEV is
2045 // guaranteed to be loop invariant, which means that it should dominate the
2046 // header. This allows us to insert code for it in the preheader.
2047 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2048 Instruction *ExpPt = Preheader->getTerminator();
2049 IRBuilder<> Builder(ExpPt);
2050 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2051
2052 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2053
2054 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2055 // this into a memcpy/memmove in the loop preheader now if we want. However,
2056 // this would be unsafe to do if there is anything else in the loop that may
2057 // read or write the memory region we're storing to. For memcpy, this
2058 // includes the load that feeds the stores. Check for an alias by generating
2059 // the base address and checking everything.
2060 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2061 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2062 Value *LoadBasePtr = nullptr;
2063
2064 bool Overlap = false;
2065 bool DestVolatile = SI->isVolatile();
2066 Type *BECountTy = BECount->getType();
2067
2068 if (DestVolatile) {
2069 // The trip count must fit in i32, since it is the type of the "num_words"
2070 // argument to hexagon_memcpy_forward_vp4cp4n2.
2071 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2072CleanupAndExit:
2073 // If we generated new code for the base pointer, clean up.
2074 Expander.clear();
2075 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2076 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2077 StoreBasePtr = nullptr;
2078 }
2079 if (LoadBasePtr) {
2080 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2081 LoadBasePtr = nullptr;
2082 }
2083 return false;
2084 }
2085 }
2086
2087 SmallPtrSet<Instruction*, 2> Ignore1;
2088 Ignore1.insert(SI);
2089 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2090 StoreSize, *AA, Ignore1)) {
2091 // Check if the load is the offending instruction.
2092 Ignore1.insert(LI);
2093 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2094 BECount, StoreSize, *AA, Ignore1)) {
2095 // Still bad. Nothing we can do.
2096 goto CleanupAndExit;
2097 }
2098 // It worked with the load ignored.
2099 Overlap = true;
2100 }
2101
2102 if (!Overlap) {
2103 if (DisableMemcpyIdiom || !HasMemcpy)
2104 goto CleanupAndExit;
2105 } else {
2106 // Don't generate memmove if this function will be inlined. This is
2107 // because the caller will undergo this transformation after inlining.
2108 Function *Func = CurLoop->getHeader()->getParent();
2109 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2110 goto CleanupAndExit;
2111
2112 // In case of a memmove, the call to memmove will be executed instead
2113 // of the loop, so we need to make sure that there is nothing else in
2114 // the loop than the load, store and instructions that these two depend
2115 // on.
2116 SmallVector<Instruction*,2> Insts;
2117 Insts.push_back(SI);
2118 Insts.push_back(LI);
2119 if (!coverLoop(CurLoop, Insts))
2120 goto CleanupAndExit;
2121
2122 if (DisableMemmoveIdiom || !HasMemmove)
2123 goto CleanupAndExit;
2124 bool IsNested = CurLoop->getParentLoop() != nullptr;
2125 if (IsNested && OnlyNonNestedMemmove)
2126 goto CleanupAndExit;
2127 }
2128
2129 // For a memcpy, we have to make sure that the input array is not being
2130 // mutated by the loop.
2131 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2132 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2133
2134 SmallPtrSet<Instruction*, 2> Ignore2;
2135 Ignore2.insert(SI);
2136 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2137 StoreSize, *AA, Ignore2))
2138 goto CleanupAndExit;
2139
2140 // Check the stride.
2141 bool StridePos = getSCEVStride(LoadEv) >= 0;
2142
2143 // Currently, the volatile memcpy only emulates traversing memory forward.
2144 if (!StridePos && DestVolatile)
2145 goto CleanupAndExit;
2146
2147 bool RuntimeCheck = (Overlap || DestVolatile);
2148
2149 BasicBlock *ExitB;
2150 if (RuntimeCheck) {
2151 // The runtime check needs a single exit block.
2152 SmallVector<BasicBlock*, 8> ExitBlocks;
2153 CurLoop->getUniqueExitBlocks(ExitBlocks);
2154 if (ExitBlocks.size() != 1)
2155 goto CleanupAndExit;
2156 ExitB = ExitBlocks[0];
2157 }
2158
2159 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2160 // pointer size if it isn't already.
2161 LLVMContext &Ctx = SI->getContext();
2162 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2163 DebugLoc DLoc = SI->getDebugLoc();
2164
2165 const SCEV *NumBytesS =
2166 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2167 if (StoreSize != 1)
2168 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2169 SCEV::FlagNUW);
2170 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2171 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2172 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2173 NumBytes = Simp;
2174
2175 CallInst *NewCall;
2176
2177 if (RuntimeCheck) {
2178 unsigned Threshold = RuntimeMemSizeThreshold;
2179 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2180 uint64_t C = CI->getZExtValue();
2181 if (Threshold != 0 && C < Threshold)
2182 goto CleanupAndExit;
2183 if (C < CompileTimeMemSizeThreshold)
2184 goto CleanupAndExit;
2185 }
2186
2187 BasicBlock *Header = CurLoop->getHeader();
2188 Function *Func = Header->getParent();
2189 Loop *ParentL = LF->getLoopFor(Preheader);
2190 StringRef HeaderName = Header->getName();
2191
2192 // Create a new (empty) preheader, and update the PHI nodes in the
2193 // header to use the new preheader.
2194 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2195 Func, Header);
2196 if (ParentL)
2197 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2198 IRBuilder<>(NewPreheader).CreateBr(Header);
2199 for (auto &In : *Header) {
2200 PHINode *PN = dyn_cast<PHINode>(&In);
2201 if (!PN)
2202 break;
2203 int bx = PN->getBasicBlockIndex(Preheader);
2204 if (bx >= 0)
2205 PN->setIncomingBlock(bx, NewPreheader);
2206 }
2207 DT->addNewBlock(NewPreheader, Preheader);
2208 DT->changeImmediateDominator(Header, NewPreheader);
2209
2210 // Check for safe conditions to execute memmove.
2211 // If stride is positive, copying things from higher to lower addresses
2212 // is equivalent to memmove. For negative stride, it's the other way
2213 // around. Copying forward in memory with positive stride may not be
2214 // same as memmove since we may be copying values that we just stored
2215 // in some previous iteration.
2216 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2217 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2218 Value *LowA = StridePos ? SA : LA;
2219 Value *HighA = StridePos ? LA : SA;
2220 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2221 Value *Cond = CmpA;
2222
2223 // Check for distance between pointers. Since the case LowA < HighA
2224 // is checked for above, assume LowA >= HighA.
2225 Value *Dist = Builder.CreateSub(LowA, HighA);
2226 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2227 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2228 Cond = CmpEither;
2229
2230 if (Threshold != 0) {
2231 Type *Ty = NumBytes->getType();
2232 Value *Thr = ConstantInt::get(Ty, Threshold);
2233 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2234 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2235 Cond = CmpBoth;
2236 }
2237 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2238 Func, NewPreheader);
2239 if (ParentL)
2240 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2241 Instruction *OldT = Preheader->getTerminator();
2242 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2243 OldT->eraseFromParent();
2244 Preheader->setName(Preheader->getName()+".old");
2245 DT->addNewBlock(MemmoveB, Preheader);
2246 // Find the new immediate dominator of the exit block.
2247 BasicBlock *ExitD = Preheader;
2248 for (BasicBlock *PB : predecessors(ExitB)) {
2249 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2250 if (!ExitD)
2251 break;
2252 }
2253 // If the prior immediate dominator of ExitB was dominated by the
2254 // old preheader, then the old preheader becomes the new immediate
2255 // dominator. Otherwise don't change anything (because the newly
2256 // added blocks are dominated by the old preheader).
2257 if (ExitD && DT->dominates(Preheader, ExitD)) {
2258 DomTreeNode *BN = DT->getNode(ExitB);
2259 DomTreeNode *DN = DT->getNode(ExitD);
2260 BN->setIDom(DN);
2261 }
2262
2263 // Add a call to memmove to the conditional block.
2264 IRBuilder<> CondBuilder(MemmoveB);
2265 CondBuilder.CreateBr(ExitB);
2266 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2267
2268 if (DestVolatile) {
2269 Type *Int32Ty = Type::getInt32Ty(Ctx);
2270 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2271 Type *VoidTy = Type::getVoidTy(Ctx);
2272 Module *M = Func->getParent();
2273 FunctionCallee Fn = M->getOrInsertFunction(
2274 HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
2275
2276 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2277 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2278 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2279 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2280 MemmoveB->getTerminator());
2281 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2282 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2283 NumWords = Simp;
2284
2285 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2286 ? StoreBasePtr
2287 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2288 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2289 ? LoadBasePtr
2290 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2291 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2292 } else {
2293 NewCall = CondBuilder.CreateMemMove(
2294 StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
2295 }
2296 } else {
2297 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
2298 LI->getAlign(), NumBytes);
2299 // Okay, the memcpy has been formed. Zap the original store and
2300 // anything that feeds into it.
2301 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2302 }
2303
2304 NewCall->setDebugLoc(DLoc);
2305
2306 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2307 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2308 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2309 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2310 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2311
2312 return true;
2313}
2314
2315// Check if the instructions in Insts, together with their dependencies
2316// cover the loop in the sense that the loop could be safely eliminated once
2317// the instructions in Insts are removed.
2318bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2319 SmallVectorImpl<Instruction*> &Insts) const {
2320 SmallSet<BasicBlock*,8> LoopBlocks;
2321 for (auto *B : L->blocks())
2322 LoopBlocks.insert(B);
2323
2324 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2325
2326 // Collect all instructions from the loop that the instructions in Insts
2327 // depend on (plus their dependencies, etc.). These instructions will
2328 // constitute the expression trees that feed those in Insts, but the trees
2329 // will be limited only to instructions contained in the loop.
2330 for (unsigned i = 0; i < Worklist.size(); ++i) {
2331 Instruction *In = Worklist[i];
2332 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2333 Instruction *OpI = dyn_cast<Instruction>(I);
2334 if (!OpI)
2335 continue;
2336 BasicBlock *PB = OpI->getParent();
2337 if (!LoopBlocks.count(PB))
2338 continue;
2339 Worklist.insert(OpI);
2340 }
2341 }
2342
2343 // Scan all instructions in the loop, if any of them have a user outside
2344 // of the loop, or outside of the expressions collected above, then either
2345 // the loop has a side-effect visible outside of it, or there are
2346 // instructions in it that are not involved in the original set Insts.
2347 for (auto *B : L->blocks()) {
2348 for (auto &In : *B) {
2349 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2350 continue;
2351 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2352 return false;
2353 for (auto K : In.users()) {
2354 Instruction *UseI = dyn_cast<Instruction>(K);
2355 if (!UseI)
2356 continue;
2357 BasicBlock *UseB = UseI->getParent();
2358 if (LF->getLoopFor(UseB) != L)
2359 return false;
2360 }
2361 }
2362 }
2363
2364 return true;
2365}
2366
2367/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2368/// with the specified backedge count. This block is known to be in the current
2369/// loop and not in any subloops.
2370bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2371 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2372 // We can only promote stores in this block if they are unconditionally
2373 // executed in the loop. For a block to be unconditionally executed, it has
2374 // to dominate all the exit blocks of the loop. Verify this now.
2375 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2376 return DT->dominates(BB, EB);
2377 };
2378 if (!all_of(ExitBlocks, DominatedByBB))
2379 return false;
2380
2381 bool MadeChange = false;
2382 // Look for store instructions, which may be optimized to memset/memcpy.
2383 SmallVector<StoreInst*,8> Stores;
2384 collectStores(CurLoop, BB, Stores);
2385
2386 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2387 for (auto &SI : Stores)
2388 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2389
2390 return MadeChange;
2391}
2392
2393bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2394 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2395 if (PMR.recognize())
2396 return true;
2397
2398 if (!HasMemcpy && !HasMemmove)
2399 return false;
2400
2401 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2402 assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2404
, __extension__ __PRETTY_FUNCTION__))
2403 "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2404
, __extension__ __PRETTY_FUNCTION__))
2404 "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2404
, __extension__ __PRETTY_FUNCTION__))
;
2405
2406 SmallVector<BasicBlock *, 8> ExitBlocks;
2407 L->getUniqueExitBlocks(ExitBlocks);
2408
2409 bool Changed = false;
2410
2411 // Scan all the blocks in the loop that are not in subloops.
2412 for (auto *BB : L->getBlocks()) {
2413 // Ignore blocks in subloops.
2414 if (LF->getLoopFor(BB) != L)
2415 continue;
2416 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2417 }
2418
2419 return Changed;
2420}
2421
2422bool HexagonLoopIdiomRecognize::run(Loop *L) {
2423 const Module &M = *L->getHeader()->getParent()->getParent();
2424 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2425 return false;
2426
2427 // If the loop could not be converted to canonical form, it must have an
2428 // indirectbr in it, just give up.
2429 if (!L->getLoopPreheader())
2430 return false;
2431
2432 // Disable loop idiom recognition if the function's name is a common idiom.
2433 StringRef Name = L->getHeader()->getParent()->getName();
2434 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2435 return false;
2436
2437 DL = &L->getHeader()->getModule()->getDataLayout();
2438
2439 HasMemcpy = TLI->has(LibFunc_memcpy);
2440 HasMemmove = TLI->has(LibFunc_memmove);
2441
2442 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2443 return runOnCountableLoop(L);
2444 return false;
2445}
2446
2447bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
2448 LPPassManager &LPM) {
2449 if (skipLoop(L))
2450 return false;
2451
2452 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2453 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2454 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2455 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2456 *L->getHeader()->getParent());
2457 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2458 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2459}
2460
2461Pass *llvm::createHexagonLoopIdiomPass() {
2462 return new HexagonLoopIdiomRecognizeLegacyPass();
2463}
2464
2465PreservedAnalyses
2466HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM,
2467 LoopStandardAnalysisResults &AR,
2468 LPMUpdater &U) {
2469 return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)
2470 .run(&L)
2471 ? getLoopPassPreservedAnalyses()
2472 : PreservedAnalyses::all();
2473}

/build/llvm-toolchain-snapshot-14~++20220116100644+5f782d25a742/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <utility>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34template <typename T, typename Enable> struct DenseMapInfo;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69/// * APInt supports zero-bit-width values, but operations that require bits
70/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit
71/// integer). This means that operations like zero extension and logical
72/// shifts are defined, but sign extension and ashr is not. Zero bit values
73/// compare and hash equal to themselves, and countLeadingZeros returns 0.
74///
75class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
76public:
77 typedef uint64_t WordType;
78
79 /// This enum is used to hold the constants we needed for APInt.
80 enum : unsigned {
81 /// Byte size of a word.
82 APINT_WORD_SIZE = sizeof(WordType),
83 /// Bits in a word.
84 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
85 };
86
87 enum class Rounding {
88 DOWN,
89 TOWARD_ZERO,
90 UP,
91 };
92
93 static constexpr WordType WORDTYPE_MAX = ~WordType(0);
94
95 /// \name Constructors
96 /// @{
97
98 /// Create a new APInt of numBits width, initialized as val.
99 ///
100 /// If isSigned is true then val is treated as if it were a signed value
101 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
102 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
103 /// the range of val are zero filled).
104 ///
105 /// \param numBits the bit width of the constructed APInt
106 /// \param val the initial value of the APInt
107 /// \param isSigned how to treat signedness of val
108 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
109 : BitWidth(numBits) {
110 if (isSingleWord()) {
111 U.VAL = val;
112 clearUnusedBits();
113 } else {
114 initSlowCase(val, isSigned);
115 }
116 }
117
118 /// Construct an APInt of numBits width, initialized as bigVal[].
119 ///
120 /// Note that bigVal.size() can be smaller or larger than the corresponding
121 /// bit width but any extraneous bits will be dropped.
122 ///
123 /// \param numBits the bit width of the constructed APInt
124 /// \param bigVal a sequence of words to form the initial value of the APInt
125 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
126
127 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
128 /// deprecated because this constructor is prone to ambiguity with the
129 /// APInt(unsigned, uint64_t, bool) constructor.
130 ///
131 /// If this overload is ever deleted, care should be taken to prevent calls
132 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
133 /// constructor.
134 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
135
136 /// Construct an APInt from a string representation.
137 ///
138 /// This constructor interprets the string \p str in the given radix. The
139 /// interpretation stops when the first character that is not suitable for the
140 /// radix is encountered, or the end of the string. Acceptable radix values
141 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
142 /// string to require more bits than numBits.
143 ///
144 /// \param numBits the bit width of the constructed APInt
145 /// \param str the string to be interpreted
146 /// \param radix the radix to use for the conversion
147 APInt(unsigned numBits, StringRef str, uint8_t radix);
148
149 /// Default constructor that creates an APInt with a 1-bit zero value.
150 explicit APInt() : BitWidth(1) { U.VAL = 0; }
151
152 /// Copy Constructor.
153 APInt(const APInt &that) : BitWidth(that.BitWidth) {
154 if (isSingleWord())
155 U.VAL = that.U.VAL;
156 else
157 initSlowCase(that);
158 }
159
160 /// Move Constructor.
161 APInt(APInt &&that) : BitWidth(that.BitWidth) {
162 memcpy(&U, &that.U, sizeof(U));
163 that.BitWidth = 0;
164 }
165
166 /// Destructor.
167 ~APInt() {
168 if (needsCleanup())
169 delete[] U.pVal;
170 }
171
172 /// @}
173 /// \name Value Generators
174 /// @{
175
176 /// Get the '0' value for the specified bit-width.
177 static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
178
179 /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
180 static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
181
182 /// Return an APInt zero bits wide.
183 static APInt getZeroWidth() { return getZero(0); }
184
185 /// Gets maximum unsigned value of APInt for specific bit width.
186 static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
187
188 /// Gets maximum signed value of APInt for a specific bit width.
189 static APInt getSignedMaxValue(unsigned numBits) {
190 APInt API = getAllOnes(numBits);
191 API.clearBit(numBits - 1);
192 return API;
193 }
194
195 /// Gets minimum unsigned value of APInt for a specific bit width.
196 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
197
198 /// Gets minimum signed value of APInt for a specific bit width.
199 static APInt getSignedMinValue(unsigned numBits) {
200 APInt API(numBits, 0);
201 API.setBit(numBits - 1);
202 return API;
203 }
204
205 /// Get the SignMask for a specific bit width.
206 ///
207 /// This is just a wrapper function of getSignedMinValue(), and it helps code
208 /// readability when we want to get a SignMask.
209 static APInt getSignMask(unsigned BitWidth) {
210 return getSignedMinValue(BitWidth);
211 }
212
213 /// Return an APInt of a specified width with all bits set.
214 static APInt getAllOnes(unsigned numBits) {
215 return APInt(numBits, WORDTYPE_MAX, true);
216 }
217
218 /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
219 static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
220
221 /// Return an APInt with exactly one bit set in the result.
222 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
223 APInt Res(numBits, 0);
224 Res.setBit(BitNo);
225 return Res;
226 }
227
228 /// Get a value with a block of bits set.
229 ///
230 /// Constructs an APInt value that has a contiguous range of bits set. The
231 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
232 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
233 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
234 /// \p hiBit.
235 ///
236 /// \param numBits the intended bit width of the result
237 /// \param loBit the index of the lowest bit set.
238 /// \param hiBit the index of the highest bit set.
239 ///
240 /// \returns An APInt value with the requested bits set.
241 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
242 APInt Res(numBits, 0);
243 Res.setBits(loBit, hiBit);
244 return Res;
245 }
246
247 /// Wrap version of getBitsSet.
248 /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
249 /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
250 /// with parameters (32, 28, 4), you would get 0xF000000F.
251 /// If \p hiBit is equal to \p loBit, you would get a result with all bits
252 /// set.
253 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
254 unsigned hiBit) {
255 APInt Res(numBits, 0);
256 Res.setBitsWithWrap(loBit, hiBit);
257 return Res;
258 }
259
260 /// Constructs an APInt value that has a contiguous range of bits set. The
261 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
262 /// bits will be zero. For example, with parameters(32, 12) you would get
263 /// 0xFFFFF000.
264 ///
265 /// \param numBits the intended bit width of the result
266 /// \param loBit the index of the lowest bit to set.
267 ///
268 /// \returns An APInt value with the requested bits set.
269 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
270 APInt Res(numBits, 0);
271 Res.setBitsFrom(loBit);
272 return Res;
273 }
274
275 /// Constructs an APInt value that has the top hiBitsSet bits set.
276 ///
277 /// \param numBits the bitwidth of the result
278 /// \param hiBitsSet the number of high-order bits set in the result.
279 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
280 APInt Res(numBits, 0);
281 Res.setHighBits(hiBitsSet);
282 return Res;
283 }
284
285 /// Constructs an APInt value that has the bottom loBitsSet bits set.
286 ///
287 /// \param numBits the bitwidth of the result
288 /// \param loBitsSet the number of low-order bits set in the result.
289 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
290 APInt Res(numBits, 0);
291 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
14
Returning from 'APInt::setLowBits'
292 return Res;
293 }
294
295 /// Return a value containing V broadcasted over NewLen bits.
296 static APInt getSplat(unsigned NewLen, const APInt &V);
297
298 /// @}
299 /// \name Value Tests
300 /// @{
301
302 /// Determine if this APInt just has one word to store value.
303 ///
304 /// \returns true if the number of bits <= 64, false otherwise.
305 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
306
307 /// Determine sign of this APInt.
308 ///
309 /// This tests the high bit of this APInt to determine if it is set.
310 ///
311 /// \returns true if this APInt is negative, false otherwise
312 bool isNegative() const { return (*this)[BitWidth - 1]; }
313
314 /// Determine if this APInt Value is non-negative (>= 0)
315 ///
316 /// This tests the high bit of the APInt to determine if it is unset.
317 bool isNonNegative() const { return !isNegative(); }
318
319 /// Determine if sign bit of this APInt is set.
320 ///
321 /// This tests the high bit of this APInt to determine if it is set.
322 ///
323 /// \returns true if this APInt has its sign bit set, false otherwise.
324 bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
325
326 /// Determine if sign bit of this APInt is clear.
327 ///
328 /// This tests the high bit of this APInt to determine if it is clear.
329 ///
330 /// \returns true if this APInt has its sign bit clear, false otherwise.
331 bool isSignBitClear() const { return !isSignBitSet(); }
332
333 /// Determine if this APInt Value is positive.
334 ///
335 /// This tests if the value of this APInt is positive (> 0). Note
336 /// that 0 is not a positive value.
337 ///
338 /// \returns true if this APInt is positive.
339 bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
340
341 /// Determine if this APInt Value is non-positive (<= 0).
342 ///
343 /// \returns true if this APInt is non-positive.
344 bool isNonPositive() const { return !isStrictlyPositive(); }
345
346 /// Determine if all bits are set. This is true for zero-width values.
347 bool isAllOnes() const {
348 if (BitWidth == 0)
349 return true;
350 if (isSingleWord())
351 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
352 return countTrailingOnesSlowCase() == BitWidth;
353 }
354
355 /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
356 bool isAllOnesValue() const { return isAllOnes(); }
357
358 /// Determine if this value is zero, i.e. all bits are clear.
359 bool isZero() const {
360 if (isSingleWord())
361 return U.VAL == 0;
362 return countLeadingZerosSlowCase() == BitWidth;
363 }
364
365 /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
366 bool isNullValue() const { return isZero(); }
367
368 /// Determine if this is a value of 1.
369 ///
370 /// This checks to see if the value of this APInt is one.
371 bool isOne() const {
372 if (isSingleWord())
373 return U.VAL == 1;
374 return countLeadingZerosSlowCase() == BitWidth - 1;
375 }
376
377 /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
378 bool isOneValue() const { return isOne(); }
379
380 /// Determine if this is the largest unsigned value.
381 ///
382 /// This checks to see if the value of this APInt is the maximum unsigned
383 /// value for the APInt's bit width.
384 bool isMaxValue() const { return isAllOnes(); }
385
386 /// Determine if this is the largest signed value.
387 ///
388 /// This checks to see if the value of this APInt is the maximum signed
389 /// value for the APInt's bit width.
390 bool isMaxSignedValue() const {
391 if (isSingleWord()) {
392 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 392, __extension__ __PRETTY_FUNCTION__
))
;
393 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
394 }
395 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
396 }
397
398 /// Determine if this is the smallest unsigned value.
399 ///
400 /// This checks to see if the value of this APInt is the minimum unsigned
401 /// value for the APInt's bit width.
402 bool isMinValue() const { return isZero(); }
403
404 /// Determine if this is the smallest signed value.
405 ///
406 /// This checks to see if the value of this APInt is the minimum signed
407 /// value for the APInt's bit width.
408 bool isMinSignedValue() const {
409 if (isSingleWord()) {
410 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 410, __extension__ __PRETTY_FUNCTION__
))
;
411 return U.VAL == (WordType(1) << (BitWidth - 1));
412 }
413 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
414 }
415
416 /// Check if this APInt has an N-bits unsigned integer value.
417 bool isIntN(unsigned N) const { return getActiveBits() <= N; }
418
419 /// Check if this APInt has an N-bits signed integer value.
420 bool isSignedIntN(unsigned N) const { return getSignificantBits() <= N; }
421
422 /// Check if this APInt's value is a power of two greater than zero.
423 ///
424 /// \returns true if the argument APInt value is a power of two > 0.
425 bool isPowerOf2() const {
426 if (isSingleWord()) {
427 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 427, __extension__ __PRETTY_FUNCTION__
))
;
428 return isPowerOf2_64(U.VAL);
429 }
430 return countPopulationSlowCase() == 1;
431 }
432
433 /// Check if this APInt's negated value is a power of two greater than zero.
434 bool isNegatedPowerOf2() const {
435 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 435, __extension__ __PRETTY_FUNCTION__
))
;
436 if (isNonNegative())
437 return false;
438 // NegatedPowerOf2 - shifted mask in the top bits.
439 unsigned LO = countLeadingOnes();
440 unsigned TZ = countTrailingZeros();
441 return (LO + TZ) == BitWidth;
442 }
443
444 /// Check if the APInt's value is returned by getSignMask.
445 ///
446 /// \returns true if this is the value returned by getSignMask.
447 bool isSignMask() const { return isMinSignedValue(); }
448
449 /// Convert APInt to a boolean value.
450 ///
451 /// This converts the APInt to a boolean value as a test against zero.
452 bool getBoolValue() const { return !isZero(); }
453
454 /// If this value is smaller than the specified limit, return it, otherwise
455 /// return the limit value. This causes the value to saturate to the limit.
456 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
457 return ugt(Limit) ? Limit : getZExtValue();
458 }
459
460 /// Check if the APInt consists of a repeated bit pattern.
461 ///
462 /// e.g. 0x01010101 satisfies isSplat(8).
463 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
464 /// width without remainder.
465 bool isSplat(unsigned SplatSizeInBits) const;
466
467 /// \returns true if this APInt value is a sequence of \param numBits ones
468 /// starting at the least significant bit with the remainder zero.
469 bool isMask(unsigned numBits) const {
470 assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "llvm/include/llvm/ADT/APInt.h", 470, __extension__ __PRETTY_FUNCTION__
))
;
471 assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "llvm/include/llvm/ADT/APInt.h", 471, __extension__ __PRETTY_FUNCTION__
))
;
472 if (isSingleWord())
473 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
474 unsigned Ones = countTrailingOnesSlowCase();
475 return (numBits == Ones) &&
476 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
477 }
478
479 /// \returns true if this APInt is a non-empty sequence of ones starting at
480 /// the least significant bit with the remainder zero.
481 /// Ex. isMask(0x0000FFFFU) == true.
482 bool isMask() const {
483 if (isSingleWord())
484 return isMask_64(U.VAL);
485 unsigned Ones = countTrailingOnesSlowCase();
486 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
487 }
488
489 /// Return true if this APInt value contains a sequence of ones with
490 /// the remainder zero.
491 bool isShiftedMask() const {
492 if (isSingleWord())
493 return isShiftedMask_64(U.VAL);
494 unsigned Ones = countPopulationSlowCase();
495 unsigned LeadZ = countLeadingZerosSlowCase();
496 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
497 }
498
499 /// Compute an APInt containing numBits highbits from this APInt.
500 ///
501 /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
502 /// bits and right shift to the least significant bit.
503 ///
504 /// \returns the high "numBits" bits of this APInt.
505 APInt getHiBits(unsigned numBits) const;
506
507 /// Compute an APInt containing numBits lowbits from this APInt.
508 ///
509 /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
510 /// bits.
511 ///
512 /// \returns the low "numBits" bits of this APInt.
513 APInt getLoBits(unsigned numBits) const;
514
515 /// Determine if two APInts have the same value, after zero-extending
516 /// one of them (if needed!) to ensure that the bit-widths match.
517 static bool isSameValue(const APInt &I1, const APInt &I2) {
518 if (I1.getBitWidth() == I2.getBitWidth())
519 return I1 == I2;
520
521 if (I1.getBitWidth() > I2.getBitWidth())
522 return I1 == I2.zext(I1.getBitWidth());
523
524 return I1.zext(I2.getBitWidth()) == I2;
525 }
526
527 /// Overload to compute a hash_code for an APInt value.
528 friend hash_code hash_value(const APInt &Arg);
529
530 /// This function returns a pointer to the internal storage of the APInt.
531 /// This is useful for writing out the APInt in binary form without any
532 /// conversions.
533 const uint64_t *getRawData() const {
534 if (isSingleWord())
535 return &U.VAL;
536 return &U.pVal[0];
537 }
538
539 /// @}
540 /// \name Unary Operators
541 /// @{
542
543 /// Postfix increment operator. Increment *this by 1.
544 ///
545 /// \returns a new APInt value representing the original value of *this.
546 APInt operator++(int) {
547 APInt API(*this);
548 ++(*this);
549 return API;
550 }
551
552 /// Prefix increment operator.
553 ///
554 /// \returns *this incremented by one
555 APInt &operator++();
556
557 /// Postfix decrement operator. Decrement *this by 1.
558 ///
559 /// \returns a new APInt value representing the original value of *this.
560 APInt operator--(int) {
561 APInt API(*this);
562 --(*this);
563 return API;
564 }
565
566 /// Prefix decrement operator.
567 ///
568 /// \returns *this decremented by one.
569 APInt &operator--();
570
571 /// Logical negation operation on this APInt returns true if zero, like normal
572 /// integers.
573 bool operator!() const { return isZero(); }
574
575 /// @}
576 /// \name Assignment Operators
577 /// @{
578
579 /// Copy assignment operator.
580 ///
581 /// \returns *this after assignment of RHS.
582 APInt &operator=(const APInt &RHS) {
583 // The common case (both source or dest being inline) doesn't require
584 // allocation or deallocation.
585 if (isSingleWord() && RHS.isSingleWord()) {
586 U.VAL = RHS.U.VAL;
587 BitWidth = RHS.BitWidth;
588 return *this;
589 }
590
591 assignSlowCase(RHS);
592 return *this;
593 }
594
595 /// Move assignment operator.
596 APInt &operator=(APInt &&that) {
597#ifdef EXPENSIVE_CHECKS
598 // Some std::shuffle implementations still do self-assignment.
599 if (this == &that)
600 return *this;
601#endif
602 assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "llvm/include/llvm/ADT/APInt.h", 602, __extension__ __PRETTY_FUNCTION__
))
;
603 if (!isSingleWord())
604 delete[] U.pVal;
605
606 // Use memcpy so that type based alias analysis sees both VAL and pVal
607 // as modified.
608 memcpy(&U, &that.U, sizeof(U));
609
610 BitWidth = that.BitWidth;
611 that.BitWidth = 0;
612 return *this;
613 }
614
615 /// Assignment operator.
616 ///
617 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
618 /// the bit width, the excess bits are truncated. If the bit width is larger
619 /// than 64, the value is zero filled in the unspecified high order bits.
620 ///
621 /// \returns *this after assignment of RHS value.
622 APInt &operator=(uint64_t RHS) {
623 if (isSingleWord()) {
624 U.VAL = RHS;
625 return clearUnusedBits();
626 }
627 U.pVal[0] = RHS;
628 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
629 return *this;
630 }
631
632 /// Bitwise AND assignment operator.
633 ///
634 /// Performs a bitwise AND operation on this APInt and RHS. The result is
635 /// assigned to *this.
636 ///
637 /// \returns *this after ANDing with RHS.
638 APInt &operator&=(const APInt &RHS) {
639 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 639, __extension__ __PRETTY_FUNCTION__
))
;
640 if (isSingleWord())
641 U.VAL &= RHS.U.VAL;
642 else
643 andAssignSlowCase(RHS);
644 return *this;
645 }
646
647 /// Bitwise AND assignment operator.
648 ///
649 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
650 /// logically zero-extended or truncated to match the bit-width of
651 /// the LHS.
652 APInt &operator&=(uint64_t RHS) {
653 if (isSingleWord()) {
654 U.VAL &= RHS;
655 return *this;
656 }
657 U.pVal[0] &= RHS;
658 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
659 return *this;
660 }
661
662 /// Bitwise OR assignment operator.
663 ///
664 /// Performs a bitwise OR operation on this APInt and RHS. The result is
665 /// assigned *this;
666 ///
667 /// \returns *this after ORing with RHS.
668 APInt &operator|=(const APInt &RHS) {
669 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 669, __extension__ __PRETTY_FUNCTION__
))
;
670 if (isSingleWord())
671 U.VAL |= RHS.U.VAL;
672 else
673 orAssignSlowCase(RHS);
674 return *this;
675 }
676
677 /// Bitwise OR assignment operator.
678 ///
679 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
680 /// logically zero-extended or truncated to match the bit-width of
681 /// the LHS.
682 APInt &operator|=(uint64_t RHS) {
683 if (isSingleWord()) {
684 U.VAL |= RHS;
685 return clearUnusedBits();
686 }
687 U.pVal[0] |= RHS;
688 return *this;
689 }
690
691 /// Bitwise XOR assignment operator.
692 ///
693 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
694 /// assigned to *this.
695 ///
696 /// \returns *this after XORing with RHS.
697 APInt &operator^=(const APInt &RHS) {
698 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 698, __extension__ __PRETTY_FUNCTION__
))
;
699 if (isSingleWord())
700 U.VAL ^= RHS.U.VAL;
701 else
702 xorAssignSlowCase(RHS);
703 return *this;
704 }
705
706 /// Bitwise XOR assignment operator.
707 ///
708 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
709 /// logically zero-extended or truncated to match the bit-width of
710 /// the LHS.
711 APInt &operator^=(uint64_t RHS) {
712 if (isSingleWord()) {
713 U.VAL ^= RHS;
714 return clearUnusedBits();
715 }
716 U.pVal[0] ^= RHS;
717 return *this;
718 }
719
720 /// Multiplication assignment operator.
721 ///
722 /// Multiplies this APInt by RHS and assigns the result to *this.
723 ///
724 /// \returns *this
725 APInt &operator*=(const APInt &RHS);
726 APInt &operator*=(uint64_t RHS);
727
728 /// Addition assignment operator.
729 ///
730 /// Adds RHS to *this and assigns the result to *this.
731 ///
732 /// \returns *this
733 APInt &operator+=(const APInt &RHS);
734 APInt &operator+=(uint64_t RHS);
735
736 /// Subtraction assignment operator.
737 ///
738 /// Subtracts RHS from *this and assigns the result to *this.
739 ///
740 /// \returns *this
741 APInt &operator-=(const APInt &RHS);
742 APInt &operator-=(uint64_t RHS);
743
744 /// Left-shift assignment function.
745 ///
746 /// Shifts *this left by shiftAmt and assigns the result to *this.
747 ///
748 /// \returns *this after shifting left by ShiftAmt
749 APInt &operator<<=(unsigned ShiftAmt) {
750 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 750, __extension__ __PRETTY_FUNCTION__
))
;
751 if (isSingleWord()) {
752 if (ShiftAmt == BitWidth)
753 U.VAL = 0;
754 else
755 U.VAL <<= ShiftAmt;
756 return clearUnusedBits();
757 }
758 shlSlowCase(ShiftAmt);
759 return *this;
760 }
761
762 /// Left-shift assignment function.
763 ///
764 /// Shifts *this left by shiftAmt and assigns the result to *this.
765 ///
766 /// \returns *this after shifting left by ShiftAmt
767 APInt &operator<<=(const APInt &ShiftAmt);
768
769 /// @}
770 /// \name Binary Operators
771 /// @{
772
773 /// Multiplication operator.
774 ///
775 /// Multiplies this APInt by RHS and returns the result.
776 APInt operator*(const APInt &RHS) const;
777
778 /// Left logical shift operator.
779 ///
780 /// Shifts this APInt left by \p Bits and returns the result.
781 APInt operator<<(unsigned Bits) const { return shl(Bits); }
782
783 /// Left logical shift operator.
784 ///
785 /// Shifts this APInt left by \p Bits and returns the result.
786 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
787
788 /// Arithmetic right-shift function.
789 ///
790 /// Arithmetic right-shift this APInt by shiftAmt.
791 APInt ashr(unsigned ShiftAmt) const {
792 APInt R(*this);
793 R.ashrInPlace(ShiftAmt);
794 return R;
795 }
796
797 /// Arithmetic right-shift this APInt by ShiftAmt in place.
798 void ashrInPlace(unsigned ShiftAmt) {
799 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 799, __extension__ __PRETTY_FUNCTION__
))
;
800 if (isSingleWord()) {
801 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
802 if (ShiftAmt == BitWidth)
803 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
804 else
805 U.VAL = SExtVAL >> ShiftAmt;
806 clearUnusedBits();
807 return;
808 }
809 ashrSlowCase(ShiftAmt);
810 }
811
812 /// Logical right-shift function.
813 ///
814 /// Logical right-shift this APInt by shiftAmt.
815 APInt lshr(unsigned shiftAmt) const {
816 APInt R(*this);
817 R.lshrInPlace(shiftAmt);
818 return R;
819 }
820
821 /// Logical right-shift this APInt by ShiftAmt in place.
822 void lshrInPlace(unsigned ShiftAmt) {
823 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 823, __extension__ __PRETTY_FUNCTION__
))
;
824 if (isSingleWord()) {
825 if (ShiftAmt == BitWidth)
826 U.VAL = 0;
827 else
828 U.VAL >>= ShiftAmt;
829 return;
830 }
831 lshrSlowCase(ShiftAmt);
832 }
833
834 /// Left-shift function.
835 ///
836 /// Left-shift this APInt by shiftAmt.
837 APInt shl(unsigned shiftAmt) const {
838 APInt R(*this);
839 R <<= shiftAmt;
840 return R;
841 }
842
843 /// Rotate left by rotateAmt.
844 APInt rotl(unsigned rotateAmt) const;
845
846 /// Rotate right by rotateAmt.
847 APInt rotr(unsigned rotateAmt) const;
848
849 /// Arithmetic right-shift function.
850 ///
851 /// Arithmetic right-shift this APInt by shiftAmt.
852 APInt ashr(const APInt &ShiftAmt) const {
853 APInt R(*this);
854 R.ashrInPlace(ShiftAmt);
855 return R;
856 }
857
858 /// Arithmetic right-shift this APInt by shiftAmt in place.
859 void ashrInPlace(const APInt &shiftAmt);
860
861 /// Logical right-shift function.
862 ///
863 /// Logical right-shift this APInt by shiftAmt.
864 APInt lshr(const APInt &ShiftAmt) const {
865 APInt R(*this);
866 R.lshrInPlace(ShiftAmt);
867 return R;
868 }
869
870 /// Logical right-shift this APInt by ShiftAmt in place.
871 void lshrInPlace(const APInt &ShiftAmt);
872
873 /// Left-shift function.
874 ///
875 /// Left-shift this APInt by shiftAmt.
876 APInt shl(const APInt &ShiftAmt) const {
877 APInt R(*this);
878 R <<= ShiftAmt;
879 return R;
880 }
881
882 /// Rotate left by rotateAmt.
883 APInt rotl(const APInt &rotateAmt) const;
884
885 /// Rotate right by rotateAmt.
886 APInt rotr(const APInt &rotateAmt) const;
887
888 /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
889 /// equivalent to:
890 /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
891 APInt concat(const APInt &NewLSB) const {
892 /// If the result will be small, then both the merged values are small.
893 unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
894 if (NewWidth <= APINT_BITS_PER_WORD)
895 return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
896 return concatSlowCase(NewLSB);
897 }
898
899 /// Unsigned division operation.
900 ///
901 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
902 /// RHS are treated as unsigned quantities for purposes of this division.
903 ///
904 /// \returns a new APInt value containing the division result, rounded towards
905 /// zero.
906 APInt udiv(const APInt &RHS) const;
907 APInt udiv(uint64_t RHS) const;
908
909 /// Signed division function for APInt.
910 ///
911 /// Signed divide this APInt by APInt RHS.
912 ///
913 /// The result is rounded towards zero.
914 APInt sdiv(const APInt &RHS) const;
915 APInt sdiv(int64_t RHS) const;
916
917 /// Unsigned remainder operation.
918 ///
919 /// Perform an unsigned remainder operation on this APInt with RHS being the
920 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
921 /// of this operation. Note that this is a true remainder operation and not a
922 /// modulo operation because the sign follows the sign of the dividend which
923 /// is *this.
924 ///
925 /// \returns a new APInt value containing the remainder result
926 APInt urem(const APInt &RHS) const;
927 uint64_t urem(uint64_t RHS) const;
928
929 /// Function for signed remainder operation.
930 ///
931 /// Signed remainder operation on APInt.
932 APInt srem(const APInt &RHS) const;
933 int64_t srem(int64_t RHS) const;
934
935 /// Dual division/remainder interface.
936 ///
937 /// Sometimes it is convenient to divide two APInt values and obtain both the
938 /// quotient and remainder. This function does both operations in the same
939 /// computation making it a little more efficient. The pair of input arguments
940 /// may overlap with the pair of output arguments. It is safe to call
941 /// udivrem(X, Y, X, Y), for example.
942 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
943 APInt &Remainder);
944 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
945 uint64_t &Remainder);
946
947 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
948 APInt &Remainder);
949 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
950 int64_t &Remainder);
951
952 // Operations that return overflow indicators.
953 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
954 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
955 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
956 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
957 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
958 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
959 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
960 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
961 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
962
963 // Operations that saturate
964 APInt sadd_sat(const APInt &RHS) const;
965 APInt uadd_sat(const APInt &RHS) const;
966 APInt ssub_sat(const APInt &RHS) const;
967 APInt usub_sat(const APInt &RHS) const;
968 APInt smul_sat(const APInt &RHS) const;
969 APInt umul_sat(const APInt &RHS) const;
970 APInt sshl_sat(const APInt &RHS) const;
971 APInt ushl_sat(const APInt &RHS) const;
972
973 /// Array-indexing support.
974 ///
975 /// \returns the bit value at bitPosition
976 bool operator[](unsigned bitPosition) const {
977 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
"Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "llvm/include/llvm/ADT/APInt.h", 977, __extension__ __PRETTY_FUNCTION__
))
;
978 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
979 }
980
981 /// @}
982 /// \name Comparison Operators
983 /// @{
984
985 /// Equality operator.
986 ///
987 /// Compares this APInt with RHS for the validity of the equality
988 /// relationship.
989 bool operator==(const APInt &RHS) const {
990 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Comparison requires equal bit widths") ? void (0) : __assert_fail
("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "llvm/include/llvm/ADT/APInt.h", 990, __extension__ __PRETTY_FUNCTION__
))
;
991 if (isSingleWord())
992 return U.VAL == RHS.U.VAL;
993 return equalSlowCase(RHS);
994 }
995
996 /// Equality operator.
997 ///
998 /// Compares this APInt with a uint64_t for the validity of the equality
999 /// relationship.
1000 ///
1001 /// \returns true if *this == Val
1002 bool operator==(uint64_t Val) const {
1003 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1004 }
1005
1006 /// Equality comparison.
1007 ///
1008 /// Compares this APInt with RHS for the validity of the equality
1009 /// relationship.
1010 ///
1011 /// \returns true if *this == Val
1012 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1013
1014 /// Inequality operator.
1015 ///
1016 /// Compares this APInt with RHS for the validity of the inequality
1017 /// relationship.
1018 ///
1019 /// \returns true if *this != Val
1020 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1021
1022 /// Inequality operator.
1023 ///
1024 /// Compares this APInt with a uint64_t for the validity of the inequality
1025 /// relationship.
1026 ///
1027 /// \returns true if *this != Val
1028 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1029
1030 /// Inequality comparison
1031 ///
1032 /// Compares this APInt with RHS for the validity of the inequality
1033 /// relationship.
1034 ///
1035 /// \returns true if *this != Val
1036 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1037
1038 /// Unsigned less than comparison
1039 ///
1040 /// Regards both *this and RHS as unsigned quantities and compares them for
1041 /// the validity of the less-than relationship.
1042 ///
1043 /// \returns true if *this < RHS when both are considered unsigned.
1044 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1045
1046 /// Unsigned less than comparison
1047 ///
1048 /// Regards both *this as an unsigned quantity and compares it with RHS for
1049 /// the validity of the less-than relationship.
1050 ///
1051 /// \returns true if *this < RHS when considered unsigned.
1052 bool ult(uint64_t RHS) const {
1053 // Only need to check active bits if not a single word.
1054 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1055 }
1056
1057 /// Signed less than comparison
1058 ///
1059 /// Regards both *this and RHS as signed quantities and compares them for
1060 /// validity of the less-than relationship.
1061 ///
1062 /// \returns true if *this < RHS when both are considered signed.
1063 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1064
1065 /// Signed less than comparison
1066 ///
1067 /// Regards both *this as a signed quantity and compares it with RHS for
1068 /// the validity of the less-than relationship.
1069 ///
1070 /// \returns true if *this < RHS when considered signed.
1071 bool slt(int64_t RHS) const {
1072 return (!isSingleWord() && getSignificantBits() > 64)
1073 ? isNegative()
1074 : getSExtValue() < RHS;
1075 }
1076
1077 /// Unsigned less or equal comparison
1078 ///
1079 /// Regards both *this and RHS as unsigned quantities and compares them for
1080 /// validity of the less-or-equal relationship.
1081 ///
1082 /// \returns true if *this <= RHS when both are considered unsigned.
1083 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1084
1085 /// Unsigned less or equal comparison
1086 ///
1087 /// Regards both *this as an unsigned quantity and compares it with RHS for
1088 /// the validity of the less-or-equal relationship.
1089 ///
1090 /// \returns true if *this <= RHS when considered unsigned.
1091 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1092
1093 /// Signed less or equal comparison
1094 ///
1095 /// Regards both *this and RHS as signed quantities and compares them for
1096 /// validity of the less-or-equal relationship.
1097 ///
1098 /// \returns true if *this <= RHS when both are considered signed.
1099 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1100
1101 /// Signed less or equal comparison
1102 ///
1103 /// Regards both *this as a signed quantity and compares it with RHS for the
1104 /// validity of the less-or-equal relationship.
1105 ///
1106 /// \returns true if *this <= RHS when considered signed.
1107 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1108
1109 /// Unsigned greater than comparison
1110 ///
1111 /// Regards both *this and RHS as unsigned quantities and compares them for
1112 /// the validity of the greater-than relationship.
1113 ///
1114 /// \returns true if *this > RHS when both are considered unsigned.
1115 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1116
1117 /// Unsigned greater than comparison
1118 ///
1119 /// Regards both *this as an unsigned quantity and compares it with RHS for
1120 /// the validity of the greater-than relationship.
1121 ///
1122 /// \returns true if *this > RHS when considered unsigned.
1123 bool ugt(uint64_t RHS) const {
1124 // Only need to check active bits if not a single word.
1125 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1126 }
1127
1128 /// Signed greater than comparison
1129 ///
1130 /// Regards both *this and RHS as signed quantities and compares them for the
1131 /// validity of the greater-than relationship.
1132 ///
1133 /// \returns true if *this > RHS when both are considered signed.
1134 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1135
1136 /// Signed greater than comparison
1137 ///
1138 /// Regards both *this as a signed quantity and compares it with RHS for
1139 /// the validity of the greater-than relationship.
1140 ///
1141 /// \returns true if *this > RHS when considered signed.
1142 bool sgt(int64_t RHS) const {
1143 return (!isSingleWord() && getSignificantBits() > 64)
1144 ? !isNegative()
1145 : getSExtValue() > RHS;
1146 }
1147
1148 /// Unsigned greater or equal comparison
1149 ///
1150 /// Regards both *this and RHS as unsigned quantities and compares them for
1151 /// validity of the greater-or-equal relationship.
1152 ///
1153 /// \returns true if *this >= RHS when both are considered unsigned.
1154 bool uge(const APInt &RHS) const { return !ult(RHS); }
1155
1156 /// Unsigned greater or equal comparison
1157 ///
1158 /// Regards both *this as an unsigned quantity and compares it with RHS for
1159 /// the validity of the greater-or-equal relationship.
1160 ///
1161 /// \returns true if *this >= RHS when considered unsigned.
1162 bool uge(uint64_t RHS) const { return !ult(RHS); }
1163
1164 /// Signed greater or equal comparison
1165 ///
1166 /// Regards both *this and RHS as signed quantities and compares them for
1167 /// validity of the greater-or-equal relationship.
1168 ///
1169 /// \returns true if *this >= RHS when both are considered signed.
1170 bool sge(const APInt &RHS) const { return !slt(RHS); }
1171
1172 /// Signed greater or equal comparison
1173 ///
1174 /// Regards both *this as a signed quantity and compares it with RHS for
1175 /// the validity of the greater-or-equal relationship.
1176 ///
1177 /// \returns true if *this >= RHS when considered signed.
1178 bool sge(int64_t RHS) const { return !slt(RHS); }
1179
1180 /// This operation tests if there are any pairs of corresponding bits
1181 /// between this APInt and RHS that are both set.
1182 bool intersects(const APInt &RHS) const {
1183 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1183, __extension__ __PRETTY_FUNCTION__
))
;
1184 if (isSingleWord())
1185 return (U.VAL & RHS.U.VAL) != 0;
1186 return intersectsSlowCase(RHS);
1187 }
1188
1189 /// This operation checks that all bits set in this APInt are also set in RHS.
1190 bool isSubsetOf(const APInt &RHS) const {
1191 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1191, __extension__ __PRETTY_FUNCTION__
))
;
1192 if (isSingleWord())
1193 return (U.VAL & ~RHS.U.VAL) == 0;
1194 return isSubsetOfSlowCase(RHS);
1195 }
1196
1197 /// @}
1198 /// \name Resizing Operators
1199 /// @{
1200
1201 /// Truncate to new width.
1202 ///
1203 /// Truncate the APInt to a specified width. It is an error to specify a width
1204 /// that is greater than or equal to the current width.
1205 APInt trunc(unsigned width) const;
1206
1207 /// Truncate to new width with unsigned saturation.
1208 ///
1209 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1210 /// the new bitwidth, then return truncated APInt. Else, return max value.
1211 APInt truncUSat(unsigned width) const;
1212
1213 /// Truncate to new width with signed saturation.
1214 ///
1215 /// If this APInt, treated as signed integer, can be losslessly truncated to
1216 /// the new bitwidth, then return truncated APInt. Else, return either
1217 /// signed min value if the APInt was negative, or signed max value.
1218 APInt truncSSat(unsigned width) const;
1219
1220 /// Sign extend to a new width.
1221 ///
1222 /// This operation sign extends the APInt to a new width. If the high order
1223 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1224 /// It is an error to specify a width that is less than or equal to the
1225 /// current width.
1226 APInt sext(unsigned width) const;
1227
1228 /// Zero extend to a new width.
1229 ///
1230 /// This operation zero extends the APInt to a new width. The high order bits
1231 /// are filled with 0 bits. It is an error to specify a width that is less
1232 /// than or equal to the current width.
1233 APInt zext(unsigned width) const;
1234
1235 /// Sign extend or truncate to width
1236 ///
1237 /// Make this APInt have the bit width given by \p width. The value is sign
1238 /// extended, truncated, or left alone to make it that width.
1239 APInt sextOrTrunc(unsigned width) const;
1240
1241 /// Zero extend or truncate to width
1242 ///
1243 /// Make this APInt have the bit width given by \p width. The value is zero
1244 /// extended, truncated, or left alone to make it that width.
1245 APInt zextOrTrunc(unsigned width) const;
1246
1247 /// Truncate to width
1248 ///
1249 /// Make this APInt have the bit width given by \p width. The value is
1250 /// truncated or left alone to make it that width.
1251 APInt truncOrSelf(unsigned width) const;
1252
1253 /// Sign extend or truncate to width
1254 ///
1255 /// Make this APInt have the bit width given by \p width. The value is sign
1256 /// extended, or left alone to make it that width.
1257 APInt sextOrSelf(unsigned width) const;
1258
1259 /// Zero extend or truncate to width
1260 ///
1261 /// Make this APInt have the bit width given by \p width. The value is zero
1262 /// extended, or left alone to make it that width.
1263 APInt zextOrSelf(unsigned width) const;
1264
1265 /// @}
1266 /// \name Bit Manipulation Operators
1267 /// @{
1268
1269 /// Set every bit to 1.
1270 void setAllBits() {
1271 if (isSingleWord())
1272 U.VAL = WORDTYPE_MAX;
1273 else
1274 // Set all the bits in all the words.
1275 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1276 // Clear the unused ones
1277 clearUnusedBits();
1278 }
1279
1280 /// Set the given bit to 1 whose position is given as "bitPosition".
1281 void setBit(unsigned BitPosition) {
1282 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1282, __extension__ __PRETTY_FUNCTION__
))
;
1283 WordType Mask = maskBit(BitPosition);
1284 if (isSingleWord())
1285 U.VAL |= Mask;
1286 else
1287 U.pVal[whichWord(BitPosition)] |= Mask;
1288 }
1289
1290 /// Set the sign bit to 1.
1291 void setSignBit() { setBit(BitWidth - 1); }
1292
1293 /// Set a given bit to a given value.
1294 void setBitVal(unsigned BitPosition, bool BitValue) {
1295 if (BitValue)
1296 setBit(BitPosition);
1297 else
1298 clearBit(BitPosition);
1299 }
1300
1301 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1302 /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
1303 /// setBits when \p loBit < \p hiBit.
1304 /// For \p loBit == \p hiBit wrap case, set every bit to 1.
1305 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1306 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1306, __extension__ __PRETTY_FUNCTION__
))
;
1307 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1307, __extension__ __PRETTY_FUNCTION__
))
;
1308 if (loBit < hiBit) {
1309 setBits(loBit, hiBit);
1310 return;
1311 }
1312 setLowBits(hiBit);
1313 setHighBits(BitWidth - loBit);
1314 }
1315
1316 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1317 /// This function handles case when \p loBit <= \p hiBit.
1318 void setBits(unsigned loBit, unsigned hiBit) {
1319 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1319, __extension__ __PRETTY_FUNCTION__
))
;
6
Assuming 'hiBit' is <= field 'BitWidth'
7
'?' condition is true
1320 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1320, __extension__ __PRETTY_FUNCTION__
))
;
8
'?' condition is true
1321 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "llvm/include/llvm/ADT/APInt.h", 1321, __extension__ __PRETTY_FUNCTION__
))
;
9
'?' condition is true
1322 if (loBit == hiBit)
10
Assuming 'loBit' is not equal to 'hiBit'
1323 return;
1324 if (loBit
10.1
'loBit' is < APINT_BITS_PER_WORD
10.1
'loBit' is < APINT_BITS_PER_WORD
< APINT_BITS_PER_WORD && hiBit
10.2
'hiBit' is <= APINT_BITS_PER_WORD
10.2
'hiBit' is <= APINT_BITS_PER_WORD
<= APINT_BITS_PER_WORD) {
11
Taking true branch
1325 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1326 mask <<= loBit;
1327 if (isSingleWord())
12
Taking true branch
1328 U.VAL |= mask;
1329 else
1330 U.pVal[0] |= mask;
1331 } else {
1332 setBitsSlowCase(loBit, hiBit);
1333 }
1334 }
1335
1336 /// Set the top bits starting from loBit.
1337 void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
1338
1339 /// Set the bottom loBits bits.
1340 void setLowBits(unsigned loBits) { return setBits(0, loBits); }
5
Calling 'APInt::setBits'
13
Returning from 'APInt::setBits'
1341
1342 /// Set the top hiBits bits.
1343 void setHighBits(unsigned hiBits) {
1344 return setBits(BitWidth - hiBits, BitWidth);
1345 }
1346
1347 /// Set every bit to 0.
1348 void clearAllBits() {
1349 if (isSingleWord())
1350 U.VAL = 0;
1351 else
1352 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1353 }
1354
1355 /// Set a given bit to 0.
1356 ///
1357 /// Set the given bit to 0 whose position is given as "bitPosition".
1358 void clearBit(unsigned BitPosition) {
1359 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1359, __extension__ __PRETTY_FUNCTION__
))
;
1360 WordType Mask = ~maskBit(BitPosition);
1361 if (isSingleWord())
1362 U.VAL &= Mask;
1363 else
1364 U.pVal[whichWord(BitPosition)] &= Mask;
1365 }
1366
1367 /// Set bottom loBits bits to 0.
1368 void clearLowBits(unsigned loBits) {
1369 assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth"
) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "llvm/include/llvm/ADT/APInt.h", 1369, __extension__ __PRETTY_FUNCTION__
))
;
1370 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1371 *this &= Keep;
1372 }
1373
1374 /// Set the sign bit to 0.
1375 void clearSignBit() { clearBit(BitWidth - 1); }
1376
1377 /// Toggle every bit to its opposite value.
1378 void flipAllBits() {
1379 if (isSingleWord()) {
1380 U.VAL ^= WORDTYPE_MAX;
1381 clearUnusedBits();
1382 } else {
1383 flipAllBitsSlowCase();
1384 }
1385 }
1386
1387 /// Toggles a given bit to its opposite value.
1388 ///
1389 /// Toggle a given bit to its opposite value whose position is given
1390 /// as "bitPosition".
1391 void flipBit(unsigned bitPosition);
1392
1393 /// Negate this APInt in place.
1394 void negate() {
1395 flipAllBits();
1396 ++(*this);
1397 }
1398
1399 /// Insert the bits from a smaller APInt starting at bitPosition.
1400 void insertBits(const APInt &SubBits, unsigned bitPosition);
1401 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1402
1403 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1404 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1405 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1406
1407 /// @}
1408 /// \name Value Characterization Functions
1409 /// @{
1410
1411 /// Return the number of bits in the APInt.
1412 unsigned getBitWidth() const { return BitWidth; }
1413
1414 /// Get the number of words.
1415 ///
1416 /// Here one word's bitwidth equals to that of uint64_t.
1417 ///
1418 /// \returns the number of words to hold the integer value of this APInt.
1419 unsigned getNumWords() const { return getNumWords(BitWidth); }
1420
1421 /// Get the number of words.
1422 ///
1423 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1424 ///
1425 /// \returns the number of words to hold the integer value with a given bit
1426 /// width.
1427 static unsigned getNumWords(unsigned BitWidth) {
1428 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1429 }
1430
1431 /// Compute the number of active bits in the value
1432 ///
1433 /// This function returns the number of active bits which is defined as the
1434 /// bit width minus the number of leading zeros. This is used in several
1435 /// computations to see how "wide" the value is.
1436 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1437
1438 /// Compute the number of active words in the value of this APInt.
1439 ///
1440 /// This is used in conjunction with getActiveData to extract the raw value of
1441 /// the APInt.
1442 unsigned getActiveWords() const {
1443 unsigned numActiveBits = getActiveBits();
1444 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1445 }
1446
1447 /// Get the minimum bit size for this signed APInt
1448 ///
1449 /// Computes the minimum bit width for this APInt while considering it to be a
1450 /// signed (and probably negative) value. If the value is not negative, this
1451 /// function returns the same value as getActiveBits()+1. Otherwise, it
1452 /// returns the smallest bit width that will retain the negative value. For
1453 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1454 /// for -1, this function will always return 1.
1455 unsigned getSignificantBits() const {
1456 return BitWidth - getNumSignBits() + 1;
1457 }
1458
1459 /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead.
1460 unsigned getMinSignedBits() const { return getSignificantBits(); }
1461
1462 /// Get zero extended value
1463 ///
1464 /// This method attempts to return the value of this APInt as a zero extended
1465 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1466 /// uint64_t. Otherwise an assertion will result.
1467 uint64_t getZExtValue() const {
1468 if (isSingleWord())
1469 return U.VAL;
1470 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
"Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1470, __extension__ __PRETTY_FUNCTION__
))
;
1471 return U.pVal[0];
1472 }
1473
1474 /// Get sign extended value
1475 ///
1476 /// This method attempts to return the value of this APInt as a sign extended
1477 /// int64_t. The bit width must be <= 64 or the value must fit within an
1478 /// int64_t. Otherwise an assertion will result.
1479 int64_t getSExtValue() const {
1480 if (isSingleWord())
1481 return SignExtend64(U.VAL, BitWidth);
1482 assert(getSignificantBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getSignificantBits() <= 64 &&
"Too many bits for int64_t") ? void (0) : __assert_fail ("getSignificantBits() <= 64 && \"Too many bits for int64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1482, __extension__ __PRETTY_FUNCTION__
))
;
1483 return int64_t(U.pVal[0]);
1484 }
1485
1486 /// Get bits required for string value.
1487 ///
1488 /// This method determines how many bits are required to hold the APInt
1489 /// equivalent of the string given by \p str.
1490 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1491
1492 /// The APInt version of the countLeadingZeros functions in
1493 /// MathExtras.h.
1494 ///
1495 /// It counts the number of zeros from the most significant bit to the first
1496 /// one bit.
1497 ///
1498 /// \returns BitWidth if the value is zero, otherwise returns the number of
1499 /// zeros from the most significant bit to the first one bits.
1500 unsigned countLeadingZeros() const {
1501 if (isSingleWord()) {
1502 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1503 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1504 }
1505 return countLeadingZerosSlowCase();
1506 }
1507
1508 /// Count the number of leading one bits.
1509 ///
1510 /// This function is an APInt version of the countLeadingOnes
1511 /// functions in MathExtras.h. It counts the number of ones from the most
1512 /// significant bit to the first zero bit.
1513 ///
1514 /// \returns 0 if the high order bit is not set, otherwise returns the number
1515 /// of 1 bits from the most significant to the least
1516 unsigned countLeadingOnes() const {
1517 if (isSingleWord()) {
1518 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1519 return 0;
1520 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1521 }
1522 return countLeadingOnesSlowCase();
1523 }
1524
1525 /// Computes the number of leading bits of this APInt that are equal to its
1526 /// sign bit.
1527 unsigned getNumSignBits() const {
1528 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1529 }
1530
1531 /// Count the number of trailing zero bits.
1532 ///
1533 /// This function is an APInt version of the countTrailingZeros
1534 /// functions in MathExtras.h. It counts the number of zeros from the least
1535 /// significant bit to the first set bit.
1536 ///
1537 /// \returns BitWidth if the value is zero, otherwise returns the number of
1538 /// zeros from the least significant bit to the first one bit.
1539 unsigned countTrailingZeros() const {
1540 if (isSingleWord()) {
1541 unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
1542 return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
1543 }
1544 return countTrailingZerosSlowCase();
1545 }
1546
1547 /// Count the number of trailing one bits.
1548 ///
1549 /// This function is an APInt version of the countTrailingOnes
1550 /// functions in MathExtras.h. It counts the number of ones from the least
1551 /// significant bit to the first zero bit.
1552 ///
1553 /// \returns BitWidth if the value is all ones, otherwise returns the number
1554 /// of ones from the least significant bit to the first zero bit.
1555 unsigned countTrailingOnes() const {
1556 if (isSingleWord())
1557 return llvm::countTrailingOnes(U.VAL);
1558 return countTrailingOnesSlowCase();
1559 }
1560
1561 /// Count the number of bits set.
1562 ///
1563 /// This function is an APInt version of the countPopulation functions
1564 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1565 ///
1566 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1567 unsigned countPopulation() const {
1568 if (isSingleWord())
1569 return llvm::countPopulation(U.VAL);
1570 return countPopulationSlowCase();
1571 }
1572
1573 /// @}
1574 /// \name Conversion Functions
1575 /// @{
1576 void print(raw_ostream &OS, bool isSigned) const;
1577
1578 /// Converts an APInt to a string and append it to Str. Str is commonly a
1579 /// SmallString.
1580 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1581 bool formatAsCLiteral = false) const;
1582
1583 /// Considers the APInt to be unsigned and converts it into a string in the
1584 /// radix given. The radix can be 2, 8, 10 16, or 36.
1585 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1586 toString(Str, Radix, false, false);
1587 }
1588
1589 /// Considers the APInt to be signed and converts it into a string in the
1590 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1591 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1592 toString(Str, Radix, true, false);
1593 }
1594
1595 /// \returns a byte-swapped representation of this APInt Value.
1596 APInt byteSwap() const;
1597
1598 /// \returns the value with the bit representation reversed of this APInt
1599 /// Value.
1600 APInt reverseBits() const;
1601
1602 /// Converts this APInt to a double value.
1603 double roundToDouble(bool isSigned) const;
1604
1605 /// Converts this unsigned APInt to a double value.
1606 double roundToDouble() const { return roundToDouble(false); }
1607
1608 /// Converts this signed APInt to a double value.
1609 double signedRoundToDouble() const { return roundToDouble(true); }
1610
1611 /// Converts APInt bits to a double
1612 ///
1613 /// The conversion does not do a translation from integer to double, it just
1614 /// re-interprets the bits as a double. Note that it is valid to do this on
1615 /// any bit width. Exactly 64 bits will be translated.
1616 double bitsToDouble() const { return BitsToDouble(getWord(0)); }
1617
1618 /// Converts APInt bits to a float
1619 ///
1620 /// The conversion does not do a translation from integer to float, it just
1621 /// re-interprets the bits as a float. Note that it is valid to do this on
1622 /// any bit width. Exactly 32 bits will be translated.
1623 float bitsToFloat() const {
1624 return BitsToFloat(static_cast<uint32_t>(getWord(0)));
1625 }
1626
1627 /// Converts a double to APInt bits.
1628 ///
1629 /// The conversion does not do a translation from double to integer, it just
1630 /// re-interprets the bits of the double.
1631 static APInt doubleToBits(double V) {
1632 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1633 }
1634
1635 /// Converts a float to APInt bits.
1636 ///
1637 /// The conversion does not do a translation from float to integer, it just
1638 /// re-interprets the bits of the float.
1639 static APInt floatToBits(float V) {
1640 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1641 }
1642
1643 /// @}
1644 /// \name Mathematics Operations
1645 /// @{
1646
1647 /// \returns the floor log base 2 of this APInt.
1648 unsigned logBase2() const { return getActiveBits() - 1; }
1649
1650 /// \returns the ceil log base 2 of this APInt.
1651 unsigned ceilLogBase2() const {
1652 APInt temp(*this);
1653 --temp;
1654 return temp.getActiveBits();
1655 }
1656
1657 /// \returns the nearest log base 2 of this APInt. Ties round up.
1658 ///
1659 /// NOTE: When we have a BitWidth of 1, we define:
1660 ///
1661 /// log2(0) = UINT32_MAX
1662 /// log2(1) = 0
1663 ///
1664 /// to get around any mathematical concerns resulting from
1665 /// referencing 2 in a space where 2 does no exist.
1666 unsigned nearestLogBase2() const;
1667
1668 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1669 /// otherwise
1670 int32_t exactLogBase2() const {
1671 if (!isPowerOf2())
1672 return -1;
1673 return logBase2();
1674 }
1675
1676 /// Compute the square root.
1677 APInt sqrt() const;
1678
1679 /// Get the absolute value. If *this is < 0 then return -(*this), otherwise
1680 /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit
1681 /// wide APInt) is unchanged due to how negation works.
1682 APInt abs() const {
1683 if (isNegative())
1684 return -(*this);
1685 return *this;
1686 }
1687
1688 /// \returns the multiplicative inverse for a given modulo.
1689 APInt multiplicativeInverse(const APInt &modulo) const;
1690
1691 /// @}
1692 /// \name Building-block Operations for APInt and APFloat
1693 /// @{
1694
1695 // These building block operations operate on a representation of arbitrary
1696 // precision, two's-complement, bignum integer values. They should be
1697 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1698 // generally a pointer to the base of an array of integer parts, representing
1699 // an unsigned bignum, and a count of how many parts there are.
1700
1701 /// Sets the least significant part of a bignum to the input value, and zeroes
1702 /// out higher parts.
1703 static void tcSet(WordType *, WordType, unsigned);
1704
1705 /// Assign one bignum to another.
1706 static void tcAssign(WordType *, const WordType *, unsigned);
1707
1708 /// Returns true if a bignum is zero, false otherwise.
1709 static bool tcIsZero(const WordType *, unsigned);
1710
1711 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1712 static int tcExtractBit(const WordType *, unsigned bit);
1713
1714 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1715 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1716 /// significant bit of DST. All high bits above srcBITS in DST are
1717 /// zero-filled.
1718 static void tcExtract(WordType *, unsigned dstCount, const WordType *,
1719 unsigned srcBits, unsigned srcLSB);
1720
1721 /// Set the given bit of a bignum. Zero-based.
1722 static void tcSetBit(WordType *, unsigned bit);
1723
1724 /// Clear the given bit of a bignum. Zero-based.
1725 static void tcClearBit(WordType *, unsigned bit);
1726
1727 /// Returns the bit number of the least or most significant set bit of a
1728 /// number. If the input number has no bits set -1U is returned.
1729 static unsigned tcLSB(const WordType *, unsigned n);
1730 static unsigned tcMSB(const WordType *parts, unsigned n);
1731
1732 /// Negate a bignum in-place.
1733 static void tcNegate(WordType *, unsigned);
1734
1735 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1736 static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
1737 /// DST += RHS. Returns the carry flag.
1738 static WordType tcAddPart(WordType *, WordType, unsigned);
1739
1740 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1741 static WordType tcSubtract(WordType *, const WordType *, WordType carry,
1742 unsigned);
1743 /// DST -= RHS. Returns the carry flag.
1744 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1745
1746 /// DST += SRC * MULTIPLIER + PART if add is true
1747 /// DST = SRC * MULTIPLIER + PART if add is false
1748 ///
1749 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1750 /// start at the same point, i.e. DST == SRC.
1751 ///
1752 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1753 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1754 /// result, and if all of the omitted higher parts were zero return zero,
1755 /// otherwise overflow occurred and return one.
1756 static int tcMultiplyPart(WordType *dst, const WordType *src,
1757 WordType multiplier, WordType carry,
1758 unsigned srcParts, unsigned dstParts, bool add);
1759
1760 /// DST = LHS * RHS, where DST has the same width as the operands and is
1761 /// filled with the least significant parts of the result. Returns one if
1762 /// overflow occurred, otherwise zero. DST must be disjoint from both
1763 /// operands.
1764 static int tcMultiply(WordType *, const WordType *, const WordType *,
1765 unsigned);
1766
1767 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1768 /// operands. No overflow occurs. DST must be disjoint from both operands.
1769 static void tcFullMultiply(WordType *, const WordType *, const WordType *,
1770 unsigned, unsigned);
1771
1772 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1773 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1774 /// REMAINDER to the remainder, return zero. i.e.
1775 ///
1776 /// OLD_LHS = RHS * LHS + REMAINDER
1777 ///
1778 /// SCRATCH is a bignum of the same size as the operands and result for use by
1779 /// the routine; its contents need not be initialized and are destroyed. LHS,
1780 /// REMAINDER and SCRATCH must be distinct.
1781 static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
1782 WordType *scratch, unsigned parts);
1783
1784 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1785 /// restrictions on Count.
1786 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1787
1788 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1789 /// restrictions on Count.
1790 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1791
1792 /// Comparison (unsigned) of two bignums.
1793 static int tcCompare(const WordType *, const WordType *, unsigned);
1794
1795 /// Increment a bignum in-place. Return the carry flag.
1796 static WordType tcIncrement(WordType *dst, unsigned parts) {
1797 return tcAddPart(dst, 1, parts);
1798 }
1799
1800 /// Decrement a bignum in-place. Return the borrow flag.
1801 static WordType tcDecrement(WordType *dst, unsigned parts) {
1802 return tcSubtractPart(dst, 1, parts);
1803 }
1804
1805 /// Used to insert APInt objects, or objects that contain APInt objects, into
1806 /// FoldingSets.
1807 void Profile(FoldingSetNodeID &id) const;
1808
1809 /// debug method
1810 void dump() const;
1811
1812 /// Returns whether this instance allocated memory.
1813 bool needsCleanup() const { return !isSingleWord(); }
1814
1815private:
1816 /// This union is used to store the integer value. When the
1817 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
1818 union {
1819 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
1820 uint64_t *pVal; ///< Used to store the >64 bits integer value.
1821 } U;
1822
1823 unsigned BitWidth; ///< The number of bits in this APInt.
1824
1825 friend struct DenseMapInfo<APInt, void>;
1826 friend class APSInt;
1827
1828 /// This constructor is used only internally for speed of construction of
1829 /// temporaries. It is unsafe since it takes ownership of the pointer, so it
1830 /// is not public.
1831 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
1832
1833 /// Determine which word a bit is in.
1834 ///
1835 /// \returns the word position for the specified bit position.
1836 static unsigned whichWord(unsigned bitPosition) {
1837 return bitPosition / APINT_BITS_PER_WORD;
1838 }
1839
1840 /// Determine which bit in a word the specified bit position is in.
1841 static unsigned whichBit(unsigned bitPosition) {
1842 return bitPosition % APINT_BITS_PER_WORD;
1843 }
1844
1845 /// Get a single bit mask.
1846 ///
1847 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
1848 /// This method generates and returns a uint64_t (word) mask for a single
1849 /// bit at a specific bit position. This is used to mask the bit in the
1850 /// corresponding word.
1851 static uint64_t maskBit(unsigned bitPosition) {
1852 return 1ULL << whichBit(bitPosition);
1853 }
1854
1855 /// Clear unused high order bits
1856 ///
1857 /// This method is used internally to clear the top "N" bits in the high order
1858 /// word that are not used by the APInt. This is needed after the most
1859 /// significant word is assigned a value to ensure that those bits are
1860 /// zero'd out.
1861 APInt &clearUnusedBits() {
1862 // Compute how many bits are used in the final word.
1863 unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
1864
1865 // Mask out the high bits.
1866 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
1867 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1868 mask = 0;
1869
1870 if (isSingleWord())
1871 U.VAL &= mask;
1872 else
1873 U.pVal[getNumWords() - 1] &= mask;
1874 return *this;
1875 }
1876
1877 /// Get the word corresponding to a bit position
1878 /// \returns the corresponding word for the specified bit position.
1879 uint64_t getWord(unsigned bitPosition) const {
1880 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
1881 }
1882
1883 /// Utility method to change the bit width of this APInt to new bit width,
1884 /// allocating and/or deallocating as necessary. There is no guarantee on the
1885 /// value of any bits upon return. Caller should populate the bits after.
1886 void reallocate(unsigned NewBitWidth);
1887
1888 /// Convert a char array into an APInt
1889 ///
1890 /// \param radix 2, 8, 10, 16, or 36
1891 /// Converts a string into a number. The string must be non-empty
1892 /// and well-formed as a number of the given base. The bit-width
1893 /// must be sufficient to hold the result.
1894 ///
1895 /// This is used by the constructors that take string arguments.
1896 ///
1897 /// StringRef::getAsInteger is superficially similar but (1) does
1898 /// not assume that the string is well-formed and (2) grows the
1899 /// result to hold the input.
1900 void fromString(unsigned numBits, StringRef str, uint8_t radix);
1901
1902 /// An internal division function for dividing APInts.
1903 ///
1904 /// This is used by the toString method to divide by the radix. It simply
1905 /// provides a more convenient form of divide for internal use since KnuthDiv
1906 /// has specific constraints on its inputs. If those constraints are not met
1907 /// then it provides a simpler form of divide.
1908 static void divide(const WordType *LHS, unsigned lhsWords,
1909 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
1910 WordType *Remainder);
1911
1912 /// out-of-line slow case for inline constructor
1913 void initSlowCase(uint64_t val, bool isSigned);
1914
1915 /// shared code between two array constructors
1916 void initFromArray(ArrayRef<uint64_t> array);
1917
1918 /// out-of-line slow case for inline copy constructor
1919 void initSlowCase(const APInt &that);
1920
1921 /// out-of-line slow case for shl
1922 void shlSlowCase(unsigned ShiftAmt);
1923
1924 /// out-of-line slow case for lshr.
1925 void lshrSlowCase(unsigned ShiftAmt);
1926
1927 /// out-of-line slow case for ashr.
1928 void ashrSlowCase(unsigned ShiftAmt);
1929
1930 /// out-of-line slow case for operator=
1931 void assignSlowCase(const APInt &RHS);
1932
1933 /// out-of-line slow case for operator==
1934 bool equalSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1935
1936 /// out-of-line slow case for countLeadingZeros
1937 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1938
1939 /// out-of-line slow case for countLeadingOnes.
1940 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1941
1942 /// out-of-line slow case for countTrailingZeros.
1943 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1944
1945 /// out-of-line slow case for countTrailingOnes
1946 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1947
1948 /// out-of-line slow case for countPopulation
1949 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
1950
1951 /// out-of-line slow case for intersects.
1952 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1953
1954 /// out-of-line slow case for isSubsetOf.
1955 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1956
1957 /// out-of-line slow case for setBits.
1958 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
1959
1960 /// out-of-line slow case for flipAllBits.
1961 void flipAllBitsSlowCase();
1962
1963 /// out-of-line slow case for concat.
1964 APInt concatSlowCase(const APInt &NewLSB) const;
1965
1966 /// out-of-line slow case for operator&=.
1967 void andAssignSlowCase(const APInt &RHS);
1968
1969 /// out-of-line slow case for operator|=.
1970 void orAssignSlowCase(const APInt &RHS);
1971
1972 /// out-of-line slow case for operator^=.
1973 void xorAssignSlowCase(const APInt &RHS);
1974
1975 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
1976 /// to, or greater than RHS.
1977 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1978
1979 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
1980 /// to, or greater than RHS.
1981 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1982
1983 /// @}
1984};
1985
1986inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
1987
1988inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
1989
1990/// Unary bitwise complement operator.
1991///
1992/// \returns an APInt that is the bitwise complement of \p v.
1993inline APInt operator~(APInt v) {
1994 v.flipAllBits();
1995 return v;
1996}
1997
1998inline APInt operator&(APInt a, const APInt &b) {
1999 a &= b;
2000 return a;
2001}
2002
2003inline APInt operator&(const APInt &a, APInt &&b) {
2004 b &= a;
2005 return std::move(b);
2006}
2007
2008inline APInt operator&(APInt a, uint64_t RHS) {
2009 a &= RHS;
2010 return a;
2011}
2012
2013inline APInt operator&(uint64_t LHS, APInt b) {
2014 b &= LHS;
2015 return b;
2016}
2017
2018inline APInt operator|(APInt a, const APInt &b) {
2019 a |= b;
2020 return a;
2021}
2022
2023inline APInt operator|(const APInt &a, APInt &&b) {
2024 b |= a;
2025 return std::move(b);
2026}
2027
2028inline APInt operator|(APInt a, uint64_t RHS) {
2029 a |= RHS;
2030 return a;
2031}
2032
2033inline APInt operator|(uint64_t LHS, APInt b) {
2034 b |= LHS;
2035 return b;
2036}
2037
2038inline APInt operator^(APInt a, const APInt &b) {
2039 a ^= b;
2040 return a;
2041}
2042
2043inline APInt operator^(const APInt &a, APInt &&b) {
2044 b ^= a;
2045 return std::move(b);
2046}
2047
2048inline APInt operator^(APInt a, uint64_t RHS) {
2049 a ^= RHS;
2050 return a;
2051}
2052
2053inline APInt operator^(uint64_t LHS, APInt b) {
2054 b ^= LHS;
2055 return b;
2056}
2057
2058inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2059 I.print(OS, true);
2060 return OS;
2061}
2062
2063inline APInt operator-(APInt v) {
2064 v.negate();
2065 return v;
2066}
2067
2068inline APInt operator+(APInt a, const APInt &b) {
2069 a += b;
2070 return a;
2071}
2072
2073inline APInt operator+(const APInt &a, APInt &&b) {
2074 b += a;
2075 return std::move(b);
2076}
2077
2078inline APInt operator+(APInt a, uint64_t RHS) {
2079 a += RHS;
2080 return a;
2081}
2082
2083inline APInt operator+(uint64_t LHS, APInt b) {
2084 b += LHS;
2085 return b;
2086}
2087
2088inline APInt operator-(APInt a, const APInt &b) {
2089 a -= b;
2090 return a;
2091}
2092
2093inline APInt operator-(const APInt &a, APInt &&b) {
2094 b.negate();
2095 b += a;
2096 return std::move(b);
2097}
2098
2099inline APInt operator-(APInt a, uint64_t RHS) {
2100 a -= RHS;
2101 return a;
2102}
2103
2104inline APInt operator-(uint64_t LHS, APInt b) {
2105 b.negate();
2106 b += LHS;
2107 return b;
2108}
2109
2110inline APInt operator*(APInt a, uint64_t RHS) {
2111 a *= RHS;
2112 return a;
2113}
2114
2115inline APInt operator*(uint64_t LHS, APInt b) {
2116 b *= LHS;
2117 return b;
2118}
2119
2120namespace APIntOps {
2121
2122/// Determine the smaller of two APInts considered to be signed.
2123inline const APInt &smin(const APInt &A, const APInt &B) {
2124 return A.slt(B) ? A : B;
2125}
2126
2127/// Determine the larger of two APInts considered to be signed.
2128inline const APInt &smax(const APInt &A, const APInt &B) {
2129 return A.sgt(B) ? A : B;
2130}
2131
2132/// Determine the smaller of two APInts considered to be unsigned.
2133inline const APInt &umin(const APInt &A, const APInt &B) {
2134 return A.ult(B) ? A : B;
2135}
2136
2137/// Determine the larger of two APInts considered to be unsigned.
2138inline const APInt &umax(const APInt &A, const APInt &B) {
2139 return A.ugt(B) ? A : B;
2140}
2141
2142/// Compute GCD of two unsigned APInt values.
2143///
2144/// This function returns the greatest common divisor of the two APInt values
2145/// using Stein's algorithm.
2146///
2147/// \returns the greatest common divisor of A and B.
2148APInt GreatestCommonDivisor(APInt A, APInt B);
2149
2150/// Converts the given APInt to a double value.
2151///
2152/// Treats the APInt as an unsigned value for conversion purposes.
2153inline double RoundAPIntToDouble(const APInt &APIVal) {
2154 return APIVal.roundToDouble();
2155}
2156
2157/// Converts the given APInt to a double value.
2158///
2159/// Treats the APInt as a signed value for conversion purposes.
2160inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2161 return APIVal.signedRoundToDouble();
2162}
2163
2164/// Converts the given APInt to a float value.
2165inline float RoundAPIntToFloat(const APInt &APIVal) {
2166 return float(RoundAPIntToDouble(APIVal));
2167}
2168
2169/// Converts the given APInt to a float value.
2170///
2171/// Treats the APInt as a signed value for conversion purposes.
2172inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2173 return float(APIVal.signedRoundToDouble());
2174}
2175
2176/// Converts the given double value into a APInt.
2177///
2178/// This function convert a double value to an APInt value.
2179APInt RoundDoubleToAPInt(double Double, unsigned width);
2180
2181/// Converts a float value into a APInt.
2182///
2183/// Converts a float value into an APInt value.
2184inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2185 return RoundDoubleToAPInt(double(Float), width);
2186}
2187
2188/// Return A unsign-divided by B, rounded by the given rounding mode.
2189APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2190
2191/// Return A sign-divided by B, rounded by the given rounding mode.
2192APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2193
2194/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2195/// (e.g. 32 for i32).
2196/// This function finds the smallest number n, such that
2197/// (a) n >= 0 and q(n) = 0, or
2198/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2199/// integers, belong to two different intervals [Rk, Rk+R),
2200/// where R = 2^BW, and k is an integer.
2201/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2202/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2203/// subtraction (treated as addition of negated numbers) would always
2204/// count as an overflow, but here we want to allow values to decrease
2205/// and increase as long as they are within the same interval.
2206/// Specifically, adding of two negative numbers should not cause an
2207/// overflow (as long as the magnitude does not exceed the bit width).
2208/// On the other hand, given a positive number, adding a negative
2209/// number to it can give a negative result, which would cause the
2210/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2211/// treated as a special case of an overflow.
2212///
2213/// This function returns None if after finding k that minimizes the
2214/// positive solution to q(n) = kR, both solutions are contained between
2215/// two consecutive integers.
2216///
2217/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2218/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2219/// virtue of *signed* overflow. This function will *not* find such an n,
2220/// however it may find a value of n satisfying the inequalities due to
2221/// an *unsigned* overflow (if the values are treated as unsigned).
2222/// To find a solution for a signed overflow, treat it as a problem of
2223/// finding an unsigned overflow with a range with of BW-1.
2224///
2225/// The returned value may have a different bit width from the input
2226/// coefficients.
2227Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2228 unsigned RangeWidth);
2229
2230/// Compare two values, and if they are different, return the position of the
2231/// most significant bit that is different in the values.
2232Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2233 const APInt &B);
2234
2235/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
2236/// by \param A to \param NewBitWidth bits.
2237///
2238/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
2239/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
2240/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
2241///
2242/// TODO: Do we need a mode where all bits must be set when merging down?
2243APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth);
2244} // namespace APIntOps
2245
2246// See friend declaration above. This additional declaration is required in
2247// order to compile LLVM with IBM xlC compiler.
2248hash_code hash_value(const APInt &Arg);
2249
2250/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2251/// with the integer held in IntVal.
2252void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2253
2254/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2255/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2256void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
2257
2258/// Provide DenseMapInfo for APInt.
2259template <> struct DenseMapInfo<APInt, void> {
2260 static inline APInt getEmptyKey() {
2261 APInt V(nullptr, 0);
2262 V.U.VAL = 0;
2263 return V;
2264 }
2265
2266 static inline APInt getTombstoneKey() {
2267 APInt V(nullptr, 0);
2268 V.U.VAL = 1;
2269 return V;
2270 }
2271
2272 static unsigned getHashValue(const APInt &Key);
2273
2274 static bool isEqual(const APInt &LHS, const APInt &RHS) {
2275 return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
2276 }
2277};
2278
2279} // namespace llvm
2280
2281#endif