Bug Summary

File:build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1563, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-15/lib/clang/15.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/Hexagon -I include -I /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-15/lib/clang/15.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -O3 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/= -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2022-04-20-140412-16051-1 -x c++ /build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonLoopIdiomRecognition.h"
10#include "llvm/ADT/APInt.h"
11#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/ADT/Triple.h"
18#include "llvm/Analysis/AliasAnalysis.h"
19#include "llvm/Analysis/InstructionSimplify.h"
20#include "llvm/Analysis/LoopAnalysisManager.h"
21#include "llvm/Analysis/LoopInfo.h"
22#include "llvm/Analysis/LoopPass.h"
23#include "llvm/Analysis/MemoryLocation.h"
24#include "llvm/Analysis/ScalarEvolution.h"
25#include "llvm/Analysis/ScalarEvolutionExpressions.h"
26#include "llvm/Analysis/TargetLibraryInfo.h"
27#include "llvm/Analysis/ValueTracking.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/DerivedTypes.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/InstrTypes.h"
39#include "llvm/IR/Instruction.h"
40#include "llvm/IR/Instructions.h"
41#include "llvm/IR/IntrinsicInst.h"
42#include "llvm/IR/Intrinsics.h"
43#include "llvm/IR/IntrinsicsHexagon.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/PassManager.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/User.h"
49#include "llvm/IR/Value.h"
50#include "llvm/InitializePasses.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
63#include <algorithm>
64#include <array>
65#include <cassert>
66#include <cstdint>
67#include <cstdlib>
68#include <deque>
69#include <functional>
70#include <iterator>
71#include <map>
72#include <set>
73#include <utility>
74#include <vector>
75
76#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
77
78using namespace llvm;
79
80static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memcpy in loop idiom recognition"));
83
84static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
85 cl::Hidden, cl::init(false),
86 cl::desc("Disable generation of memmove in loop idiom recognition"));
87
88static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
89 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
90 "check guarding the memmove."));
91
92static cl::opt<unsigned> CompileTimeMemSizeThreshold(
93 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
94 cl::desc("Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
96
97static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
98 cl::Hidden, cl::init(true),
99 cl::desc("Only enable generating memmove in non-nested loops"));
100
101static cl::opt<bool> HexagonVolatileMemcpy(
102 "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
103 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
104
105static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
106 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
107
108static const char *HexagonVolatileMemcpyName
109 = "hexagon_memcpy_forward_vp4cp4n2";
110
111
112namespace llvm {
113
114void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
115Pass *createHexagonLoopIdiomPass();
116
117} // end namespace llvm
118
119namespace {
120
121class HexagonLoopIdiomRecognize {
122public:
123 explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
124 LoopInfo *LF, const TargetLibraryInfo *TLI,
125 ScalarEvolution *SE)
126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
127
128 bool run(Loop *L);
129
130private:
131 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
132 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
133 void collectStores(Loop *CurLoop, BasicBlock *BB,
134 SmallVectorImpl<StoreInst *> &Stores);
135 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
136 bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const;
137 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
138 SmallVectorImpl<BasicBlock *> &ExitBlocks);
139 bool runOnCountableLoop(Loop *L);
140
141 AliasAnalysis *AA;
142 const DataLayout *DL;
143 DominatorTree *DT;
144 LoopInfo *LF;
145 const TargetLibraryInfo *TLI;
146 ScalarEvolution *SE;
147 bool HasMemcpy, HasMemmove;
148};
149
150class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {
151public:
152 static char ID;
153
154 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
155 initializeHexagonLoopIdiomRecognizeLegacyPassPass(
156 *PassRegistry::getPassRegistry());
157 }
158
159 StringRef getPassName() const override {
160 return "Recognize Hexagon-specific loop idioms";
161 }
162
163 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 AU.addRequired<LoopInfoWrapperPass>();
165 AU.addRequiredID(LoopSimplifyID);
166 AU.addRequiredID(LCSSAID);
167 AU.addRequired<AAResultsWrapperPass>();
168 AU.addRequired<ScalarEvolutionWrapperPass>();
169 AU.addRequired<DominatorTreeWrapperPass>();
170 AU.addRequired<TargetLibraryInfoWrapperPass>();
171 AU.addPreserved<TargetLibraryInfoWrapperPass>();
172 }
173
174 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
175};
176
177struct Simplifier {
178 struct Rule {
179 using FuncType = std::function<Value *(Instruction *, LLVMContext &)>;
180 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
181 StringRef Name; // For debugging.
182 FuncType Fn;
183 };
184
185 void addRule(StringRef N, const Rule::FuncType &F) {
186 Rules.push_back(Rule(N, F));
187 }
188
189private:
190 struct WorkListType {
191 WorkListType() = default;
192
193 void push_back(Value *V) {
194 // Do not push back duplicates.
195 if (!S.count(V)) {
196 Q.push_back(V);
197 S.insert(V);
198 }
199 }
200
201 Value *pop_front_val() {
202 Value *V = Q.front();
203 Q.pop_front();
204 S.erase(V);
205 return V;
206 }
207
208 bool empty() const { return Q.empty(); }
209
210 private:
211 std::deque<Value *> Q;
212 std::set<Value *> S;
213 };
214
215 using ValueSetType = std::set<Value *>;
216
217 std::vector<Rule> Rules;
218
219public:
220 struct Context {
221 using ValueMapType = DenseMap<Value *, Value *>;
222
223 Value *Root;
224 ValueSetType Used; // The set of all cloned values used by Root.
225 ValueSetType Clones; // The set of all cloned values.
226 LLVMContext &Ctx;
227
228 Context(Instruction *Exp)
229 : Ctx(Exp->getParent()->getParent()->getContext()) {
230 initialize(Exp);
231 }
232
233 ~Context() { cleanup(); }
234
235 void print(raw_ostream &OS, const Value *V) const;
236 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
237
238 private:
239 friend struct Simplifier;
240
241 void initialize(Instruction *Exp);
242 void cleanup();
243
244 template <typename FuncT> void traverse(Value *V, FuncT F);
245 void record(Value *V);
246 void use(Value *V);
247 void unuse(Value *V);
248
249 bool equal(const Instruction *I, const Instruction *J) const;
250 Value *find(Value *Tree, Value *Sub) const;
251 Value *subst(Value *Tree, Value *OldV, Value *NewV);
252 void replace(Value *OldV, Value *NewV);
253 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
254 };
255
256 Value *simplify(Context &C);
257};
258
259 struct PE {
260 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
261
262 const Simplifier::Context &C;
263 const Value *V;
264 };
265
266 LLVM_ATTRIBUTE_USED__attribute__((__used__))
267 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
268 P.C.print(OS, P.V ? P.V : P.C.Root);
269 return OS;
270 }
271
272} // end anonymous namespace
273
274char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
275
276INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
277 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
278INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
279INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
280INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
281INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
282INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
283INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
284INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
285INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
286 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
287
288template <typename FuncT>
289void Simplifier::Context::traverse(Value *V, FuncT F) {
290 WorkListType Q;
291 Q.push_back(V);
292
293 while (!Q.empty()) {
294 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
295 if (!U || U->getParent())
296 continue;
297 if (!F(U))
298 continue;
299 for (Value *Op : U->operands())
300 Q.push_back(Op);
301 }
302}
303
304void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
305 const auto *U = dyn_cast<const Instruction>(V);
306 if (!U) {
307 OS << V << '(' << *V << ')';
308 return;
309 }
310
311 if (U->getParent()) {
312 OS << U << '(';
313 U->printAsOperand(OS, true);
314 OS << ')';
315 return;
316 }
317
318 unsigned N = U->getNumOperands();
319 if (N != 0)
320 OS << U << '(';
321 OS << U->getOpcodeName();
322 for (const Value *Op : U->operands()) {
323 OS << ' ';
324 print(OS, Op);
325 }
326 if (N != 0)
327 OS << ')';
328}
329
330void Simplifier::Context::initialize(Instruction *Exp) {
331 // Perform a deep clone of the expression, set Root to the root
332 // of the clone, and build a map from the cloned values to the
333 // original ones.
334 ValueMapType M;
335 BasicBlock *Block = Exp->getParent();
336 WorkListType Q;
337 Q.push_back(Exp);
338
339 while (!Q.empty()) {
340 Value *V = Q.pop_front_val();
341 if (M.find(V) != M.end())
342 continue;
343 if (Instruction *U = dyn_cast<Instruction>(V)) {
344 if (isa<PHINode>(U) || U->getParent() != Block)
345 continue;
346 for (Value *Op : U->operands())
347 Q.push_back(Op);
348 M.insert({U, U->clone()});
349 }
350 }
351
352 for (std::pair<Value*,Value*> P : M) {
353 Instruction *U = cast<Instruction>(P.second);
354 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
355 auto F = M.find(U->getOperand(i));
356 if (F != M.end())
357 U->setOperand(i, F->second);
358 }
359 }
360
361 auto R = M.find(Exp);
362 assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail
("R != M.end()", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 362, __extension__ __PRETTY_FUNCTION__))
;
363 Root = R->second;
364
365 record(Root);
366 use(Root);
367}
368
369void Simplifier::Context::record(Value *V) {
370 auto Record = [this](Instruction *U) -> bool {
371 Clones.insert(U);
372 return true;
373 };
374 traverse(V, Record);
375}
376
377void Simplifier::Context::use(Value *V) {
378 auto Use = [this](Instruction *U) -> bool {
379 Used.insert(U);
380 return true;
381 };
382 traverse(V, Use);
383}
384
385void Simplifier::Context::unuse(Value *V) {
386 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
387 return;
388
389 auto Unuse = [this](Instruction *U) -> bool {
390 if (!U->use_empty())
391 return false;
392 Used.erase(U);
393 return true;
394 };
395 traverse(V, Unuse);
396}
397
398Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
399 if (Tree == OldV)
400 return NewV;
401 if (OldV == NewV)
402 return Tree;
403
404 WorkListType Q;
405 Q.push_back(Tree);
406 while (!Q.empty()) {
407 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
408 // If U is not an instruction, or it's not a clone, skip it.
409 if (!U || U->getParent())
410 continue;
411 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
412 Value *Op = U->getOperand(i);
413 if (Op == OldV) {
414 U->setOperand(i, NewV);
415 unuse(OldV);
416 } else {
417 Q.push_back(Op);
418 }
419 }
420 }
421 return Tree;
422}
423
424void Simplifier::Context::replace(Value *OldV, Value *NewV) {
425 if (Root == OldV) {
426 Root = NewV;
427 use(Root);
428 return;
429 }
430
431 // NewV may be a complex tree that has just been created by one of the
432 // transformation rules. We need to make sure that it is commoned with
433 // the existing Root to the maximum extent possible.
434 // Identify all subtrees of NewV (including NewV itself) that have
435 // equivalent counterparts in Root, and replace those subtrees with
436 // these counterparts.
437 WorkListType Q;
438 Q.push_back(NewV);
439 while (!Q.empty()) {
440 Value *V = Q.pop_front_val();
441 Instruction *U = dyn_cast<Instruction>(V);
442 if (!U || U->getParent())
443 continue;
444 if (Value *DupV = find(Root, V)) {
445 if (DupV != V)
446 NewV = subst(NewV, V, DupV);
447 } else {
448 for (Value *Op : U->operands())
449 Q.push_back(Op);
450 }
451 }
452
453 // Now, simply replace OldV with NewV in Root.
454 Root = subst(Root, OldV, NewV);
455 use(Root);
456}
457
458void Simplifier::Context::cleanup() {
459 for (Value *V : Clones) {
460 Instruction *U = cast<Instruction>(V);
461 if (!U->getParent())
462 U->dropAllReferences();
463 }
464
465 for (Value *V : Clones) {
466 Instruction *U = cast<Instruction>(V);
467 if (!U->getParent())
468 U->deleteValue();
469 }
470}
471
472bool Simplifier::Context::equal(const Instruction *I,
473 const Instruction *J) const {
474 if (I == J)
475 return true;
476 if (!I->isSameOperationAs(J))
477 return false;
478 if (isa<PHINode>(I))
479 return I->isIdenticalTo(J);
480
481 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
482 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
483 if (OpI == OpJ)
484 continue;
485 auto *InI = dyn_cast<const Instruction>(OpI);
486 auto *InJ = dyn_cast<const Instruction>(OpJ);
487 if (InI && InJ) {
488 if (!equal(InI, InJ))
489 return false;
490 } else if (InI != InJ || !InI)
491 return false;
492 }
493 return true;
494}
495
496Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
497 Instruction *SubI = dyn_cast<Instruction>(Sub);
498 WorkListType Q;
499 Q.push_back(Tree);
500
501 while (!Q.empty()) {
502 Value *V = Q.pop_front_val();
503 if (V == Sub)
504 return V;
505 Instruction *U = dyn_cast<Instruction>(V);
506 if (!U || U->getParent())
507 continue;
508 if (SubI && equal(SubI, U))
509 return U;
510 assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0)
: __assert_fail ("!isa<PHINode>(U)", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 510, __extension__ __PRETTY_FUNCTION__))
;
511 for (Value *Op : U->operands())
512 Q.push_back(Op);
513 }
514 return nullptr;
515}
516
517void Simplifier::Context::link(Instruction *I, BasicBlock *B,
518 BasicBlock::iterator At) {
519 if (I->getParent())
520 return;
521
522 for (Value *Op : I->operands()) {
523 if (Instruction *OpI = dyn_cast<Instruction>(Op))
524 link(OpI, B, At);
525 }
526
527 B->getInstList().insert(At, I);
528}
529
530Value *Simplifier::Context::materialize(BasicBlock *B,
531 BasicBlock::iterator At) {
532 if (Instruction *RootI = dyn_cast<Instruction>(Root))
533 link(RootI, B, At);
534 return Root;
535}
536
537Value *Simplifier::simplify(Context &C) {
538 WorkListType Q;
539 Q.push_back(C.Root);
540 unsigned Count = 0;
541 const unsigned Limit = SimplifyLimit;
542
543 while (!Q.empty()) {
544 if (Count++ >= Limit)
545 break;
546 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
547 if (!U || U->getParent() || !C.Used.count(U))
548 continue;
549 bool Changed = false;
550 for (Rule &R : Rules) {
551 Value *W = R.Fn(U, C.Ctx);
552 if (!W)
553 continue;
554 Changed = true;
555 C.record(W);
556 C.replace(U, W);
557 Q.push_back(C.Root);
558 break;
559 }
560 if (!Changed) {
561 for (Value *Op : U->operands())
562 Q.push_back(Op);
563 }
564 }
565 return Count < Limit ? C.Root : nullptr;
566}
567
568//===----------------------------------------------------------------------===//
569//
570// Implementation of PolynomialMultiplyRecognize
571//
572//===----------------------------------------------------------------------===//
573
574namespace {
575
576 class PolynomialMultiplyRecognize {
577 public:
578 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
579 const DominatorTree &dt, const TargetLibraryInfo &tli,
580 ScalarEvolution &se)
581 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
582
583 bool recognize();
584
585 private:
586 using ValueSeq = SetVector<Value *>;
587
588 IntegerType *getPmpyType() const {
589 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
590 return IntegerType::get(Ctx, 32);
591 }
592
593 bool isPromotableTo(Value *V, IntegerType *Ty);
594 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
595 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
596
597 Value *getCountIV(BasicBlock *BB);
598 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
599 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
600 ValueSeq &Late);
601 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
602 bool commutesWithShift(Instruction *I);
603 bool highBitsAreZero(Value *V, unsigned IterCount);
604 bool keepsHighBitsZero(Value *V, unsigned IterCount);
605 bool isOperandShifted(Instruction *I, Value *Op);
606 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
607 unsigned IterCount);
608 void cleanupLoopBody(BasicBlock *LoopB);
609
610 struct ParsedValues {
611 ParsedValues() = default;
612
613 Value *M = nullptr;
614 Value *P = nullptr;
615 Value *Q = nullptr;
616 Value *R = nullptr;
617 Value *X = nullptr;
618 Instruction *Res = nullptr;
619 unsigned IterCount = 0;
620 bool Left = false;
621 bool Inv = false;
622 };
623
624 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
625 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
626 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
627 Value *CIV, ParsedValues &PV, bool PreScan);
628 unsigned getInverseMxN(unsigned QP);
629 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
630
631 void setupPreSimplifier(Simplifier &S);
632 void setupPostSimplifier(Simplifier &S);
633
634 Loop *CurLoop;
635 const DataLayout &DL;
636 const DominatorTree &DT;
637 const TargetLibraryInfo &TLI;
638 ScalarEvolution &SE;
639 };
640
641} // end anonymous namespace
642
643Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
644 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
645 if (std::distance(PI, PE) != 2)
646 return nullptr;
647 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
648
649 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
650 auto *PN = cast<PHINode>(I);
651 Value *InitV = PN->getIncomingValueForBlock(PB);
652 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
653 continue;
654 Value *IterV = PN->getIncomingValueForBlock(BB);
655 auto *BO = dyn_cast<BinaryOperator>(IterV);
656 if (!BO)
657 continue;
658 if (BO->getOpcode() != Instruction::Add)
659 continue;
660 Value *IncV = nullptr;
661 if (BO->getOperand(0) == PN)
662 IncV = BO->getOperand(1);
663 else if (BO->getOperand(1) == PN)
664 IncV = BO->getOperand(0);
665 if (IncV == nullptr)
666 continue;
667
668 if (auto *T = dyn_cast<ConstantInt>(IncV))
669 if (T->getZExtValue() == 1)
670 return PN;
671 }
672 return nullptr;
673}
674
675static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
676 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
677 Use &TheUse = UI.getUse();
678 ++UI;
679 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
680 if (BB == II->getParent())
681 II->replaceUsesOfWith(I, J);
682 }
683}
684
685bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
686 Value *CIV, ParsedValues &PV) {
687 // Match the following:
688 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
689 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
690 // The condition may also check for equality with the masked value, i.e
691 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
692 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
693
694 Value *CondV = SelI->getCondition();
695 Value *TrueV = SelI->getTrueValue();
696 Value *FalseV = SelI->getFalseValue();
697
698 using namespace PatternMatch;
699
700 CmpInst::Predicate P;
701 Value *A = nullptr, *B = nullptr, *C = nullptr;
702
703 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
704 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
705 return false;
706 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
707 return false;
708 // Matched: select (A & B) == C ? ... : ...
709 // select (A & B) != C ? ... : ...
710
711 Value *X = nullptr, *Sh1 = nullptr;
712 // Check (A & B) for (X & (1 << i)):
713 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
714 Sh1 = A;
715 X = B;
716 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
717 Sh1 = B;
718 X = A;
719 } else {
720 // TODO: Could also check for an induction variable containing single
721 // bit shifted left by 1 in each iteration.
722 return false;
723 }
724
725 bool TrueIfZero;
726
727 // Check C against the possible values for comparison: 0 and (1 << i):
728 if (match(C, m_Zero()))
729 TrueIfZero = (P == CmpInst::ICMP_EQ);
730 else if (C == Sh1)
731 TrueIfZero = (P == CmpInst::ICMP_NE);
732 else
733 return false;
734
735 // So far, matched:
736 // select (X & (1 << i)) ? ... : ...
737 // including variations of the check against zero/non-zero value.
738
739 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
740 if (TrueIfZero) {
741 ShouldSameV = TrueV;
742 ShouldXoredV = FalseV;
743 } else {
744 ShouldSameV = FalseV;
745 ShouldXoredV = TrueV;
746 }
747
748 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
749 Value *T = nullptr;
750 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
751 // Matched: select +++ ? ... : Y ^ Z
752 // select +++ ? Y ^ Z : ...
753 // where +++ denotes previously checked matches.
754 if (ShouldSameV == Y)
755 T = Z;
756 else if (ShouldSameV == Z)
757 T = Y;
758 else
759 return false;
760 R = ShouldSameV;
761 // Matched: select +++ ? R : R ^ T
762 // select +++ ? R ^ T : R
763 // depending on TrueIfZero.
764
765 } else if (match(ShouldSameV, m_Zero())) {
766 // Matched: select +++ ? 0 : ...
767 // select +++ ? ... : 0
768 if (!SelI->hasOneUse())
769 return false;
770 T = ShouldXoredV;
771 // Matched: select +++ ? 0 : T
772 // select +++ ? T : 0
773
774 Value *U = *SelI->user_begin();
775 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
776 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
777 return false;
778 // Matched: xor (select +++ ? 0 : T), R
779 // xor (select +++ ? T : 0), R
780 } else
781 return false;
782
783 // The xor input value T is isolated into its own match so that it could
784 // be checked against an induction variable containing a shifted bit
785 // (todo).
786 // For now, check against (Q << i).
787 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
788 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
789 return false;
790 // Matched: select +++ ? R : R ^ (Q << i)
791 // select +++ ? R ^ (Q << i) : R
792
793 PV.X = X;
794 PV.Q = Q;
795 PV.R = R;
796 PV.Left = true;
797 return true;
798}
799
800bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
801 ParsedValues &PV) {
802 // Match the following:
803 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
804 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
805 // The condition may also check for equality with the masked value, i.e
806 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
807 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
808
809 Value *CondV = SelI->getCondition();
810 Value *TrueV = SelI->getTrueValue();
811 Value *FalseV = SelI->getFalseValue();
812
813 using namespace PatternMatch;
814
815 Value *C = nullptr;
816 CmpInst::Predicate P;
817 bool TrueIfZero;
818
819 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
820 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
821 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
822 return false;
823 // Matched: select C == 0 ? ... : ...
824 // select C != 0 ? ... : ...
825 TrueIfZero = (P == CmpInst::ICMP_EQ);
826 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
827 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
828 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
829 return false;
830 // Matched: select C == 1 ? ... : ...
831 // select C != 1 ? ... : ...
832 TrueIfZero = (P == CmpInst::ICMP_NE);
833 } else
834 return false;
835
836 Value *X = nullptr;
837 if (!match(C, m_And(m_Value(X), m_One())) &&
838 !match(C, m_And(m_One(), m_Value(X))))
839 return false;
840 // Matched: select (X & 1) == +++ ? ... : ...
841 // select (X & 1) != +++ ? ... : ...
842
843 Value *R = nullptr, *Q = nullptr;
844 if (TrueIfZero) {
845 // The select's condition is true if the tested bit is 0.
846 // TrueV must be the shift, FalseV must be the xor.
847 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
848 return false;
849 // Matched: select +++ ? (R >> 1) : ...
850 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
851 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
852 return false;
853 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
854 // with commuting ^.
855 } else {
856 // The select's condition is true if the tested bit is 1.
857 // TrueV must be the xor, FalseV must be the shift.
858 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
859 return false;
860 // Matched: select +++ ? ... : (R >> 1)
861 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
862 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
863 return false;
864 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
865 // with commuting ^.
866 }
867
868 PV.X = X;
869 PV.Q = Q;
870 PV.R = R;
871 PV.Left = false;
872 return true;
873}
874
875bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
876 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
877 bool PreScan) {
878 using namespace PatternMatch;
879
880 // The basic pattern for R = P.Q is:
881 // for i = 0..31
882 // R = phi (0, R')
883 // if (P & (1 << i)) ; test-bit(P, i)
884 // R' = R ^ (Q << i)
885 //
886 // Similarly, the basic pattern for R = (P/Q).Q - P
887 // for i = 0..31
888 // R = phi(P, R')
889 // if (R & (1 << i))
890 // R' = R ^ (Q << i)
891
892 // There exist idioms, where instead of Q being shifted left, P is shifted
893 // right. This produces a result that is shifted right by 32 bits (the
894 // non-shifted result is 64-bit).
895 //
896 // For R = P.Q, this would be:
897 // for i = 0..31
898 // R = phi (0, R')
899 // if ((P >> i) & 1)
900 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
901 // else ; be shifted by 1, not i.
902 // R' = R >> 1
903 //
904 // And for the inverse:
905 // for i = 0..31
906 // R = phi (P, R')
907 // if (R & 1)
908 // R' = (R >> 1) ^ Q
909 // else
910 // R' = R >> 1
911
912 // The left-shifting idioms share the same pattern:
913 // select (X & (1 << i)) ? R ^ (Q << i) : R
914 // Similarly for right-shifting idioms:
915 // select (X & 1) ? (R >> 1) ^ Q
916
917 if (matchLeftShift(SelI, CIV, PV)) {
918 // If this is a pre-scan, getting this far is sufficient.
919 if (PreScan)
920 return true;
921
922 // Need to make sure that the SelI goes back into R.
923 auto *RPhi = dyn_cast<PHINode>(PV.R);
924 if (!RPhi)
925 return false;
926 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
927 return false;
928 PV.Res = SelI;
929
930 // If X is loop invariant, it must be the input polynomial, and the
931 // idiom is the basic polynomial multiply.
932 if (CurLoop->isLoopInvariant(PV.X)) {
933 PV.P = PV.X;
934 PV.Inv = false;
935 } else {
936 // X is not loop invariant. If X == R, this is the inverse pmpy.
937 // Otherwise, check for an xor with an invariant value. If the
938 // variable argument to the xor is R, then this is still a valid
939 // inverse pmpy.
940 PV.Inv = true;
941 if (PV.X != PV.R) {
942 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
943 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
944 return false;
945 auto *I1 = dyn_cast<Instruction>(X1);
946 auto *I2 = dyn_cast<Instruction>(X2);
947 if (!I1 || I1->getParent() != LoopB) {
948 Var = X2;
949 Inv = X1;
950 } else if (!I2 || I2->getParent() != LoopB) {
951 Var = X1;
952 Inv = X2;
953 } else
954 return false;
955 if (Var != PV.R)
956 return false;
957 PV.M = Inv;
958 }
959 // The input polynomial P still needs to be determined. It will be
960 // the entry value of R.
961 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
962 PV.P = EntryP;
963 }
964
965 return true;
966 }
967
968 if (matchRightShift(SelI, PV)) {
969 // If this is an inverse pattern, the Q polynomial must be known at
970 // compile time.
971 if (PV.Inv && !isa<ConstantInt>(PV.Q))
972 return false;
973 if (PreScan)
974 return true;
975 // There is no exact matching of right-shift pmpy.
976 return false;
977 }
978
979 return false;
980}
981
982bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
983 IntegerType *DestTy) {
984 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
985 if (!T || T->getBitWidth() > DestTy->getBitWidth())
986 return false;
987 if (T->getBitWidth() == DestTy->getBitWidth())
988 return true;
989 // Non-instructions are promotable. The reason why an instruction may not
990 // be promotable is that it may produce a different result if its operands
991 // and the result are promoted, for example, it may produce more non-zero
992 // bits. While it would still be possible to represent the proper result
993 // in a wider type, it may require adding additional instructions (which
994 // we don't want to do).
995 Instruction *In = dyn_cast<Instruction>(Val);
996 if (!In)
997 return true;
998 // The bitwidth of the source type is smaller than the destination.
999 // Check if the individual operation can be promoted.
1000 switch (In->getOpcode()) {
1001 case Instruction::PHI:
1002 case Instruction::ZExt:
1003 case Instruction::And:
1004 case Instruction::Or:
1005 case Instruction::Xor:
1006 case Instruction::LShr: // Shift right is ok.
1007 case Instruction::Select:
1008 case Instruction::Trunc:
1009 return true;
1010 case Instruction::ICmp:
1011 if (CmpInst *CI = cast<CmpInst>(In))
1012 return CI->isEquality() || CI->isUnsigned();
1013 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1013
)
;
1014 case Instruction::Add:
1015 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
1016 }
1017 return false;
1018}
1019
1020void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1021 IntegerType *DestTy, BasicBlock *LoopB) {
1022 Type *OrigTy = In->getType();
1023 assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote"
) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1023
, __extension__ __PRETTY_FUNCTION__))
;
1024
1025 // Leave boolean values alone.
1026 if (!In->getType()->isIntegerTy(1))
1027 In->mutateType(DestTy);
1028 unsigned DestBW = DestTy->getBitWidth();
1029
1030 // Handle PHIs.
1031 if (PHINode *P = dyn_cast<PHINode>(In)) {
1032 unsigned N = P->getNumIncomingValues();
1033 for (unsigned i = 0; i != N; ++i) {
1034 BasicBlock *InB = P->getIncomingBlock(i);
1035 if (InB == LoopB)
1036 continue;
1037 Value *InV = P->getIncomingValue(i);
1038 IntegerType *Ty = cast<IntegerType>(InV->getType());
1039 // Do not promote values in PHI nodes of type i1.
1040 if (Ty != P->getType()) {
1041 // If the value type does not match the PHI type, the PHI type
1042 // must have been promoted.
1043 assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ?
void (0) : __assert_fail ("Ty->getBitWidth() < DestBW"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1043
, __extension__ __PRETTY_FUNCTION__))
;
1044 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1045 P->setIncomingValue(i, InV);
1046 }
1047 }
1048 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1049 Value *Op = Z->getOperand(0);
1050 if (Op->getType() == Z->getType())
1051 Z->replaceAllUsesWith(Op);
1052 Z->eraseFromParent();
1053 return;
1054 }
1055 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1056 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1057 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1058 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1059 T->replaceAllUsesWith(And);
1060 T->eraseFromParent();
1061 return;
1062 }
1063
1064 // Promote immediates.
1065 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1066 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1067 if (CI->getType()->getBitWidth() < DestBW)
1068 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1069 }
1070}
1071
1072bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1073 BasicBlock *ExitB) {
1074 assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail (
"LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1074, __extension__ __PRETTY_FUNCTION__))
;
1075 // Skip loops where the exit block has more than one predecessor. The values
1076 // coming from the loop block will be promoted to another type, and so the
1077 // values coming into the exit block from other predecessors would also have
1078 // to be promoted.
1079 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1080 return false;
1081 IntegerType *DestTy = getPmpyType();
1082 // Check if the exit values have types that are no wider than the type
1083 // that we want to promote to.
1084 unsigned DestBW = DestTy->getBitWidth();
1085 for (PHINode &P : ExitB->phis()) {
1086 if (P.getNumIncomingValues() != 1)
1087 return false;
1088 assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void
(0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1088, __extension__ __PRETTY_FUNCTION__))
;
1089 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1090 if (!T || T->getBitWidth() > DestBW)
1091 return false;
1092 }
1093
1094 // Check all instructions in the loop.
1095 for (Instruction &In : *LoopB)
1096 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1097 return false;
1098
1099 // Perform the promotion.
1100 std::vector<Instruction*> LoopIns;
1101 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1102 [](Instruction &In) { return &In; });
1103 for (Instruction *In : LoopIns)
1104 if (!In->isTerminator())
1105 promoteTo(In, DestTy, LoopB);
1106
1107 // Fix up the PHI nodes in the exit block.
1108 Instruction *EndI = ExitB->getFirstNonPHI();
1109 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1110 for (auto I = ExitB->begin(); I != End; ++I) {
1111 PHINode *P = dyn_cast<PHINode>(I);
1112 if (!P)
1113 break;
1114 Type *Ty0 = P->getIncomingValue(0)->getType();
1115 Type *PTy = P->getType();
1116 if (PTy != Ty0) {
1117 assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail
("Ty0 == DestTy", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1117, __extension__ __PRETTY_FUNCTION__))
;
1118 // In order to create the trunc, P must have the promoted type.
1119 P->mutateType(Ty0);
1120 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1121 // In order for the RAUW to work, the types of P and T must match.
1122 P->mutateType(PTy);
1123 P->replaceAllUsesWith(T);
1124 // Final update of the P's type.
1125 P->mutateType(Ty0);
1126 cast<Instruction>(T)->setOperand(0, P);
1127 }
1128 }
1129
1130 return true;
1131}
1132
1133bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1134 ValueSeq &Cycle) {
1135 // Out = ..., In, ...
1136 if (Out == In)
1137 return true;
1138
1139 auto *BB = cast<Instruction>(Out)->getParent();
1140 bool HadPhi = false;
1141
1142 for (auto U : Out->users()) {
1143 auto *I = dyn_cast<Instruction>(&*U);
1144 if (I == nullptr || I->getParent() != BB)
1145 continue;
1146 // Make sure that there are no multi-iteration cycles, e.g.
1147 // p1 = phi(p2)
1148 // p2 = phi(p1)
1149 // The cycle p1->p2->p1 would span two loop iterations.
1150 // Check that there is only one phi in the cycle.
1151 bool IsPhi = isa<PHINode>(I);
1152 if (IsPhi && HadPhi)
1153 return false;
1154 HadPhi |= IsPhi;
1155 if (Cycle.count(I))
1156 return false;
1157 Cycle.insert(I);
1158 if (findCycle(I, In, Cycle))
1159 break;
1160 Cycle.remove(I);
1161 }
1162 return !Cycle.empty();
1163}
1164
1165void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1166 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1167 // All the values in the cycle that are between the phi node and the
1168 // divider instruction will be classified as "early", all other values
1169 // will be "late".
1170
1171 bool IsE = true;
1172 unsigned I, N = Cycle.size();
1173 for (I = 0; I < N; ++I) {
1174 Value *V = Cycle[I];
1175 if (DivI == V)
1176 IsE = false;
1177 else if (!isa<PHINode>(V))
1178 continue;
1179 // Stop if found either.
1180 break;
1181 }
1182 // "I" is the index of either DivI or the phi node, whichever was first.
1183 // "E" is "false" or "true" respectively.
1184 ValueSeq &First = !IsE ? Early : Late;
1185 for (unsigned J = 0; J < I; ++J)
1186 First.insert(Cycle[J]);
1187
1188 ValueSeq &Second = IsE ? Early : Late;
1189 Second.insert(Cycle[I]);
1190 for (++I; I < N; ++I) {
1191 Value *V = Cycle[I];
1192 if (DivI == V || isa<PHINode>(V))
1193 break;
1194 Second.insert(V);
1195 }
1196
1197 for (; I < N; ++I)
1198 First.insert(Cycle[I]);
1199}
1200
1201bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1202 ValueSeq &Early, ValueSeq &Late) {
1203 // Select is an exception, since the condition value does not have to be
1204 // classified in the same way as the true/false values. The true/false
1205 // values do have to be both early or both late.
1206 if (UseI->getOpcode() == Instruction::Select) {
1207 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1208 if (Early.count(TV) || Early.count(FV)) {
1209 if (Late.count(TV) || Late.count(FV))
1210 return false;
1211 Early.insert(UseI);
1212 } else if (Late.count(TV) || Late.count(FV)) {
1213 if (Early.count(TV) || Early.count(FV))
1214 return false;
1215 Late.insert(UseI);
1216 }
1217 return true;
1218 }
1219
1220 // Not sure what would be the example of this, but the code below relies
1221 // on having at least one operand.
1222 if (UseI->getNumOperands() == 0)
1223 return true;
1224
1225 bool AE = true, AL = true;
1226 for (auto &I : UseI->operands()) {
1227 if (Early.count(&*I))
1228 AL = false;
1229 else if (Late.count(&*I))
1230 AE = false;
1231 }
1232 // If the operands appear "all early" and "all late" at the same time,
1233 // then it means that none of them are actually classified as either.
1234 // This is harmless.
1235 if (AE && AL)
1236 return true;
1237 // Conversely, if they are neither "all early" nor "all late", then
1238 // we have a mixture of early and late operands that is not a known
1239 // exception.
1240 if (!AE && !AL)
1241 return false;
1242
1243 // Check that we have covered the two special cases.
1244 assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail
("AE != AL", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1244, __extension__ __PRETTY_FUNCTION__))
;
1245
1246 if (AE)
1247 Early.insert(UseI);
1248 else
1249 Late.insert(UseI);
1250 return true;
1251}
1252
1253bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1254 switch (I->getOpcode()) {
1255 case Instruction::And:
1256 case Instruction::Or:
1257 case Instruction::Xor:
1258 case Instruction::LShr:
1259 case Instruction::Shl:
1260 case Instruction::Select:
1261 case Instruction::ICmp:
1262 case Instruction::PHI:
1263 break;
1264 default:
1265 return false;
1266 }
1267 return true;
1268}
1269
1270bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1271 unsigned IterCount) {
1272 auto *T = dyn_cast<IntegerType>(V->getType());
1273 if (!T)
1274 return false;
1275
1276 KnownBits Known(T->getBitWidth());
1277 computeKnownBits(V, Known, DL);
1278 return Known.countMinLeadingZeros() >= IterCount;
1279}
1280
1281bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1282 unsigned IterCount) {
1283 // Assume that all inputs to the value have the high bits zero.
1284 // Check if the value itself preserves the zeros in the high bits.
1285 if (auto *C = dyn_cast<ConstantInt>(V))
1286 return C->getValue().countLeadingZeros() >= IterCount;
1287
1288 if (auto *I = dyn_cast<Instruction>(V)) {
1289 switch (I->getOpcode()) {
1290 case Instruction::And:
1291 case Instruction::Or:
1292 case Instruction::Xor:
1293 case Instruction::LShr:
1294 case Instruction::Select:
1295 case Instruction::ICmp:
1296 case Instruction::PHI:
1297 case Instruction::ZExt:
1298 return true;
1299 }
1300 }
1301
1302 return false;
1303}
1304
1305bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1306 unsigned Opc = I->getOpcode();
1307 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1308 return Op != I->getOperand(1);
1309 return true;
1310}
1311
1312bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1313 BasicBlock *ExitB, unsigned IterCount) {
1314 Value *CIV = getCountIV(LoopB);
1315 if (CIV == nullptr)
1316 return false;
1317 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1318 if (CIVTy == nullptr)
1319 return false;
1320
1321 ValueSeq RShifts;
1322 ValueSeq Early, Late, Cycled;
1323
1324 // Find all value cycles that contain logical right shifts by 1.
1325 for (Instruction &I : *LoopB) {
1326 using namespace PatternMatch;
1327
1328 Value *V = nullptr;
1329 if (!match(&I, m_LShr(m_Value(V), m_One())))
1330 continue;
1331 ValueSeq C;
1332 if (!findCycle(&I, V, C))
1333 continue;
1334
1335 // Found a cycle.
1336 C.insert(&I);
1337 classifyCycle(&I, C, Early, Late);
1338 Cycled.insert(C.begin(), C.end());
1339 RShifts.insert(&I);
1340 }
1341
1342 // Find the set of all values affected by the shift cycles, i.e. all
1343 // cycled values, and (recursively) all their users.
1344 ValueSeq Users(Cycled.begin(), Cycled.end());
1345 for (unsigned i = 0; i < Users.size(); ++i) {
1346 Value *V = Users[i];
1347 if (!isa<IntegerType>(V->getType()))
1348 return false;
1349 auto *R = cast<Instruction>(V);
1350 // If the instruction does not commute with shifts, the loop cannot
1351 // be unshifted.
1352 if (!commutesWithShift(R))
1353 return false;
1354 for (User *U : R->users()) {
1355 auto *T = cast<Instruction>(U);
1356 // Skip users from outside of the loop. They will be handled later.
1357 // Also, skip the right-shifts and phi nodes, since they mix early
1358 // and late values.
1359 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1360 continue;
1361
1362 Users.insert(T);
1363 if (!classifyInst(T, Early, Late))
1364 return false;
1365 }
1366 }
1367
1368 if (Users.empty())
1369 return false;
1370
1371 // Verify that high bits remain zero.
1372 ValueSeq Internal(Users.begin(), Users.end());
1373 ValueSeq Inputs;
1374 for (unsigned i = 0; i < Internal.size(); ++i) {
1375 auto *R = dyn_cast<Instruction>(Internal[i]);
1376 if (!R)
1377 continue;
1378 for (Value *Op : R->operands()) {
1379 auto *T = dyn_cast<Instruction>(Op);
1380 if (T && T->getParent() != LoopB)
1381 Inputs.insert(Op);
1382 else
1383 Internal.insert(Op);
1384 }
1385 }
1386 for (Value *V : Inputs)
1387 if (!highBitsAreZero(V, IterCount))
1388 return false;
1389 for (Value *V : Internal)
1390 if (!keepsHighBitsZero(V, IterCount))
1391 return false;
1392
1393 // Finally, the work can be done. Unshift each user.
1394 IRBuilder<> IRB(LoopB);
1395 std::map<Value*,Value*> ShiftMap;
1396
1397 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1398
1399 CastMapType CastMap;
1400
1401 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1402 IntegerType *Ty) -> Value* {
1403 auto H = CM.find(std::make_pair(V, Ty));
1404 if (H != CM.end())
1405 return H->second;
1406 Value *CV = IRB.CreateIntCast(V, Ty, false);
1407 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1408 return CV;
1409 };
1410
1411 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1412 using namespace PatternMatch;
1413
1414 if (isa<PHINode>(I) || !Users.count(&*I))
1415 continue;
1416
1417 // Match lshr x, 1.
1418 Value *V = nullptr;
1419 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1420 replaceAllUsesOfWithIn(&*I, V, LoopB);
1421 continue;
1422 }
1423 // For each non-cycled operand, replace it with the corresponding
1424 // value shifted left.
1425 for (auto &J : I->operands()) {
1426 Value *Op = J.get();
1427 if (!isOperandShifted(&*I, Op))
1428 continue;
1429 if (Users.count(Op))
1430 continue;
1431 // Skip shifting zeros.
1432 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1433 continue;
1434 // Check if we have already generated a shift for this value.
1435 auto F = ShiftMap.find(Op);
1436 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1437 if (W == nullptr) {
1438 IRB.SetInsertPoint(&*I);
1439 // First, the shift amount will be CIV or CIV+1, depending on
1440 // whether the value is early or late. Instead of creating CIV+1,
1441 // do a single shift of the value.
1442 Value *ShAmt = CIV, *ShVal = Op;
1443 auto *VTy = cast<IntegerType>(ShVal->getType());
1444 auto *ATy = cast<IntegerType>(ShAmt->getType());
1445 if (Late.count(&*I))
1446 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1447 // Second, the types of the shifted value and the shift amount
1448 // must match.
1449 if (VTy != ATy) {
1450 if (VTy->getBitWidth() < ATy->getBitWidth())
1451 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1452 else
1453 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1454 }
1455 // Ready to generate the shift and memoize it.
1456 W = IRB.CreateShl(ShVal, ShAmt);
1457 ShiftMap.insert(std::make_pair(Op, W));
1458 }
1459 I->replaceUsesOfWith(Op, W);
1460 }
1461 }
1462
1463 // Update the users outside of the loop to account for having left
1464 // shifts. They would normally be shifted right in the loop, so shift
1465 // them right after the loop exit.
1466 // Take advantage of the loop-closed SSA form, which has all the post-
1467 // loop values in phi nodes.
1468 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1469 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1470 if (!isa<PHINode>(P))
1471 break;
1472 auto *PN = cast<PHINode>(P);
1473 Value *U = PN->getIncomingValueForBlock(LoopB);
1474 if (!Users.count(U))
1475 continue;
1476 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1477 PN->replaceAllUsesWith(S);
1478 // The above RAUW will create
1479 // S = lshr S, IterCount
1480 // so we need to fix it back into
1481 // S = lshr PN, IterCount
1482 cast<User>(S)->replaceUsesOfWith(S, PN);
1483 }
1484
1485 return true;
1486}
1487
1488void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1489 for (auto &I : *LoopB)
1490 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1491 I.replaceAllUsesWith(SV);
1492
1493 for (Instruction &I : llvm::make_early_inc_range(*LoopB))
1494 RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI);
1495}
1496
1497unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1498 // Arrays of coefficients of Q and the inverse, C.
1499 // Q[i] = coefficient at x^i.
1500 std::array<char,32> Q, C;
1501
1502 for (unsigned i = 0; i < 32; ++i) {
1503 Q[i] = QP & 1;
1504 QP >>= 1;
1505 }
1506 assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail
("Q[0] == 1", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1506, __extension__ __PRETTY_FUNCTION__))
;
1507
1508 // Find C, such that
1509 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1510 //
1511 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1512 // operations * and + are & and ^ respectively.
1513 //
1514 // Find C[i] recursively, by comparing i-th coefficient in the product
1515 // with 0 (or 1 for i=0).
1516 //
1517 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1518 C[0] = 1;
1519 for (unsigned i = 1; i < 32; ++i) {
1520 // Solve for C[i] in:
1521 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1522 // This is equivalent to
1523 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1524 // which is
1525 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1526 unsigned T = 0;
1527 for (unsigned j = 0; j < i; ++j)
1528 T = T ^ (C[j] & Q[i-j]);
1529 C[i] = T;
1530 }
1531
1532 unsigned QV = 0;
1533 for (unsigned i = 0; i < 32; ++i)
1534 if (C[i])
1535 QV |= (1 << i);
1536
1537 return QV;
1538}
1539
1540Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1541 ParsedValues &PV) {
1542 IRBuilder<> B(&*At);
1543 Module *M = At->getParent()->getParent()->getParent();
1544 Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1545
1546 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1547 unsigned IC = PV.IterCount;
1548
1549 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1550 P0 = P = B.CreateXor(P, PV.M);
1551
1552 // Create a bit mask to clear the high bits beyond IterCount.
1553 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
15
Returning from 'APInt::getLowBitsSet'
1554
1555 if (PV.IterCount != 32)
16
Assuming field 'IterCount' is equal to 32
17
Taking false branch
1556 P = B.CreateAnd(P, BMI);
1557
1558 if (PV.Inv) {
18
Assuming field 'Inv' is true
19
Taking true branch
1559 auto *QI = dyn_cast<ConstantInt>(PV.Q);
20
Assuming field 'Q' is a 'ConstantInt'
1560 assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth()
<= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1560
, __extension__ __PRETTY_FUNCTION__))
;
21
Assuming the condition is true
22
'?' condition is true
1561
1562 // Again, clearing bits beyond IterCount.
1563 unsigned M = (1 << PV.IterCount) - 1;
23
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1564 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1565 unsigned QV = getInverseMxN(Tmp) & M;
1566 auto *QVI = ConstantInt::get(QI->getType(), QV);
1567 P = B.CreateCall(PMF, {P, QVI});
1568 P = B.CreateTrunc(P, QI->getType());
1569 if (IC != 32)
1570 P = B.CreateAnd(P, BMI);
1571 }
1572
1573 Value *R = B.CreateCall(PMF, {P, Q});
1574
1575 if (PV.M != nullptr)
1576 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1577
1578 return R;
1579}
1580
1581static bool hasZeroSignBit(const Value *V) {
1582 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1583 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1584 const Instruction *I = dyn_cast<const Instruction>(V);
1585 if (!I)
1586 return false;
1587 switch (I->getOpcode()) {
1588 case Instruction::LShr:
1589 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1590 return SI->getZExtValue() > 0;
1591 return false;
1592 case Instruction::Or:
1593 case Instruction::Xor:
1594 return hasZeroSignBit(I->getOperand(0)) &&
1595 hasZeroSignBit(I->getOperand(1));
1596 case Instruction::And:
1597 return hasZeroSignBit(I->getOperand(0)) ||
1598 hasZeroSignBit(I->getOperand(1));
1599 }
1600 return false;
1601}
1602
1603void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1604 S.addRule("sink-zext",
1605 // Sink zext past bitwise operations.
1606 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1607 if (I->getOpcode() != Instruction::ZExt)
1608 return nullptr;
1609 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1610 if (!T)
1611 return nullptr;
1612 switch (T->getOpcode()) {
1613 case Instruction::And:
1614 case Instruction::Or:
1615 case Instruction::Xor:
1616 break;
1617 default:
1618 return nullptr;
1619 }
1620 IRBuilder<> B(Ctx);
1621 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1622 B.CreateZExt(T->getOperand(0), I->getType()),
1623 B.CreateZExt(T->getOperand(1), I->getType()));
1624 });
1625 S.addRule("xor/and -> and/xor",
1626 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1627 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1628 if (I->getOpcode() != Instruction::Xor)
1629 return nullptr;
1630 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1631 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1632 if (!And0 || !And1)
1633 return nullptr;
1634 if (And0->getOpcode() != Instruction::And ||
1635 And1->getOpcode() != Instruction::And)
1636 return nullptr;
1637 if (And0->getOperand(1) != And1->getOperand(1))
1638 return nullptr;
1639 IRBuilder<> B(Ctx);
1640 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1641 And0->getOperand(1));
1642 });
1643 S.addRule("sink binop into select",
1644 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1645 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1646 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1647 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1648 if (!BO)
1649 return nullptr;
1650 Instruction::BinaryOps Op = BO->getOpcode();
1651 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1652 IRBuilder<> B(Ctx);
1653 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1654 Value *Z = BO->getOperand(1);
1655 return B.CreateSelect(Sel->getCondition(),
1656 B.CreateBinOp(Op, X, Z),
1657 B.CreateBinOp(Op, Y, Z));
1658 }
1659 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1660 IRBuilder<> B(Ctx);
1661 Value *X = BO->getOperand(0);
1662 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1663 return B.CreateSelect(Sel->getCondition(),
1664 B.CreateBinOp(Op, X, Y),
1665 B.CreateBinOp(Op, X, Z));
1666 }
1667 return nullptr;
1668 });
1669 S.addRule("fold select-select",
1670 // (select c (select c x y) z) -> (select c x z)
1671 // (select c x (select c y z)) -> (select c x z)
1672 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1673 SelectInst *Sel = dyn_cast<SelectInst>(I);
1674 if (!Sel)
1675 return nullptr;
1676 IRBuilder<> B(Ctx);
1677 Value *C = Sel->getCondition();
1678 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1679 if (Sel0->getCondition() == C)
1680 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1681 }
1682 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1683 if (Sel1->getCondition() == C)
1684 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1685 }
1686 return nullptr;
1687 });
1688 S.addRule("or-signbit -> xor-signbit",
1689 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1690 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1691 if (I->getOpcode() != Instruction::Or)
1692 return nullptr;
1693 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1694 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1695 return nullptr;
1696 if (!hasZeroSignBit(I->getOperand(0)))
1697 return nullptr;
1698 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1699 });
1700 S.addRule("sink lshr into binop",
1701 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1702 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1703 if (I->getOpcode() != Instruction::LShr)
1704 return nullptr;
1705 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1706 if (!BitOp)
1707 return nullptr;
1708 switch (BitOp->getOpcode()) {
1709 case Instruction::And:
1710 case Instruction::Or:
1711 case Instruction::Xor:
1712 break;
1713 default:
1714 return nullptr;
1715 }
1716 IRBuilder<> B(Ctx);
1717 Value *S = I->getOperand(1);
1718 return B.CreateBinOp(BitOp->getOpcode(),
1719 B.CreateLShr(BitOp->getOperand(0), S),
1720 B.CreateLShr(BitOp->getOperand(1), S));
1721 });
1722 S.addRule("expose bitop-const",
1723 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1724 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1725 auto IsBitOp = [](unsigned Op) -> bool {
1726 switch (Op) {
1727 case Instruction::And:
1728 case Instruction::Or:
1729 case Instruction::Xor:
1730 return true;
1731 }
1732 return false;
1733 };
1734 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1735 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1736 return nullptr;
1737 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1738 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1739 return nullptr;
1740 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1741 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1742 if (!CA || !CB)
1743 return nullptr;
1744 IRBuilder<> B(Ctx);
1745 Value *X = BitOp2->getOperand(0);
1746 return B.CreateBinOp(BitOp2->getOpcode(), X,
1747 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1748 });
1749}
1750
1751void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1752 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1753 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1754 if (I->getOpcode() != Instruction::And)
1755 return nullptr;
1756 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1757 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1758 if (!Xor || !C0)
1759 return nullptr;
1760 if (Xor->getOpcode() != Instruction::Xor)
1761 return nullptr;
1762 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1763 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1764 // Pick the first non-null and.
1765 if (!And0 || And0->getOpcode() != Instruction::And)
1766 std::swap(And0, And1);
1767 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1768 if (!C1)
1769 return nullptr;
1770 uint32_t V0 = C0->getZExtValue();
1771 uint32_t V1 = C1->getZExtValue();
1772 if (V0 != (V0 & V1))
1773 return nullptr;
1774 IRBuilder<> B(Ctx);
1775 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1776 });
1777}
1778
1779bool PolynomialMultiplyRecognize::recognize() {
1780 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1781 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1782 // Restrictions:
1783 // - The loop must consist of a single block.
1784 // - The iteration count must be known at compile-time.
1785 // - The loop must have an induction variable starting from 0, and
1786 // incremented in each iteration of the loop.
1787 BasicBlock *LoopB = CurLoop->getHeader();
1788 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1789
1790 if (LoopB != CurLoop->getLoopLatch())
1791 return false;
1792 BasicBlock *ExitB = CurLoop->getExitBlock();
1793 if (ExitB == nullptr)
1794 return false;
1795 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1796 if (EntryB == nullptr)
1797 return false;
1798
1799 unsigned IterCount = 0;
1800 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1801 if (isa<SCEVCouldNotCompute>(CT))
1802 return false;
1803 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1804 IterCount = CV->getValue()->getZExtValue() + 1;
1805
1806 Value *CIV = getCountIV(LoopB);
1807 ParsedValues PV;
1808 Simplifier PreSimp;
1809 PV.IterCount = IterCount;
1810 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1811 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1812
1813 setupPreSimplifier(PreSimp);
1814
1815 // Perform a preliminary scan of select instructions to see if any of them
1816 // looks like a generator of the polynomial multiply steps. Assume that a
1817 // loop can only contain a single transformable operation, so stop the
1818 // traversal after the first reasonable candidate was found.
1819 // XXX: Currently this approach can modify the loop before being 100% sure
1820 // that the transformation can be carried out.
1821 bool FoundPreScan = false;
1822 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1823 for (const Value *U : V->users()) {
1824 if (const auto *P = dyn_cast<const PHINode>(U))
1825 if (P->getParent() == LoopB)
1826 return true;
1827 }
1828 return false;
1829 };
1830 for (Instruction &In : *LoopB) {
1831 SelectInst *SI = dyn_cast<SelectInst>(&In);
1832 if (!SI || !FeedsPHI(SI))
1833 continue;
1834
1835 Simplifier::Context C(SI);
1836 Value *T = PreSimp.simplify(C);
1837 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1838 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1839 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1840 FoundPreScan = true;
1841 if (SelI != SI) {
1842 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1843 SI->replaceAllUsesWith(NewSel);
1844 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1845 }
1846 break;
1847 }
1848 }
1849
1850 if (!FoundPreScan) {
1851 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1852 return false;
1853 }
1854
1855 if (!PV.Left) {
1856 // The right shift version actually only returns the higher bits of
1857 // the result (each iteration discards the LSB). If we want to convert it
1858 // to a left-shifting loop, the working data type must be at least as
1859 // wide as the target's pmpy instruction.
1860 if (!promoteTypes(LoopB, ExitB))
1861 return false;
1862 // Run post-promotion simplifications.
1863 Simplifier PostSimp;
1864 setupPostSimplifier(PostSimp);
1865 for (Instruction &In : *LoopB) {
1866 SelectInst *SI = dyn_cast<SelectInst>(&In);
1867 if (!SI || !FeedsPHI(SI))
1868 continue;
1869 Simplifier::Context C(SI);
1870 Value *T = PostSimp.simplify(C);
1871 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1872 if (SelI != SI) {
1873 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1874 SI->replaceAllUsesWith(NewSel);
1875 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1876 }
1877 break;
1878 }
1879
1880 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1881 return false;
1882 cleanupLoopBody(LoopB);
1883 }
1884
1885 // Scan the loop again, find the generating select instruction.
1886 bool FoundScan = false;
1887 for (Instruction &In : *LoopB) {
1888 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1889 if (!SelI)
1890 continue;
1891 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1892 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1893 if (FoundScan)
1894 break;
1895 }
1896 assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail
("FoundScan", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1896, __extension__ __PRETTY_FUNCTION__))
;
1897
1898 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1899 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1900 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1901 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1902 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1903 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1904 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1905 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1906 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1907 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1908 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1909 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1910 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1911
1912 BasicBlock::iterator At(EntryB->getTerminator());
1913 Value *PM = generate(At, PV);
1914 if (PM == nullptr)
1915 return false;
1916
1917 if (PM->getType() != PV.Res->getType())
1918 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1919
1920 PV.Res->replaceAllUsesWith(PM);
1921 PV.Res->eraseFromParent();
1922 return true;
1923}
1924
1925int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1926 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1927 return SC->getAPInt().getSExtValue();
1928 return 0;
1929}
1930
1931bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1932 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1933 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1934 return false;
1935
1936 Value *StoredVal = SI->getValueOperand();
1937 Value *StorePtr = SI->getPointerOperand();
1938
1939 // Reject stores that are so large that they overflow an unsigned.
1940 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1941 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1942 return false;
1943
1944 // See if the pointer expression is an AddRec like {base,+,1} on the current
1945 // loop, which indicates a strided store. If we have something else, it's a
1946 // random store we can't handle.
1947 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1948 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1949 return false;
1950
1951 // Check to see if the stride matches the size of the store. If so, then we
1952 // know that every byte is touched in the loop.
1953 int Stride = getSCEVStride(StoreEv);
1954 if (Stride == 0)
1955 return false;
1956 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1957 if (StoreSize != unsigned(std::abs(Stride)))
1958 return false;
1959
1960 // The store must be feeding a non-volatile load.
1961 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1962 if (!LI || !LI->isSimple())
1963 return false;
1964
1965 // See if the pointer expression is an AddRec like {base,+,1} on the current
1966 // loop, which indicates a strided load. If we have something else, it's a
1967 // random load we can't handle.
1968 Value *LoadPtr = LI->getPointerOperand();
1969 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1970 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1971 return false;
1972
1973 // The store and load must share the same stride.
1974 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1975 return false;
1976
1977 // Success. This store can be converted into a memcpy.
1978 return true;
1979}
1980
1981/// mayLoopAccessLocation - Return true if the specified loop might access the
1982/// specified pointer location, which is a loop-strided access. The 'Access'
1983/// argument specifies what the verboten forms of access are (read or write).
1984static bool
1985mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1986 const SCEV *BECount, unsigned StoreSize,
1987 AliasAnalysis &AA,
1988 SmallPtrSetImpl<Instruction *> &Ignored) {
1989 // Get the location that may be stored across the loop. Since the access
1990 // is strided positively through memory, we say that the modified location
1991 // starts at the pointer and has infinite size.
1992 LocationSize AccessSize = LocationSize::afterPointer();
1993
1994 // If the loop iterates a fixed number of times, we can refine the access
1995 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1996 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1997 AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
1998 StoreSize);
1999
2000 // TODO: For this to be really effective, we have to dive into the pointer
2001 // operand in the store. Store to &A[i] of 100 will always return may alias
2002 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
2003 // which will then no-alias a store to &A[100].
2004 MemoryLocation StoreLoc(Ptr, AccessSize);
2005
2006 for (auto *B : L->blocks())
2007 for (auto &I : *B)
2008 if (Ignored.count(&I) == 0 &&
2009 isModOrRefSet(
2010 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
2011 return true;
2012
2013 return false;
2014}
2015
2016void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2017 SmallVectorImpl<StoreInst*> &Stores) {
2018 Stores.clear();
2019 for (Instruction &I : *BB)
2020 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2021 if (isLegalStore(CurLoop, SI))
2022 Stores.push_back(SI);
2023}
2024
2025bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2026 StoreInst *SI, const SCEV *BECount) {
2027 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2029
, __extension__ __PRETTY_FUNCTION__))
2028 "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2029
, __extension__ __PRETTY_FUNCTION__))
2029 "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2029
, __extension__ __PRETTY_FUNCTION__))
;
2030
2031 Value *StorePtr = SI->getPointerOperand();
2032 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2033 unsigned Stride = getSCEVStride(StoreEv);
2034 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2035 if (Stride != StoreSize)
2036 return false;
2037
2038 // See if the pointer expression is an AddRec like {base,+,1} on the current
2039 // loop, which indicates a strided load. If we have something else, it's a
2040 // random load we can't handle.
2041 auto *LI = cast<LoadInst>(SI->getValueOperand());
2042 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2043
2044 // The trip count of the loop and the base pointer of the addrec SCEV is
2045 // guaranteed to be loop invariant, which means that it should dominate the
2046 // header. This allows us to insert code for it in the preheader.
2047 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2048 Instruction *ExpPt = Preheader->getTerminator();
2049 IRBuilder<> Builder(ExpPt);
2050 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2051
2052 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2053
2054 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2055 // this into a memcpy/memmove in the loop preheader now if we want. However,
2056 // this would be unsafe to do if there is anything else in the loop that may
2057 // read or write the memory region we're storing to. For memcpy, this
2058 // includes the load that feeds the stores. Check for an alias by generating
2059 // the base address and checking everything.
2060 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2061 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2062 Value *LoadBasePtr = nullptr;
2063
2064 bool Overlap = false;
2065 bool DestVolatile = SI->isVolatile();
2066 Type *BECountTy = BECount->getType();
2067
2068 if (DestVolatile) {
2069 // The trip count must fit in i32, since it is the type of the "num_words"
2070 // argument to hexagon_memcpy_forward_vp4cp4n2.
2071 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2072CleanupAndExit:
2073 // If we generated new code for the base pointer, clean up.
2074 Expander.clear();
2075 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2076 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2077 StoreBasePtr = nullptr;
2078 }
2079 if (LoadBasePtr) {
2080 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2081 LoadBasePtr = nullptr;
2082 }
2083 return false;
2084 }
2085 }
2086
2087 SmallPtrSet<Instruction*, 2> Ignore1;
2088 Ignore1.insert(SI);
2089 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2090 StoreSize, *AA, Ignore1)) {
2091 // Check if the load is the offending instruction.
2092 Ignore1.insert(LI);
2093 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2094 BECount, StoreSize, *AA, Ignore1)) {
2095 // Still bad. Nothing we can do.
2096 goto CleanupAndExit;
2097 }
2098 // It worked with the load ignored.
2099 Overlap = true;
2100 }
2101
2102 if (!Overlap) {
2103 if (DisableMemcpyIdiom || !HasMemcpy)
2104 goto CleanupAndExit;
2105 } else {
2106 // Don't generate memmove if this function will be inlined. This is
2107 // because the caller will undergo this transformation after inlining.
2108 Function *Func = CurLoop->getHeader()->getParent();
2109 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2110 goto CleanupAndExit;
2111
2112 // In case of a memmove, the call to memmove will be executed instead
2113 // of the loop, so we need to make sure that there is nothing else in
2114 // the loop than the load, store and instructions that these two depend
2115 // on.
2116 SmallVector<Instruction*,2> Insts;
2117 Insts.push_back(SI);
2118 Insts.push_back(LI);
2119 if (!coverLoop(CurLoop, Insts))
2120 goto CleanupAndExit;
2121
2122 if (DisableMemmoveIdiom || !HasMemmove)
2123 goto CleanupAndExit;
2124 bool IsNested = CurLoop->getParentLoop() != nullptr;
2125 if (IsNested && OnlyNonNestedMemmove)
2126 goto CleanupAndExit;
2127 }
2128
2129 // For a memcpy, we have to make sure that the input array is not being
2130 // mutated by the loop.
2131 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2132 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2133
2134 SmallPtrSet<Instruction*, 2> Ignore2;
2135 Ignore2.insert(SI);
2136 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2137 StoreSize, *AA, Ignore2))
2138 goto CleanupAndExit;
2139
2140 // Check the stride.
2141 bool StridePos = getSCEVStride(LoadEv) >= 0;
2142
2143 // Currently, the volatile memcpy only emulates traversing memory forward.
2144 if (!StridePos && DestVolatile)
2145 goto CleanupAndExit;
2146
2147 bool RuntimeCheck = (Overlap || DestVolatile);
2148
2149 BasicBlock *ExitB;
2150 if (RuntimeCheck) {
2151 // The runtime check needs a single exit block.
2152 SmallVector<BasicBlock*, 8> ExitBlocks;
2153 CurLoop->getUniqueExitBlocks(ExitBlocks);
2154 if (ExitBlocks.size() != 1)
2155 goto CleanupAndExit;
2156 ExitB = ExitBlocks[0];
2157 }
2158
2159 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2160 // pointer size if it isn't already.
2161 LLVMContext &Ctx = SI->getContext();
2162 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2163 DebugLoc DLoc = SI->getDebugLoc();
2164
2165 const SCEV *NumBytesS =
2166 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2167 if (StoreSize != 1)
2168 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2169 SCEV::FlagNUW);
2170 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2171 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2172 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2173 NumBytes = Simp;
2174
2175 CallInst *NewCall;
2176
2177 if (RuntimeCheck) {
2178 unsigned Threshold = RuntimeMemSizeThreshold;
2179 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2180 uint64_t C = CI->getZExtValue();
2181 if (Threshold != 0 && C < Threshold)
2182 goto CleanupAndExit;
2183 if (C < CompileTimeMemSizeThreshold)
2184 goto CleanupAndExit;
2185 }
2186
2187 BasicBlock *Header = CurLoop->getHeader();
2188 Function *Func = Header->getParent();
2189 Loop *ParentL = LF->getLoopFor(Preheader);
2190 StringRef HeaderName = Header->getName();
2191
2192 // Create a new (empty) preheader, and update the PHI nodes in the
2193 // header to use the new preheader.
2194 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2195 Func, Header);
2196 if (ParentL)
2197 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2198 IRBuilder<>(NewPreheader).CreateBr(Header);
2199 for (auto &In : *Header) {
2200 PHINode *PN = dyn_cast<PHINode>(&In);
2201 if (!PN)
2202 break;
2203 int bx = PN->getBasicBlockIndex(Preheader);
2204 if (bx >= 0)
2205 PN->setIncomingBlock(bx, NewPreheader);
2206 }
2207 DT->addNewBlock(NewPreheader, Preheader);
2208 DT->changeImmediateDominator(Header, NewPreheader);
2209
2210 // Check for safe conditions to execute memmove.
2211 // If stride is positive, copying things from higher to lower addresses
2212 // is equivalent to memmove. For negative stride, it's the other way
2213 // around. Copying forward in memory with positive stride may not be
2214 // same as memmove since we may be copying values that we just stored
2215 // in some previous iteration.
2216 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2217 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2218 Value *LowA = StridePos ? SA : LA;
2219 Value *HighA = StridePos ? LA : SA;
2220 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2221 Value *Cond = CmpA;
2222
2223 // Check for distance between pointers. Since the case LowA < HighA
2224 // is checked for above, assume LowA >= HighA.
2225 Value *Dist = Builder.CreateSub(LowA, HighA);
2226 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2227 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2228 Cond = CmpEither;
2229
2230 if (Threshold != 0) {
2231 Type *Ty = NumBytes->getType();
2232 Value *Thr = ConstantInt::get(Ty, Threshold);
2233 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2234 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2235 Cond = CmpBoth;
2236 }
2237 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2238 Func, NewPreheader);
2239 if (ParentL)
2240 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2241 Instruction *OldT = Preheader->getTerminator();
2242 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2243 OldT->eraseFromParent();
2244 Preheader->setName(Preheader->getName()+".old");
2245 DT->addNewBlock(MemmoveB, Preheader);
2246 // Find the new immediate dominator of the exit block.
2247 BasicBlock *ExitD = Preheader;
2248 for (BasicBlock *PB : predecessors(ExitB)) {
2249 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2250 if (!ExitD)
2251 break;
2252 }
2253 // If the prior immediate dominator of ExitB was dominated by the
2254 // old preheader, then the old preheader becomes the new immediate
2255 // dominator. Otherwise don't change anything (because the newly
2256 // added blocks are dominated by the old preheader).
2257 if (ExitD && DT->dominates(Preheader, ExitD)) {
2258 DomTreeNode *BN = DT->getNode(ExitB);
2259 DomTreeNode *DN = DT->getNode(ExitD);
2260 BN->setIDom(DN);
2261 }
2262
2263 // Add a call to memmove to the conditional block.
2264 IRBuilder<> CondBuilder(MemmoveB);
2265 CondBuilder.CreateBr(ExitB);
2266 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2267
2268 if (DestVolatile) {
2269 Type *Int32Ty = Type::getInt32Ty(Ctx);
2270 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2271 Type *VoidTy = Type::getVoidTy(Ctx);
2272 Module *M = Func->getParent();
2273 FunctionCallee Fn = M->getOrInsertFunction(
2274 HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
2275
2276 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2277 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2278 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2279 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2280 MemmoveB->getTerminator());
2281 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2282 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2283 NumWords = Simp;
2284
2285 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2286 ? StoreBasePtr
2287 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2288 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2289 ? LoadBasePtr
2290 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2291 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2292 } else {
2293 NewCall = CondBuilder.CreateMemMove(
2294 StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
2295 }
2296 } else {
2297 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
2298 LI->getAlign(), NumBytes);
2299 // Okay, the memcpy has been formed. Zap the original store and
2300 // anything that feeds into it.
2301 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2302 }
2303
2304 NewCall->setDebugLoc(DLoc);
2305
2306 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2307 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2308 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2309 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2310 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2311
2312 return true;
2313}
2314
2315// Check if the instructions in Insts, together with their dependencies
2316// cover the loop in the sense that the loop could be safely eliminated once
2317// the instructions in Insts are removed.
2318bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2319 SmallVectorImpl<Instruction*> &Insts) const {
2320 SmallSet<BasicBlock*,8> LoopBlocks;
2321 for (auto *B : L->blocks())
2322 LoopBlocks.insert(B);
2323
2324 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2325
2326 // Collect all instructions from the loop that the instructions in Insts
2327 // depend on (plus their dependencies, etc.). These instructions will
2328 // constitute the expression trees that feed those in Insts, but the trees
2329 // will be limited only to instructions contained in the loop.
2330 for (unsigned i = 0; i < Worklist.size(); ++i) {
2331 Instruction *In = Worklist[i];
2332 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2333 Instruction *OpI = dyn_cast<Instruction>(I);
2334 if (!OpI)
2335 continue;
2336 BasicBlock *PB = OpI->getParent();
2337 if (!LoopBlocks.count(PB))
2338 continue;
2339 Worklist.insert(OpI);
2340 }
2341 }
2342
2343 // Scan all instructions in the loop, if any of them have a user outside
2344 // of the loop, or outside of the expressions collected above, then either
2345 // the loop has a side-effect visible outside of it, or there are
2346 // instructions in it that are not involved in the original set Insts.
2347 for (auto *B : L->blocks()) {
2348 for (auto &In : *B) {
2349 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2350 continue;
2351 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2352 return false;
2353 for (auto K : In.users()) {
2354 Instruction *UseI = dyn_cast<Instruction>(K);
2355 if (!UseI)
2356 continue;
2357 BasicBlock *UseB = UseI->getParent();
2358 if (LF->getLoopFor(UseB) != L)
2359 return false;
2360 }
2361 }
2362 }
2363
2364 return true;
2365}
2366
2367/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2368/// with the specified backedge count. This block is known to be in the current
2369/// loop and not in any subloops.
2370bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2371 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2372 // We can only promote stores in this block if they are unconditionally
2373 // executed in the loop. For a block to be unconditionally executed, it has
2374 // to dominate all the exit blocks of the loop. Verify this now.
2375 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2376 return DT->dominates(BB, EB);
2377 };
2378 if (!all_of(ExitBlocks, DominatedByBB))
2379 return false;
2380
2381 bool MadeChange = false;
2382 // Look for store instructions, which may be optimized to memset/memcpy.
2383 SmallVector<StoreInst*,8> Stores;
2384 collectStores(CurLoop, BB, Stores);
2385
2386 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2387 for (auto &SI : Stores)
2388 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2389
2390 return MadeChange;
2391}
2392
2393bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2394 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2395 if (PMR.recognize())
2396 return true;
2397
2398 if (!HasMemcpy && !HasMemmove)
2399 return false;
2400
2401 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2402 assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2404
, __extension__ __PRETTY_FUNCTION__))
2403 "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2404
, __extension__ __PRETTY_FUNCTION__))
2404 "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2404
, __extension__ __PRETTY_FUNCTION__))
;
2405
2406 SmallVector<BasicBlock *, 8> ExitBlocks;
2407 L->getUniqueExitBlocks(ExitBlocks);
2408
2409 bool Changed = false;
2410
2411 // Scan all the blocks in the loop that are not in subloops.
2412 for (auto *BB : L->getBlocks()) {
2413 // Ignore blocks in subloops.
2414 if (LF->getLoopFor(BB) != L)
2415 continue;
2416 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2417 }
2418
2419 return Changed;
2420}
2421
2422bool HexagonLoopIdiomRecognize::run(Loop *L) {
2423 const Module &M = *L->getHeader()->getParent()->getParent();
2424 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2425 return false;
2426
2427 // If the loop could not be converted to canonical form, it must have an
2428 // indirectbr in it, just give up.
2429 if (!L->getLoopPreheader())
2430 return false;
2431
2432 // Disable loop idiom recognition if the function's name is a common idiom.
2433 StringRef Name = L->getHeader()->getParent()->getName();
2434 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2435 return false;
2436
2437 DL = &L->getHeader()->getModule()->getDataLayout();
2438
2439 HasMemcpy = TLI->has(LibFunc_memcpy);
2440 HasMemmove = TLI->has(LibFunc_memmove);
2441
2442 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2443 return runOnCountableLoop(L);
2444 return false;
2445}
2446
2447bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
2448 LPPassManager &LPM) {
2449 if (skipLoop(L))
2450 return false;
2451
2452 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2453 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2454 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2455 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2456 *L->getHeader()->getParent());
2457 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2458 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2459}
2460
2461Pass *llvm::createHexagonLoopIdiomPass() {
2462 return new HexagonLoopIdiomRecognizeLegacyPass();
2463}
2464
2465PreservedAnalyses
2466HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM,
2467 LoopStandardAnalysisResults &AR,
2468 LPMUpdater &U) {
2469 return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)
2470 .run(&L)
2471 ? getLoopPassPreservedAnalyses()
2472 : PreservedAnalyses::all();
2473}

/build/llvm-toolchain-snapshot-15~++20220420111733+e13d2efed663/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <utility>
24
25namespace llvm {
26class FoldingSetNodeID;
27class StringRef;
28class hash_code;
29class raw_ostream;
30
31template <typename T> class SmallVectorImpl;
32template <typename T> class ArrayRef;
33template <typename T> class Optional;
34template <typename T, typename Enable> struct DenseMapInfo;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69/// * APInt supports zero-bit-width values, but operations that require bits
70/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit
71/// integer). This means that operations like zero extension and logical
72/// shifts are defined, but sign extension and ashr is not. Zero bit values
73/// compare and hash equal to themselves, and countLeadingZeros returns 0.
74///
75class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
76public:
77 typedef uint64_t WordType;
78
79 /// This enum is used to hold the constants we needed for APInt.
80 enum : unsigned {
81 /// Byte size of a word.
82 APINT_WORD_SIZE = sizeof(WordType),
83 /// Bits in a word.
84 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
85 };
86
87 enum class Rounding {
88 DOWN,
89 TOWARD_ZERO,
90 UP,
91 };
92
93 static constexpr WordType WORDTYPE_MAX = ~WordType(0);
94
95 /// \name Constructors
96 /// @{
97
98 /// Create a new APInt of numBits width, initialized as val.
99 ///
100 /// If isSigned is true then val is treated as if it were a signed value
101 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
102 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
103 /// the range of val are zero filled).
104 ///
105 /// \param numBits the bit width of the constructed APInt
106 /// \param val the initial value of the APInt
107 /// \param isSigned how to treat signedness of val
108 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
109 : BitWidth(numBits) {
110 if (isSingleWord()) {
111 U.VAL = val;
112 clearUnusedBits();
113 } else {
114 initSlowCase(val, isSigned);
115 }
116 }
117
118 /// Construct an APInt of numBits width, initialized as bigVal[].
119 ///
120 /// Note that bigVal.size() can be smaller or larger than the corresponding
121 /// bit width but any extraneous bits will be dropped.
122 ///
123 /// \param numBits the bit width of the constructed APInt
124 /// \param bigVal a sequence of words to form the initial value of the APInt
125 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
126
127 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
128 /// deprecated because this constructor is prone to ambiguity with the
129 /// APInt(unsigned, uint64_t, bool) constructor.
130 ///
131 /// If this overload is ever deleted, care should be taken to prevent calls
132 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
133 /// constructor.
134 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
135
136 /// Construct an APInt from a string representation.
137 ///
138 /// This constructor interprets the string \p str in the given radix. The
139 /// interpretation stops when the first character that is not suitable for the
140 /// radix is encountered, or the end of the string. Acceptable radix values
141 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
142 /// string to require more bits than numBits.
143 ///
144 /// \param numBits the bit width of the constructed APInt
145 /// \param str the string to be interpreted
146 /// \param radix the radix to use for the conversion
147 APInt(unsigned numBits, StringRef str, uint8_t radix);
148
149 /// Default constructor that creates an APInt with a 1-bit zero value.
150 explicit APInt() : BitWidth(1) { U.VAL = 0; }
151
152 /// Copy Constructor.
153 APInt(const APInt &that) : BitWidth(that.BitWidth) {
154 if (isSingleWord())
155 U.VAL = that.U.VAL;
156 else
157 initSlowCase(that);
158 }
159
160 /// Move Constructor.
161 APInt(APInt &&that) : BitWidth(that.BitWidth) {
162 memcpy(&U, &that.U, sizeof(U));
163 that.BitWidth = 0;
164 }
165
166 /// Destructor.
167 ~APInt() {
168 if (needsCleanup())
169 delete[] U.pVal;
170 }
171
172 /// @}
173 /// \name Value Generators
174 /// @{
175
176 /// Get the '0' value for the specified bit-width.
177 static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
178
179 /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
180 static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
181
182 /// Return an APInt zero bits wide.
183 static APInt getZeroWidth() { return getZero(0); }
184
185 /// Gets maximum unsigned value of APInt for specific bit width.
186 static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
187
188 /// Gets maximum signed value of APInt for a specific bit width.
189 static APInt getSignedMaxValue(unsigned numBits) {
190 APInt API = getAllOnes(numBits);
191 API.clearBit(numBits - 1);
192 return API;
193 }
194
195 /// Gets minimum unsigned value of APInt for a specific bit width.
196 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
197
198 /// Gets minimum signed value of APInt for a specific bit width.
199 static APInt getSignedMinValue(unsigned numBits) {
200 APInt API(numBits, 0);
201 API.setBit(numBits - 1);
202 return API;
203 }
204
205 /// Get the SignMask for a specific bit width.
206 ///
207 /// This is just a wrapper function of getSignedMinValue(), and it helps code
208 /// readability when we want to get a SignMask.
209 static APInt getSignMask(unsigned BitWidth) {
210 return getSignedMinValue(BitWidth);
211 }
212
213 /// Return an APInt of a specified width with all bits set.
214 static APInt getAllOnes(unsigned numBits) {
215 return APInt(numBits, WORDTYPE_MAX, true);
216 }
217
218 /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
219 static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
220
221 /// Return an APInt with exactly one bit set in the result.
222 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
223 APInt Res(numBits, 0);
224 Res.setBit(BitNo);
225 return Res;
226 }
227
228 /// Get a value with a block of bits set.
229 ///
230 /// Constructs an APInt value that has a contiguous range of bits set. The
231 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
232 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
233 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
234 /// \p hiBit.
235 ///
236 /// \param numBits the intended bit width of the result
237 /// \param loBit the index of the lowest bit set.
238 /// \param hiBit the index of the highest bit set.
239 ///
240 /// \returns An APInt value with the requested bits set.
241 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
242 APInt Res(numBits, 0);
243 Res.setBits(loBit, hiBit);
244 return Res;
245 }
246
247 /// Wrap version of getBitsSet.
248 /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
249 /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
250 /// with parameters (32, 28, 4), you would get 0xF000000F.
251 /// If \p hiBit is equal to \p loBit, you would get a result with all bits
252 /// set.
253 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
254 unsigned hiBit) {
255 APInt Res(numBits, 0);
256 Res.setBitsWithWrap(loBit, hiBit);
257 return Res;
258 }
259
260 /// Constructs an APInt value that has a contiguous range of bits set. The
261 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
262 /// bits will be zero. For example, with parameters(32, 12) you would get
263 /// 0xFFFFF000.
264 ///
265 /// \param numBits the intended bit width of the result
266 /// \param loBit the index of the lowest bit to set.
267 ///
268 /// \returns An APInt value with the requested bits set.
269 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
270 APInt Res(numBits, 0);
271 Res.setBitsFrom(loBit);
272 return Res;
273 }
274
275 /// Constructs an APInt value that has the top hiBitsSet bits set.
276 ///
277 /// \param numBits the bitwidth of the result
278 /// \param hiBitsSet the number of high-order bits set in the result.
279 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
280 APInt Res(numBits, 0);
281 Res.setHighBits(hiBitsSet);
282 return Res;
283 }
284
285 /// Constructs an APInt value that has the bottom loBitsSet bits set.
286 ///
287 /// \param numBits the bitwidth of the result
288 /// \param loBitsSet the number of low-order bits set in the result.
289 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
290 APInt Res(numBits, 0);
291 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
14
Returning from 'APInt::setLowBits'
292 return Res;
293 }
294
295 /// Return a value containing V broadcasted over NewLen bits.
296 static APInt getSplat(unsigned NewLen, const APInt &V);
297
298 /// @}
299 /// \name Value Tests
300 /// @{
301
302 /// Determine if this APInt just has one word to store value.
303 ///
304 /// \returns true if the number of bits <= 64, false otherwise.
305 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
306
307 /// Determine sign of this APInt.
308 ///
309 /// This tests the high bit of this APInt to determine if it is set.
310 ///
311 /// \returns true if this APInt is negative, false otherwise
312 bool isNegative() const { return (*this)[BitWidth - 1]; }
313
314 /// Determine if this APInt Value is non-negative (>= 0)
315 ///
316 /// This tests the high bit of the APInt to determine if it is unset.
317 bool isNonNegative() const { return !isNegative(); }
318
319 /// Determine if sign bit of this APInt is set.
320 ///
321 /// This tests the high bit of this APInt to determine if it is set.
322 ///
323 /// \returns true if this APInt has its sign bit set, false otherwise.
324 bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
325
326 /// Determine if sign bit of this APInt is clear.
327 ///
328 /// This tests the high bit of this APInt to determine if it is clear.
329 ///
330 /// \returns true if this APInt has its sign bit clear, false otherwise.
331 bool isSignBitClear() const { return !isSignBitSet(); }
332
333 /// Determine if this APInt Value is positive.
334 ///
335 /// This tests if the value of this APInt is positive (> 0). Note
336 /// that 0 is not a positive value.
337 ///
338 /// \returns true if this APInt is positive.
339 bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
340
341 /// Determine if this APInt Value is non-positive (<= 0).
342 ///
343 /// \returns true if this APInt is non-positive.
344 bool isNonPositive() const { return !isStrictlyPositive(); }
345
346 /// Determine if all bits are set. This is true for zero-width values.
347 bool isAllOnes() const {
348 if (BitWidth == 0)
349 return true;
350 if (isSingleWord())
351 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
352 return countTrailingOnesSlowCase() == BitWidth;
353 }
354
355 /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
356 bool isAllOnesValue() const { return isAllOnes(); }
357
358 /// Determine if this value is zero, i.e. all bits are clear.
359 bool isZero() const {
360 if (isSingleWord())
361 return U.VAL == 0;
362 return countLeadingZerosSlowCase() == BitWidth;
363 }
364
365 /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
366 bool isNullValue() const { return isZero(); }
367
368 /// Determine if this is a value of 1.
369 ///
370 /// This checks to see if the value of this APInt is one.
371 bool isOne() const {
372 if (isSingleWord())
373 return U.VAL == 1;
374 return countLeadingZerosSlowCase() == BitWidth - 1;
375 }
376
377 /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
378 bool isOneValue() const { return isOne(); }
379
380 /// Determine if this is the largest unsigned value.
381 ///
382 /// This checks to see if the value of this APInt is the maximum unsigned
383 /// value for the APInt's bit width.
384 bool isMaxValue() const { return isAllOnes(); }
385
386 /// Determine if this is the largest signed value.
387 ///
388 /// This checks to see if the value of this APInt is the maximum signed
389 /// value for the APInt's bit width.
390 bool isMaxSignedValue() const {
391 if (isSingleWord()) {
392 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 392, __extension__ __PRETTY_FUNCTION__
))
;
393 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
394 }
395 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
396 }
397
398 /// Determine if this is the smallest unsigned value.
399 ///
400 /// This checks to see if the value of this APInt is the minimum unsigned
401 /// value for the APInt's bit width.
402 bool isMinValue() const { return isZero(); }
403
404 /// Determine if this is the smallest signed value.
405 ///
406 /// This checks to see if the value of this APInt is the minimum signed
407 /// value for the APInt's bit width.
408 bool isMinSignedValue() const {
409 if (isSingleWord()) {
410 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 410, __extension__ __PRETTY_FUNCTION__
))
;
411 return U.VAL == (WordType(1) << (BitWidth - 1));
412 }
413 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
414 }
415
416 /// Check if this APInt has an N-bits unsigned integer value.
417 bool isIntN(unsigned N) const { return getActiveBits() <= N; }
418
419 /// Check if this APInt has an N-bits signed integer value.
420 bool isSignedIntN(unsigned N) const { return getSignificantBits() <= N; }
421
422 /// Check if this APInt's value is a power of two greater than zero.
423 ///
424 /// \returns true if the argument APInt value is a power of two > 0.
425 bool isPowerOf2() const {
426 if (isSingleWord()) {
427 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 427, __extension__ __PRETTY_FUNCTION__
))
;
428 return isPowerOf2_64(U.VAL);
429 }
430 return countPopulationSlowCase() == 1;
431 }
432
433 /// Check if this APInt's negated value is a power of two greater than zero.
434 bool isNegatedPowerOf2() const {
435 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 435, __extension__ __PRETTY_FUNCTION__
))
;
436 if (isNonNegative())
437 return false;
438 // NegatedPowerOf2 - shifted mask in the top bits.
439 unsigned LO = countLeadingOnes();
440 unsigned TZ = countTrailingZeros();
441 return (LO + TZ) == BitWidth;
442 }
443
444 /// Check if the APInt's value is returned by getSignMask.
445 ///
446 /// \returns true if this is the value returned by getSignMask.
447 bool isSignMask() const { return isMinSignedValue(); }
448
449 /// Convert APInt to a boolean value.
450 ///
451 /// This converts the APInt to a boolean value as a test against zero.
452 bool getBoolValue() const { return !isZero(); }
453
454 /// If this value is smaller than the specified limit, return it, otherwise
455 /// return the limit value. This causes the value to saturate to the limit.
456 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
457 return ugt(Limit) ? Limit : getZExtValue();
458 }
459
460 /// Check if the APInt consists of a repeated bit pattern.
461 ///
462 /// e.g. 0x01010101 satisfies isSplat(8).
463 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
464 /// width without remainder.
465 bool isSplat(unsigned SplatSizeInBits) const;
466
467 /// \returns true if this APInt value is a sequence of \param numBits ones
468 /// starting at the least significant bit with the remainder zero.
469 bool isMask(unsigned numBits) const {
470 assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "llvm/include/llvm/ADT/APInt.h", 470, __extension__ __PRETTY_FUNCTION__
))
;
471 assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "llvm/include/llvm/ADT/APInt.h", 471, __extension__ __PRETTY_FUNCTION__
))
;
472 if (isSingleWord())
473 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
474 unsigned Ones = countTrailingOnesSlowCase();
475 return (numBits == Ones) &&
476 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
477 }
478
479 /// \returns true if this APInt is a non-empty sequence of ones starting at
480 /// the least significant bit with the remainder zero.
481 /// Ex. isMask(0x0000FFFFU) == true.
482 bool isMask() const {
483 if (isSingleWord())
484 return isMask_64(U.VAL);
485 unsigned Ones = countTrailingOnesSlowCase();
486 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
487 }
488
489 /// Return true if this APInt value contains a non-empty sequence of ones with
490 /// the remainder zero.
491 bool isShiftedMask() const {
492 if (isSingleWord())
493 return isShiftedMask_64(U.VAL);
494 unsigned Ones = countPopulationSlowCase();
495 unsigned LeadZ = countLeadingZerosSlowCase();
496 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
497 }
498
499 /// Return true if this APInt value contains a non-empty sequence of ones with
500 /// the remainder zero. If true, \p MaskIdx will specify the index of the
501 /// lowest set bit and \p MaskLen is updated to specify the length of the
502 /// mask, else neither are updated.
503 bool isShiftedMask(unsigned &MaskIdx, unsigned &MaskLen) const {
504 if (isSingleWord())
505 return isShiftedMask_64(U.VAL, MaskIdx, MaskLen);
506 unsigned Ones = countPopulationSlowCase();
507 unsigned LeadZ = countLeadingZerosSlowCase();
508 unsigned TrailZ = countTrailingZerosSlowCase();
509 if ((Ones + LeadZ + TrailZ) != BitWidth)
510 return false;
511 MaskLen = Ones;
512 MaskIdx = TrailZ;
513 return true;
514 }
515
516 /// Compute an APInt containing numBits highbits from this APInt.
517 ///
518 /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
519 /// bits and right shift to the least significant bit.
520 ///
521 /// \returns the high "numBits" bits of this APInt.
522 APInt getHiBits(unsigned numBits) const;
523
524 /// Compute an APInt containing numBits lowbits from this APInt.
525 ///
526 /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
527 /// bits.
528 ///
529 /// \returns the low "numBits" bits of this APInt.
530 APInt getLoBits(unsigned numBits) const;
531
532 /// Determine if two APInts have the same value, after zero-extending
533 /// one of them (if needed!) to ensure that the bit-widths match.
534 static bool isSameValue(const APInt &I1, const APInt &I2) {
535 if (I1.getBitWidth() == I2.getBitWidth())
536 return I1 == I2;
537
538 if (I1.getBitWidth() > I2.getBitWidth())
539 return I1 == I2.zext(I1.getBitWidth());
540
541 return I1.zext(I2.getBitWidth()) == I2;
542 }
543
544 /// Overload to compute a hash_code for an APInt value.
545 friend hash_code hash_value(const APInt &Arg);
546
547 /// This function returns a pointer to the internal storage of the APInt.
548 /// This is useful for writing out the APInt in binary form without any
549 /// conversions.
550 const uint64_t *getRawData() const {
551 if (isSingleWord())
552 return &U.VAL;
553 return &U.pVal[0];
554 }
555
556 /// @}
557 /// \name Unary Operators
558 /// @{
559
560 /// Postfix increment operator. Increment *this by 1.
561 ///
562 /// \returns a new APInt value representing the original value of *this.
563 APInt operator++(int) {
564 APInt API(*this);
565 ++(*this);
566 return API;
567 }
568
569 /// Prefix increment operator.
570 ///
571 /// \returns *this incremented by one
572 APInt &operator++();
573
574 /// Postfix decrement operator. Decrement *this by 1.
575 ///
576 /// \returns a new APInt value representing the original value of *this.
577 APInt operator--(int) {
578 APInt API(*this);
579 --(*this);
580 return API;
581 }
582
583 /// Prefix decrement operator.
584 ///
585 /// \returns *this decremented by one.
586 APInt &operator--();
587
588 /// Logical negation operation on this APInt returns true if zero, like normal
589 /// integers.
590 bool operator!() const { return isZero(); }
591
592 /// @}
593 /// \name Assignment Operators
594 /// @{
595
596 /// Copy assignment operator.
597 ///
598 /// \returns *this after assignment of RHS.
599 APInt &operator=(const APInt &RHS) {
600 // The common case (both source or dest being inline) doesn't require
601 // allocation or deallocation.
602 if (isSingleWord() && RHS.isSingleWord()) {
603 U.VAL = RHS.U.VAL;
604 BitWidth = RHS.BitWidth;
605 return *this;
606 }
607
608 assignSlowCase(RHS);
609 return *this;
610 }
611
612 /// Move assignment operator.
613 APInt &operator=(APInt &&that) {
614#ifdef EXPENSIVE_CHECKS
615 // Some std::shuffle implementations still do self-assignment.
616 if (this == &that)
617 return *this;
618#endif
619 assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "llvm/include/llvm/ADT/APInt.h", 619, __extension__ __PRETTY_FUNCTION__
))
;
620 if (!isSingleWord())
621 delete[] U.pVal;
622
623 // Use memcpy so that type based alias analysis sees both VAL and pVal
624 // as modified.
625 memcpy(&U, &that.U, sizeof(U));
626
627 BitWidth = that.BitWidth;
628 that.BitWidth = 0;
629 return *this;
630 }
631
632 /// Assignment operator.
633 ///
634 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
635 /// the bit width, the excess bits are truncated. If the bit width is larger
636 /// than 64, the value is zero filled in the unspecified high order bits.
637 ///
638 /// \returns *this after assignment of RHS value.
639 APInt &operator=(uint64_t RHS) {
640 if (isSingleWord()) {
641 U.VAL = RHS;
642 return clearUnusedBits();
643 }
644 U.pVal[0] = RHS;
645 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
646 return *this;
647 }
648
649 /// Bitwise AND assignment operator.
650 ///
651 /// Performs a bitwise AND operation on this APInt and RHS. The result is
652 /// assigned to *this.
653 ///
654 /// \returns *this after ANDing with RHS.
655 APInt &operator&=(const APInt &RHS) {
656 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 656, __extension__ __PRETTY_FUNCTION__
))
;
657 if (isSingleWord())
658 U.VAL &= RHS.U.VAL;
659 else
660 andAssignSlowCase(RHS);
661 return *this;
662 }
663
664 /// Bitwise AND assignment operator.
665 ///
666 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
667 /// logically zero-extended or truncated to match the bit-width of
668 /// the LHS.
669 APInt &operator&=(uint64_t RHS) {
670 if (isSingleWord()) {
671 U.VAL &= RHS;
672 return *this;
673 }
674 U.pVal[0] &= RHS;
675 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
676 return *this;
677 }
678
679 /// Bitwise OR assignment operator.
680 ///
681 /// Performs a bitwise OR operation on this APInt and RHS. The result is
682 /// assigned *this;
683 ///
684 /// \returns *this after ORing with RHS.
685 APInt &operator|=(const APInt &RHS) {
686 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 686, __extension__ __PRETTY_FUNCTION__
))
;
687 if (isSingleWord())
688 U.VAL |= RHS.U.VAL;
689 else
690 orAssignSlowCase(RHS);
691 return *this;
692 }
693
694 /// Bitwise OR assignment operator.
695 ///
696 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
697 /// logically zero-extended or truncated to match the bit-width of
698 /// the LHS.
699 APInt &operator|=(uint64_t RHS) {
700 if (isSingleWord()) {
701 U.VAL |= RHS;
702 return clearUnusedBits();
703 }
704 U.pVal[0] |= RHS;
705 return *this;
706 }
707
708 /// Bitwise XOR assignment operator.
709 ///
710 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
711 /// assigned to *this.
712 ///
713 /// \returns *this after XORing with RHS.
714 APInt &operator^=(const APInt &RHS) {
715 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 715, __extension__ __PRETTY_FUNCTION__
))
;
716 if (isSingleWord())
717 U.VAL ^= RHS.U.VAL;
718 else
719 xorAssignSlowCase(RHS);
720 return *this;
721 }
722
723 /// Bitwise XOR assignment operator.
724 ///
725 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
726 /// logically zero-extended or truncated to match the bit-width of
727 /// the LHS.
728 APInt &operator^=(uint64_t RHS) {
729 if (isSingleWord()) {
730 U.VAL ^= RHS;
731 return clearUnusedBits();
732 }
733 U.pVal[0] ^= RHS;
734 return *this;
735 }
736
737 /// Multiplication assignment operator.
738 ///
739 /// Multiplies this APInt by RHS and assigns the result to *this.
740 ///
741 /// \returns *this
742 APInt &operator*=(const APInt &RHS);
743 APInt &operator*=(uint64_t RHS);
744
745 /// Addition assignment operator.
746 ///
747 /// Adds RHS to *this and assigns the result to *this.
748 ///
749 /// \returns *this
750 APInt &operator+=(const APInt &RHS);
751 APInt &operator+=(uint64_t RHS);
752
753 /// Subtraction assignment operator.
754 ///
755 /// Subtracts RHS from *this and assigns the result to *this.
756 ///
757 /// \returns *this
758 APInt &operator-=(const APInt &RHS);
759 APInt &operator-=(uint64_t RHS);
760
761 /// Left-shift assignment function.
762 ///
763 /// Shifts *this left by shiftAmt and assigns the result to *this.
764 ///
765 /// \returns *this after shifting left by ShiftAmt
766 APInt &operator<<=(unsigned ShiftAmt) {
767 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 767, __extension__ __PRETTY_FUNCTION__
))
;
768 if (isSingleWord()) {
769 if (ShiftAmt == BitWidth)
770 U.VAL = 0;
771 else
772 U.VAL <<= ShiftAmt;
773 return clearUnusedBits();
774 }
775 shlSlowCase(ShiftAmt);
776 return *this;
777 }
778
779 /// Left-shift assignment function.
780 ///
781 /// Shifts *this left by shiftAmt and assigns the result to *this.
782 ///
783 /// \returns *this after shifting left by ShiftAmt
784 APInt &operator<<=(const APInt &ShiftAmt);
785
786 /// @}
787 /// \name Binary Operators
788 /// @{
789
790 /// Multiplication operator.
791 ///
792 /// Multiplies this APInt by RHS and returns the result.
793 APInt operator*(const APInt &RHS) const;
794
795 /// Left logical shift operator.
796 ///
797 /// Shifts this APInt left by \p Bits and returns the result.
798 APInt operator<<(unsigned Bits) const { return shl(Bits); }
799
800 /// Left logical shift operator.
801 ///
802 /// Shifts this APInt left by \p Bits and returns the result.
803 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
804
805 /// Arithmetic right-shift function.
806 ///
807 /// Arithmetic right-shift this APInt by shiftAmt.
808 APInt ashr(unsigned ShiftAmt) const {
809 APInt R(*this);
810 R.ashrInPlace(ShiftAmt);
811 return R;
812 }
813
814 /// Arithmetic right-shift this APInt by ShiftAmt in place.
815 void ashrInPlace(unsigned ShiftAmt) {
816 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 816, __extension__ __PRETTY_FUNCTION__
))
;
817 if (isSingleWord()) {
818 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
819 if (ShiftAmt == BitWidth)
820 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
821 else
822 U.VAL = SExtVAL >> ShiftAmt;
823 clearUnusedBits();
824 return;
825 }
826 ashrSlowCase(ShiftAmt);
827 }
828
829 /// Logical right-shift function.
830 ///
831 /// Logical right-shift this APInt by shiftAmt.
832 APInt lshr(unsigned shiftAmt) const {
833 APInt R(*this);
834 R.lshrInPlace(shiftAmt);
835 return R;
836 }
837
838 /// Logical right-shift this APInt by ShiftAmt in place.
839 void lshrInPlace(unsigned ShiftAmt) {
840 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 840, __extension__ __PRETTY_FUNCTION__
))
;
841 if (isSingleWord()) {
842 if (ShiftAmt == BitWidth)
843 U.VAL = 0;
844 else
845 U.VAL >>= ShiftAmt;
846 return;
847 }
848 lshrSlowCase(ShiftAmt);
849 }
850
851 /// Left-shift function.
852 ///
853 /// Left-shift this APInt by shiftAmt.
854 APInt shl(unsigned shiftAmt) const {
855 APInt R(*this);
856 R <<= shiftAmt;
857 return R;
858 }
859
860 /// Rotate left by rotateAmt.
861 APInt rotl(unsigned rotateAmt) const;
862
863 /// Rotate right by rotateAmt.
864 APInt rotr(unsigned rotateAmt) const;
865
866 /// Arithmetic right-shift function.
867 ///
868 /// Arithmetic right-shift this APInt by shiftAmt.
869 APInt ashr(const APInt &ShiftAmt) const {
870 APInt R(*this);
871 R.ashrInPlace(ShiftAmt);
872 return R;
873 }
874
875 /// Arithmetic right-shift this APInt by shiftAmt in place.
876 void ashrInPlace(const APInt &shiftAmt);
877
878 /// Logical right-shift function.
879 ///
880 /// Logical right-shift this APInt by shiftAmt.
881 APInt lshr(const APInt &ShiftAmt) const {
882 APInt R(*this);
883 R.lshrInPlace(ShiftAmt);
884 return R;
885 }
886
887 /// Logical right-shift this APInt by ShiftAmt in place.
888 void lshrInPlace(const APInt &ShiftAmt);
889
890 /// Left-shift function.
891 ///
892 /// Left-shift this APInt by shiftAmt.
893 APInt shl(const APInt &ShiftAmt) const {
894 APInt R(*this);
895 R <<= ShiftAmt;
896 return R;
897 }
898
899 /// Rotate left by rotateAmt.
900 APInt rotl(const APInt &rotateAmt) const;
901
902 /// Rotate right by rotateAmt.
903 APInt rotr(const APInt &rotateAmt) const;
904
905 /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
906 /// equivalent to:
907 /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
908 APInt concat(const APInt &NewLSB) const {
909 /// If the result will be small, then both the merged values are small.
910 unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
911 if (NewWidth <= APINT_BITS_PER_WORD)
912 return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
913 return concatSlowCase(NewLSB);
914 }
915
916 /// Unsigned division operation.
917 ///
918 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
919 /// RHS are treated as unsigned quantities for purposes of this division.
920 ///
921 /// \returns a new APInt value containing the division result, rounded towards
922 /// zero.
923 APInt udiv(const APInt &RHS) const;
924 APInt udiv(uint64_t RHS) const;
925
926 /// Signed division function for APInt.
927 ///
928 /// Signed divide this APInt by APInt RHS.
929 ///
930 /// The result is rounded towards zero.
931 APInt sdiv(const APInt &RHS) const;
932 APInt sdiv(int64_t RHS) const;
933
934 /// Unsigned remainder operation.
935 ///
936 /// Perform an unsigned remainder operation on this APInt with RHS being the
937 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
938 /// of this operation. Note that this is a true remainder operation and not a
939 /// modulo operation because the sign follows the sign of the dividend which
940 /// is *this.
941 ///
942 /// \returns a new APInt value containing the remainder result
943 APInt urem(const APInt &RHS) const;
944 uint64_t urem(uint64_t RHS) const;
945
946 /// Function for signed remainder operation.
947 ///
948 /// Signed remainder operation on APInt.
949 APInt srem(const APInt &RHS) const;
950 int64_t srem(int64_t RHS) const;
951
952 /// Dual division/remainder interface.
953 ///
954 /// Sometimes it is convenient to divide two APInt values and obtain both the
955 /// quotient and remainder. This function does both operations in the same
956 /// computation making it a little more efficient. The pair of input arguments
957 /// may overlap with the pair of output arguments. It is safe to call
958 /// udivrem(X, Y, X, Y), for example.
959 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
960 APInt &Remainder);
961 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
962 uint64_t &Remainder);
963
964 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
965 APInt &Remainder);
966 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
967 int64_t &Remainder);
968
969 // Operations that return overflow indicators.
970 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
971 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
972 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
973 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
974 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
975 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
976 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
977 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
978 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
979
980 // Operations that saturate
981 APInt sadd_sat(const APInt &RHS) const;
982 APInt uadd_sat(const APInt &RHS) const;
983 APInt ssub_sat(const APInt &RHS) const;
984 APInt usub_sat(const APInt &RHS) const;
985 APInt smul_sat(const APInt &RHS) const;
986 APInt umul_sat(const APInt &RHS) const;
987 APInt sshl_sat(const APInt &RHS) const;
988 APInt ushl_sat(const APInt &RHS) const;
989
990 /// Array-indexing support.
991 ///
992 /// \returns the bit value at bitPosition
993 bool operator[](unsigned bitPosition) const {
994 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
"Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "llvm/include/llvm/ADT/APInt.h", 994, __extension__ __PRETTY_FUNCTION__
))
;
995 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
996 }
997
998 /// @}
999 /// \name Comparison Operators
1000 /// @{
1001
1002 /// Equality operator.
1003 ///
1004 /// Compares this APInt with RHS for the validity of the equality
1005 /// relationship.
1006 bool operator==(const APInt &RHS) const {
1007 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Comparison requires equal bit widths") ? void (0) : __assert_fail
("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "llvm/include/llvm/ADT/APInt.h", 1007, __extension__ __PRETTY_FUNCTION__
))
;
1008 if (isSingleWord())
1009 return U.VAL == RHS.U.VAL;
1010 return equalSlowCase(RHS);
1011 }
1012
1013 /// Equality operator.
1014 ///
1015 /// Compares this APInt with a uint64_t for the validity of the equality
1016 /// relationship.
1017 ///
1018 /// \returns true if *this == Val
1019 bool operator==(uint64_t Val) const {
1020 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1021 }
1022
1023 /// Equality comparison.
1024 ///
1025 /// Compares this APInt with RHS for the validity of the equality
1026 /// relationship.
1027 ///
1028 /// \returns true if *this == Val
1029 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1030
1031 /// Inequality operator.
1032 ///
1033 /// Compares this APInt with RHS for the validity of the inequality
1034 /// relationship.
1035 ///
1036 /// \returns true if *this != Val
1037 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1038
1039 /// Inequality operator.
1040 ///
1041 /// Compares this APInt with a uint64_t for the validity of the inequality
1042 /// relationship.
1043 ///
1044 /// \returns true if *this != Val
1045 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1046
1047 /// Inequality comparison
1048 ///
1049 /// Compares this APInt with RHS for the validity of the inequality
1050 /// relationship.
1051 ///
1052 /// \returns true if *this != Val
1053 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1054
1055 /// Unsigned less than comparison
1056 ///
1057 /// Regards both *this and RHS as unsigned quantities and compares them for
1058 /// the validity of the less-than relationship.
1059 ///
1060 /// \returns true if *this < RHS when both are considered unsigned.
1061 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1062
1063 /// Unsigned less than comparison
1064 ///
1065 /// Regards both *this as an unsigned quantity and compares it with RHS for
1066 /// the validity of the less-than relationship.
1067 ///
1068 /// \returns true if *this < RHS when considered unsigned.
1069 bool ult(uint64_t RHS) const {
1070 // Only need to check active bits if not a single word.
1071 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1072 }
1073
1074 /// Signed less than comparison
1075 ///
1076 /// Regards both *this and RHS as signed quantities and compares them for
1077 /// validity of the less-than relationship.
1078 ///
1079 /// \returns true if *this < RHS when both are considered signed.
1080 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1081
1082 /// Signed less than comparison
1083 ///
1084 /// Regards both *this as a signed quantity and compares it with RHS for
1085 /// the validity of the less-than relationship.
1086 ///
1087 /// \returns true if *this < RHS when considered signed.
1088 bool slt(int64_t RHS) const {
1089 return (!isSingleWord() && getSignificantBits() > 64)
1090 ? isNegative()
1091 : getSExtValue() < RHS;
1092 }
1093
1094 /// Unsigned less or equal comparison
1095 ///
1096 /// Regards both *this and RHS as unsigned quantities and compares them for
1097 /// validity of the less-or-equal relationship.
1098 ///
1099 /// \returns true if *this <= RHS when both are considered unsigned.
1100 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1101
1102 /// Unsigned less or equal comparison
1103 ///
1104 /// Regards both *this as an unsigned quantity and compares it with RHS for
1105 /// the validity of the less-or-equal relationship.
1106 ///
1107 /// \returns true if *this <= RHS when considered unsigned.
1108 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1109
1110 /// Signed less or equal comparison
1111 ///
1112 /// Regards both *this and RHS as signed quantities and compares them for
1113 /// validity of the less-or-equal relationship.
1114 ///
1115 /// \returns true if *this <= RHS when both are considered signed.
1116 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1117
1118 /// Signed less or equal comparison
1119 ///
1120 /// Regards both *this as a signed quantity and compares it with RHS for the
1121 /// validity of the less-or-equal relationship.
1122 ///
1123 /// \returns true if *this <= RHS when considered signed.
1124 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1125
1126 /// Unsigned greater than comparison
1127 ///
1128 /// Regards both *this and RHS as unsigned quantities and compares them for
1129 /// the validity of the greater-than relationship.
1130 ///
1131 /// \returns true if *this > RHS when both are considered unsigned.
1132 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1133
1134 /// Unsigned greater than comparison
1135 ///
1136 /// Regards both *this as an unsigned quantity and compares it with RHS for
1137 /// the validity of the greater-than relationship.
1138 ///
1139 /// \returns true if *this > RHS when considered unsigned.
1140 bool ugt(uint64_t RHS) const {
1141 // Only need to check active bits if not a single word.
1142 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1143 }
1144
1145 /// Signed greater than comparison
1146 ///
1147 /// Regards both *this and RHS as signed quantities and compares them for the
1148 /// validity of the greater-than relationship.
1149 ///
1150 /// \returns true if *this > RHS when both are considered signed.
1151 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1152
1153 /// Signed greater than comparison
1154 ///
1155 /// Regards both *this as a signed quantity and compares it with RHS for
1156 /// the validity of the greater-than relationship.
1157 ///
1158 /// \returns true if *this > RHS when considered signed.
1159 bool sgt(int64_t RHS) const {
1160 return (!isSingleWord() && getSignificantBits() > 64)
1161 ? !isNegative()
1162 : getSExtValue() > RHS;
1163 }
1164
1165 /// Unsigned greater or equal comparison
1166 ///
1167 /// Regards both *this and RHS as unsigned quantities and compares them for
1168 /// validity of the greater-or-equal relationship.
1169 ///
1170 /// \returns true if *this >= RHS when both are considered unsigned.
1171 bool uge(const APInt &RHS) const { return !ult(RHS); }
1172
1173 /// Unsigned greater or equal comparison
1174 ///
1175 /// Regards both *this as an unsigned quantity and compares it with RHS for
1176 /// the validity of the greater-or-equal relationship.
1177 ///
1178 /// \returns true if *this >= RHS when considered unsigned.
1179 bool uge(uint64_t RHS) const { return !ult(RHS); }
1180
1181 /// Signed greater or equal comparison
1182 ///
1183 /// Regards both *this and RHS as signed quantities and compares them for
1184 /// validity of the greater-or-equal relationship.
1185 ///
1186 /// \returns true if *this >= RHS when both are considered signed.
1187 bool sge(const APInt &RHS) const { return !slt(RHS); }
1188
1189 /// Signed greater or equal comparison
1190 ///
1191 /// Regards both *this as a signed quantity and compares it with RHS for
1192 /// the validity of the greater-or-equal relationship.
1193 ///
1194 /// \returns true if *this >= RHS when considered signed.
1195 bool sge(int64_t RHS) const { return !slt(RHS); }
1196
1197 /// This operation tests if there are any pairs of corresponding bits
1198 /// between this APInt and RHS that are both set.
1199 bool intersects(const APInt &RHS) const {
1200 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1200, __extension__ __PRETTY_FUNCTION__
))
;
1201 if (isSingleWord())
1202 return (U.VAL & RHS.U.VAL) != 0;
1203 return intersectsSlowCase(RHS);
1204 }
1205
1206 /// This operation checks that all bits set in this APInt are also set in RHS.
1207 bool isSubsetOf(const APInt &RHS) const {
1208 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1208, __extension__ __PRETTY_FUNCTION__
))
;
1209 if (isSingleWord())
1210 return (U.VAL & ~RHS.U.VAL) == 0;
1211 return isSubsetOfSlowCase(RHS);
1212 }
1213
1214 /// @}
1215 /// \name Resizing Operators
1216 /// @{
1217
1218 /// Truncate to new width.
1219 ///
1220 /// Truncate the APInt to a specified width. It is an error to specify a width
1221 /// that is greater than or equal to the current width.
1222 APInt trunc(unsigned width) const;
1223
1224 /// Truncate to new width with unsigned saturation.
1225 ///
1226 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1227 /// the new bitwidth, then return truncated APInt. Else, return max value.
1228 APInt truncUSat(unsigned width) const;
1229
1230 /// Truncate to new width with signed saturation.
1231 ///
1232 /// If this APInt, treated as signed integer, can be losslessly truncated to
1233 /// the new bitwidth, then return truncated APInt. Else, return either
1234 /// signed min value if the APInt was negative, or signed max value.
1235 APInt truncSSat(unsigned width) const;
1236
1237 /// Sign extend to a new width.
1238 ///
1239 /// This operation sign extends the APInt to a new width. If the high order
1240 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1241 /// It is an error to specify a width that is less than or equal to the
1242 /// current width.
1243 APInt sext(unsigned width) const;
1244
1245 /// Zero extend to a new width.
1246 ///
1247 /// This operation zero extends the APInt to a new width. The high order bits
1248 /// are filled with 0 bits. It is an error to specify a width that is less
1249 /// than or equal to the current width.
1250 APInt zext(unsigned width) const;
1251
1252 /// Sign extend or truncate to width
1253 ///
1254 /// Make this APInt have the bit width given by \p width. The value is sign
1255 /// extended, truncated, or left alone to make it that width.
1256 APInt sextOrTrunc(unsigned width) const;
1257
1258 /// Zero extend or truncate to width
1259 ///
1260 /// Make this APInt have the bit width given by \p width. The value is zero
1261 /// extended, truncated, or left alone to make it that width.
1262 APInt zextOrTrunc(unsigned width) const;
1263
1264 /// Truncate to width
1265 ///
1266 /// Make this APInt have the bit width given by \p width. The value is
1267 /// truncated or left alone to make it that width.
1268 APInt truncOrSelf(unsigned width) const;
1269
1270 /// Sign extend or truncate to width
1271 ///
1272 /// Make this APInt have the bit width given by \p width. The value is sign
1273 /// extended, or left alone to make it that width.
1274 APInt sextOrSelf(unsigned width) const;
1275
1276 /// Zero extend or truncate to width
1277 ///
1278 /// Make this APInt have the bit width given by \p width. The value is zero
1279 /// extended, or left alone to make it that width.
1280 APInt zextOrSelf(unsigned width) const;
1281
1282 /// @}
1283 /// \name Bit Manipulation Operators
1284 /// @{
1285
1286 /// Set every bit to 1.
1287 void setAllBits() {
1288 if (isSingleWord())
1289 U.VAL = WORDTYPE_MAX;
1290 else
1291 // Set all the bits in all the words.
1292 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1293 // Clear the unused ones
1294 clearUnusedBits();
1295 }
1296
1297 /// Set the given bit to 1 whose position is given as "bitPosition".
1298 void setBit(unsigned BitPosition) {
1299 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1299, __extension__ __PRETTY_FUNCTION__
))
;
1300 WordType Mask = maskBit(BitPosition);
1301 if (isSingleWord())
1302 U.VAL |= Mask;
1303 else
1304 U.pVal[whichWord(BitPosition)] |= Mask;
1305 }
1306
1307 /// Set the sign bit to 1.
1308 void setSignBit() { setBit(BitWidth - 1); }
1309
1310 /// Set a given bit to a given value.
1311 void setBitVal(unsigned BitPosition, bool BitValue) {
1312 if (BitValue)
1313 setBit(BitPosition);
1314 else
1315 clearBit(BitPosition);
1316 }
1317
1318 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1319 /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
1320 /// setBits when \p loBit < \p hiBit.
1321 /// For \p loBit == \p hiBit wrap case, set every bit to 1.
1322 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1323 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1323, __extension__ __PRETTY_FUNCTION__
))
;
1324 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1324, __extension__ __PRETTY_FUNCTION__
))
;
1325 if (loBit < hiBit) {
1326 setBits(loBit, hiBit);
1327 return;
1328 }
1329 setLowBits(hiBit);
1330 setHighBits(BitWidth - loBit);
1331 }
1332
1333 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1334 /// This function handles case when \p loBit <= \p hiBit.
1335 void setBits(unsigned loBit, unsigned hiBit) {
1336 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1336, __extension__ __PRETTY_FUNCTION__
))
;
6
Assuming 'hiBit' is <= field 'BitWidth'
7
'?' condition is true
1337 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1337, __extension__ __PRETTY_FUNCTION__
))
;
8
'?' condition is true
1338 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "llvm/include/llvm/ADT/APInt.h", 1338, __extension__ __PRETTY_FUNCTION__
))
;
9
'?' condition is true
1339 if (loBit == hiBit)
10
Assuming 'loBit' is not equal to 'hiBit'
1340 return;
1341 if (loBit
10.1
'loBit' is < APINT_BITS_PER_WORD
10.1
'loBit' is < APINT_BITS_PER_WORD
< APINT_BITS_PER_WORD && hiBit
10.2
'hiBit' is <= APINT_BITS_PER_WORD
10.2
'hiBit' is <= APINT_BITS_PER_WORD
<= APINT_BITS_PER_WORD) {
11
Taking true branch
1342 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1343 mask <<= loBit;
1344 if (isSingleWord())
12
Taking true branch
1345 U.VAL |= mask;
1346 else
1347 U.pVal[0] |= mask;
1348 } else {
1349 setBitsSlowCase(loBit, hiBit);
1350 }
1351 }
1352
1353 /// Set the top bits starting from loBit.
1354 void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
1355
1356 /// Set the bottom loBits bits.
1357 void setLowBits(unsigned loBits) { return setBits(0, loBits); }
5
Calling 'APInt::setBits'
13
Returning from 'APInt::setBits'
1358
1359 /// Set the top hiBits bits.
1360 void setHighBits(unsigned hiBits) {
1361 return setBits(BitWidth - hiBits, BitWidth);
1362 }
1363
1364 /// Set every bit to 0.
1365 void clearAllBits() {
1366 if (isSingleWord())
1367 U.VAL = 0;
1368 else
1369 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1370 }
1371
1372 /// Set a given bit to 0.
1373 ///
1374 /// Set the given bit to 0 whose position is given as "bitPosition".
1375 void clearBit(unsigned BitPosition) {
1376 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1376, __extension__ __PRETTY_FUNCTION__
))
;
1377 WordType Mask = ~maskBit(BitPosition);
1378 if (isSingleWord())
1379 U.VAL &= Mask;
1380 else
1381 U.pVal[whichWord(BitPosition)] &= Mask;
1382 }
1383
1384 /// Set bottom loBits bits to 0.
1385 void clearLowBits(unsigned loBits) {
1386 assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth"
) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "llvm/include/llvm/ADT/APInt.h", 1386, __extension__ __PRETTY_FUNCTION__
))
;
1387 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1388 *this &= Keep;
1389 }
1390
1391 /// Set the sign bit to 0.
1392 void clearSignBit() { clearBit(BitWidth - 1); }
1393
1394 /// Toggle every bit to its opposite value.
1395 void flipAllBits() {
1396 if (isSingleWord()) {
1397 U.VAL ^= WORDTYPE_MAX;
1398 clearUnusedBits();
1399 } else {
1400 flipAllBitsSlowCase();
1401 }
1402 }
1403
1404 /// Toggles a given bit to its opposite value.
1405 ///
1406 /// Toggle a given bit to its opposite value whose position is given
1407 /// as "bitPosition".
1408 void flipBit(unsigned bitPosition);
1409
1410 /// Negate this APInt in place.
1411 void negate() {
1412 flipAllBits();
1413 ++(*this);
1414 }
1415
1416 /// Insert the bits from a smaller APInt starting at bitPosition.
1417 void insertBits(const APInt &SubBits, unsigned bitPosition);
1418 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1419
1420 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1421 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1422 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1423
1424 /// @}
1425 /// \name Value Characterization Functions
1426 /// @{
1427
1428 /// Return the number of bits in the APInt.
1429 unsigned getBitWidth() const { return BitWidth; }
1430
1431 /// Get the number of words.
1432 ///
1433 /// Here one word's bitwidth equals to that of uint64_t.
1434 ///
1435 /// \returns the number of words to hold the integer value of this APInt.
1436 unsigned getNumWords() const { return getNumWords(BitWidth); }
1437
1438 /// Get the number of words.
1439 ///
1440 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1441 ///
1442 /// \returns the number of words to hold the integer value with a given bit
1443 /// width.
1444 static unsigned getNumWords(unsigned BitWidth) {
1445 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1446 }
1447
1448 /// Compute the number of active bits in the value
1449 ///
1450 /// This function returns the number of active bits which is defined as the
1451 /// bit width minus the number of leading zeros. This is used in several
1452 /// computations to see how "wide" the value is.
1453 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1454
1455 /// Compute the number of active words in the value of this APInt.
1456 ///
1457 /// This is used in conjunction with getActiveData to extract the raw value of
1458 /// the APInt.
1459 unsigned getActiveWords() const {
1460 unsigned numActiveBits = getActiveBits();
1461 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1462 }
1463
1464 /// Get the minimum bit size for this signed APInt
1465 ///
1466 /// Computes the minimum bit width for this APInt while considering it to be a
1467 /// signed (and probably negative) value. If the value is not negative, this
1468 /// function returns the same value as getActiveBits()+1. Otherwise, it
1469 /// returns the smallest bit width that will retain the negative value. For
1470 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1471 /// for -1, this function will always return 1.
1472 unsigned getSignificantBits() const {
1473 return BitWidth - getNumSignBits() + 1;
1474 }
1475
1476 /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead.
1477 unsigned getMinSignedBits() const { return getSignificantBits(); }
1478
1479 /// Get zero extended value
1480 ///
1481 /// This method attempts to return the value of this APInt as a zero extended
1482 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1483 /// uint64_t. Otherwise an assertion will result.
1484 uint64_t getZExtValue() const {
1485 if (isSingleWord())
1486 return U.VAL;
1487 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
"Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1487, __extension__ __PRETTY_FUNCTION__
))
;
1488 return U.pVal[0];
1489 }
1490
1491 /// Get sign extended value
1492 ///
1493 /// This method attempts to return the value of this APInt as a sign extended
1494 /// int64_t. The bit width must be <= 64 or the value must fit within an
1495 /// int64_t. Otherwise an assertion will result.
1496 int64_t getSExtValue() const {
1497 if (isSingleWord())
1498 return SignExtend64(U.VAL, BitWidth);
1499 assert(getSignificantBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getSignificantBits() <= 64 &&
"Too many bits for int64_t") ? void (0) : __assert_fail ("getSignificantBits() <= 64 && \"Too many bits for int64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1499, __extension__ __PRETTY_FUNCTION__
))
;
1500 return int64_t(U.pVal[0]);
1501 }
1502
1503 /// Get bits required for string value.
1504 ///
1505 /// This method determines how many bits are required to hold the APInt
1506 /// equivalent of the string given by \p str.
1507 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1508
1509 /// Get the bits that are sufficient to represent the string value. This may
1510 /// over estimate the amount of bits required, but it does not require
1511 /// parsing the value in the string.
1512 static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
1513
1514 /// The APInt version of the countLeadingZeros functions in
1515 /// MathExtras.h.
1516 ///
1517 /// It counts the number of zeros from the most significant bit to the first
1518 /// one bit.
1519 ///
1520 /// \returns BitWidth if the value is zero, otherwise returns the number of
1521 /// zeros from the most significant bit to the first one bits.
1522 unsigned countLeadingZeros() const {
1523 if (isSingleWord()) {
1524 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1525 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1526 }
1527 return countLeadingZerosSlowCase();
1528 }
1529
1530 /// Count the number of leading one bits.
1531 ///
1532 /// This function is an APInt version of the countLeadingOnes
1533 /// functions in MathExtras.h. It counts the number of ones from the most
1534 /// significant bit to the first zero bit.
1535 ///
1536 /// \returns 0 if the high order bit is not set, otherwise returns the number
1537 /// of 1 bits from the most significant to the least
1538 unsigned countLeadingOnes() const {
1539 if (isSingleWord()) {
1540 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1541 return 0;
1542 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1543 }
1544 return countLeadingOnesSlowCase();
1545 }
1546
1547 /// Computes the number of leading bits of this APInt that are equal to its
1548 /// sign bit.
1549 unsigned getNumSignBits() const {
1550 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1551 }
1552
1553 /// Count the number of trailing zero bits.
1554 ///
1555 /// This function is an APInt version of the countTrailingZeros
1556 /// functions in MathExtras.h. It counts the number of zeros from the least
1557 /// significant bit to the first set bit.
1558 ///
1559 /// \returns BitWidth if the value is zero, otherwise returns the number of
1560 /// zeros from the least significant bit to the first one bit.
1561 unsigned countTrailingZeros() const {
1562 if (isSingleWord()) {
1563 unsigned TrailingZeros = llvm::countTrailingZeros(U.VAL);
1564 return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
1565 }
1566 return countTrailingZerosSlowCase();
1567 }
1568
1569 /// Count the number of trailing one bits.
1570 ///
1571 /// This function is an APInt version of the countTrailingOnes
1572 /// functions in MathExtras.h. It counts the number of ones from the least
1573 /// significant bit to the first zero bit.
1574 ///
1575 /// \returns BitWidth if the value is all ones, otherwise returns the number
1576 /// of ones from the least significant bit to the first zero bit.
1577 unsigned countTrailingOnes() const {
1578 if (isSingleWord())
1579 return llvm::countTrailingOnes(U.VAL);
1580 return countTrailingOnesSlowCase();
1581 }
1582
1583 /// Count the number of bits set.
1584 ///
1585 /// This function is an APInt version of the countPopulation functions
1586 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1587 ///
1588 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1589 unsigned countPopulation() const {
1590 if (isSingleWord())
1591 return llvm::countPopulation(U.VAL);
1592 return countPopulationSlowCase();
1593 }
1594
1595 /// @}
1596 /// \name Conversion Functions
1597 /// @{
1598 void print(raw_ostream &OS, bool isSigned) const;
1599
1600 /// Converts an APInt to a string and append it to Str. Str is commonly a
1601 /// SmallString.
1602 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1603 bool formatAsCLiteral = false) const;
1604
1605 /// Considers the APInt to be unsigned and converts it into a string in the
1606 /// radix given. The radix can be 2, 8, 10 16, or 36.
1607 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1608 toString(Str, Radix, false, false);
1609 }
1610
1611 /// Considers the APInt to be signed and converts it into a string in the
1612 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1613 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1614 toString(Str, Radix, true, false);
1615 }
1616
1617 /// \returns a byte-swapped representation of this APInt Value.
1618 APInt byteSwap() const;
1619
1620 /// \returns the value with the bit representation reversed of this APInt
1621 /// Value.
1622 APInt reverseBits() const;
1623
1624 /// Converts this APInt to a double value.
1625 double roundToDouble(bool isSigned) const;
1626
1627 /// Converts this unsigned APInt to a double value.
1628 double roundToDouble() const { return roundToDouble(false); }
1629
1630 /// Converts this signed APInt to a double value.
1631 double signedRoundToDouble() const { return roundToDouble(true); }
1632
1633 /// Converts APInt bits to a double
1634 ///
1635 /// The conversion does not do a translation from integer to double, it just
1636 /// re-interprets the bits as a double. Note that it is valid to do this on
1637 /// any bit width. Exactly 64 bits will be translated.
1638 double bitsToDouble() const { return BitsToDouble(getWord(0)); }
1639
1640 /// Converts APInt bits to a float
1641 ///
1642 /// The conversion does not do a translation from integer to float, it just
1643 /// re-interprets the bits as a float. Note that it is valid to do this on
1644 /// any bit width. Exactly 32 bits will be translated.
1645 float bitsToFloat() const {
1646 return BitsToFloat(static_cast<uint32_t>(getWord(0)));
1647 }
1648
1649 /// Converts a double to APInt bits.
1650 ///
1651 /// The conversion does not do a translation from double to integer, it just
1652 /// re-interprets the bits of the double.
1653 static APInt doubleToBits(double V) {
1654 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1655 }
1656
1657 /// Converts a float to APInt bits.
1658 ///
1659 /// The conversion does not do a translation from float to integer, it just
1660 /// re-interprets the bits of the float.
1661 static APInt floatToBits(float V) {
1662 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1663 }
1664
1665 /// @}
1666 /// \name Mathematics Operations
1667 /// @{
1668
1669 /// \returns the floor log base 2 of this APInt.
1670 unsigned logBase2() const { return getActiveBits() - 1; }
1671
1672 /// \returns the ceil log base 2 of this APInt.
1673 unsigned ceilLogBase2() const {
1674 APInt temp(*this);
1675 --temp;
1676 return temp.getActiveBits();
1677 }
1678
1679 /// \returns the nearest log base 2 of this APInt. Ties round up.
1680 ///
1681 /// NOTE: When we have a BitWidth of 1, we define:
1682 ///
1683 /// log2(0) = UINT32_MAX
1684 /// log2(1) = 0
1685 ///
1686 /// to get around any mathematical concerns resulting from
1687 /// referencing 2 in a space where 2 does no exist.
1688 unsigned nearestLogBase2() const;
1689
1690 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1691 /// otherwise
1692 int32_t exactLogBase2() const {
1693 if (!isPowerOf2())
1694 return -1;
1695 return logBase2();
1696 }
1697
1698 /// Compute the square root.
1699 APInt sqrt() const;
1700
1701 /// Get the absolute value. If *this is < 0 then return -(*this), otherwise
1702 /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit
1703 /// wide APInt) is unchanged due to how negation works.
1704 APInt abs() const {
1705 if (isNegative())
1706 return -(*this);
1707 return *this;
1708 }
1709
1710 /// \returns the multiplicative inverse for a given modulo.
1711 APInt multiplicativeInverse(const APInt &modulo) const;
1712
1713 /// @}
1714 /// \name Building-block Operations for APInt and APFloat
1715 /// @{
1716
1717 // These building block operations operate on a representation of arbitrary
1718 // precision, two's-complement, bignum integer values. They should be
1719 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1720 // generally a pointer to the base of an array of integer parts, representing
1721 // an unsigned bignum, and a count of how many parts there are.
1722
1723 /// Sets the least significant part of a bignum to the input value, and zeroes
1724 /// out higher parts.
1725 static void tcSet(WordType *, WordType, unsigned);
1726
1727 /// Assign one bignum to another.
1728 static void tcAssign(WordType *, const WordType *, unsigned);
1729
1730 /// Returns true if a bignum is zero, false otherwise.
1731 static bool tcIsZero(const WordType *, unsigned);
1732
1733 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1734 static int tcExtractBit(const WordType *, unsigned bit);
1735
1736 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1737 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1738 /// significant bit of DST. All high bits above srcBITS in DST are
1739 /// zero-filled.
1740 static void tcExtract(WordType *, unsigned dstCount, const WordType *,
1741 unsigned srcBits, unsigned srcLSB);
1742
1743 /// Set the given bit of a bignum. Zero-based.
1744 static void tcSetBit(WordType *, unsigned bit);
1745
1746 /// Clear the given bit of a bignum. Zero-based.
1747 static void tcClearBit(WordType *, unsigned bit);
1748
1749 /// Returns the bit number of the least or most significant set bit of a
1750 /// number. If the input number has no bits set -1U is returned.
1751 static unsigned tcLSB(const WordType *, unsigned n);
1752 static unsigned tcMSB(const WordType *parts, unsigned n);
1753
1754 /// Negate a bignum in-place.
1755 static void tcNegate(WordType *, unsigned);
1756
1757 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1758 static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
1759 /// DST += RHS. Returns the carry flag.
1760 static WordType tcAddPart(WordType *, WordType, unsigned);
1761
1762 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1763 static WordType tcSubtract(WordType *, const WordType *, WordType carry,
1764 unsigned);
1765 /// DST -= RHS. Returns the carry flag.
1766 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1767
1768 /// DST += SRC * MULTIPLIER + PART if add is true
1769 /// DST = SRC * MULTIPLIER + PART if add is false
1770 ///
1771 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1772 /// start at the same point, i.e. DST == SRC.
1773 ///
1774 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1775 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1776 /// result, and if all of the omitted higher parts were zero return zero,
1777 /// otherwise overflow occurred and return one.
1778 static int tcMultiplyPart(WordType *dst, const WordType *src,
1779 WordType multiplier, WordType carry,
1780 unsigned srcParts, unsigned dstParts, bool add);
1781
1782 /// DST = LHS * RHS, where DST has the same width as the operands and is
1783 /// filled with the least significant parts of the result. Returns one if
1784 /// overflow occurred, otherwise zero. DST must be disjoint from both
1785 /// operands.
1786 static int tcMultiply(WordType *, const WordType *, const WordType *,
1787 unsigned);
1788
1789 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1790 /// operands. No overflow occurs. DST must be disjoint from both operands.
1791 static void tcFullMultiply(WordType *, const WordType *, const WordType *,
1792 unsigned, unsigned);
1793
1794 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1795 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1796 /// REMAINDER to the remainder, return zero. i.e.
1797 ///
1798 /// OLD_LHS = RHS * LHS + REMAINDER
1799 ///
1800 /// SCRATCH is a bignum of the same size as the operands and result for use by
1801 /// the routine; its contents need not be initialized and are destroyed. LHS,
1802 /// REMAINDER and SCRATCH must be distinct.
1803 static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
1804 WordType *scratch, unsigned parts);
1805
1806 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1807 /// restrictions on Count.
1808 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1809
1810 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1811 /// restrictions on Count.
1812 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1813
1814 /// Comparison (unsigned) of two bignums.
1815 static int tcCompare(const WordType *, const WordType *, unsigned);
1816
1817 /// Increment a bignum in-place. Return the carry flag.
1818 static WordType tcIncrement(WordType *dst, unsigned parts) {
1819 return tcAddPart(dst, 1, parts);
1820 }
1821
1822 /// Decrement a bignum in-place. Return the borrow flag.
1823 static WordType tcDecrement(WordType *dst, unsigned parts) {
1824 return tcSubtractPart(dst, 1, parts);
1825 }
1826
1827 /// Used to insert APInt objects, or objects that contain APInt objects, into
1828 /// FoldingSets.
1829 void Profile(FoldingSetNodeID &id) const;
1830
1831 /// debug method
1832 void dump() const;
1833
1834 /// Returns whether this instance allocated memory.
1835 bool needsCleanup() const { return !isSingleWord(); }
1836
1837private:
1838 /// This union is used to store the integer value. When the
1839 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
1840 union {
1841 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
1842 uint64_t *pVal; ///< Used to store the >64 bits integer value.
1843 } U;
1844
1845 unsigned BitWidth; ///< The number of bits in this APInt.
1846
1847 friend struct DenseMapInfo<APInt, void>;
1848 friend class APSInt;
1849
1850 /// This constructor is used only internally for speed of construction of
1851 /// temporaries. It is unsafe since it takes ownership of the pointer, so it
1852 /// is not public.
1853 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
1854
1855 /// Determine which word a bit is in.
1856 ///
1857 /// \returns the word position for the specified bit position.
1858 static unsigned whichWord(unsigned bitPosition) {
1859 return bitPosition / APINT_BITS_PER_WORD;
1860 }
1861
1862 /// Determine which bit in a word the specified bit position is in.
1863 static unsigned whichBit(unsigned bitPosition) {
1864 return bitPosition % APINT_BITS_PER_WORD;
1865 }
1866
1867 /// Get a single bit mask.
1868 ///
1869 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
1870 /// This method generates and returns a uint64_t (word) mask for a single
1871 /// bit at a specific bit position. This is used to mask the bit in the
1872 /// corresponding word.
1873 static uint64_t maskBit(unsigned bitPosition) {
1874 return 1ULL << whichBit(bitPosition);
1875 }
1876
1877 /// Clear unused high order bits
1878 ///
1879 /// This method is used internally to clear the top "N" bits in the high order
1880 /// word that are not used by the APInt. This is needed after the most
1881 /// significant word is assigned a value to ensure that those bits are
1882 /// zero'd out.
1883 APInt &clearUnusedBits() {
1884 // Compute how many bits are used in the final word.
1885 unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
1886
1887 // Mask out the high bits.
1888 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
1889 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1890 mask = 0;
1891
1892 if (isSingleWord())
1893 U.VAL &= mask;
1894 else
1895 U.pVal[getNumWords() - 1] &= mask;
1896 return *this;
1897 }
1898
1899 /// Get the word corresponding to a bit position
1900 /// \returns the corresponding word for the specified bit position.
1901 uint64_t getWord(unsigned bitPosition) const {
1902 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
1903 }
1904
1905 /// Utility method to change the bit width of this APInt to new bit width,
1906 /// allocating and/or deallocating as necessary. There is no guarantee on the
1907 /// value of any bits upon return. Caller should populate the bits after.
1908 void reallocate(unsigned NewBitWidth);
1909
1910 /// Convert a char array into an APInt
1911 ///
1912 /// \param radix 2, 8, 10, 16, or 36
1913 /// Converts a string into a number. The string must be non-empty
1914 /// and well-formed as a number of the given base. The bit-width
1915 /// must be sufficient to hold the result.
1916 ///
1917 /// This is used by the constructors that take string arguments.
1918 ///
1919 /// StringRef::getAsInteger is superficially similar but (1) does
1920 /// not assume that the string is well-formed and (2) grows the
1921 /// result to hold the input.
1922 void fromString(unsigned numBits, StringRef str, uint8_t radix);
1923
1924 /// An internal division function for dividing APInts.
1925 ///
1926 /// This is used by the toString method to divide by the radix. It simply
1927 /// provides a more convenient form of divide for internal use since KnuthDiv
1928 /// has specific constraints on its inputs. If those constraints are not met
1929 /// then it provides a simpler form of divide.
1930 static void divide(const WordType *LHS, unsigned lhsWords,
1931 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
1932 WordType *Remainder);
1933
1934 /// out-of-line slow case for inline constructor
1935 void initSlowCase(uint64_t val, bool isSigned);
1936
1937 /// shared code between two array constructors
1938 void initFromArray(ArrayRef<uint64_t> array);
1939
1940 /// out-of-line slow case for inline copy constructor
1941 void initSlowCase(const APInt &that);
1942
1943 /// out-of-line slow case for shl
1944 void shlSlowCase(unsigned ShiftAmt);
1945
1946 /// out-of-line slow case for lshr.
1947 void lshrSlowCase(unsigned ShiftAmt);
1948
1949 /// out-of-line slow case for ashr.
1950 void ashrSlowCase(unsigned ShiftAmt);
1951
1952 /// out-of-line slow case for operator=
1953 void assignSlowCase(const APInt &RHS);
1954
1955 /// out-of-line slow case for operator==
1956 bool equalSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1957
1958 /// out-of-line slow case for countLeadingZeros
1959 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1960
1961 /// out-of-line slow case for countLeadingOnes.
1962 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1963
1964 /// out-of-line slow case for countTrailingZeros.
1965 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1966
1967 /// out-of-line slow case for countTrailingOnes
1968 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1969
1970 /// out-of-line slow case for countPopulation
1971 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
1972
1973 /// out-of-line slow case for intersects.
1974 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1975
1976 /// out-of-line slow case for isSubsetOf.
1977 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1978
1979 /// out-of-line slow case for setBits.
1980 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
1981
1982 /// out-of-line slow case for flipAllBits.
1983 void flipAllBitsSlowCase();
1984
1985 /// out-of-line slow case for concat.
1986 APInt concatSlowCase(const APInt &NewLSB) const;
1987
1988 /// out-of-line slow case for operator&=.
1989 void andAssignSlowCase(const APInt &RHS);
1990
1991 /// out-of-line slow case for operator|=.
1992 void orAssignSlowCase(const APInt &RHS);
1993
1994 /// out-of-line slow case for operator^=.
1995 void xorAssignSlowCase(const APInt &RHS);
1996
1997 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
1998 /// to, or greater than RHS.
1999 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
2000
2001 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
2002 /// to, or greater than RHS.
2003 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
2004
2005 /// @}
2006};
2007
2008inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
2009
2010inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
2011
2012/// Unary bitwise complement operator.
2013///
2014/// \returns an APInt that is the bitwise complement of \p v.
2015inline APInt operator~(APInt v) {
2016 v.flipAllBits();
2017 return v;
2018}
2019
2020inline APInt operator&(APInt a, const APInt &b) {
2021 a &= b;
2022 return a;
2023}
2024
2025inline APInt operator&(const APInt &a, APInt &&b) {
2026 b &= a;
2027 return std::move(b);
2028}
2029
2030inline APInt operator&(APInt a, uint64_t RHS) {
2031 a &= RHS;
2032 return a;
2033}
2034
2035inline APInt operator&(uint64_t LHS, APInt b) {
2036 b &= LHS;
2037 return b;
2038}
2039
2040inline APInt operator|(APInt a, const APInt &b) {
2041 a |= b;
2042 return a;
2043}
2044
2045inline APInt operator|(const APInt &a, APInt &&b) {
2046 b |= a;
2047 return std::move(b);
2048}
2049
2050inline APInt operator|(APInt a, uint64_t RHS) {
2051 a |= RHS;
2052 return a;
2053}
2054
2055inline APInt operator|(uint64_t LHS, APInt b) {
2056 b |= LHS;
2057 return b;
2058}
2059
2060inline APInt operator^(APInt a, const APInt &b) {
2061 a ^= b;
2062 return a;
2063}
2064
2065inline APInt operator^(const APInt &a, APInt &&b) {
2066 b ^= a;
2067 return std::move(b);
2068}
2069
2070inline APInt operator^(APInt a, uint64_t RHS) {
2071 a ^= RHS;
2072 return a;
2073}
2074
2075inline APInt operator^(uint64_t LHS, APInt b) {
2076 b ^= LHS;
2077 return b;
2078}
2079
2080inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2081 I.print(OS, true);
2082 return OS;
2083}
2084
2085inline APInt operator-(APInt v) {
2086 v.negate();
2087 return v;
2088}
2089
2090inline APInt operator+(APInt a, const APInt &b) {
2091 a += b;
2092 return a;
2093}
2094
2095inline APInt operator+(const APInt &a, APInt &&b) {
2096 b += a;
2097 return std::move(b);
2098}
2099
2100inline APInt operator+(APInt a, uint64_t RHS) {
2101 a += RHS;
2102 return a;
2103}
2104
2105inline APInt operator+(uint64_t LHS, APInt b) {
2106 b += LHS;
2107 return b;
2108}
2109
2110inline APInt operator-(APInt a, const APInt &b) {
2111 a -= b;
2112 return a;
2113}
2114
2115inline APInt operator-(const APInt &a, APInt &&b) {
2116 b.negate();
2117 b += a;
2118 return std::move(b);
2119}
2120
2121inline APInt operator-(APInt a, uint64_t RHS) {
2122 a -= RHS;
2123 return a;
2124}
2125
2126inline APInt operator-(uint64_t LHS, APInt b) {
2127 b.negate();
2128 b += LHS;
2129 return b;
2130}
2131
2132inline APInt operator*(APInt a, uint64_t RHS) {
2133 a *= RHS;
2134 return a;
2135}
2136
2137inline APInt operator*(uint64_t LHS, APInt b) {
2138 b *= LHS;
2139 return b;
2140}
2141
2142namespace APIntOps {
2143
2144/// Determine the smaller of two APInts considered to be signed.
2145inline const APInt &smin(const APInt &A, const APInt &B) {
2146 return A.slt(B) ? A : B;
2147}
2148
2149/// Determine the larger of two APInts considered to be signed.
2150inline const APInt &smax(const APInt &A, const APInt &B) {
2151 return A.sgt(B) ? A : B;
2152}
2153
2154/// Determine the smaller of two APInts considered to be unsigned.
2155inline const APInt &umin(const APInt &A, const APInt &B) {
2156 return A.ult(B) ? A : B;
2157}
2158
2159/// Determine the larger of two APInts considered to be unsigned.
2160inline const APInt &umax(const APInt &A, const APInt &B) {
2161 return A.ugt(B) ? A : B;
2162}
2163
2164/// Compute GCD of two unsigned APInt values.
2165///
2166/// This function returns the greatest common divisor of the two APInt values
2167/// using Stein's algorithm.
2168///
2169/// \returns the greatest common divisor of A and B.
2170APInt GreatestCommonDivisor(APInt A, APInt B);
2171
2172/// Converts the given APInt to a double value.
2173///
2174/// Treats the APInt as an unsigned value for conversion purposes.
2175inline double RoundAPIntToDouble(const APInt &APIVal) {
2176 return APIVal.roundToDouble();
2177}
2178
2179/// Converts the given APInt to a double value.
2180///
2181/// Treats the APInt as a signed value for conversion purposes.
2182inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2183 return APIVal.signedRoundToDouble();
2184}
2185
2186/// Converts the given APInt to a float value.
2187inline float RoundAPIntToFloat(const APInt &APIVal) {
2188 return float(RoundAPIntToDouble(APIVal));
2189}
2190
2191/// Converts the given APInt to a float value.
2192///
2193/// Treats the APInt as a signed value for conversion purposes.
2194inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2195 return float(APIVal.signedRoundToDouble());
2196}
2197
2198/// Converts the given double value into a APInt.
2199///
2200/// This function convert a double value to an APInt value.
2201APInt RoundDoubleToAPInt(double Double, unsigned width);
2202
2203/// Converts a float value into a APInt.
2204///
2205/// Converts a float value into an APInt value.
2206inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2207 return RoundDoubleToAPInt(double(Float), width);
2208}
2209
2210/// Return A unsign-divided by B, rounded by the given rounding mode.
2211APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2212
2213/// Return A sign-divided by B, rounded by the given rounding mode.
2214APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2215
2216/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2217/// (e.g. 32 for i32).
2218/// This function finds the smallest number n, such that
2219/// (a) n >= 0 and q(n) = 0, or
2220/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2221/// integers, belong to two different intervals [Rk, Rk+R),
2222/// where R = 2^BW, and k is an integer.
2223/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2224/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2225/// subtraction (treated as addition of negated numbers) would always
2226/// count as an overflow, but here we want to allow values to decrease
2227/// and increase as long as they are within the same interval.
2228/// Specifically, adding of two negative numbers should not cause an
2229/// overflow (as long as the magnitude does not exceed the bit width).
2230/// On the other hand, given a positive number, adding a negative
2231/// number to it can give a negative result, which would cause the
2232/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2233/// treated as a special case of an overflow.
2234///
2235/// This function returns None if after finding k that minimizes the
2236/// positive solution to q(n) = kR, both solutions are contained between
2237/// two consecutive integers.
2238///
2239/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2240/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2241/// virtue of *signed* overflow. This function will *not* find such an n,
2242/// however it may find a value of n satisfying the inequalities due to
2243/// an *unsigned* overflow (if the values are treated as unsigned).
2244/// To find a solution for a signed overflow, treat it as a problem of
2245/// finding an unsigned overflow with a range with of BW-1.
2246///
2247/// The returned value may have a different bit width from the input
2248/// coefficients.
2249Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2250 unsigned RangeWidth);
2251
2252/// Compare two values, and if they are different, return the position of the
2253/// most significant bit that is different in the values.
2254Optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2255 const APInt &B);
2256
2257/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
2258/// by \param A to \param NewBitWidth bits.
2259///
2260/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
2261/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
2262/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
2263///
2264/// TODO: Do we need a mode where all bits must be set when merging down?
2265APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth);
2266} // namespace APIntOps
2267
2268// See friend declaration above. This additional declaration is required in
2269// order to compile LLVM with IBM xlC compiler.
2270hash_code hash_value(const APInt &Arg);
2271
2272/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2273/// with the integer held in IntVal.
2274void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2275
2276/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2277/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2278void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
2279
2280/// Provide DenseMapInfo for APInt.
2281template <> struct DenseMapInfo<APInt, void> {
2282 static inline APInt getEmptyKey() {
2283 APInt V(nullptr, 0);
2284 V.U.VAL = 0;
2285 return V;
2286 }
2287
2288 static inline APInt getTombstoneKey() {
2289 APInt V(nullptr, 0);
2290 V.U.VAL = 1;
2291 return V;
2292 }
2293
2294 static unsigned getHashValue(const APInt &Key);
2295
2296 static bool isEqual(const APInt &LHS, const APInt &RHS) {
2297 return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
2298 }
2299};
2300
2301} // namespace llvm
2302
2303#endif