Bug Summary

File:build/source/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1560, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -ffp-contract=on -fno-rounding-math -mconstructor-aliases -funwind-tables=2 -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -resource-dir /usr/lib/llvm-17/lib/clang/17 -I lib/Target/Hexagon -I /build/source/llvm/lib/Target/Hexagon -I include -I /build/source/llvm/include -D _DEBUG -D _GLIBCXX_ASSERTIONS -D _GNU_SOURCE -D _LIBCPP_ENABLE_ASSERTIONS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -D _FORTIFY_SOURCE=2 -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-17/lib/clang/17/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -fmacro-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fmacro-prefix-map=/build/source/= -fcoverage-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fcoverage-prefix-map=/build/source/= -source-date-epoch 1677881322 -O2 -Wno-unused-command-line-argument -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -Wno-misleading-indentation -std=c++17 -fdeprecated-macro -fdebug-compilation-dir=/build/source/build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/build-llvm/tools/clang/stage2-bins=build-llvm/tools/clang/stage2-bins -fdebug-prefix-map=/build/source/= -ferror-limit 19 -fvisibility=hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fcolor-diagnostics -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2023-03-04-000721-16503-1 -x c++ /build/source/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

/build/source/llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonLoopIdiomRecognition.h"
10#include "llvm/ADT/APInt.h"
11#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallPtrSet.h"
14#include "llvm/ADT/SmallSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Analysis/AliasAnalysis.h"
18#include "llvm/Analysis/InstructionSimplify.h"
19#include "llvm/Analysis/LoopAnalysisManager.h"
20#include "llvm/Analysis/LoopInfo.h"
21#include "llvm/Analysis/LoopPass.h"
22#include "llvm/Analysis/MemoryLocation.h"
23#include "llvm/Analysis/ScalarEvolution.h"
24#include "llvm/Analysis/ScalarEvolutionExpressions.h"
25#include "llvm/Analysis/TargetLibraryInfo.h"
26#include "llvm/Analysis/ValueTracking.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constant.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DataLayout.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/IR/DerivedTypes.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/IntrinsicInst.h"
41#include "llvm/IR/Intrinsics.h"
42#include "llvm/IR/IntrinsicsHexagon.h"
43#include "llvm/IR/Module.h"
44#include "llvm/IR/PassManager.h"
45#include "llvm/IR/PatternMatch.h"
46#include "llvm/IR/Type.h"
47#include "llvm/IR/User.h"
48#include "llvm/IR/Value.h"
49#include "llvm/InitializePasses.h"
50#include "llvm/Pass.h"
51#include "llvm/Support/Casting.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Compiler.h"
54#include "llvm/Support/Debug.h"
55#include "llvm/Support/ErrorHandling.h"
56#include "llvm/Support/KnownBits.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/TargetParser/Triple.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include "llvm/Transforms/Utils/Local.h"
62#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
63#include <algorithm>
64#include <array>
65#include <cassert>
66#include <cstdint>
67#include <cstdlib>
68#include <deque>
69#include <functional>
70#include <iterator>
71#include <map>
72#include <set>
73#include <utility>
74#include <vector>
75
76#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
77
78using namespace llvm;
79
80static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memcpy in loop idiom recognition"));
83
84static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
85 cl::Hidden, cl::init(false),
86 cl::desc("Disable generation of memmove in loop idiom recognition"));
87
88static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
89 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
90 "check guarding the memmove."));
91
92static cl::opt<unsigned> CompileTimeMemSizeThreshold(
93 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
94 cl::desc("Threshold (in bytes) to perform the transformation, if the "
95 "runtime loop count (mem transfer size) is known at compile-time."));
96
97static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
98 cl::Hidden, cl::init(true),
99 cl::desc("Only enable generating memmove in non-nested loops"));
100
101static cl::opt<bool> HexagonVolatileMemcpy(
102 "disable-hexagon-volatile-memcpy", cl::Hidden, cl::init(false),
103 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
104
105static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
106 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
107
108static const char *HexagonVolatileMemcpyName
109 = "hexagon_memcpy_forward_vp4cp4n2";
110
111
112namespace llvm {
113
114void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
115Pass *createHexagonLoopIdiomPass();
116
117} // end namespace llvm
118
119namespace {
120
121class HexagonLoopIdiomRecognize {
122public:
123 explicit HexagonLoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
124 LoopInfo *LF, const TargetLibraryInfo *TLI,
125 ScalarEvolution *SE)
126 : AA(AA), DT(DT), LF(LF), TLI(TLI), SE(SE) {}
127
128 bool run(Loop *L);
129
130private:
131 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
132 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
133 void collectStores(Loop *CurLoop, BasicBlock *BB,
134 SmallVectorImpl<StoreInst *> &Stores);
135 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
136 bool coverLoop(Loop *L, SmallVectorImpl<Instruction *> &Insts) const;
137 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
138 SmallVectorImpl<BasicBlock *> &ExitBlocks);
139 bool runOnCountableLoop(Loop *L);
140
141 AliasAnalysis *AA;
142 const DataLayout *DL;
143 DominatorTree *DT;
144 LoopInfo *LF;
145 const TargetLibraryInfo *TLI;
146 ScalarEvolution *SE;
147 bool HasMemcpy, HasMemmove;
148};
149
150class HexagonLoopIdiomRecognizeLegacyPass : public LoopPass {
151public:
152 static char ID;
153
154 explicit HexagonLoopIdiomRecognizeLegacyPass() : LoopPass(ID) {
155 initializeHexagonLoopIdiomRecognizeLegacyPassPass(
156 *PassRegistry::getPassRegistry());
157 }
158
159 StringRef getPassName() const override {
160 return "Recognize Hexagon-specific loop idioms";
161 }
162
163 void getAnalysisUsage(AnalysisUsage &AU) const override {
164 AU.addRequired<LoopInfoWrapperPass>();
165 AU.addRequiredID(LoopSimplifyID);
166 AU.addRequiredID(LCSSAID);
167 AU.addRequired<AAResultsWrapperPass>();
168 AU.addRequired<ScalarEvolutionWrapperPass>();
169 AU.addRequired<DominatorTreeWrapperPass>();
170 AU.addRequired<TargetLibraryInfoWrapperPass>();
171 AU.addPreserved<TargetLibraryInfoWrapperPass>();
172 }
173
174 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
175};
176
177struct Simplifier {
178 struct Rule {
179 using FuncType = std::function<Value *(Instruction *, LLVMContext &)>;
180 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
181 StringRef Name; // For debugging.
182 FuncType Fn;
183 };
184
185 void addRule(StringRef N, const Rule::FuncType &F) {
186 Rules.push_back(Rule(N, F));
187 }
188
189private:
190 struct WorkListType {
191 WorkListType() = default;
192
193 void push_back(Value *V) {
194 // Do not push back duplicates.
195 if (S.insert(V).second)
196 Q.push_back(V);
197 }
198
199 Value *pop_front_val() {
200 Value *V = Q.front();
201 Q.pop_front();
202 S.erase(V);
203 return V;
204 }
205
206 bool empty() const { return Q.empty(); }
207
208 private:
209 std::deque<Value *> Q;
210 std::set<Value *> S;
211 };
212
213 using ValueSetType = std::set<Value *>;
214
215 std::vector<Rule> Rules;
216
217public:
218 struct Context {
219 using ValueMapType = DenseMap<Value *, Value *>;
220
221 Value *Root;
222 ValueSetType Used; // The set of all cloned values used by Root.
223 ValueSetType Clones; // The set of all cloned values.
224 LLVMContext &Ctx;
225
226 Context(Instruction *Exp)
227 : Ctx(Exp->getParent()->getParent()->getContext()) {
228 initialize(Exp);
229 }
230
231 ~Context() { cleanup(); }
232
233 void print(raw_ostream &OS, const Value *V) const;
234 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
235
236 private:
237 friend struct Simplifier;
238
239 void initialize(Instruction *Exp);
240 void cleanup();
241
242 template <typename FuncT> void traverse(Value *V, FuncT F);
243 void record(Value *V);
244 void use(Value *V);
245 void unuse(Value *V);
246
247 bool equal(const Instruction *I, const Instruction *J) const;
248 Value *find(Value *Tree, Value *Sub) const;
249 Value *subst(Value *Tree, Value *OldV, Value *NewV);
250 void replace(Value *OldV, Value *NewV);
251 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
252 };
253
254 Value *simplify(Context &C);
255};
256
257 struct PE {
258 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
259
260 const Simplifier::Context &C;
261 const Value *V;
262 };
263
264 LLVM_ATTRIBUTE_USED__attribute__((__used__))
265 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
266 P.C.print(OS, P.V ? P.V : P.C.Root);
267 return OS;
268 }
269
270} // end anonymous namespace
271
272char HexagonLoopIdiomRecognizeLegacyPass::ID = 0;
273
274INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
275 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce
(PassRegistry &Registry) {
276INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
277INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
278INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
279INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
280INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
281INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
282INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
283INITIALIZE_PASS_END(HexagonLoopIdiomRecognizeLegacyPass, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
284 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognizeLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognizeLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
; void llvm::initializeHexagonLoopIdiomRecognizeLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizeLegacyPassPassFlag
, initializeHexagonLoopIdiomRecognizeLegacyPassPassOnce, std::
ref(Registry)); }
285
286template <typename FuncT>
287void Simplifier::Context::traverse(Value *V, FuncT F) {
288 WorkListType Q;
289 Q.push_back(V);
290
291 while (!Q.empty()) {
292 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
293 if (!U || U->getParent())
294 continue;
295 if (!F(U))
296 continue;
297 for (Value *Op : U->operands())
298 Q.push_back(Op);
299 }
300}
301
302void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
303 const auto *U = dyn_cast<const Instruction>(V);
304 if (!U) {
305 OS << V << '(' << *V << ')';
306 return;
307 }
308
309 if (U->getParent()) {
310 OS << U << '(';
311 U->printAsOperand(OS, true);
312 OS << ')';
313 return;
314 }
315
316 unsigned N = U->getNumOperands();
317 if (N != 0)
318 OS << U << '(';
319 OS << U->getOpcodeName();
320 for (const Value *Op : U->operands()) {
321 OS << ' ';
322 print(OS, Op);
323 }
324 if (N != 0)
325 OS << ')';
326}
327
328void Simplifier::Context::initialize(Instruction *Exp) {
329 // Perform a deep clone of the expression, set Root to the root
330 // of the clone, and build a map from the cloned values to the
331 // original ones.
332 ValueMapType M;
333 BasicBlock *Block = Exp->getParent();
334 WorkListType Q;
335 Q.push_back(Exp);
336
337 while (!Q.empty()) {
338 Value *V = Q.pop_front_val();
339 if (M.find(V) != M.end())
340 continue;
341 if (Instruction *U = dyn_cast<Instruction>(V)) {
342 if (isa<PHINode>(U) || U->getParent() != Block)
343 continue;
344 for (Value *Op : U->operands())
345 Q.push_back(Op);
346 M.insert({U, U->clone()});
347 }
348 }
349
350 for (std::pair<Value*,Value*> P : M) {
351 Instruction *U = cast<Instruction>(P.second);
352 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
353 auto F = M.find(U->getOperand(i));
354 if (F != M.end())
355 U->setOperand(i, F->second);
356 }
357 }
358
359 auto R = M.find(Exp);
360 assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail
("R != M.end()", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 360, __extension__ __PRETTY_FUNCTION__))
;
361 Root = R->second;
362
363 record(Root);
364 use(Root);
365}
366
367void Simplifier::Context::record(Value *V) {
368 auto Record = [this](Instruction *U) -> bool {
369 Clones.insert(U);
370 return true;
371 };
372 traverse(V, Record);
373}
374
375void Simplifier::Context::use(Value *V) {
376 auto Use = [this](Instruction *U) -> bool {
377 Used.insert(U);
378 return true;
379 };
380 traverse(V, Use);
381}
382
383void Simplifier::Context::unuse(Value *V) {
384 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
385 return;
386
387 auto Unuse = [this](Instruction *U) -> bool {
388 if (!U->use_empty())
389 return false;
390 Used.erase(U);
391 return true;
392 };
393 traverse(V, Unuse);
394}
395
396Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
397 if (Tree == OldV)
398 return NewV;
399 if (OldV == NewV)
400 return Tree;
401
402 WorkListType Q;
403 Q.push_back(Tree);
404 while (!Q.empty()) {
405 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
406 // If U is not an instruction, or it's not a clone, skip it.
407 if (!U || U->getParent())
408 continue;
409 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
410 Value *Op = U->getOperand(i);
411 if (Op == OldV) {
412 U->setOperand(i, NewV);
413 unuse(OldV);
414 } else {
415 Q.push_back(Op);
416 }
417 }
418 }
419 return Tree;
420}
421
422void Simplifier::Context::replace(Value *OldV, Value *NewV) {
423 if (Root == OldV) {
424 Root = NewV;
425 use(Root);
426 return;
427 }
428
429 // NewV may be a complex tree that has just been created by one of the
430 // transformation rules. We need to make sure that it is commoned with
431 // the existing Root to the maximum extent possible.
432 // Identify all subtrees of NewV (including NewV itself) that have
433 // equivalent counterparts in Root, and replace those subtrees with
434 // these counterparts.
435 WorkListType Q;
436 Q.push_back(NewV);
437 while (!Q.empty()) {
438 Value *V = Q.pop_front_val();
439 Instruction *U = dyn_cast<Instruction>(V);
440 if (!U || U->getParent())
441 continue;
442 if (Value *DupV = find(Root, V)) {
443 if (DupV != V)
444 NewV = subst(NewV, V, DupV);
445 } else {
446 for (Value *Op : U->operands())
447 Q.push_back(Op);
448 }
449 }
450
451 // Now, simply replace OldV with NewV in Root.
452 Root = subst(Root, OldV, NewV);
453 use(Root);
454}
455
456void Simplifier::Context::cleanup() {
457 for (Value *V : Clones) {
458 Instruction *U = cast<Instruction>(V);
459 if (!U->getParent())
460 U->dropAllReferences();
461 }
462
463 for (Value *V : Clones) {
464 Instruction *U = cast<Instruction>(V);
465 if (!U->getParent())
466 U->deleteValue();
467 }
468}
469
470bool Simplifier::Context::equal(const Instruction *I,
471 const Instruction *J) const {
472 if (I == J)
473 return true;
474 if (!I->isSameOperationAs(J))
475 return false;
476 if (isa<PHINode>(I))
477 return I->isIdenticalTo(J);
478
479 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
480 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
481 if (OpI == OpJ)
482 continue;
483 auto *InI = dyn_cast<const Instruction>(OpI);
484 auto *InJ = dyn_cast<const Instruction>(OpJ);
485 if (InI && InJ) {
486 if (!equal(InI, InJ))
487 return false;
488 } else if (InI != InJ || !InI)
489 return false;
490 }
491 return true;
492}
493
494Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
495 Instruction *SubI = dyn_cast<Instruction>(Sub);
496 WorkListType Q;
497 Q.push_back(Tree);
498
499 while (!Q.empty()) {
500 Value *V = Q.pop_front_val();
501 if (V == Sub)
502 return V;
503 Instruction *U = dyn_cast<Instruction>(V);
504 if (!U || U->getParent())
505 continue;
506 if (SubI && equal(SubI, U))
507 return U;
508 assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0)
: __assert_fail ("!isa<PHINode>(U)", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 508, __extension__ __PRETTY_FUNCTION__))
;
509 for (Value *Op : U->operands())
510 Q.push_back(Op);
511 }
512 return nullptr;
513}
514
515void Simplifier::Context::link(Instruction *I, BasicBlock *B,
516 BasicBlock::iterator At) {
517 if (I->getParent())
518 return;
519
520 for (Value *Op : I->operands()) {
521 if (Instruction *OpI = dyn_cast<Instruction>(Op))
522 link(OpI, B, At);
523 }
524
525 I->insertInto(B, At);
526}
527
528Value *Simplifier::Context::materialize(BasicBlock *B,
529 BasicBlock::iterator At) {
530 if (Instruction *RootI = dyn_cast<Instruction>(Root))
531 link(RootI, B, At);
532 return Root;
533}
534
535Value *Simplifier::simplify(Context &C) {
536 WorkListType Q;
537 Q.push_back(C.Root);
538 unsigned Count = 0;
539 const unsigned Limit = SimplifyLimit;
540
541 while (!Q.empty()) {
542 if (Count++ >= Limit)
543 break;
544 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
545 if (!U || U->getParent() || !C.Used.count(U))
546 continue;
547 bool Changed = false;
548 for (Rule &R : Rules) {
549 Value *W = R.Fn(U, C.Ctx);
550 if (!W)
551 continue;
552 Changed = true;
553 C.record(W);
554 C.replace(U, W);
555 Q.push_back(C.Root);
556 break;
557 }
558 if (!Changed) {
559 for (Value *Op : U->operands())
560 Q.push_back(Op);
561 }
562 }
563 return Count < Limit ? C.Root : nullptr;
564}
565
566//===----------------------------------------------------------------------===//
567//
568// Implementation of PolynomialMultiplyRecognize
569//
570//===----------------------------------------------------------------------===//
571
572namespace {
573
574 class PolynomialMultiplyRecognize {
575 public:
576 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
577 const DominatorTree &dt, const TargetLibraryInfo &tli,
578 ScalarEvolution &se)
579 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
580
581 bool recognize();
582
583 private:
584 using ValueSeq = SetVector<Value *>;
585
586 IntegerType *getPmpyType() const {
587 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
588 return IntegerType::get(Ctx, 32);
589 }
590
591 bool isPromotableTo(Value *V, IntegerType *Ty);
592 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
593 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
594
595 Value *getCountIV(BasicBlock *BB);
596 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
597 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
598 ValueSeq &Late);
599 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
600 bool commutesWithShift(Instruction *I);
601 bool highBitsAreZero(Value *V, unsigned IterCount);
602 bool keepsHighBitsZero(Value *V, unsigned IterCount);
603 bool isOperandShifted(Instruction *I, Value *Op);
604 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
605 unsigned IterCount);
606 void cleanupLoopBody(BasicBlock *LoopB);
607
608 struct ParsedValues {
609 ParsedValues() = default;
610
611 Value *M = nullptr;
612 Value *P = nullptr;
613 Value *Q = nullptr;
614 Value *R = nullptr;
615 Value *X = nullptr;
616 Instruction *Res = nullptr;
617 unsigned IterCount = 0;
618 bool Left = false;
619 bool Inv = false;
620 };
621
622 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
623 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
624 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
625 Value *CIV, ParsedValues &PV, bool PreScan);
626 unsigned getInverseMxN(unsigned QP);
627 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
628
629 void setupPreSimplifier(Simplifier &S);
630 void setupPostSimplifier(Simplifier &S);
631
632 Loop *CurLoop;
633 const DataLayout &DL;
634 const DominatorTree &DT;
635 const TargetLibraryInfo &TLI;
636 ScalarEvolution &SE;
637 };
638
639} // end anonymous namespace
640
641Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
642 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
643 if (std::distance(PI, PE) != 2)
644 return nullptr;
645 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
646
647 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
648 auto *PN = cast<PHINode>(I);
649 Value *InitV = PN->getIncomingValueForBlock(PB);
650 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
651 continue;
652 Value *IterV = PN->getIncomingValueForBlock(BB);
653 auto *BO = dyn_cast<BinaryOperator>(IterV);
654 if (!BO)
655 continue;
656 if (BO->getOpcode() != Instruction::Add)
657 continue;
658 Value *IncV = nullptr;
659 if (BO->getOperand(0) == PN)
660 IncV = BO->getOperand(1);
661 else if (BO->getOperand(1) == PN)
662 IncV = BO->getOperand(0);
663 if (IncV == nullptr)
664 continue;
665
666 if (auto *T = dyn_cast<ConstantInt>(IncV))
667 if (T->getZExtValue() == 1)
668 return PN;
669 }
670 return nullptr;
671}
672
673static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
674 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
675 Use &TheUse = UI.getUse();
676 ++UI;
677 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
678 if (BB == II->getParent())
679 II->replaceUsesOfWith(I, J);
680 }
681}
682
683bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
684 Value *CIV, ParsedValues &PV) {
685 // Match the following:
686 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
687 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
688 // The condition may also check for equality with the masked value, i.e
689 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
690 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
691
692 Value *CondV = SelI->getCondition();
693 Value *TrueV = SelI->getTrueValue();
694 Value *FalseV = SelI->getFalseValue();
695
696 using namespace PatternMatch;
697
698 CmpInst::Predicate P;
699 Value *A = nullptr, *B = nullptr, *C = nullptr;
700
701 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
702 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
703 return false;
704 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
705 return false;
706 // Matched: select (A & B) == C ? ... : ...
707 // select (A & B) != C ? ... : ...
708
709 Value *X = nullptr, *Sh1 = nullptr;
710 // Check (A & B) for (X & (1 << i)):
711 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
712 Sh1 = A;
713 X = B;
714 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
715 Sh1 = B;
716 X = A;
717 } else {
718 // TODO: Could also check for an induction variable containing single
719 // bit shifted left by 1 in each iteration.
720 return false;
721 }
722
723 bool TrueIfZero;
724
725 // Check C against the possible values for comparison: 0 and (1 << i):
726 if (match(C, m_Zero()))
727 TrueIfZero = (P == CmpInst::ICMP_EQ);
728 else if (C == Sh1)
729 TrueIfZero = (P == CmpInst::ICMP_NE);
730 else
731 return false;
732
733 // So far, matched:
734 // select (X & (1 << i)) ? ... : ...
735 // including variations of the check against zero/non-zero value.
736
737 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
738 if (TrueIfZero) {
739 ShouldSameV = TrueV;
740 ShouldXoredV = FalseV;
741 } else {
742 ShouldSameV = FalseV;
743 ShouldXoredV = TrueV;
744 }
745
746 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
747 Value *T = nullptr;
748 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
749 // Matched: select +++ ? ... : Y ^ Z
750 // select +++ ? Y ^ Z : ...
751 // where +++ denotes previously checked matches.
752 if (ShouldSameV == Y)
753 T = Z;
754 else if (ShouldSameV == Z)
755 T = Y;
756 else
757 return false;
758 R = ShouldSameV;
759 // Matched: select +++ ? R : R ^ T
760 // select +++ ? R ^ T : R
761 // depending on TrueIfZero.
762
763 } else if (match(ShouldSameV, m_Zero())) {
764 // Matched: select +++ ? 0 : ...
765 // select +++ ? ... : 0
766 if (!SelI->hasOneUse())
767 return false;
768 T = ShouldXoredV;
769 // Matched: select +++ ? 0 : T
770 // select +++ ? T : 0
771
772 Value *U = *SelI->user_begin();
773 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
774 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
775 return false;
776 // Matched: xor (select +++ ? 0 : T), R
777 // xor (select +++ ? T : 0), R
778 } else
779 return false;
780
781 // The xor input value T is isolated into its own match so that it could
782 // be checked against an induction variable containing a shifted bit
783 // (todo).
784 // For now, check against (Q << i).
785 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
786 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
787 return false;
788 // Matched: select +++ ? R : R ^ (Q << i)
789 // select +++ ? R ^ (Q << i) : R
790
791 PV.X = X;
792 PV.Q = Q;
793 PV.R = R;
794 PV.Left = true;
795 return true;
796}
797
798bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
799 ParsedValues &PV) {
800 // Match the following:
801 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
802 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
803 // The condition may also check for equality with the masked value, i.e
804 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
805 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
806
807 Value *CondV = SelI->getCondition();
808 Value *TrueV = SelI->getTrueValue();
809 Value *FalseV = SelI->getFalseValue();
810
811 using namespace PatternMatch;
812
813 Value *C = nullptr;
814 CmpInst::Predicate P;
815 bool TrueIfZero;
816
817 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
818 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
819 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
820 return false;
821 // Matched: select C == 0 ? ... : ...
822 // select C != 0 ? ... : ...
823 TrueIfZero = (P == CmpInst::ICMP_EQ);
824 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
825 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
826 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
827 return false;
828 // Matched: select C == 1 ? ... : ...
829 // select C != 1 ? ... : ...
830 TrueIfZero = (P == CmpInst::ICMP_NE);
831 } else
832 return false;
833
834 Value *X = nullptr;
835 if (!match(C, m_And(m_Value(X), m_One())) &&
836 !match(C, m_And(m_One(), m_Value(X))))
837 return false;
838 // Matched: select (X & 1) == +++ ? ... : ...
839 // select (X & 1) != +++ ? ... : ...
840
841 Value *R = nullptr, *Q = nullptr;
842 if (TrueIfZero) {
843 // The select's condition is true if the tested bit is 0.
844 // TrueV must be the shift, FalseV must be the xor.
845 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
846 return false;
847 // Matched: select +++ ? (R >> 1) : ...
848 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
849 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
850 return false;
851 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
852 // with commuting ^.
853 } else {
854 // The select's condition is true if the tested bit is 1.
855 // TrueV must be the xor, FalseV must be the shift.
856 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
857 return false;
858 // Matched: select +++ ? ... : (R >> 1)
859 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
860 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
861 return false;
862 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
863 // with commuting ^.
864 }
865
866 PV.X = X;
867 PV.Q = Q;
868 PV.R = R;
869 PV.Left = false;
870 return true;
871}
872
873bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
874 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
875 bool PreScan) {
876 using namespace PatternMatch;
877
878 // The basic pattern for R = P.Q is:
879 // for i = 0..31
880 // R = phi (0, R')
881 // if (P & (1 << i)) ; test-bit(P, i)
882 // R' = R ^ (Q << i)
883 //
884 // Similarly, the basic pattern for R = (P/Q).Q - P
885 // for i = 0..31
886 // R = phi(P, R')
887 // if (R & (1 << i))
888 // R' = R ^ (Q << i)
889
890 // There exist idioms, where instead of Q being shifted left, P is shifted
891 // right. This produces a result that is shifted right by 32 bits (the
892 // non-shifted result is 64-bit).
893 //
894 // For R = P.Q, this would be:
895 // for i = 0..31
896 // R = phi (0, R')
897 // if ((P >> i) & 1)
898 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
899 // else ; be shifted by 1, not i.
900 // R' = R >> 1
901 //
902 // And for the inverse:
903 // for i = 0..31
904 // R = phi (P, R')
905 // if (R & 1)
906 // R' = (R >> 1) ^ Q
907 // else
908 // R' = R >> 1
909
910 // The left-shifting idioms share the same pattern:
911 // select (X & (1 << i)) ? R ^ (Q << i) : R
912 // Similarly for right-shifting idioms:
913 // select (X & 1) ? (R >> 1) ^ Q
914
915 if (matchLeftShift(SelI, CIV, PV)) {
916 // If this is a pre-scan, getting this far is sufficient.
917 if (PreScan)
918 return true;
919
920 // Need to make sure that the SelI goes back into R.
921 auto *RPhi = dyn_cast<PHINode>(PV.R);
922 if (!RPhi)
923 return false;
924 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
925 return false;
926 PV.Res = SelI;
927
928 // If X is loop invariant, it must be the input polynomial, and the
929 // idiom is the basic polynomial multiply.
930 if (CurLoop->isLoopInvariant(PV.X)) {
931 PV.P = PV.X;
932 PV.Inv = false;
933 } else {
934 // X is not loop invariant. If X == R, this is the inverse pmpy.
935 // Otherwise, check for an xor with an invariant value. If the
936 // variable argument to the xor is R, then this is still a valid
937 // inverse pmpy.
938 PV.Inv = true;
939 if (PV.X != PV.R) {
940 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
941 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
942 return false;
943 auto *I1 = dyn_cast<Instruction>(X1);
944 auto *I2 = dyn_cast<Instruction>(X2);
945 if (!I1 || I1->getParent() != LoopB) {
946 Var = X2;
947 Inv = X1;
948 } else if (!I2 || I2->getParent() != LoopB) {
949 Var = X1;
950 Inv = X2;
951 } else
952 return false;
953 if (Var != PV.R)
954 return false;
955 PV.M = Inv;
956 }
957 // The input polynomial P still needs to be determined. It will be
958 // the entry value of R.
959 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
960 PV.P = EntryP;
961 }
962
963 return true;
964 }
965
966 if (matchRightShift(SelI, PV)) {
967 // If this is an inverse pattern, the Q polynomial must be known at
968 // compile time.
969 if (PV.Inv && !isa<ConstantInt>(PV.Q))
970 return false;
971 if (PreScan)
972 return true;
973 // There is no exact matching of right-shift pmpy.
974 return false;
975 }
976
977 return false;
978}
979
980bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
981 IntegerType *DestTy) {
982 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
983 if (!T || T->getBitWidth() > DestTy->getBitWidth())
984 return false;
985 if (T->getBitWidth() == DestTy->getBitWidth())
986 return true;
987 // Non-instructions are promotable. The reason why an instruction may not
988 // be promotable is that it may produce a different result if its operands
989 // and the result are promoted, for example, it may produce more non-zero
990 // bits. While it would still be possible to represent the proper result
991 // in a wider type, it may require adding additional instructions (which
992 // we don't want to do).
993 Instruction *In = dyn_cast<Instruction>(Val);
994 if (!In)
995 return true;
996 // The bitwidth of the source type is smaller than the destination.
997 // Check if the individual operation can be promoted.
998 switch (In->getOpcode()) {
999 case Instruction::PHI:
1000 case Instruction::ZExt:
1001 case Instruction::And:
1002 case Instruction::Or:
1003 case Instruction::Xor:
1004 case Instruction::LShr: // Shift right is ok.
1005 case Instruction::Select:
1006 case Instruction::Trunc:
1007 return true;
1008 case Instruction::ICmp:
1009 if (CmpInst *CI = cast<CmpInst>(In))
1010 return CI->isEquality() || CI->isUnsigned();
1011 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1011
)
;
1012 case Instruction::Add:
1013 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
1014 }
1015 return false;
1016}
1017
1018void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1019 IntegerType *DestTy, BasicBlock *LoopB) {
1020 Type *OrigTy = In->getType();
1021 assert(!OrigTy->isVoidTy() && "Invalid instruction to promote")(static_cast <bool> (!OrigTy->isVoidTy() && "Invalid instruction to promote"
) ? void (0) : __assert_fail ("!OrigTy->isVoidTy() && \"Invalid instruction to promote\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1021
, __extension__ __PRETTY_FUNCTION__))
;
1022
1023 // Leave boolean values alone.
1024 if (!In->getType()->isIntegerTy(1))
1025 In->mutateType(DestTy);
1026 unsigned DestBW = DestTy->getBitWidth();
1027
1028 // Handle PHIs.
1029 if (PHINode *P = dyn_cast<PHINode>(In)) {
1030 unsigned N = P->getNumIncomingValues();
1031 for (unsigned i = 0; i != N; ++i) {
1032 BasicBlock *InB = P->getIncomingBlock(i);
1033 if (InB == LoopB)
1034 continue;
1035 Value *InV = P->getIncomingValue(i);
1036 IntegerType *Ty = cast<IntegerType>(InV->getType());
1037 // Do not promote values in PHI nodes of type i1.
1038 if (Ty != P->getType()) {
1039 // If the value type does not match the PHI type, the PHI type
1040 // must have been promoted.
1041 assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ?
void (0) : __assert_fail ("Ty->getBitWidth() < DestBW"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1041
, __extension__ __PRETTY_FUNCTION__))
;
1042 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1043 P->setIncomingValue(i, InV);
1044 }
1045 }
1046 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1047 Value *Op = Z->getOperand(0);
1048 if (Op->getType() == Z->getType())
1049 Z->replaceAllUsesWith(Op);
1050 Z->eraseFromParent();
1051 return;
1052 }
1053 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1054 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1055 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1056 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1057 T->replaceAllUsesWith(And);
1058 T->eraseFromParent();
1059 return;
1060 }
1061
1062 // Promote immediates.
1063 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1064 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1065 if (CI->getType()->getBitWidth() < DestBW)
1066 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1067 }
1068}
1069
1070bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1071 BasicBlock *ExitB) {
1072 assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail (
"LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1072, __extension__ __PRETTY_FUNCTION__))
;
1073 // Skip loops where the exit block has more than one predecessor. The values
1074 // coming from the loop block will be promoted to another type, and so the
1075 // values coming into the exit block from other predecessors would also have
1076 // to be promoted.
1077 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1078 return false;
1079 IntegerType *DestTy = getPmpyType();
1080 // Check if the exit values have types that are no wider than the type
1081 // that we want to promote to.
1082 unsigned DestBW = DestTy->getBitWidth();
1083 for (PHINode &P : ExitB->phis()) {
1084 if (P.getNumIncomingValues() != 1)
1085 return false;
1086 assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void
(0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1086, __extension__ __PRETTY_FUNCTION__))
;
1087 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1088 if (!T || T->getBitWidth() > DestBW)
1089 return false;
1090 }
1091
1092 // Check all instructions in the loop.
1093 for (Instruction &In : *LoopB)
1094 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1095 return false;
1096
1097 // Perform the promotion.
1098 std::vector<Instruction*> LoopIns;
1099 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1100 [](Instruction &In) { return &In; });
1101 for (Instruction *In : LoopIns)
1102 if (!In->isTerminator())
1103 promoteTo(In, DestTy, LoopB);
1104
1105 // Fix up the PHI nodes in the exit block.
1106 Instruction *EndI = ExitB->getFirstNonPHI();
1107 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1108 for (auto I = ExitB->begin(); I != End; ++I) {
1109 PHINode *P = dyn_cast<PHINode>(I);
1110 if (!P)
1111 break;
1112 Type *Ty0 = P->getIncomingValue(0)->getType();
1113 Type *PTy = P->getType();
1114 if (PTy != Ty0) {
1115 assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail
("Ty0 == DestTy", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1115, __extension__ __PRETTY_FUNCTION__))
;
1116 // In order to create the trunc, P must have the promoted type.
1117 P->mutateType(Ty0);
1118 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1119 // In order for the RAUW to work, the types of P and T must match.
1120 P->mutateType(PTy);
1121 P->replaceAllUsesWith(T);
1122 // Final update of the P's type.
1123 P->mutateType(Ty0);
1124 cast<Instruction>(T)->setOperand(0, P);
1125 }
1126 }
1127
1128 return true;
1129}
1130
1131bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1132 ValueSeq &Cycle) {
1133 // Out = ..., In, ...
1134 if (Out == In)
1135 return true;
1136
1137 auto *BB = cast<Instruction>(Out)->getParent();
1138 bool HadPhi = false;
1139
1140 for (auto *U : Out->users()) {
1141 auto *I = dyn_cast<Instruction>(&*U);
1142 if (I == nullptr || I->getParent() != BB)
1143 continue;
1144 // Make sure that there are no multi-iteration cycles, e.g.
1145 // p1 = phi(p2)
1146 // p2 = phi(p1)
1147 // The cycle p1->p2->p1 would span two loop iterations.
1148 // Check that there is only one phi in the cycle.
1149 bool IsPhi = isa<PHINode>(I);
1150 if (IsPhi && HadPhi)
1151 return false;
1152 HadPhi |= IsPhi;
1153 if (!Cycle.insert(I))
1154 return false;
1155 if (findCycle(I, In, Cycle))
1156 break;
1157 Cycle.remove(I);
1158 }
1159 return !Cycle.empty();
1160}
1161
1162void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1163 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1164 // All the values in the cycle that are between the phi node and the
1165 // divider instruction will be classified as "early", all other values
1166 // will be "late".
1167
1168 bool IsE = true;
1169 unsigned I, N = Cycle.size();
1170 for (I = 0; I < N; ++I) {
1171 Value *V = Cycle[I];
1172 if (DivI == V)
1173 IsE = false;
1174 else if (!isa<PHINode>(V))
1175 continue;
1176 // Stop if found either.
1177 break;
1178 }
1179 // "I" is the index of either DivI or the phi node, whichever was first.
1180 // "E" is "false" or "true" respectively.
1181 ValueSeq &First = !IsE ? Early : Late;
1182 for (unsigned J = 0; J < I; ++J)
1183 First.insert(Cycle[J]);
1184
1185 ValueSeq &Second = IsE ? Early : Late;
1186 Second.insert(Cycle[I]);
1187 for (++I; I < N; ++I) {
1188 Value *V = Cycle[I];
1189 if (DivI == V || isa<PHINode>(V))
1190 break;
1191 Second.insert(V);
1192 }
1193
1194 for (; I < N; ++I)
1195 First.insert(Cycle[I]);
1196}
1197
1198bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1199 ValueSeq &Early, ValueSeq &Late) {
1200 // Select is an exception, since the condition value does not have to be
1201 // classified in the same way as the true/false values. The true/false
1202 // values do have to be both early or both late.
1203 if (UseI->getOpcode() == Instruction::Select) {
1204 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1205 if (Early.count(TV) || Early.count(FV)) {
1206 if (Late.count(TV) || Late.count(FV))
1207 return false;
1208 Early.insert(UseI);
1209 } else if (Late.count(TV) || Late.count(FV)) {
1210 if (Early.count(TV) || Early.count(FV))
1211 return false;
1212 Late.insert(UseI);
1213 }
1214 return true;
1215 }
1216
1217 // Not sure what would be the example of this, but the code below relies
1218 // on having at least one operand.
1219 if (UseI->getNumOperands() == 0)
1220 return true;
1221
1222 bool AE = true, AL = true;
1223 for (auto &I : UseI->operands()) {
1224 if (Early.count(&*I))
1225 AL = false;
1226 else if (Late.count(&*I))
1227 AE = false;
1228 }
1229 // If the operands appear "all early" and "all late" at the same time,
1230 // then it means that none of them are actually classified as either.
1231 // This is harmless.
1232 if (AE && AL)
1233 return true;
1234 // Conversely, if they are neither "all early" nor "all late", then
1235 // we have a mixture of early and late operands that is not a known
1236 // exception.
1237 if (!AE && !AL)
1238 return false;
1239
1240 // Check that we have covered the two special cases.
1241 assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail
("AE != AL", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1241, __extension__ __PRETTY_FUNCTION__))
;
1242
1243 if (AE)
1244 Early.insert(UseI);
1245 else
1246 Late.insert(UseI);
1247 return true;
1248}
1249
1250bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1251 switch (I->getOpcode()) {
1252 case Instruction::And:
1253 case Instruction::Or:
1254 case Instruction::Xor:
1255 case Instruction::LShr:
1256 case Instruction::Shl:
1257 case Instruction::Select:
1258 case Instruction::ICmp:
1259 case Instruction::PHI:
1260 break;
1261 default:
1262 return false;
1263 }
1264 return true;
1265}
1266
1267bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1268 unsigned IterCount) {
1269 auto *T = dyn_cast<IntegerType>(V->getType());
1270 if (!T)
1271 return false;
1272
1273 KnownBits Known(T->getBitWidth());
1274 computeKnownBits(V, Known, DL);
1275 return Known.countMinLeadingZeros() >= IterCount;
1276}
1277
1278bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1279 unsigned IterCount) {
1280 // Assume that all inputs to the value have the high bits zero.
1281 // Check if the value itself preserves the zeros in the high bits.
1282 if (auto *C = dyn_cast<ConstantInt>(V))
1283 return C->getValue().countl_zero() >= IterCount;
1284
1285 if (auto *I = dyn_cast<Instruction>(V)) {
1286 switch (I->getOpcode()) {
1287 case Instruction::And:
1288 case Instruction::Or:
1289 case Instruction::Xor:
1290 case Instruction::LShr:
1291 case Instruction::Select:
1292 case Instruction::ICmp:
1293 case Instruction::PHI:
1294 case Instruction::ZExt:
1295 return true;
1296 }
1297 }
1298
1299 return false;
1300}
1301
1302bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1303 unsigned Opc = I->getOpcode();
1304 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1305 return Op != I->getOperand(1);
1306 return true;
1307}
1308
1309bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1310 BasicBlock *ExitB, unsigned IterCount) {
1311 Value *CIV = getCountIV(LoopB);
1312 if (CIV == nullptr)
1313 return false;
1314 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1315 if (CIVTy == nullptr)
1316 return false;
1317
1318 ValueSeq RShifts;
1319 ValueSeq Early, Late, Cycled;
1320
1321 // Find all value cycles that contain logical right shifts by 1.
1322 for (Instruction &I : *LoopB) {
1323 using namespace PatternMatch;
1324
1325 Value *V = nullptr;
1326 if (!match(&I, m_LShr(m_Value(V), m_One())))
1327 continue;
1328 ValueSeq C;
1329 if (!findCycle(&I, V, C))
1330 continue;
1331
1332 // Found a cycle.
1333 C.insert(&I);
1334 classifyCycle(&I, C, Early, Late);
1335 Cycled.insert(C.begin(), C.end());
1336 RShifts.insert(&I);
1337 }
1338
1339 // Find the set of all values affected by the shift cycles, i.e. all
1340 // cycled values, and (recursively) all their users.
1341 ValueSeq Users(Cycled.begin(), Cycled.end());
1342 for (unsigned i = 0; i < Users.size(); ++i) {
1343 Value *V = Users[i];
1344 if (!isa<IntegerType>(V->getType()))
1345 return false;
1346 auto *R = cast<Instruction>(V);
1347 // If the instruction does not commute with shifts, the loop cannot
1348 // be unshifted.
1349 if (!commutesWithShift(R))
1350 return false;
1351 for (User *U : R->users()) {
1352 auto *T = cast<Instruction>(U);
1353 // Skip users from outside of the loop. They will be handled later.
1354 // Also, skip the right-shifts and phi nodes, since they mix early
1355 // and late values.
1356 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1357 continue;
1358
1359 Users.insert(T);
1360 if (!classifyInst(T, Early, Late))
1361 return false;
1362 }
1363 }
1364
1365 if (Users.empty())
1366 return false;
1367
1368 // Verify that high bits remain zero.
1369 ValueSeq Internal(Users.begin(), Users.end());
1370 ValueSeq Inputs;
1371 for (unsigned i = 0; i < Internal.size(); ++i) {
1372 auto *R = dyn_cast<Instruction>(Internal[i]);
1373 if (!R)
1374 continue;
1375 for (Value *Op : R->operands()) {
1376 auto *T = dyn_cast<Instruction>(Op);
1377 if (T && T->getParent() != LoopB)
1378 Inputs.insert(Op);
1379 else
1380 Internal.insert(Op);
1381 }
1382 }
1383 for (Value *V : Inputs)
1384 if (!highBitsAreZero(V, IterCount))
1385 return false;
1386 for (Value *V : Internal)
1387 if (!keepsHighBitsZero(V, IterCount))
1388 return false;
1389
1390 // Finally, the work can be done. Unshift each user.
1391 IRBuilder<> IRB(LoopB);
1392 std::map<Value*,Value*> ShiftMap;
1393
1394 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1395
1396 CastMapType CastMap;
1397
1398 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1399 IntegerType *Ty) -> Value* {
1400 auto H = CM.find(std::make_pair(V, Ty));
1401 if (H != CM.end())
1402 return H->second;
1403 Value *CV = IRB.CreateIntCast(V, Ty, false);
1404 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1405 return CV;
1406 };
1407
1408 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1409 using namespace PatternMatch;
1410
1411 if (isa<PHINode>(I) || !Users.count(&*I))
1412 continue;
1413
1414 // Match lshr x, 1.
1415 Value *V = nullptr;
1416 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1417 replaceAllUsesOfWithIn(&*I, V, LoopB);
1418 continue;
1419 }
1420 // For each non-cycled operand, replace it with the corresponding
1421 // value shifted left.
1422 for (auto &J : I->operands()) {
1423 Value *Op = J.get();
1424 if (!isOperandShifted(&*I, Op))
1425 continue;
1426 if (Users.count(Op))
1427 continue;
1428 // Skip shifting zeros.
1429 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1430 continue;
1431 // Check if we have already generated a shift for this value.
1432 auto F = ShiftMap.find(Op);
1433 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1434 if (W == nullptr) {
1435 IRB.SetInsertPoint(&*I);
1436 // First, the shift amount will be CIV or CIV+1, depending on
1437 // whether the value is early or late. Instead of creating CIV+1,
1438 // do a single shift of the value.
1439 Value *ShAmt = CIV, *ShVal = Op;
1440 auto *VTy = cast<IntegerType>(ShVal->getType());
1441 auto *ATy = cast<IntegerType>(ShAmt->getType());
1442 if (Late.count(&*I))
1443 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1444 // Second, the types of the shifted value and the shift amount
1445 // must match.
1446 if (VTy != ATy) {
1447 if (VTy->getBitWidth() < ATy->getBitWidth())
1448 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1449 else
1450 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1451 }
1452 // Ready to generate the shift and memoize it.
1453 W = IRB.CreateShl(ShVal, ShAmt);
1454 ShiftMap.insert(std::make_pair(Op, W));
1455 }
1456 I->replaceUsesOfWith(Op, W);
1457 }
1458 }
1459
1460 // Update the users outside of the loop to account for having left
1461 // shifts. They would normally be shifted right in the loop, so shift
1462 // them right after the loop exit.
1463 // Take advantage of the loop-closed SSA form, which has all the post-
1464 // loop values in phi nodes.
1465 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1466 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1467 if (!isa<PHINode>(P))
1468 break;
1469 auto *PN = cast<PHINode>(P);
1470 Value *U = PN->getIncomingValueForBlock(LoopB);
1471 if (!Users.count(U))
1472 continue;
1473 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1474 PN->replaceAllUsesWith(S);
1475 // The above RAUW will create
1476 // S = lshr S, IterCount
1477 // so we need to fix it back into
1478 // S = lshr PN, IterCount
1479 cast<User>(S)->replaceUsesOfWith(S, PN);
1480 }
1481
1482 return true;
1483}
1484
1485void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1486 for (auto &I : *LoopB)
1487 if (Value *SV = simplifyInstruction(&I, {DL, &TLI, &DT}))
1488 I.replaceAllUsesWith(SV);
1489
1490 for (Instruction &I : llvm::make_early_inc_range(*LoopB))
1491 RecursivelyDeleteTriviallyDeadInstructions(&I, &TLI);
1492}
1493
1494unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1495 // Arrays of coefficients of Q and the inverse, C.
1496 // Q[i] = coefficient at x^i.
1497 std::array<char,32> Q, C;
1498
1499 for (unsigned i = 0; i < 32; ++i) {
1500 Q[i] = QP & 1;
1501 QP >>= 1;
1502 }
1503 assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail
("Q[0] == 1", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1503, __extension__ __PRETTY_FUNCTION__))
;
1504
1505 // Find C, such that
1506 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1507 //
1508 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1509 // operations * and + are & and ^ respectively.
1510 //
1511 // Find C[i] recursively, by comparing i-th coefficient in the product
1512 // with 0 (or 1 for i=0).
1513 //
1514 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1515 C[0] = 1;
1516 for (unsigned i = 1; i < 32; ++i) {
1517 // Solve for C[i] in:
1518 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1519 // This is equivalent to
1520 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1521 // which is
1522 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1523 unsigned T = 0;
1524 for (unsigned j = 0; j < i; ++j)
1525 T = T ^ (C[j] & Q[i-j]);
1526 C[i] = T;
1527 }
1528
1529 unsigned QV = 0;
1530 for (unsigned i = 0; i < 32; ++i)
1531 if (C[i])
1532 QV |= (1 << i);
1533
1534 return QV;
1535}
1536
1537Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1538 ParsedValues &PV) {
1539 IRBuilder<> B(&*At);
1540 Module *M = At->getParent()->getParent()->getParent();
1541 Function *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1542
1543 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1544 unsigned IC = PV.IterCount;
1545
1546 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1547 P0 = P = B.CreateXor(P, PV.M);
1548
1549 // Create a bit mask to clear the high bits beyond IterCount.
1550 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
15
Returning from 'APInt::getLowBitsSet'
1551
1552 if (PV.IterCount != 32)
16
Assuming field 'IterCount' is equal to 32
17
Taking false branch
1553 P = B.CreateAnd(P, BMI);
1554
1555 if (PV.Inv) {
18
Assuming field 'Inv' is true
19
Taking true branch
1556 auto *QI = dyn_cast<ConstantInt>(PV.Q);
20
Assuming field 'Q' is a 'CastReturnType'
1557 assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth()
<= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 1557
, __extension__ __PRETTY_FUNCTION__))
;
21
Assuming the condition is true
22
'?' condition is true
1558
1559 // Again, clearing bits beyond IterCount.
1560 unsigned M = (1 << PV.IterCount) - 1;
23
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1561 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1562 unsigned QV = getInverseMxN(Tmp) & M;
1563 auto *QVI = ConstantInt::get(QI->getType(), QV);
1564 P = B.CreateCall(PMF, {P, QVI});
1565 P = B.CreateTrunc(P, QI->getType());
1566 if (IC != 32)
1567 P = B.CreateAnd(P, BMI);
1568 }
1569
1570 Value *R = B.CreateCall(PMF, {P, Q});
1571
1572 if (PV.M != nullptr)
1573 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1574
1575 return R;
1576}
1577
1578static bool hasZeroSignBit(const Value *V) {
1579 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1580 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1581 const Instruction *I = dyn_cast<const Instruction>(V);
1582 if (!I)
1583 return false;
1584 switch (I->getOpcode()) {
1585 case Instruction::LShr:
1586 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1587 return SI->getZExtValue() > 0;
1588 return false;
1589 case Instruction::Or:
1590 case Instruction::Xor:
1591 return hasZeroSignBit(I->getOperand(0)) &&
1592 hasZeroSignBit(I->getOperand(1));
1593 case Instruction::And:
1594 return hasZeroSignBit(I->getOperand(0)) ||
1595 hasZeroSignBit(I->getOperand(1));
1596 }
1597 return false;
1598}
1599
1600void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1601 S.addRule("sink-zext",
1602 // Sink zext past bitwise operations.
1603 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1604 if (I->getOpcode() != Instruction::ZExt)
1605 return nullptr;
1606 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1607 if (!T)
1608 return nullptr;
1609 switch (T->getOpcode()) {
1610 case Instruction::And:
1611 case Instruction::Or:
1612 case Instruction::Xor:
1613 break;
1614 default:
1615 return nullptr;
1616 }
1617 IRBuilder<> B(Ctx);
1618 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1619 B.CreateZExt(T->getOperand(0), I->getType()),
1620 B.CreateZExt(T->getOperand(1), I->getType()));
1621 });
1622 S.addRule("xor/and -> and/xor",
1623 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1624 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1625 if (I->getOpcode() != Instruction::Xor)
1626 return nullptr;
1627 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1628 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1629 if (!And0 || !And1)
1630 return nullptr;
1631 if (And0->getOpcode() != Instruction::And ||
1632 And1->getOpcode() != Instruction::And)
1633 return nullptr;
1634 if (And0->getOperand(1) != And1->getOperand(1))
1635 return nullptr;
1636 IRBuilder<> B(Ctx);
1637 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1638 And0->getOperand(1));
1639 });
1640 S.addRule("sink binop into select",
1641 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1642 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1643 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1644 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1645 if (!BO)
1646 return nullptr;
1647 Instruction::BinaryOps Op = BO->getOpcode();
1648 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1649 IRBuilder<> B(Ctx);
1650 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1651 Value *Z = BO->getOperand(1);
1652 return B.CreateSelect(Sel->getCondition(),
1653 B.CreateBinOp(Op, X, Z),
1654 B.CreateBinOp(Op, Y, Z));
1655 }
1656 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1657 IRBuilder<> B(Ctx);
1658 Value *X = BO->getOperand(0);
1659 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1660 return B.CreateSelect(Sel->getCondition(),
1661 B.CreateBinOp(Op, X, Y),
1662 B.CreateBinOp(Op, X, Z));
1663 }
1664 return nullptr;
1665 });
1666 S.addRule("fold select-select",
1667 // (select c (select c x y) z) -> (select c x z)
1668 // (select c x (select c y z)) -> (select c x z)
1669 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1670 SelectInst *Sel = dyn_cast<SelectInst>(I);
1671 if (!Sel)
1672 return nullptr;
1673 IRBuilder<> B(Ctx);
1674 Value *C = Sel->getCondition();
1675 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1676 if (Sel0->getCondition() == C)
1677 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1678 }
1679 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1680 if (Sel1->getCondition() == C)
1681 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1682 }
1683 return nullptr;
1684 });
1685 S.addRule("or-signbit -> xor-signbit",
1686 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1687 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1688 if (I->getOpcode() != Instruction::Or)
1689 return nullptr;
1690 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1691 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1692 return nullptr;
1693 if (!hasZeroSignBit(I->getOperand(0)))
1694 return nullptr;
1695 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1696 });
1697 S.addRule("sink lshr into binop",
1698 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1699 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1700 if (I->getOpcode() != Instruction::LShr)
1701 return nullptr;
1702 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1703 if (!BitOp)
1704 return nullptr;
1705 switch (BitOp->getOpcode()) {
1706 case Instruction::And:
1707 case Instruction::Or:
1708 case Instruction::Xor:
1709 break;
1710 default:
1711 return nullptr;
1712 }
1713 IRBuilder<> B(Ctx);
1714 Value *S = I->getOperand(1);
1715 return B.CreateBinOp(BitOp->getOpcode(),
1716 B.CreateLShr(BitOp->getOperand(0), S),
1717 B.CreateLShr(BitOp->getOperand(1), S));
1718 });
1719 S.addRule("expose bitop-const",
1720 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1721 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1722 auto IsBitOp = [](unsigned Op) -> bool {
1723 switch (Op) {
1724 case Instruction::And:
1725 case Instruction::Or:
1726 case Instruction::Xor:
1727 return true;
1728 }
1729 return false;
1730 };
1731 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1732 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1733 return nullptr;
1734 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1735 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1736 return nullptr;
1737 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1738 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1739 if (!CA || !CB)
1740 return nullptr;
1741 IRBuilder<> B(Ctx);
1742 Value *X = BitOp2->getOperand(0);
1743 return B.CreateBinOp(BitOp2->getOpcode(), X,
1744 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1745 });
1746}
1747
1748void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1749 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1750 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1751 if (I->getOpcode() != Instruction::And)
1752 return nullptr;
1753 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1754 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1755 if (!Xor || !C0)
1756 return nullptr;
1757 if (Xor->getOpcode() != Instruction::Xor)
1758 return nullptr;
1759 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1760 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1761 // Pick the first non-null and.
1762 if (!And0 || And0->getOpcode() != Instruction::And)
1763 std::swap(And0, And1);
1764 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1765 if (!C1)
1766 return nullptr;
1767 uint32_t V0 = C0->getZExtValue();
1768 uint32_t V1 = C1->getZExtValue();
1769 if (V0 != (V0 & V1))
1770 return nullptr;
1771 IRBuilder<> B(Ctx);
1772 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1773 });
1774}
1775
1776bool PolynomialMultiplyRecognize::recognize() {
1777 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1778 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1779 // Restrictions:
1780 // - The loop must consist of a single block.
1781 // - The iteration count must be known at compile-time.
1782 // - The loop must have an induction variable starting from 0, and
1783 // incremented in each iteration of the loop.
1784 BasicBlock *LoopB = CurLoop->getHeader();
1785 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1786
1787 if (LoopB != CurLoop->getLoopLatch())
1788 return false;
1789 BasicBlock *ExitB = CurLoop->getExitBlock();
1790 if (ExitB == nullptr)
1791 return false;
1792 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1793 if (EntryB == nullptr)
1794 return false;
1795
1796 unsigned IterCount = 0;
1797 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1798 if (isa<SCEVCouldNotCompute>(CT))
1799 return false;
1800 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1801 IterCount = CV->getValue()->getZExtValue() + 1;
1802
1803 Value *CIV = getCountIV(LoopB);
1804 ParsedValues PV;
1805 Simplifier PreSimp;
1806 PV.IterCount = IterCount;
1807 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1808 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1809
1810 setupPreSimplifier(PreSimp);
1811
1812 // Perform a preliminary scan of select instructions to see if any of them
1813 // looks like a generator of the polynomial multiply steps. Assume that a
1814 // loop can only contain a single transformable operation, so stop the
1815 // traversal after the first reasonable candidate was found.
1816 // XXX: Currently this approach can modify the loop before being 100% sure
1817 // that the transformation can be carried out.
1818 bool FoundPreScan = false;
1819 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1820 for (const Value *U : V->users()) {
1821 if (const auto *P = dyn_cast<const PHINode>(U))
1822 if (P->getParent() == LoopB)
1823 return true;
1824 }
1825 return false;
1826 };
1827 for (Instruction &In : *LoopB) {
1828 SelectInst *SI = dyn_cast<SelectInst>(&In);
1829 if (!SI || !FeedsPHI(SI))
1830 continue;
1831
1832 Simplifier::Context C(SI);
1833 Value *T = PreSimp.simplify(C);
1834 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1835 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1836 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1837 FoundPreScan = true;
1838 if (SelI != SI) {
1839 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1840 SI->replaceAllUsesWith(NewSel);
1841 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1842 }
1843 break;
1844 }
1845 }
1846
1847 if (!FoundPreScan) {
1848 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1849 return false;
1850 }
1851
1852 if (!PV.Left) {
1853 // The right shift version actually only returns the higher bits of
1854 // the result (each iteration discards the LSB). If we want to convert it
1855 // to a left-shifting loop, the working data type must be at least as
1856 // wide as the target's pmpy instruction.
1857 if (!promoteTypes(LoopB, ExitB))
1858 return false;
1859 // Run post-promotion simplifications.
1860 Simplifier PostSimp;
1861 setupPostSimplifier(PostSimp);
1862 for (Instruction &In : *LoopB) {
1863 SelectInst *SI = dyn_cast<SelectInst>(&In);
1864 if (!SI || !FeedsPHI(SI))
1865 continue;
1866 Simplifier::Context C(SI);
1867 Value *T = PostSimp.simplify(C);
1868 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1869 if (SelI != SI) {
1870 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1871 SI->replaceAllUsesWith(NewSel);
1872 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1873 }
1874 break;
1875 }
1876
1877 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1878 return false;
1879 cleanupLoopBody(LoopB);
1880 }
1881
1882 // Scan the loop again, find the generating select instruction.
1883 bool FoundScan = false;
1884 for (Instruction &In : *LoopB) {
1885 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1886 if (!SelI)
1887 continue;
1888 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1889 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1890 if (FoundScan)
1891 break;
1892 }
1893 assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail
("FoundScan", "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1893, __extension__ __PRETTY_FUNCTION__))
;
1894
1895 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1896 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1897 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1898 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1899 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1900 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1901 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1902 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1903 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1904 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1905 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1906 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1907 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1908
1909 BasicBlock::iterator At(EntryB->getTerminator());
1910 Value *PM = generate(At, PV);
1911 if (PM == nullptr)
1912 return false;
1913
1914 if (PM->getType() != PV.Res->getType())
1915 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1916
1917 PV.Res->replaceAllUsesWith(PM);
1918 PV.Res->eraseFromParent();
1919 return true;
1920}
1921
1922int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1923 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1924 return SC->getAPInt().getSExtValue();
1925 return 0;
1926}
1927
1928bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1929 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1930 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1931 return false;
1932
1933 Value *StoredVal = SI->getValueOperand();
1934 Value *StorePtr = SI->getPointerOperand();
1935
1936 // Reject stores that are so large that they overflow an unsigned.
1937 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1938 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1939 return false;
1940
1941 // See if the pointer expression is an AddRec like {base,+,1} on the current
1942 // loop, which indicates a strided store. If we have something else, it's a
1943 // random store we can't handle.
1944 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1945 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1946 return false;
1947
1948 // Check to see if the stride matches the size of the store. If so, then we
1949 // know that every byte is touched in the loop.
1950 int Stride = getSCEVStride(StoreEv);
1951 if (Stride == 0)
1952 return false;
1953 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1954 if (StoreSize != unsigned(std::abs(Stride)))
1955 return false;
1956
1957 // The store must be feeding a non-volatile load.
1958 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1959 if (!LI || !LI->isSimple())
1960 return false;
1961
1962 // See if the pointer expression is an AddRec like {base,+,1} on the current
1963 // loop, which indicates a strided load. If we have something else, it's a
1964 // random load we can't handle.
1965 Value *LoadPtr = LI->getPointerOperand();
1966 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1967 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1968 return false;
1969
1970 // The store and load must share the same stride.
1971 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1972 return false;
1973
1974 // Success. This store can be converted into a memcpy.
1975 return true;
1976}
1977
1978/// mayLoopAccessLocation - Return true if the specified loop might access the
1979/// specified pointer location, which is a loop-strided access. The 'Access'
1980/// argument specifies what the verboten forms of access are (read or write).
1981static bool
1982mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1983 const SCEV *BECount, unsigned StoreSize,
1984 AliasAnalysis &AA,
1985 SmallPtrSetImpl<Instruction *> &Ignored) {
1986 // Get the location that may be stored across the loop. Since the access
1987 // is strided positively through memory, we say that the modified location
1988 // starts at the pointer and has infinite size.
1989 LocationSize AccessSize = LocationSize::afterPointer();
1990
1991 // If the loop iterates a fixed number of times, we can refine the access
1992 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1993 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1994 AccessSize = LocationSize::precise((BECst->getValue()->getZExtValue() + 1) *
1995 StoreSize);
1996
1997 // TODO: For this to be really effective, we have to dive into the pointer
1998 // operand in the store. Store to &A[i] of 100 will always return may alias
1999 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
2000 // which will then no-alias a store to &A[100].
2001 MemoryLocation StoreLoc(Ptr, AccessSize);
2002
2003 for (auto *B : L->blocks())
2004 for (auto &I : *B)
2005 if (Ignored.count(&I) == 0 &&
2006 isModOrRefSet(AA.getModRefInfo(&I, StoreLoc) & Access))
2007 return true;
2008
2009 return false;
2010}
2011
2012void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
2013 SmallVectorImpl<StoreInst*> &Stores) {
2014 Stores.clear();
2015 for (Instruction &I : *BB)
2016 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2017 if (isLegalStore(CurLoop, SI))
2018 Stores.push_back(SI);
2019}
2020
2021bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2022 StoreInst *SI, const SCEV *BECount) {
2023 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025
, __extension__ __PRETTY_FUNCTION__))
2024 "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025
, __extension__ __PRETTY_FUNCTION__))
2025 "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2025
, __extension__ __PRETTY_FUNCTION__))
;
2026
2027 Value *StorePtr = SI->getPointerOperand();
2028 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2029 unsigned Stride = getSCEVStride(StoreEv);
2030 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2031 if (Stride != StoreSize)
2032 return false;
2033
2034 // See if the pointer expression is an AddRec like {base,+,1} on the current
2035 // loop, which indicates a strided load. If we have something else, it's a
2036 // random load we can't handle.
2037 auto *LI = cast<LoadInst>(SI->getValueOperand());
2038 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2039
2040 // The trip count of the loop and the base pointer of the addrec SCEV is
2041 // guaranteed to be loop invariant, which means that it should dominate the
2042 // header. This allows us to insert code for it in the preheader.
2043 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2044 Instruction *ExpPt = Preheader->getTerminator();
2045 IRBuilder<> Builder(ExpPt);
2046 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2047
2048 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2049
2050 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2051 // this into a memcpy/memmove in the loop preheader now if we want. However,
2052 // this would be unsafe to do if there is anything else in the loop that may
2053 // read or write the memory region we're storing to. For memcpy, this
2054 // includes the load that feeds the stores. Check for an alias by generating
2055 // the base address and checking everything.
2056 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2057 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2058 Value *LoadBasePtr = nullptr;
2059
2060 bool Overlap = false;
2061 bool DestVolatile = SI->isVolatile();
2062 Type *BECountTy = BECount->getType();
2063
2064 if (DestVolatile) {
2065 // The trip count must fit in i32, since it is the type of the "num_words"
2066 // argument to hexagon_memcpy_forward_vp4cp4n2.
2067 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2068CleanupAndExit:
2069 // If we generated new code for the base pointer, clean up.
2070 Expander.clear();
2071 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2072 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2073 StoreBasePtr = nullptr;
2074 }
2075 if (LoadBasePtr) {
2076 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2077 LoadBasePtr = nullptr;
2078 }
2079 return false;
2080 }
2081 }
2082
2083 SmallPtrSet<Instruction*, 2> Ignore1;
2084 Ignore1.insert(SI);
2085 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2086 StoreSize, *AA, Ignore1)) {
2087 // Check if the load is the offending instruction.
2088 Ignore1.insert(LI);
2089 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2090 BECount, StoreSize, *AA, Ignore1)) {
2091 // Still bad. Nothing we can do.
2092 goto CleanupAndExit;
2093 }
2094 // It worked with the load ignored.
2095 Overlap = true;
2096 }
2097
2098 if (!Overlap) {
2099 if (DisableMemcpyIdiom || !HasMemcpy)
2100 goto CleanupAndExit;
2101 } else {
2102 // Don't generate memmove if this function will be inlined. This is
2103 // because the caller will undergo this transformation after inlining.
2104 Function *Func = CurLoop->getHeader()->getParent();
2105 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2106 goto CleanupAndExit;
2107
2108 // In case of a memmove, the call to memmove will be executed instead
2109 // of the loop, so we need to make sure that there is nothing else in
2110 // the loop than the load, store and instructions that these two depend
2111 // on.
2112 SmallVector<Instruction*,2> Insts;
2113 Insts.push_back(SI);
2114 Insts.push_back(LI);
2115 if (!coverLoop(CurLoop, Insts))
2116 goto CleanupAndExit;
2117
2118 if (DisableMemmoveIdiom || !HasMemmove)
2119 goto CleanupAndExit;
2120 bool IsNested = CurLoop->getParentLoop() != nullptr;
2121 if (IsNested && OnlyNonNestedMemmove)
2122 goto CleanupAndExit;
2123 }
2124
2125 // For a memcpy, we have to make sure that the input array is not being
2126 // mutated by the loop.
2127 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2128 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2129
2130 SmallPtrSet<Instruction*, 2> Ignore2;
2131 Ignore2.insert(SI);
2132 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2133 StoreSize, *AA, Ignore2))
2134 goto CleanupAndExit;
2135
2136 // Check the stride.
2137 bool StridePos = getSCEVStride(LoadEv) >= 0;
2138
2139 // Currently, the volatile memcpy only emulates traversing memory forward.
2140 if (!StridePos && DestVolatile)
2141 goto CleanupAndExit;
2142
2143 bool RuntimeCheck = (Overlap || DestVolatile);
2144
2145 BasicBlock *ExitB;
2146 if (RuntimeCheck) {
2147 // The runtime check needs a single exit block.
2148 SmallVector<BasicBlock*, 8> ExitBlocks;
2149 CurLoop->getUniqueExitBlocks(ExitBlocks);
2150 if (ExitBlocks.size() != 1)
2151 goto CleanupAndExit;
2152 ExitB = ExitBlocks[0];
2153 }
2154
2155 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2156 // pointer size if it isn't already.
2157 LLVMContext &Ctx = SI->getContext();
2158 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2159 DebugLoc DLoc = SI->getDebugLoc();
2160
2161 const SCEV *NumBytesS =
2162 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2163 if (StoreSize != 1)
2164 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2165 SCEV::FlagNUW);
2166 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2167 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2168 if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT}))
2169 NumBytes = Simp;
2170
2171 CallInst *NewCall;
2172
2173 if (RuntimeCheck) {
2174 unsigned Threshold = RuntimeMemSizeThreshold;
2175 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2176 uint64_t C = CI->getZExtValue();
2177 if (Threshold != 0 && C < Threshold)
2178 goto CleanupAndExit;
2179 if (C < CompileTimeMemSizeThreshold)
2180 goto CleanupAndExit;
2181 }
2182
2183 BasicBlock *Header = CurLoop->getHeader();
2184 Function *Func = Header->getParent();
2185 Loop *ParentL = LF->getLoopFor(Preheader);
2186 StringRef HeaderName = Header->getName();
2187
2188 // Create a new (empty) preheader, and update the PHI nodes in the
2189 // header to use the new preheader.
2190 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2191 Func, Header);
2192 if (ParentL)
2193 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2194 IRBuilder<>(NewPreheader).CreateBr(Header);
2195 for (auto &In : *Header) {
2196 PHINode *PN = dyn_cast<PHINode>(&In);
2197 if (!PN)
2198 break;
2199 int bx = PN->getBasicBlockIndex(Preheader);
2200 if (bx >= 0)
2201 PN->setIncomingBlock(bx, NewPreheader);
2202 }
2203 DT->addNewBlock(NewPreheader, Preheader);
2204 DT->changeImmediateDominator(Header, NewPreheader);
2205
2206 // Check for safe conditions to execute memmove.
2207 // If stride is positive, copying things from higher to lower addresses
2208 // is equivalent to memmove. For negative stride, it's the other way
2209 // around. Copying forward in memory with positive stride may not be
2210 // same as memmove since we may be copying values that we just stored
2211 // in some previous iteration.
2212 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2213 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2214 Value *LowA = StridePos ? SA : LA;
2215 Value *HighA = StridePos ? LA : SA;
2216 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2217 Value *Cond = CmpA;
2218
2219 // Check for distance between pointers. Since the case LowA < HighA
2220 // is checked for above, assume LowA >= HighA.
2221 Value *Dist = Builder.CreateSub(LowA, HighA);
2222 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2223 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2224 Cond = CmpEither;
2225
2226 if (Threshold != 0) {
2227 Type *Ty = NumBytes->getType();
2228 Value *Thr = ConstantInt::get(Ty, Threshold);
2229 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2230 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2231 Cond = CmpBoth;
2232 }
2233 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2234 Func, NewPreheader);
2235 if (ParentL)
2236 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2237 Instruction *OldT = Preheader->getTerminator();
2238 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2239 OldT->eraseFromParent();
2240 Preheader->setName(Preheader->getName()+".old");
2241 DT->addNewBlock(MemmoveB, Preheader);
2242 // Find the new immediate dominator of the exit block.
2243 BasicBlock *ExitD = Preheader;
2244 for (BasicBlock *PB : predecessors(ExitB)) {
2245 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2246 if (!ExitD)
2247 break;
2248 }
2249 // If the prior immediate dominator of ExitB was dominated by the
2250 // old preheader, then the old preheader becomes the new immediate
2251 // dominator. Otherwise don't change anything (because the newly
2252 // added blocks are dominated by the old preheader).
2253 if (ExitD && DT->dominates(Preheader, ExitD)) {
2254 DomTreeNode *BN = DT->getNode(ExitB);
2255 DomTreeNode *DN = DT->getNode(ExitD);
2256 BN->setIDom(DN);
2257 }
2258
2259 // Add a call to memmove to the conditional block.
2260 IRBuilder<> CondBuilder(MemmoveB);
2261 CondBuilder.CreateBr(ExitB);
2262 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2263
2264 if (DestVolatile) {
2265 Type *Int32Ty = Type::getInt32Ty(Ctx);
2266 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2267 Type *VoidTy = Type::getVoidTy(Ctx);
2268 Module *M = Func->getParent();
2269 FunctionCallee Fn = M->getOrInsertFunction(
2270 HexagonVolatileMemcpyName, VoidTy, Int32PtrTy, Int32PtrTy, Int32Ty);
2271
2272 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2273 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2274 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2275 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2276 MemmoveB->getTerminator());
2277 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2278 if (Value *Simp = simplifyInstruction(In, {*DL, TLI, DT}))
2279 NumWords = Simp;
2280
2281 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2282 ? StoreBasePtr
2283 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2284 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2285 ? LoadBasePtr
2286 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2287 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2288 } else {
2289 NewCall = CondBuilder.CreateMemMove(
2290 StoreBasePtr, SI->getAlign(), LoadBasePtr, LI->getAlign(), NumBytes);
2291 }
2292 } else {
2293 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlign(), LoadBasePtr,
2294 LI->getAlign(), NumBytes);
2295 // Okay, the memcpy has been formed. Zap the original store and
2296 // anything that feeds into it.
2297 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2298 }
2299
2300 NewCall->setDebugLoc(DLoc);
2301
2302 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2303 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2304 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2305 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2306 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2307
2308 return true;
2309}
2310
2311// Check if the instructions in Insts, together with their dependencies
2312// cover the loop in the sense that the loop could be safely eliminated once
2313// the instructions in Insts are removed.
2314bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2315 SmallVectorImpl<Instruction*> &Insts) const {
2316 SmallSet<BasicBlock*,8> LoopBlocks;
2317 for (auto *B : L->blocks())
2318 LoopBlocks.insert(B);
2319
2320 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2321
2322 // Collect all instructions from the loop that the instructions in Insts
2323 // depend on (plus their dependencies, etc.). These instructions will
2324 // constitute the expression trees that feed those in Insts, but the trees
2325 // will be limited only to instructions contained in the loop.
2326 for (unsigned i = 0; i < Worklist.size(); ++i) {
2327 Instruction *In = Worklist[i];
2328 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2329 Instruction *OpI = dyn_cast<Instruction>(I);
2330 if (!OpI)
2331 continue;
2332 BasicBlock *PB = OpI->getParent();
2333 if (!LoopBlocks.count(PB))
2334 continue;
2335 Worklist.insert(OpI);
2336 }
2337 }
2338
2339 // Scan all instructions in the loop, if any of them have a user outside
2340 // of the loop, or outside of the expressions collected above, then either
2341 // the loop has a side-effect visible outside of it, or there are
2342 // instructions in it that are not involved in the original set Insts.
2343 for (auto *B : L->blocks()) {
2344 for (auto &In : *B) {
2345 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2346 continue;
2347 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2348 return false;
2349 for (auto *K : In.users()) {
2350 Instruction *UseI = dyn_cast<Instruction>(K);
2351 if (!UseI)
2352 continue;
2353 BasicBlock *UseB = UseI->getParent();
2354 if (LF->getLoopFor(UseB) != L)
2355 return false;
2356 }
2357 }
2358 }
2359
2360 return true;
2361}
2362
2363/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2364/// with the specified backedge count. This block is known to be in the current
2365/// loop and not in any subloops.
2366bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2367 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2368 // We can only promote stores in this block if they are unconditionally
2369 // executed in the loop. For a block to be unconditionally executed, it has
2370 // to dominate all the exit blocks of the loop. Verify this now.
2371 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2372 return DT->dominates(BB, EB);
2373 };
2374 if (!all_of(ExitBlocks, DominatedByBB))
2375 return false;
2376
2377 bool MadeChange = false;
2378 // Look for store instructions, which may be optimized to memset/memcpy.
2379 SmallVector<StoreInst*,8> Stores;
2380 collectStores(CurLoop, BB, Stores);
2381
2382 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2383 for (auto &SI : Stores)
2384 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2385
2386 return MadeChange;
2387}
2388
2389bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2390 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2391 if (PMR.recognize())
2392 return true;
2393
2394 if (!HasMemcpy && !HasMemmove)
2395 return false;
2396
2397 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2398 assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400
, __extension__ __PRETTY_FUNCTION__))
2399 "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400
, __extension__ __PRETTY_FUNCTION__))
2400 "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp", 2400
, __extension__ __PRETTY_FUNCTION__))
;
2401
2402 SmallVector<BasicBlock *, 8> ExitBlocks;
2403 L->getUniqueExitBlocks(ExitBlocks);
2404
2405 bool Changed = false;
2406
2407 // Scan all the blocks in the loop that are not in subloops.
2408 for (auto *BB : L->getBlocks()) {
2409 // Ignore blocks in subloops.
2410 if (LF->getLoopFor(BB) != L)
2411 continue;
2412 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2413 }
2414
2415 return Changed;
2416}
2417
2418bool HexagonLoopIdiomRecognize::run(Loop *L) {
2419 const Module &M = *L->getHeader()->getParent()->getParent();
2420 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2421 return false;
2422
2423 // If the loop could not be converted to canonical form, it must have an
2424 // indirectbr in it, just give up.
2425 if (!L->getLoopPreheader())
2426 return false;
2427
2428 // Disable loop idiom recognition if the function's name is a common idiom.
2429 StringRef Name = L->getHeader()->getParent()->getName();
2430 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2431 return false;
2432
2433 DL = &L->getHeader()->getModule()->getDataLayout();
2434
2435 HasMemcpy = TLI->has(LibFunc_memcpy);
2436 HasMemmove = TLI->has(LibFunc_memmove);
2437
2438 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2439 return runOnCountableLoop(L);
2440 return false;
2441}
2442
2443bool HexagonLoopIdiomRecognizeLegacyPass::runOnLoop(Loop *L,
2444 LPPassManager &LPM) {
2445 if (skipLoop(L))
2446 return false;
2447
2448 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2449 auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2450 auto *LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2451 auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
2452 *L->getHeader()->getParent());
2453 auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2454 return HexagonLoopIdiomRecognize(AA, DT, LF, TLI, SE).run(L);
2455}
2456
2457Pass *llvm::createHexagonLoopIdiomPass() {
2458 return new HexagonLoopIdiomRecognizeLegacyPass();
2459}
2460
2461PreservedAnalyses
2462HexagonLoopIdiomRecognitionPass::run(Loop &L, LoopAnalysisManager &AM,
2463 LoopStandardAnalysisResults &AR,
2464 LPMUpdater &U) {
2465 return HexagonLoopIdiomRecognize(&AR.AA, &AR.DT, &AR.LI, &AR.TLI, &AR.SE)
2466 .run(&L)
2467 ? getLoopPassPreservedAnalyses()
2468 : PreservedAnalyses::all();
2469}

/build/source/llvm/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements a class to represent arbitrary precision
11/// integral constant values and operations on them.
12///
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_ADT_APINT_H
16#define LLVM_ADT_APINT_H
17
18#include "llvm/Support/Compiler.h"
19#include "llvm/Support/MathExtras.h"
20#include <cassert>
21#include <climits>
22#include <cstring>
23#include <optional>
24#include <utility>
25
26namespace llvm {
27class FoldingSetNodeID;
28class StringRef;
29class hash_code;
30class raw_ostream;
31
32template <typename T> class SmallVectorImpl;
33template <typename T> class ArrayRef;
34template <typename T, typename Enable> struct DenseMapInfo;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69/// * APInt supports zero-bit-width values, but operations that require bits
70/// are not defined on it (e.g. you cannot ask for the sign of a zero-bit
71/// integer). This means that operations like zero extension and logical
72/// shifts are defined, but sign extension and ashr is not. Zero bit values
73/// compare and hash equal to themselves, and countLeadingZeros returns 0.
74///
75class [[nodiscard]] APInt {
76public:
77 typedef uint64_t WordType;
78
79 /// This enum is used to hold the constants we needed for APInt.
80 enum : unsigned {
81 /// Byte size of a word.
82 APINT_WORD_SIZE = sizeof(WordType),
83 /// Bits in a word.
84 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
85 };
86
87 enum class Rounding {
88 DOWN,
89 TOWARD_ZERO,
90 UP,
91 };
92
93 static constexpr WordType WORDTYPE_MAX = ~WordType(0);
94
95 /// \name Constructors
96 /// @{
97
98 /// Create a new APInt of numBits width, initialized as val.
99 ///
100 /// If isSigned is true then val is treated as if it were a signed value
101 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
102 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
103 /// the range of val are zero filled).
104 ///
105 /// \param numBits the bit width of the constructed APInt
106 /// \param val the initial value of the APInt
107 /// \param isSigned how to treat signedness of val
108 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
109 : BitWidth(numBits) {
110 if (isSingleWord()) {
111 U.VAL = val;
112 clearUnusedBits();
113 } else {
114 initSlowCase(val, isSigned);
115 }
116 }
117
118 /// Construct an APInt of numBits width, initialized as bigVal[].
119 ///
120 /// Note that bigVal.size() can be smaller or larger than the corresponding
121 /// bit width but any extraneous bits will be dropped.
122 ///
123 /// \param numBits the bit width of the constructed APInt
124 /// \param bigVal a sequence of words to form the initial value of the APInt
125 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
126
127 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
128 /// deprecated because this constructor is prone to ambiguity with the
129 /// APInt(unsigned, uint64_t, bool) constructor.
130 ///
131 /// If this overload is ever deleted, care should be taken to prevent calls
132 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
133 /// constructor.
134 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
135
136 /// Construct an APInt from a string representation.
137 ///
138 /// This constructor interprets the string \p str in the given radix. The
139 /// interpretation stops when the first character that is not suitable for the
140 /// radix is encountered, or the end of the string. Acceptable radix values
141 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
142 /// string to require more bits than numBits.
143 ///
144 /// \param numBits the bit width of the constructed APInt
145 /// \param str the string to be interpreted
146 /// \param radix the radix to use for the conversion
147 APInt(unsigned numBits, StringRef str, uint8_t radix);
148
149 /// Default constructor that creates an APInt with a 1-bit zero value.
150 explicit APInt() { U.VAL = 0; }
151
152 /// Copy Constructor.
153 APInt(const APInt &that) : BitWidth(that.BitWidth) {
154 if (isSingleWord())
155 U.VAL = that.U.VAL;
156 else
157 initSlowCase(that);
158 }
159
160 /// Move Constructor.
161 APInt(APInt &&that) : BitWidth(that.BitWidth) {
162 memcpy(&U, &that.U, sizeof(U));
163 that.BitWidth = 0;
164 }
165
166 /// Destructor.
167 ~APInt() {
168 if (needsCleanup())
169 delete[] U.pVal;
170 }
171
172 /// @}
173 /// \name Value Generators
174 /// @{
175
176 /// Get the '0' value for the specified bit-width.
177 static APInt getZero(unsigned numBits) { return APInt(numBits, 0); }
178
179 /// NOTE: This is soft-deprecated. Please use `getZero()` instead.
180 static APInt getNullValue(unsigned numBits) { return getZero(numBits); }
181
182 /// Return an APInt zero bits wide.
183 static APInt getZeroWidth() { return getZero(0); }
184
185 /// Gets maximum unsigned value of APInt for specific bit width.
186 static APInt getMaxValue(unsigned numBits) { return getAllOnes(numBits); }
187
188 /// Gets maximum signed value of APInt for a specific bit width.
189 static APInt getSignedMaxValue(unsigned numBits) {
190 APInt API = getAllOnes(numBits);
191 API.clearBit(numBits - 1);
192 return API;
193 }
194
195 /// Gets minimum unsigned value of APInt for a specific bit width.
196 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
197
198 /// Gets minimum signed value of APInt for a specific bit width.
199 static APInt getSignedMinValue(unsigned numBits) {
200 APInt API(numBits, 0);
201 API.setBit(numBits - 1);
202 return API;
203 }
204
205 /// Get the SignMask for a specific bit width.
206 ///
207 /// This is just a wrapper function of getSignedMinValue(), and it helps code
208 /// readability when we want to get a SignMask.
209 static APInt getSignMask(unsigned BitWidth) {
210 return getSignedMinValue(BitWidth);
211 }
212
213 /// Return an APInt of a specified width with all bits set.
214 static APInt getAllOnes(unsigned numBits) {
215 return APInt(numBits, WORDTYPE_MAX, true);
216 }
217
218 /// NOTE: This is soft-deprecated. Please use `getAllOnes()` instead.
219 static APInt getAllOnesValue(unsigned numBits) { return getAllOnes(numBits); }
220
221 /// Return an APInt with exactly one bit set in the result.
222 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
223 APInt Res(numBits, 0);
224 Res.setBit(BitNo);
225 return Res;
226 }
227
228 /// Get a value with a block of bits set.
229 ///
230 /// Constructs an APInt value that has a contiguous range of bits set. The
231 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
232 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
233 /// 0x0000FFFF. Please call getBitsSetWithWrap if \p loBit may be greater than
234 /// \p hiBit.
235 ///
236 /// \param numBits the intended bit width of the result
237 /// \param loBit the index of the lowest bit set.
238 /// \param hiBit the index of the highest bit set.
239 ///
240 /// \returns An APInt value with the requested bits set.
241 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
242 APInt Res(numBits, 0);
243 Res.setBits(loBit, hiBit);
244 return Res;
245 }
246
247 /// Wrap version of getBitsSet.
248 /// If \p hiBit is bigger than \p loBit, this is same with getBitsSet.
249 /// If \p hiBit is not bigger than \p loBit, the set bits "wrap". For example,
250 /// with parameters (32, 28, 4), you would get 0xF000000F.
251 /// If \p hiBit is equal to \p loBit, you would get a result with all bits
252 /// set.
253 static APInt getBitsSetWithWrap(unsigned numBits, unsigned loBit,
254 unsigned hiBit) {
255 APInt Res(numBits, 0);
256 Res.setBitsWithWrap(loBit, hiBit);
257 return Res;
258 }
259
260 /// Constructs an APInt value that has a contiguous range of bits set. The
261 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
262 /// bits will be zero. For example, with parameters(32, 12) you would get
263 /// 0xFFFFF000.
264 ///
265 /// \param numBits the intended bit width of the result
266 /// \param loBit the index of the lowest bit to set.
267 ///
268 /// \returns An APInt value with the requested bits set.
269 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
270 APInt Res(numBits, 0);
271 Res.setBitsFrom(loBit);
272 return Res;
273 }
274
275 /// Constructs an APInt value that has the top hiBitsSet bits set.
276 ///
277 /// \param numBits the bitwidth of the result
278 /// \param hiBitsSet the number of high-order bits set in the result.
279 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
280 APInt Res(numBits, 0);
281 Res.setHighBits(hiBitsSet);
282 return Res;
283 }
284
285 /// Constructs an APInt value that has the bottom loBitsSet bits set.
286 ///
287 /// \param numBits the bitwidth of the result
288 /// \param loBitsSet the number of low-order bits set in the result.
289 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
290 APInt Res(numBits, 0);
291 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
14
Returning from 'APInt::setLowBits'
292 return Res;
293 }
294
295 /// Return a value containing V broadcasted over NewLen bits.
296 static APInt getSplat(unsigned NewLen, const APInt &V);
297
298 /// @}
299 /// \name Value Tests
300 /// @{
301
302 /// Determine if this APInt just has one word to store value.
303 ///
304 /// \returns true if the number of bits <= 64, false otherwise.
305 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
306
307 /// Determine sign of this APInt.
308 ///
309 /// This tests the high bit of this APInt to determine if it is set.
310 ///
311 /// \returns true if this APInt is negative, false otherwise
312 bool isNegative() const { return (*this)[BitWidth - 1]; }
313
314 /// Determine if this APInt Value is non-negative (>= 0)
315 ///
316 /// This tests the high bit of the APInt to determine if it is unset.
317 bool isNonNegative() const { return !isNegative(); }
318
319 /// Determine if sign bit of this APInt is set.
320 ///
321 /// This tests the high bit of this APInt to determine if it is set.
322 ///
323 /// \returns true if this APInt has its sign bit set, false otherwise.
324 bool isSignBitSet() const { return (*this)[BitWidth - 1]; }
325
326 /// Determine if sign bit of this APInt is clear.
327 ///
328 /// This tests the high bit of this APInt to determine if it is clear.
329 ///
330 /// \returns true if this APInt has its sign bit clear, false otherwise.
331 bool isSignBitClear() const { return !isSignBitSet(); }
332
333 /// Determine if this APInt Value is positive.
334 ///
335 /// This tests if the value of this APInt is positive (> 0). Note
336 /// that 0 is not a positive value.
337 ///
338 /// \returns true if this APInt is positive.
339 bool isStrictlyPositive() const { return isNonNegative() && !isZero(); }
340
341 /// Determine if this APInt Value is non-positive (<= 0).
342 ///
343 /// \returns true if this APInt is non-positive.
344 bool isNonPositive() const { return !isStrictlyPositive(); }
345
346 /// Determine if this APInt Value only has the specified bit set.
347 ///
348 /// \returns true if this APInt only has the specified bit set.
349 bool isOneBitSet(unsigned BitNo) const {
350 return (*this)[BitNo] && popcount() == 1;
351 }
352
353 /// Determine if all bits are set. This is true for zero-width values.
354 bool isAllOnes() const {
355 if (BitWidth == 0)
356 return true;
357 if (isSingleWord())
358 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
359 return countTrailingOnesSlowCase() == BitWidth;
360 }
361
362 /// NOTE: This is soft-deprecated. Please use `isAllOnes()` instead.
363 bool isAllOnesValue() const { return isAllOnes(); }
364
365 /// Determine if this value is zero, i.e. all bits are clear.
366 bool isZero() const {
367 if (isSingleWord())
368 return U.VAL == 0;
369 return countLeadingZerosSlowCase() == BitWidth;
370 }
371
372 /// NOTE: This is soft-deprecated. Please use `isZero()` instead.
373 bool isNullValue() const { return isZero(); }
374
375 /// Determine if this is a value of 1.
376 ///
377 /// This checks to see if the value of this APInt is one.
378 bool isOne() const {
379 if (isSingleWord())
380 return U.VAL == 1;
381 return countLeadingZerosSlowCase() == BitWidth - 1;
382 }
383
384 /// NOTE: This is soft-deprecated. Please use `isOne()` instead.
385 bool isOneValue() const { return isOne(); }
386
387 /// Determine if this is the largest unsigned value.
388 ///
389 /// This checks to see if the value of this APInt is the maximum unsigned
390 /// value for the APInt's bit width.
391 bool isMaxValue() const { return isAllOnes(); }
392
393 /// Determine if this is the largest signed value.
394 ///
395 /// This checks to see if the value of this APInt is the maximum signed
396 /// value for the APInt's bit width.
397 bool isMaxSignedValue() const {
398 if (isSingleWord()) {
399 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 399, __extension__ __PRETTY_FUNCTION__
))
;
400 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
401 }
402 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
403 }
404
405 /// Determine if this is the smallest unsigned value.
406 ///
407 /// This checks to see if the value of this APInt is the minimum unsigned
408 /// value for the APInt's bit width.
409 bool isMinValue() const { return isZero(); }
410
411 /// Determine if this is the smallest signed value.
412 ///
413 /// This checks to see if the value of this APInt is the minimum signed
414 /// value for the APInt's bit width.
415 bool isMinSignedValue() const {
416 if (isSingleWord()) {
417 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 417, __extension__ __PRETTY_FUNCTION__
))
;
418 return U.VAL == (WordType(1) << (BitWidth - 1));
419 }
420 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
421 }
422
423 /// Check if this APInt has an N-bits unsigned integer value.
424 bool isIntN(unsigned N) const { return getActiveBits() <= N; }
425
426 /// Check if this APInt has an N-bits signed integer value.
427 bool isSignedIntN(unsigned N) const { return getSignificantBits() <= N; }
428
429 /// Check if this APInt's value is a power of two greater than zero.
430 ///
431 /// \returns true if the argument APInt value is a power of two > 0.
432 bool isPowerOf2() const {
433 if (isSingleWord()) {
434 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 434, __extension__ __PRETTY_FUNCTION__
))
;
435 return isPowerOf2_64(U.VAL);
436 }
437 return countPopulationSlowCase() == 1;
438 }
439
440 /// Check if this APInt's negated value is a power of two greater than zero.
441 bool isNegatedPowerOf2() const {
442 assert(BitWidth && "zero width values not allowed")(static_cast <bool> (BitWidth && "zero width values not allowed"
) ? void (0) : __assert_fail ("BitWidth && \"zero width values not allowed\""
, "llvm/include/llvm/ADT/APInt.h", 442, __extension__ __PRETTY_FUNCTION__
))
;
443 if (isNonNegative())
444 return false;
445 // NegatedPowerOf2 - shifted mask in the top bits.
446 unsigned LO = countl_one();
447 unsigned TZ = countr_zero();
448 return (LO + TZ) == BitWidth;
449 }
450
451 /// Check if the APInt's value is returned by getSignMask.
452 ///
453 /// \returns true if this is the value returned by getSignMask.
454 bool isSignMask() const { return isMinSignedValue(); }
455
456 /// Convert APInt to a boolean value.
457 ///
458 /// This converts the APInt to a boolean value as a test against zero.
459 bool getBoolValue() const { return !isZero(); }
460
461 /// If this value is smaller than the specified limit, return it, otherwise
462 /// return the limit value. This causes the value to saturate to the limit.
463 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
464 return ugt(Limit) ? Limit : getZExtValue();
465 }
466
467 /// Check if the APInt consists of a repeated bit pattern.
468 ///
469 /// e.g. 0x01010101 satisfies isSplat(8).
470 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
471 /// width without remainder.
472 bool isSplat(unsigned SplatSizeInBits) const;
473
474 /// \returns true if this APInt value is a sequence of \param numBits ones
475 /// starting at the least significant bit with the remainder zero.
476 bool isMask(unsigned numBits) const {
477 assert(numBits != 0 && "numBits must be non-zero")(static_cast <bool> (numBits != 0 && "numBits must be non-zero"
) ? void (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "llvm/include/llvm/ADT/APInt.h", 477, __extension__ __PRETTY_FUNCTION__
))
;
478 assert(numBits <= BitWidth && "numBits out of range")(static_cast <bool> (numBits <= BitWidth && "numBits out of range"
) ? void (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "llvm/include/llvm/ADT/APInt.h", 478, __extension__ __PRETTY_FUNCTION__
))
;
479 if (isSingleWord())
480 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
481 unsigned Ones = countTrailingOnesSlowCase();
482 return (numBits == Ones) &&
483 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
484 }
485
486 /// \returns true if this APInt is a non-empty sequence of ones starting at
487 /// the least significant bit with the remainder zero.
488 /// Ex. isMask(0x0000FFFFU) == true.
489 bool isMask() const {
490 if (isSingleWord())
491 return isMask_64(U.VAL);
492 unsigned Ones = countTrailingOnesSlowCase();
493 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
494 }
495
496 /// Return true if this APInt value contains a non-empty sequence of ones with
497 /// the remainder zero.
498 bool isShiftedMask() const {
499 if (isSingleWord())
500 return isShiftedMask_64(U.VAL);
501 unsigned Ones = countPopulationSlowCase();
502 unsigned LeadZ = countLeadingZerosSlowCase();
503 return (Ones + LeadZ + countr_zero()) == BitWidth;
504 }
505
506 /// Return true if this APInt value contains a non-empty sequence of ones with
507 /// the remainder zero. If true, \p MaskIdx will specify the index of the
508 /// lowest set bit and \p MaskLen is updated to specify the length of the
509 /// mask, else neither are updated.
510 bool isShiftedMask(unsigned &MaskIdx, unsigned &MaskLen) const {
511 if (isSingleWord())
512 return isShiftedMask_64(U.VAL, MaskIdx, MaskLen);
513 unsigned Ones = countPopulationSlowCase();
514 unsigned LeadZ = countLeadingZerosSlowCase();
515 unsigned TrailZ = countTrailingZerosSlowCase();
516 if ((Ones + LeadZ + TrailZ) != BitWidth)
517 return false;
518 MaskLen = Ones;
519 MaskIdx = TrailZ;
520 return true;
521 }
522
523 /// Compute an APInt containing numBits highbits from this APInt.
524 ///
525 /// Get an APInt with the same BitWidth as this APInt, just zero mask the low
526 /// bits and right shift to the least significant bit.
527 ///
528 /// \returns the high "numBits" bits of this APInt.
529 APInt getHiBits(unsigned numBits) const;
530
531 /// Compute an APInt containing numBits lowbits from this APInt.
532 ///
533 /// Get an APInt with the same BitWidth as this APInt, just zero mask the high
534 /// bits.
535 ///
536 /// \returns the low "numBits" bits of this APInt.
537 APInt getLoBits(unsigned numBits) const;
538
539 /// Determine if two APInts have the same value, after zero-extending
540 /// one of them (if needed!) to ensure that the bit-widths match.
541 static bool isSameValue(const APInt &I1, const APInt &I2) {
542 if (I1.getBitWidth() == I2.getBitWidth())
543 return I1 == I2;
544
545 if (I1.getBitWidth() > I2.getBitWidth())
546 return I1 == I2.zext(I1.getBitWidth());
547
548 return I1.zext(I2.getBitWidth()) == I2;
549 }
550
551 /// Overload to compute a hash_code for an APInt value.
552 friend hash_code hash_value(const APInt &Arg);
553
554 /// This function returns a pointer to the internal storage of the APInt.
555 /// This is useful for writing out the APInt in binary form without any
556 /// conversions.
557 const uint64_t *getRawData() const {
558 if (isSingleWord())
559 return &U.VAL;
560 return &U.pVal[0];
561 }
562
563 /// @}
564 /// \name Unary Operators
565 /// @{
566
567 /// Postfix increment operator. Increment *this by 1.
568 ///
569 /// \returns a new APInt value representing the original value of *this.
570 APInt operator++(int) {
571 APInt API(*this);
572 ++(*this);
573 return API;
574 }
575
576 /// Prefix increment operator.
577 ///
578 /// \returns *this incremented by one
579 APInt &operator++();
580
581 /// Postfix decrement operator. Decrement *this by 1.
582 ///
583 /// \returns a new APInt value representing the original value of *this.
584 APInt operator--(int) {
585 APInt API(*this);
586 --(*this);
587 return API;
588 }
589
590 /// Prefix decrement operator.
591 ///
592 /// \returns *this decremented by one.
593 APInt &operator--();
594
595 /// Logical negation operation on this APInt returns true if zero, like normal
596 /// integers.
597 bool operator!() const { return isZero(); }
598
599 /// @}
600 /// \name Assignment Operators
601 /// @{
602
603 /// Copy assignment operator.
604 ///
605 /// \returns *this after assignment of RHS.
606 APInt &operator=(const APInt &RHS) {
607 // The common case (both source or dest being inline) doesn't require
608 // allocation or deallocation.
609 if (isSingleWord() && RHS.isSingleWord()) {
610 U.VAL = RHS.U.VAL;
611 BitWidth = RHS.BitWidth;
612 return *this;
613 }
614
615 assignSlowCase(RHS);
616 return *this;
617 }
618
619 /// Move assignment operator.
620 APInt &operator=(APInt &&that) {
621#ifdef EXPENSIVE_CHECKS
622 // Some std::shuffle implementations still do self-assignment.
623 if (this == &that)
624 return *this;
625#endif
626 assert(this != &that && "Self-move not supported")(static_cast <bool> (this != &that && "Self-move not supported"
) ? void (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "llvm/include/llvm/ADT/APInt.h", 626, __extension__ __PRETTY_FUNCTION__
))
;
627 if (!isSingleWord())
628 delete[] U.pVal;
629
630 // Use memcpy so that type based alias analysis sees both VAL and pVal
631 // as modified.
632 memcpy(&U, &that.U, sizeof(U));
633
634 BitWidth = that.BitWidth;
635 that.BitWidth = 0;
636 return *this;
637 }
638
639 /// Assignment operator.
640 ///
641 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
642 /// the bit width, the excess bits are truncated. If the bit width is larger
643 /// than 64, the value is zero filled in the unspecified high order bits.
644 ///
645 /// \returns *this after assignment of RHS value.
646 APInt &operator=(uint64_t RHS) {
647 if (isSingleWord()) {
648 U.VAL = RHS;
649 return clearUnusedBits();
650 }
651 U.pVal[0] = RHS;
652 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
653 return *this;
654 }
655
656 /// Bitwise AND assignment operator.
657 ///
658 /// Performs a bitwise AND operation on this APInt and RHS. The result is
659 /// assigned to *this.
660 ///
661 /// \returns *this after ANDing with RHS.
662 APInt &operator&=(const APInt &RHS) {
663 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 663, __extension__ __PRETTY_FUNCTION__
))
;
664 if (isSingleWord())
665 U.VAL &= RHS.U.VAL;
666 else
667 andAssignSlowCase(RHS);
668 return *this;
669 }
670
671 /// Bitwise AND assignment operator.
672 ///
673 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
674 /// logically zero-extended or truncated to match the bit-width of
675 /// the LHS.
676 APInt &operator&=(uint64_t RHS) {
677 if (isSingleWord()) {
678 U.VAL &= RHS;
679 return *this;
680 }
681 U.pVal[0] &= RHS;
682 memset(U.pVal + 1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
683 return *this;
684 }
685
686 /// Bitwise OR assignment operator.
687 ///
688 /// Performs a bitwise OR operation on this APInt and RHS. The result is
689 /// assigned *this;
690 ///
691 /// \returns *this after ORing with RHS.
692 APInt &operator|=(const APInt &RHS) {
693 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 693, __extension__ __PRETTY_FUNCTION__
))
;
694 if (isSingleWord())
695 U.VAL |= RHS.U.VAL;
696 else
697 orAssignSlowCase(RHS);
698 return *this;
699 }
700
701 /// Bitwise OR assignment operator.
702 ///
703 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
704 /// logically zero-extended or truncated to match the bit-width of
705 /// the LHS.
706 APInt &operator|=(uint64_t RHS) {
707 if (isSingleWord()) {
708 U.VAL |= RHS;
709 return clearUnusedBits();
710 }
711 U.pVal[0] |= RHS;
712 return *this;
713 }
714
715 /// Bitwise XOR assignment operator.
716 ///
717 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
718 /// assigned to *this.
719 ///
720 /// \returns *this after XORing with RHS.
721 APInt &operator^=(const APInt &RHS) {
722 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 722, __extension__ __PRETTY_FUNCTION__
))
;
723 if (isSingleWord())
724 U.VAL ^= RHS.U.VAL;
725 else
726 xorAssignSlowCase(RHS);
727 return *this;
728 }
729
730 /// Bitwise XOR assignment operator.
731 ///
732 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
733 /// logically zero-extended or truncated to match the bit-width of
734 /// the LHS.
735 APInt &operator^=(uint64_t RHS) {
736 if (isSingleWord()) {
737 U.VAL ^= RHS;
738 return clearUnusedBits();
739 }
740 U.pVal[0] ^= RHS;
741 return *this;
742 }
743
744 /// Multiplication assignment operator.
745 ///
746 /// Multiplies this APInt by RHS and assigns the result to *this.
747 ///
748 /// \returns *this
749 APInt &operator*=(const APInt &RHS);
750 APInt &operator*=(uint64_t RHS);
751
752 /// Addition assignment operator.
753 ///
754 /// Adds RHS to *this and assigns the result to *this.
755 ///
756 /// \returns *this
757 APInt &operator+=(const APInt &RHS);
758 APInt &operator+=(uint64_t RHS);
759
760 /// Subtraction assignment operator.
761 ///
762 /// Subtracts RHS from *this and assigns the result to *this.
763 ///
764 /// \returns *this
765 APInt &operator-=(const APInt &RHS);
766 APInt &operator-=(uint64_t RHS);
767
768 /// Left-shift assignment function.
769 ///
770 /// Shifts *this left by shiftAmt and assigns the result to *this.
771 ///
772 /// \returns *this after shifting left by ShiftAmt
773 APInt &operator<<=(unsigned ShiftAmt) {
774 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 774, __extension__ __PRETTY_FUNCTION__
))
;
775 if (isSingleWord()) {
776 if (ShiftAmt == BitWidth)
777 U.VAL = 0;
778 else
779 U.VAL <<= ShiftAmt;
780 return clearUnusedBits();
781 }
782 shlSlowCase(ShiftAmt);
783 return *this;
784 }
785
786 /// Left-shift assignment function.
787 ///
788 /// Shifts *this left by shiftAmt and assigns the result to *this.
789 ///
790 /// \returns *this after shifting left by ShiftAmt
791 APInt &operator<<=(const APInt &ShiftAmt);
792
793 /// @}
794 /// \name Binary Operators
795 /// @{
796
797 /// Multiplication operator.
798 ///
799 /// Multiplies this APInt by RHS and returns the result.
800 APInt operator*(const APInt &RHS) const;
801
802 /// Left logical shift operator.
803 ///
804 /// Shifts this APInt left by \p Bits and returns the result.
805 APInt operator<<(unsigned Bits) const { return shl(Bits); }
806
807 /// Left logical shift operator.
808 ///
809 /// Shifts this APInt left by \p Bits and returns the result.
810 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
811
812 /// Arithmetic right-shift function.
813 ///
814 /// Arithmetic right-shift this APInt by shiftAmt.
815 APInt ashr(unsigned ShiftAmt) const {
816 APInt R(*this);
817 R.ashrInPlace(ShiftAmt);
818 return R;
819 }
820
821 /// Arithmetic right-shift this APInt by ShiftAmt in place.
822 void ashrInPlace(unsigned ShiftAmt) {
823 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 823, __extension__ __PRETTY_FUNCTION__
))
;
824 if (isSingleWord()) {
825 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
826 if (ShiftAmt == BitWidth)
827 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
828 else
829 U.VAL = SExtVAL >> ShiftAmt;
830 clearUnusedBits();
831 return;
832 }
833 ashrSlowCase(ShiftAmt);
834 }
835
836 /// Logical right-shift function.
837 ///
838 /// Logical right-shift this APInt by shiftAmt.
839 APInt lshr(unsigned shiftAmt) const {
840 APInt R(*this);
841 R.lshrInPlace(shiftAmt);
842 return R;
843 }
844
845 /// Logical right-shift this APInt by ShiftAmt in place.
846 void lshrInPlace(unsigned ShiftAmt) {
847 assert(ShiftAmt <= BitWidth && "Invalid shift amount")(static_cast <bool> (ShiftAmt <= BitWidth &&
"Invalid shift amount") ? void (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "llvm/include/llvm/ADT/APInt.h", 847, __extension__ __PRETTY_FUNCTION__
))
;
848 if (isSingleWord()) {
849 if (ShiftAmt == BitWidth)
850 U.VAL = 0;
851 else
852 U.VAL >>= ShiftAmt;
853 return;
854 }
855 lshrSlowCase(ShiftAmt);
856 }
857
858 /// Left-shift function.
859 ///
860 /// Left-shift this APInt by shiftAmt.
861 APInt shl(unsigned shiftAmt) const {
862 APInt R(*this);
863 R <<= shiftAmt;
864 return R;
865 }
866
867 /// relative logical shift right
868 APInt relativeLShr(int RelativeShift) const {
869 return RelativeShift > 0 ? lshr(RelativeShift) : shl(-RelativeShift);
870 }
871
872 /// relative logical shift left
873 APInt relativeLShl(int RelativeShift) const {
874 return relativeLShr(-RelativeShift);
875 }
876
877 /// relative arithmetic shift right
878 APInt relativeAShr(int RelativeShift) const {
879 return RelativeShift > 0 ? ashr(RelativeShift) : shl(-RelativeShift);
880 }
881
882 /// relative arithmetic shift left
883 APInt relativeAShl(int RelativeShift) const {
884 return relativeAShr(-RelativeShift);
885 }
886
887 /// Rotate left by rotateAmt.
888 APInt rotl(unsigned rotateAmt) const;
889
890 /// Rotate right by rotateAmt.
891 APInt rotr(unsigned rotateAmt) const;
892
893 /// Arithmetic right-shift function.
894 ///
895 /// Arithmetic right-shift this APInt by shiftAmt.
896 APInt ashr(const APInt &ShiftAmt) const {
897 APInt R(*this);
898 R.ashrInPlace(ShiftAmt);
899 return R;
900 }
901
902 /// Arithmetic right-shift this APInt by shiftAmt in place.
903 void ashrInPlace(const APInt &shiftAmt);
904
905 /// Logical right-shift function.
906 ///
907 /// Logical right-shift this APInt by shiftAmt.
908 APInt lshr(const APInt &ShiftAmt) const {
909 APInt R(*this);
910 R.lshrInPlace(ShiftAmt);
911 return R;
912 }
913
914 /// Logical right-shift this APInt by ShiftAmt in place.
915 void lshrInPlace(const APInt &ShiftAmt);
916
917 /// Left-shift function.
918 ///
919 /// Left-shift this APInt by shiftAmt.
920 APInt shl(const APInt &ShiftAmt) const {
921 APInt R(*this);
922 R <<= ShiftAmt;
923 return R;
924 }
925
926 /// Rotate left by rotateAmt.
927 APInt rotl(const APInt &rotateAmt) const;
928
929 /// Rotate right by rotateAmt.
930 APInt rotr(const APInt &rotateAmt) const;
931
932 /// Concatenate the bits from "NewLSB" onto the bottom of *this. This is
933 /// equivalent to:
934 /// (this->zext(NewWidth) << NewLSB.getBitWidth()) | NewLSB.zext(NewWidth)
935 APInt concat(const APInt &NewLSB) const {
936 /// If the result will be small, then both the merged values are small.
937 unsigned NewWidth = getBitWidth() + NewLSB.getBitWidth();
938 if (NewWidth <= APINT_BITS_PER_WORD)
939 return APInt(NewWidth, (U.VAL << NewLSB.getBitWidth()) | NewLSB.U.VAL);
940 return concatSlowCase(NewLSB);
941 }
942
943 /// Unsigned division operation.
944 ///
945 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
946 /// RHS are treated as unsigned quantities for purposes of this division.
947 ///
948 /// \returns a new APInt value containing the division result, rounded towards
949 /// zero.
950 APInt udiv(const APInt &RHS) const;
951 APInt udiv(uint64_t RHS) const;
952
953 /// Signed division function for APInt.
954 ///
955 /// Signed divide this APInt by APInt RHS.
956 ///
957 /// The result is rounded towards zero.
958 APInt sdiv(const APInt &RHS) const;
959 APInt sdiv(int64_t RHS) const;
960
961 /// Unsigned remainder operation.
962 ///
963 /// Perform an unsigned remainder operation on this APInt with RHS being the
964 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
965 /// of this operation.
966 ///
967 /// \returns a new APInt value containing the remainder result
968 APInt urem(const APInt &RHS) const;
969 uint64_t urem(uint64_t RHS) const;
970
971 /// Function for signed remainder operation.
972 ///
973 /// Signed remainder operation on APInt.
974 ///
975 /// Note that this is a true remainder operation and not a modulo operation
976 /// because the sign follows the sign of the dividend which is *this.
977 APInt srem(const APInt &RHS) const;
978 int64_t srem(int64_t RHS) const;
979
980 /// Dual division/remainder interface.
981 ///
982 /// Sometimes it is convenient to divide two APInt values and obtain both the
983 /// quotient and remainder. This function does both operations in the same
984 /// computation making it a little more efficient. The pair of input arguments
985 /// may overlap with the pair of output arguments. It is safe to call
986 /// udivrem(X, Y, X, Y), for example.
987 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
988 APInt &Remainder);
989 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
990 uint64_t &Remainder);
991
992 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
993 APInt &Remainder);
994 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
995 int64_t &Remainder);
996
997 // Operations that return overflow indicators.
998 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
999 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1000 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1001 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1002 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1003 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1004 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1005 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1006 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1007
1008 // Operations that saturate
1009 APInt sadd_sat(const APInt &RHS) const;
1010 APInt uadd_sat(const APInt &RHS) const;
1011 APInt ssub_sat(const APInt &RHS) const;
1012 APInt usub_sat(const APInt &RHS) const;
1013 APInt smul_sat(const APInt &RHS) const;
1014 APInt umul_sat(const APInt &RHS) const;
1015 APInt sshl_sat(const APInt &RHS) const;
1016 APInt ushl_sat(const APInt &RHS) const;
1017
1018 /// Array-indexing support.
1019 ///
1020 /// \returns the bit value at bitPosition
1021 bool operator[](unsigned bitPosition) const {
1022 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")(static_cast <bool> (bitPosition < getBitWidth() &&
"Bit position out of bounds!") ? void (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "llvm/include/llvm/ADT/APInt.h", 1022, __extension__ __PRETTY_FUNCTION__
))
;
1023 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1024 }
1025
1026 /// @}
1027 /// \name Comparison Operators
1028 /// @{
1029
1030 /// Equality operator.
1031 ///
1032 /// Compares this APInt with RHS for the validity of the equality
1033 /// relationship.
1034 bool operator==(const APInt &RHS) const {
1035 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Comparison requires equal bit widths") ? void (0) : __assert_fail
("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "llvm/include/llvm/ADT/APInt.h", 1035, __extension__ __PRETTY_FUNCTION__
))
;
1036 if (isSingleWord())
1037 return U.VAL == RHS.U.VAL;
1038 return equalSlowCase(RHS);
1039 }
1040
1041 /// Equality operator.
1042 ///
1043 /// Compares this APInt with a uint64_t for the validity of the equality
1044 /// relationship.
1045 ///
1046 /// \returns true if *this == Val
1047 bool operator==(uint64_t Val) const {
1048 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1049 }
1050
1051 /// Equality comparison.
1052 ///
1053 /// Compares this APInt with RHS for the validity of the equality
1054 /// relationship.
1055 ///
1056 /// \returns true if *this == Val
1057 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1058
1059 /// Inequality operator.
1060 ///
1061 /// Compares this APInt with RHS for the validity of the inequality
1062 /// relationship.
1063 ///
1064 /// \returns true if *this != Val
1065 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1066
1067 /// Inequality operator.
1068 ///
1069 /// Compares this APInt with a uint64_t for the validity of the inequality
1070 /// relationship.
1071 ///
1072 /// \returns true if *this != Val
1073 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1074
1075 /// Inequality comparison
1076 ///
1077 /// Compares this APInt with RHS for the validity of the inequality
1078 /// relationship.
1079 ///
1080 /// \returns true if *this != Val
1081 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1082
1083 /// Unsigned less than comparison
1084 ///
1085 /// Regards both *this and RHS as unsigned quantities and compares them for
1086 /// the validity of the less-than relationship.
1087 ///
1088 /// \returns true if *this < RHS when both are considered unsigned.
1089 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1090
1091 /// Unsigned less than comparison
1092 ///
1093 /// Regards both *this as an unsigned quantity and compares it with RHS for
1094 /// the validity of the less-than relationship.
1095 ///
1096 /// \returns true if *this < RHS when considered unsigned.
1097 bool ult(uint64_t RHS) const {
1098 // Only need to check active bits if not a single word.
1099 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1100 }
1101
1102 /// Signed less than comparison
1103 ///
1104 /// Regards both *this and RHS as signed quantities and compares them for
1105 /// validity of the less-than relationship.
1106 ///
1107 /// \returns true if *this < RHS when both are considered signed.
1108 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1109
1110 /// Signed less than comparison
1111 ///
1112 /// Regards both *this as a signed quantity and compares it with RHS for
1113 /// the validity of the less-than relationship.
1114 ///
1115 /// \returns true if *this < RHS when considered signed.
1116 bool slt(int64_t RHS) const {
1117 return (!isSingleWord() && getSignificantBits() > 64)
1118 ? isNegative()
1119 : getSExtValue() < RHS;
1120 }
1121
1122 /// Unsigned less or equal comparison
1123 ///
1124 /// Regards both *this and RHS as unsigned quantities and compares them for
1125 /// validity of the less-or-equal relationship.
1126 ///
1127 /// \returns true if *this <= RHS when both are considered unsigned.
1128 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1129
1130 /// Unsigned less or equal comparison
1131 ///
1132 /// Regards both *this as an unsigned quantity and compares it with RHS for
1133 /// the validity of the less-or-equal relationship.
1134 ///
1135 /// \returns true if *this <= RHS when considered unsigned.
1136 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1137
1138 /// Signed less or equal comparison
1139 ///
1140 /// Regards both *this and RHS as signed quantities and compares them for
1141 /// validity of the less-or-equal relationship.
1142 ///
1143 /// \returns true if *this <= RHS when both are considered signed.
1144 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1145
1146 /// Signed less or equal comparison
1147 ///
1148 /// Regards both *this as a signed quantity and compares it with RHS for the
1149 /// validity of the less-or-equal relationship.
1150 ///
1151 /// \returns true if *this <= RHS when considered signed.
1152 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1153
1154 /// Unsigned greater than comparison
1155 ///
1156 /// Regards both *this and RHS as unsigned quantities and compares them for
1157 /// the validity of the greater-than relationship.
1158 ///
1159 /// \returns true if *this > RHS when both are considered unsigned.
1160 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1161
1162 /// Unsigned greater than comparison
1163 ///
1164 /// Regards both *this as an unsigned quantity and compares it with RHS for
1165 /// the validity of the greater-than relationship.
1166 ///
1167 /// \returns true if *this > RHS when considered unsigned.
1168 bool ugt(uint64_t RHS) const {
1169 // Only need to check active bits if not a single word.
1170 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1171 }
1172
1173 /// Signed greater than comparison
1174 ///
1175 /// Regards both *this and RHS as signed quantities and compares them for the
1176 /// validity of the greater-than relationship.
1177 ///
1178 /// \returns true if *this > RHS when both are considered signed.
1179 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1180
1181 /// Signed greater than comparison
1182 ///
1183 /// Regards both *this as a signed quantity and compares it with RHS for
1184 /// the validity of the greater-than relationship.
1185 ///
1186 /// \returns true if *this > RHS when considered signed.
1187 bool sgt(int64_t RHS) const {
1188 return (!isSingleWord() && getSignificantBits() > 64)
1189 ? !isNegative()
1190 : getSExtValue() > RHS;
1191 }
1192
1193 /// Unsigned greater or equal comparison
1194 ///
1195 /// Regards both *this and RHS as unsigned quantities and compares them for
1196 /// validity of the greater-or-equal relationship.
1197 ///
1198 /// \returns true if *this >= RHS when both are considered unsigned.
1199 bool uge(const APInt &RHS) const { return !ult(RHS); }
1200
1201 /// Unsigned greater or equal comparison
1202 ///
1203 /// Regards both *this as an unsigned quantity and compares it with RHS for
1204 /// the validity of the greater-or-equal relationship.
1205 ///
1206 /// \returns true if *this >= RHS when considered unsigned.
1207 bool uge(uint64_t RHS) const { return !ult(RHS); }
1208
1209 /// Signed greater or equal comparison
1210 ///
1211 /// Regards both *this and RHS as signed quantities and compares them for
1212 /// validity of the greater-or-equal relationship.
1213 ///
1214 /// \returns true if *this >= RHS when both are considered signed.
1215 bool sge(const APInt &RHS) const { return !slt(RHS); }
1216
1217 /// Signed greater or equal comparison
1218 ///
1219 /// Regards both *this as a signed quantity and compares it with RHS for
1220 /// the validity of the greater-or-equal relationship.
1221 ///
1222 /// \returns true if *this >= RHS when considered signed.
1223 bool sge(int64_t RHS) const { return !slt(RHS); }
1224
1225 /// This operation tests if there are any pairs of corresponding bits
1226 /// between this APInt and RHS that are both set.
1227 bool intersects(const APInt &RHS) const {
1228 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1228, __extension__ __PRETTY_FUNCTION__
))
;
1229 if (isSingleWord())
1230 return (U.VAL & RHS.U.VAL) != 0;
1231 return intersectsSlowCase(RHS);
1232 }
1233
1234 /// This operation checks that all bits set in this APInt are also set in RHS.
1235 bool isSubsetOf(const APInt &RHS) const {
1236 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")(static_cast <bool> (BitWidth == RHS.BitWidth &&
"Bit widths must be the same") ? void (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "llvm/include/llvm/ADT/APInt.h", 1236, __extension__ __PRETTY_FUNCTION__
))
;
1237 if (isSingleWord())
1238 return (U.VAL & ~RHS.U.VAL) == 0;
1239 return isSubsetOfSlowCase(RHS);
1240 }
1241
1242 /// @}
1243 /// \name Resizing Operators
1244 /// @{
1245
1246 /// Truncate to new width.
1247 ///
1248 /// Truncate the APInt to a specified width. It is an error to specify a width
1249 /// that is greater than the current width.
1250 APInt trunc(unsigned width) const;
1251
1252 /// Truncate to new width with unsigned saturation.
1253 ///
1254 /// If the APInt, treated as unsigned integer, can be losslessly truncated to
1255 /// the new bitwidth, then return truncated APInt. Else, return max value.
1256 APInt truncUSat(unsigned width) const;
1257
1258 /// Truncate to new width with signed saturation.
1259 ///
1260 /// If this APInt, treated as signed integer, can be losslessly truncated to
1261 /// the new bitwidth, then return truncated APInt. Else, return either
1262 /// signed min value if the APInt was negative, or signed max value.
1263 APInt truncSSat(unsigned width) const;
1264
1265 /// Sign extend to a new width.
1266 ///
1267 /// This operation sign extends the APInt to a new width. If the high order
1268 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1269 /// It is an error to specify a width that is less than the
1270 /// current width.
1271 APInt sext(unsigned width) const;
1272
1273 /// Zero extend to a new width.
1274 ///
1275 /// This operation zero extends the APInt to a new width. The high order bits
1276 /// are filled with 0 bits. It is an error to specify a width that is less
1277 /// than the current width.
1278 APInt zext(unsigned width) const;
1279
1280 /// Sign extend or truncate to width
1281 ///
1282 /// Make this APInt have the bit width given by \p width. The value is sign
1283 /// extended, truncated, or left alone to make it that width.
1284 APInt sextOrTrunc(unsigned width) const;
1285
1286 /// Zero extend or truncate to width
1287 ///
1288 /// Make this APInt have the bit width given by \p width. The value is zero
1289 /// extended, truncated, or left alone to make it that width.
1290 APInt zextOrTrunc(unsigned width) const;
1291
1292 /// @}
1293 /// \name Bit Manipulation Operators
1294 /// @{
1295
1296 /// Set every bit to 1.
1297 void setAllBits() {
1298 if (isSingleWord())
1299 U.VAL = WORDTYPE_MAX;
1300 else
1301 // Set all the bits in all the words.
1302 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1303 // Clear the unused ones
1304 clearUnusedBits();
1305 }
1306
1307 /// Set the given bit to 1 whose position is given as "bitPosition".
1308 void setBit(unsigned BitPosition) {
1309 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1309, __extension__ __PRETTY_FUNCTION__
))
;
1310 WordType Mask = maskBit(BitPosition);
1311 if (isSingleWord())
1312 U.VAL |= Mask;
1313 else
1314 U.pVal[whichWord(BitPosition)] |= Mask;
1315 }
1316
1317 /// Set the sign bit to 1.
1318 void setSignBit() { setBit(BitWidth - 1); }
1319
1320 /// Set a given bit to a given value.
1321 void setBitVal(unsigned BitPosition, bool BitValue) {
1322 if (BitValue)
1323 setBit(BitPosition);
1324 else
1325 clearBit(BitPosition);
1326 }
1327
1328 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1329 /// This function handles "wrap" case when \p loBit >= \p hiBit, and calls
1330 /// setBits when \p loBit < \p hiBit.
1331 /// For \p loBit == \p hiBit wrap case, set every bit to 1.
1332 void setBitsWithWrap(unsigned loBit, unsigned hiBit) {
1333 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1333, __extension__ __PRETTY_FUNCTION__
))
;
1334 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1334, __extension__ __PRETTY_FUNCTION__
))
;
1335 if (loBit < hiBit) {
1336 setBits(loBit, hiBit);
1337 return;
1338 }
1339 setLowBits(hiBit);
1340 setHighBits(BitWidth - loBit);
1341 }
1342
1343 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1344 /// This function handles case when \p loBit <= \p hiBit.
1345 void setBits(unsigned loBit, unsigned hiBit) {
1346 assert(hiBit <= BitWidth && "hiBit out of range")(static_cast <bool> (hiBit <= BitWidth && "hiBit out of range"
) ? void (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1346, __extension__ __PRETTY_FUNCTION__
))
;
6
Assuming 'hiBit' is <= field 'BitWidth'
7
'?' condition is true
1347 assert(loBit <= BitWidth && "loBit out of range")(static_cast <bool> (loBit <= BitWidth && "loBit out of range"
) ? void (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1347, __extension__ __PRETTY_FUNCTION__
))
;
8
'?' condition is true
1348 assert(loBit <= hiBit && "loBit greater than hiBit")(static_cast <bool> (loBit <= hiBit && "loBit greater than hiBit"
) ? void (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "llvm/include/llvm/ADT/APInt.h", 1348, __extension__ __PRETTY_FUNCTION__
))
;
9
'?' condition is true
1349 if (loBit == hiBit)
10
Assuming 'loBit' is not equal to 'hiBit'
1350 return;
1351 if (loBit
10.1
'loBit' is < APINT_BITS_PER_WORD
10.1
'loBit' is < APINT_BITS_PER_WORD
< APINT_BITS_PER_WORD && hiBit
10.2
'hiBit' is <= APINT_BITS_PER_WORD
10.2
'hiBit' is <= APINT_BITS_PER_WORD
<= APINT_BITS_PER_WORD) {
11
Taking true branch
1352 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1353 mask <<= loBit;
1354 if (isSingleWord())
12
Taking true branch
1355 U.VAL |= mask;
1356 else
1357 U.pVal[0] |= mask;
1358 } else {
1359 setBitsSlowCase(loBit, hiBit);
1360 }
1361 }
1362
1363 /// Set the top bits starting from loBit.
1364 void setBitsFrom(unsigned loBit) { return setBits(loBit, BitWidth); }
1365
1366 /// Set the bottom loBits bits.
1367 void setLowBits(unsigned loBits) { return setBits(0, loBits); }
5
Calling 'APInt::setBits'
13
Returning from 'APInt::setBits'
1368
1369 /// Set the top hiBits bits.
1370 void setHighBits(unsigned hiBits) {
1371 return setBits(BitWidth - hiBits, BitWidth);
1372 }
1373
1374 /// Set every bit to 0.
1375 void clearAllBits() {
1376 if (isSingleWord())
1377 U.VAL = 0;
1378 else
1379 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1380 }
1381
1382 /// Set a given bit to 0.
1383 ///
1384 /// Set the given bit to 0 whose position is given as "bitPosition".
1385 void clearBit(unsigned BitPosition) {
1386 assert(BitPosition < BitWidth && "BitPosition out of range")(static_cast <bool> (BitPosition < BitWidth &&
"BitPosition out of range") ? void (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "llvm/include/llvm/ADT/APInt.h", 1386, __extension__ __PRETTY_FUNCTION__
))
;
1387 WordType Mask = ~maskBit(BitPosition);
1388 if (isSingleWord())
1389 U.VAL &= Mask;
1390 else
1391 U.pVal[whichWord(BitPosition)] &= Mask;
1392 }
1393
1394 /// Set bottom loBits bits to 0.
1395 void clearLowBits(unsigned loBits) {
1396 assert(loBits <= BitWidth && "More bits than bitwidth")(static_cast <bool> (loBits <= BitWidth && "More bits than bitwidth"
) ? void (0) : __assert_fail ("loBits <= BitWidth && \"More bits than bitwidth\""
, "llvm/include/llvm/ADT/APInt.h", 1396, __extension__ __PRETTY_FUNCTION__
))
;
1397 APInt Keep = getHighBitsSet(BitWidth, BitWidth - loBits);
1398 *this &= Keep;
1399 }
1400
1401 /// Set the sign bit to 0.
1402 void clearSignBit() { clearBit(BitWidth - 1); }
1403
1404 /// Toggle every bit to its opposite value.
1405 void flipAllBits() {
1406 if (isSingleWord()) {
1407 U.VAL ^= WORDTYPE_MAX;
1408 clearUnusedBits();
1409 } else {
1410 flipAllBitsSlowCase();
1411 }
1412 }
1413
1414 /// Toggles a given bit to its opposite value.
1415 ///
1416 /// Toggle a given bit to its opposite value whose position is given
1417 /// as "bitPosition".
1418 void flipBit(unsigned bitPosition);
1419
1420 /// Negate this APInt in place.
1421 void negate() {
1422 flipAllBits();
1423 ++(*this);
1424 }
1425
1426 /// Insert the bits from a smaller APInt starting at bitPosition.
1427 void insertBits(const APInt &SubBits, unsigned bitPosition);
1428 void insertBits(uint64_t SubBits, unsigned bitPosition, unsigned numBits);
1429
1430 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1431 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1432 uint64_t extractBitsAsZExtValue(unsigned numBits, unsigned bitPosition) const;
1433
1434 /// @}
1435 /// \name Value Characterization Functions
1436 /// @{
1437
1438 /// Return the number of bits in the APInt.
1439 unsigned getBitWidth() const { return BitWidth; }
1440
1441 /// Get the number of words.
1442 ///
1443 /// Here one word's bitwidth equals to that of uint64_t.
1444 ///
1445 /// \returns the number of words to hold the integer value of this APInt.
1446 unsigned getNumWords() const { return getNumWords(BitWidth); }
1447
1448 /// Get the number of words.
1449 ///
1450 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1451 ///
1452 /// \returns the number of words to hold the integer value with a given bit
1453 /// width.
1454 static unsigned getNumWords(unsigned BitWidth) {
1455 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1456 }
1457
1458 /// Compute the number of active bits in the value
1459 ///
1460 /// This function returns the number of active bits which is defined as the
1461 /// bit width minus the number of leading zeros. This is used in several
1462 /// computations to see how "wide" the value is.
1463 unsigned getActiveBits() const { return BitWidth - countl_zero(); }
1464
1465 /// Compute the number of active words in the value of this APInt.
1466 ///
1467 /// This is used in conjunction with getActiveData to extract the raw value of
1468 /// the APInt.
1469 unsigned getActiveWords() const {
1470 unsigned numActiveBits = getActiveBits();
1471 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1472 }
1473
1474 /// Get the minimum bit size for this signed APInt
1475 ///
1476 /// Computes the minimum bit width for this APInt while considering it to be a
1477 /// signed (and probably negative) value. If the value is not negative, this
1478 /// function returns the same value as getActiveBits()+1. Otherwise, it
1479 /// returns the smallest bit width that will retain the negative value. For
1480 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1481 /// for -1, this function will always return 1.
1482 unsigned getSignificantBits() const {
1483 return BitWidth - getNumSignBits() + 1;
1484 }
1485
1486 /// NOTE: This is soft-deprecated. Please use `getSignificantBits()` instead.
1487 unsigned getMinSignedBits() const { return getSignificantBits(); }
1488
1489 /// Get zero extended value
1490 ///
1491 /// This method attempts to return the value of this APInt as a zero extended
1492 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1493 /// uint64_t. Otherwise an assertion will result.
1494 uint64_t getZExtValue() const {
1495 if (isSingleWord())
1496 return U.VAL;
1497 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")(static_cast <bool> (getActiveBits() <= 64 &&
"Too many bits for uint64_t") ? void (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1497, __extension__ __PRETTY_FUNCTION__
))
;
1498 return U.pVal[0];
1499 }
1500
1501 /// Get zero extended value if possible
1502 ///
1503 /// This method attempts to return the value of this APInt as a zero extended
1504 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1505 /// uint64_t. Otherwise no value is returned.
1506 std::optional<uint64_t> tryZExtValue() const {
1507 return (getActiveBits() <= 64) ? std::optional<uint64_t>(getZExtValue())
1508 : std::nullopt;
1509 };
1510
1511 /// Get sign extended value
1512 ///
1513 /// This method attempts to return the value of this APInt as a sign extended
1514 /// int64_t. The bit width must be <= 64 or the value must fit within an
1515 /// int64_t. Otherwise an assertion will result.
1516 int64_t getSExtValue() const {
1517 if (isSingleWord())
1518 return SignExtend64(U.VAL, BitWidth);
1519 assert(getSignificantBits() <= 64 && "Too many bits for int64_t")(static_cast <bool> (getSignificantBits() <= 64 &&
"Too many bits for int64_t") ? void (0) : __assert_fail ("getSignificantBits() <= 64 && \"Too many bits for int64_t\""
, "llvm/include/llvm/ADT/APInt.h", 1519, __extension__ __PRETTY_FUNCTION__
))
;
1520 return int64_t(U.pVal[0]);
1521 }
1522
1523 /// Get sign extended value if possible
1524 ///
1525 /// This method attempts to return the value of this APInt as a sign extended
1526 /// int64_t. The bitwidth must be <= 64 or the value must fit within an
1527 /// int64_t. Otherwise no value is returned.
1528 std::optional<int64_t> trySExtValue() const {
1529 return (getSignificantBits() <= 64) ? std::optional<int64_t>(getSExtValue())
1530 : std::nullopt;
1531 };
1532
1533 /// Get bits required for string value.
1534 ///
1535 /// This method determines how many bits are required to hold the APInt
1536 /// equivalent of the string given by \p str.
1537 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1538
1539 /// Get the bits that are sufficient to represent the string value. This may
1540 /// over estimate the amount of bits required, but it does not require
1541 /// parsing the value in the string.
1542 static unsigned getSufficientBitsNeeded(StringRef Str, uint8_t Radix);
1543
1544 /// The APInt version of std::countl_zero.
1545 ///
1546 /// It counts the number of zeros from the most significant bit to the first
1547 /// one bit.
1548 ///
1549 /// \returns BitWidth if the value is zero, otherwise returns the number of
1550 /// zeros from the most significant bit to the first one bits.
1551 unsigned countl_zero() const {
1552 if (isSingleWord()) {
1553 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1554 return llvm::countl_zero(U.VAL) - unusedBits;
1555 }
1556 return countLeadingZerosSlowCase();
1557 }
1558
1559 unsigned countLeadingZeros() const { return countl_zero(); }
1560
1561 /// Count the number of leading one bits.
1562 ///
1563 /// This function is an APInt version of std::countl_one. It counts the number
1564 /// of ones from the most significant bit to the first zero bit.
1565 ///
1566 /// \returns 0 if the high order bit is not set, otherwise returns the number
1567 /// of 1 bits from the most significant to the least
1568 unsigned countl_one() const {
1569 if (isSingleWord()) {
1570 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1571 return 0;
1572 return llvm::countl_one(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1573 }
1574 return countLeadingOnesSlowCase();
1575 }
1576
1577 unsigned countLeadingOnes() const { return countl_one(); }
1578
1579 /// Computes the number of leading bits of this APInt that are equal to its
1580 /// sign bit.
1581 unsigned getNumSignBits() const {
1582 return isNegative() ? countl_one() : countl_zero();
1583 }
1584
1585 /// Count the number of trailing zero bits.
1586 ///
1587 /// This function is an APInt version of the countr_zero. It counts the number
1588 /// of zeros from the least significant bit to the first set bit.
1589 ///
1590 /// \returns BitWidth if the value is zero, otherwise returns the number of
1591 /// zeros from the least significant bit to the first one bit.
1592 unsigned countr_zero() const {
1593 if (isSingleWord()) {
1594 unsigned TrailingZeros = llvm::countr_zero(U.VAL);
1595 return (TrailingZeros > BitWidth ? BitWidth : TrailingZeros);
1596 }
1597 return countTrailingZerosSlowCase();
1598 }
1599
1600 unsigned countTrailingZeros() const { return countr_zero(); }
1601
1602 /// Count the number of trailing one bits.
1603 ///
1604 /// This function is an APInt version of std::countr_one. It counts the number
1605 /// of ones from the least significant bit to the first zero bit.
1606 ///
1607 /// \returns BitWidth if the value is all ones, otherwise returns the number
1608 /// of ones from the least significant bit to the first zero bit.
1609 unsigned countr_one() const {
1610 if (isSingleWord())
1611 return llvm::countr_one(U.VAL);
1612 return countTrailingOnesSlowCase();
1613 }
1614
1615 unsigned countTrailingOnes() const { return countr_one(); }
1616
1617 /// Count the number of bits set.
1618 ///
1619 /// This function is an APInt version of std::popcount. It counts the number
1620 /// of 1 bits in the APInt value.
1621 ///
1622 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1623 unsigned popcount() const {
1624 if (isSingleWord())
1625 return llvm::popcount(U.VAL);
1626 return countPopulationSlowCase();
1627 }
1628
1629 unsigned countPopulation() const { return popcount(); }
1630
1631 /// @}
1632 /// \name Conversion Functions
1633 /// @{
1634 void print(raw_ostream &OS, bool isSigned) const;
1635
1636 /// Converts an APInt to a string and append it to Str. Str is commonly a
1637 /// SmallString.
1638 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1639 bool formatAsCLiteral = false) const;
1640
1641 /// Considers the APInt to be unsigned and converts it into a string in the
1642 /// radix given. The radix can be 2, 8, 10 16, or 36.
1643 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1644 toString(Str, Radix, false, false);
1645 }
1646
1647 /// Considers the APInt to be signed and converts it into a string in the
1648 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1649 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1650 toString(Str, Radix, true, false);
1651 }
1652
1653 /// \returns a byte-swapped representation of this APInt Value.
1654 APInt byteSwap() const;
1655
1656 /// \returns the value with the bit representation reversed of this APInt
1657 /// Value.
1658 APInt reverseBits() const;
1659
1660 /// Converts this APInt to a double value.
1661 double roundToDouble(bool isSigned) const;
1662
1663 /// Converts this unsigned APInt to a double value.
1664 double roundToDouble() const { return roundToDouble(false); }
1665
1666 /// Converts this signed APInt to a double value.
1667 double signedRoundToDouble() const { return roundToDouble(true); }
1668
1669 /// Converts APInt bits to a double
1670 ///
1671 /// The conversion does not do a translation from integer to double, it just
1672 /// re-interprets the bits as a double. Note that it is valid to do this on
1673 /// any bit width. Exactly 64 bits will be translated.
1674 double bitsToDouble() const { return llvm::bit_cast<double>(getWord(0)); }
1675
1676 /// Converts APInt bits to a float
1677 ///
1678 /// The conversion does not do a translation from integer to float, it just
1679 /// re-interprets the bits as a float. Note that it is valid to do this on
1680 /// any bit width. Exactly 32 bits will be translated.
1681 float bitsToFloat() const {
1682 return llvm::bit_cast<float>(static_cast<uint32_t>(getWord(0)));
1683 }
1684
1685 /// Converts a double to APInt bits.
1686 ///
1687 /// The conversion does not do a translation from double to integer, it just
1688 /// re-interprets the bits of the double.
1689 static APInt doubleToBits(double V) {
1690 return APInt(sizeof(double) * CHAR_BIT8, llvm::bit_cast<uint64_t>(V));
1691 }
1692
1693 /// Converts a float to APInt bits.
1694 ///
1695 /// The conversion does not do a translation from float to integer, it just
1696 /// re-interprets the bits of the float.
1697 static APInt floatToBits(float V) {
1698 return APInt(sizeof(float) * CHAR_BIT8, llvm::bit_cast<uint32_t>(V));
1699 }
1700
1701 /// @}
1702 /// \name Mathematics Operations
1703 /// @{
1704
1705 /// \returns the floor log base 2 of this APInt.
1706 unsigned logBase2() const { return getActiveBits() - 1; }
1707
1708 /// \returns the ceil log base 2 of this APInt.
1709 unsigned ceilLogBase2() const {
1710 APInt temp(*this);
1711 --temp;
1712 return temp.getActiveBits();
1713 }
1714
1715 /// \returns the nearest log base 2 of this APInt. Ties round up.
1716 ///
1717 /// NOTE: When we have a BitWidth of 1, we define:
1718 ///
1719 /// log2(0) = UINT32_MAX
1720 /// log2(1) = 0
1721 ///
1722 /// to get around any mathematical concerns resulting from
1723 /// referencing 2 in a space where 2 does no exist.
1724 unsigned nearestLogBase2() const;
1725
1726 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1727 /// otherwise
1728 int32_t exactLogBase2() const {
1729 if (!isPowerOf2())
1730 return -1;
1731 return logBase2();
1732 }
1733
1734 /// Compute the square root.
1735 APInt sqrt() const;
1736
1737 /// Get the absolute value. If *this is < 0 then return -(*this), otherwise
1738 /// *this. Note that the "most negative" signed number (e.g. -128 for 8 bit
1739 /// wide APInt) is unchanged due to how negation works.
1740 APInt abs() const {
1741 if (isNegative())
1742 return -(*this);
1743 return *this;
1744 }
1745
1746 /// \returns the multiplicative inverse for a given modulo.
1747 APInt multiplicativeInverse(const APInt &modulo) const;
1748
1749 /// @}
1750 /// \name Building-block Operations for APInt and APFloat
1751 /// @{
1752
1753 // These building block operations operate on a representation of arbitrary
1754 // precision, two's-complement, bignum integer values. They should be
1755 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1756 // generally a pointer to the base of an array of integer parts, representing
1757 // an unsigned bignum, and a count of how many parts there are.
1758
1759 /// Sets the least significant part of a bignum to the input value, and zeroes
1760 /// out higher parts.
1761 static void tcSet(WordType *, WordType, unsigned);
1762
1763 /// Assign one bignum to another.
1764 static void tcAssign(WordType *, const WordType *, unsigned);
1765
1766 /// Returns true if a bignum is zero, false otherwise.
1767 static bool tcIsZero(const WordType *, unsigned);
1768
1769 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1770 static int tcExtractBit(const WordType *, unsigned bit);
1771
1772 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1773 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1774 /// significant bit of DST. All high bits above srcBITS in DST are
1775 /// zero-filled.
1776 static void tcExtract(WordType *, unsigned dstCount, const WordType *,
1777 unsigned srcBits, unsigned srcLSB);
1778
1779 /// Set the given bit of a bignum. Zero-based.
1780 static void tcSetBit(WordType *, unsigned bit);
1781
1782 /// Clear the given bit of a bignum. Zero-based.
1783 static void tcClearBit(WordType *, unsigned bit);
1784
1785 /// Returns the bit number of the least or most significant set bit of a
1786 /// number. If the input number has no bits set -1U is returned.
1787 static unsigned tcLSB(const WordType *, unsigned n);
1788 static unsigned tcMSB(const WordType *parts, unsigned n);
1789
1790 /// Negate a bignum in-place.
1791 static void tcNegate(WordType *, unsigned);
1792
1793 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1794 static WordType tcAdd(WordType *, const WordType *, WordType carry, unsigned);
1795 /// DST += RHS. Returns the carry flag.
1796 static WordType tcAddPart(WordType *, WordType, unsigned);
1797
1798 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1799 static WordType tcSubtract(WordType *, const WordType *, WordType carry,
1800 unsigned);
1801 /// DST -= RHS. Returns the carry flag.
1802 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1803
1804 /// DST += SRC * MULTIPLIER + PART if add is true
1805 /// DST = SRC * MULTIPLIER + PART if add is false
1806 ///
1807 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1808 /// start at the same point, i.e. DST == SRC.
1809 ///
1810 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1811 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1812 /// result, and if all of the omitted higher parts were zero return zero,
1813 /// otherwise overflow occurred and return one.
1814 static int tcMultiplyPart(WordType *dst, const WordType *src,
1815 WordType multiplier, WordType carry,
1816 unsigned srcParts, unsigned dstParts, bool add);
1817
1818 /// DST = LHS * RHS, where DST has the same width as the operands and is
1819 /// filled with the least significant parts of the result. Returns one if
1820 /// overflow occurred, otherwise zero. DST must be disjoint from both
1821 /// operands.
1822 static int tcMultiply(WordType *, const WordType *, const WordType *,
1823 unsigned);
1824
1825 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1826 /// operands. No overflow occurs. DST must be disjoint from both operands.
1827 static void tcFullMultiply(WordType *, const WordType *, const WordType *,
1828 unsigned, unsigned);
1829
1830 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1831 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1832 /// REMAINDER to the remainder, return zero. i.e.
1833 ///
1834 /// OLD_LHS = RHS * LHS + REMAINDER
1835 ///
1836 /// SCRATCH is a bignum of the same size as the operands and result for use by
1837 /// the routine; its contents need not be initialized and are destroyed. LHS,
1838 /// REMAINDER and SCRATCH must be distinct.
1839 static int tcDivide(WordType *lhs, const WordType *rhs, WordType *remainder,
1840 WordType *scratch, unsigned parts);
1841
1842 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1843 /// restrictions on Count.
1844 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1845
1846 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1847 /// restrictions on Count.
1848 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1849
1850 /// Comparison (unsigned) of two bignums.
1851 static int tcCompare(const WordType *, const WordType *, unsigned);
1852
1853 /// Increment a bignum in-place. Return the carry flag.
1854 static WordType tcIncrement(WordType *dst, unsigned parts) {
1855 return tcAddPart(dst, 1, parts);
1856 }
1857
1858 /// Decrement a bignum in-place. Return the borrow flag.
1859 static WordType tcDecrement(WordType *dst, unsigned parts) {
1860 return tcSubtractPart(dst, 1, parts);
1861 }
1862
1863 /// Used to insert APInt objects, or objects that contain APInt objects, into
1864 /// FoldingSets.
1865 void Profile(FoldingSetNodeID &id) const;
1866
1867 /// debug method
1868 void dump() const;
1869
1870 /// Returns whether this instance allocated memory.
1871 bool needsCleanup() const { return !isSingleWord(); }
1872
1873private:
1874 /// This union is used to store the integer value. When the
1875 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
1876 union {
1877 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
1878 uint64_t *pVal; ///< Used to store the >64 bits integer value.
1879 } U;
1880
1881 unsigned BitWidth = 1; ///< The number of bits in this APInt.
1882
1883 friend struct DenseMapInfo<APInt, void>;
1884 friend class APSInt;
1885
1886 /// This constructor is used only internally for speed of construction of
1887 /// temporaries. It is unsafe since it takes ownership of the pointer, so it
1888 /// is not public.
1889 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) { U.pVal = val; }
1890
1891 /// Determine which word a bit is in.
1892 ///
1893 /// \returns the word position for the specified bit position.
1894 static unsigned whichWord(unsigned bitPosition) {
1895 return bitPosition / APINT_BITS_PER_WORD;
1896 }
1897
1898 /// Determine which bit in a word the specified bit position is in.
1899 static unsigned whichBit(unsigned bitPosition) {
1900 return bitPosition % APINT_BITS_PER_WORD;
1901 }
1902
1903 /// Get a single bit mask.
1904 ///
1905 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
1906 /// This method generates and returns a uint64_t (word) mask for a single
1907 /// bit at a specific bit position. This is used to mask the bit in the
1908 /// corresponding word.
1909 static uint64_t maskBit(unsigned bitPosition) {
1910 return 1ULL << whichBit(bitPosition);
1911 }
1912
1913 /// Clear unused high order bits
1914 ///
1915 /// This method is used internally to clear the top "N" bits in the high order
1916 /// word that are not used by the APInt. This is needed after the most
1917 /// significant word is assigned a value to ensure that those bits are
1918 /// zero'd out.
1919 APInt &clearUnusedBits() {
1920 // Compute how many bits are used in the final word.
1921 unsigned WordBits = ((BitWidth - 1) % APINT_BITS_PER_WORD) + 1;
1922
1923 // Mask out the high bits.
1924 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
1925 if (LLVM_UNLIKELY(BitWidth == 0)__builtin_expect((bool)(BitWidth == 0), false))
1926 mask = 0;
1927
1928 if (isSingleWord())
1929 U.VAL &= mask;
1930 else
1931 U.pVal[getNumWords() - 1] &= mask;
1932 return *this;
1933 }
1934
1935 /// Get the word corresponding to a bit position
1936 /// \returns the corresponding word for the specified bit position.
1937 uint64_t getWord(unsigned bitPosition) const {
1938 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
1939 }
1940
1941 /// Utility method to change the bit width of this APInt to new bit width,
1942 /// allocating and/or deallocating as necessary. There is no guarantee on the
1943 /// value of any bits upon return. Caller should populate the bits after.
1944 void reallocate(unsigned NewBitWidth);
1945
1946 /// Convert a char array into an APInt
1947 ///
1948 /// \param radix 2, 8, 10, 16, or 36
1949 /// Converts a string into a number. The string must be non-empty
1950 /// and well-formed as a number of the given base. The bit-width
1951 /// must be sufficient to hold the result.
1952 ///
1953 /// This is used by the constructors that take string arguments.
1954 ///
1955 /// StringRef::getAsInteger is superficially similar but (1) does
1956 /// not assume that the string is well-formed and (2) grows the
1957 /// result to hold the input.
1958 void fromString(unsigned numBits, StringRef str, uint8_t radix);
1959
1960 /// An internal division function for dividing APInts.
1961 ///
1962 /// This is used by the toString method to divide by the radix. It simply
1963 /// provides a more convenient form of divide for internal use since KnuthDiv
1964 /// has specific constraints on its inputs. If those constraints are not met
1965 /// then it provides a simpler form of divide.
1966 static void divide(const WordType *LHS, unsigned lhsWords,
1967 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
1968 WordType *Remainder);
1969
1970 /// out-of-line slow case for inline constructor
1971 void initSlowCase(uint64_t val, bool isSigned);
1972
1973 /// shared code between two array constructors
1974 void initFromArray(ArrayRef<uint64_t> array);
1975
1976 /// out-of-line slow case for inline copy constructor
1977 void initSlowCase(const APInt &that);
1978
1979 /// out-of-line slow case for shl
1980 void shlSlowCase(unsigned ShiftAmt);
1981
1982 /// out-of-line slow case for lshr.
1983 void lshrSlowCase(unsigned ShiftAmt);
1984
1985 /// out-of-line slow case for ashr.
1986 void ashrSlowCase(unsigned ShiftAmt);
1987
1988 /// out-of-line slow case for operator=
1989 void assignSlowCase(const APInt &RHS);
1990
1991 /// out-of-line slow case for operator==
1992 bool equalSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
1993
1994 /// out-of-line slow case for countLeadingZeros
1995 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
1996
1997 /// out-of-line slow case for countLeadingOnes.
1998 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
1999
2000 /// out-of-line slow case for countTrailingZeros.
2001 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
2002
2003 /// out-of-line slow case for countTrailingOnes
2004 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
2005
2006 /// out-of-line slow case for countPopulation
2007 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
2008
2009 /// out-of-line slow case for intersects.
2010 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
2011
2012 /// out-of-line slow case for isSubsetOf.
2013 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
2014
2015 /// out-of-line slow case for setBits.
2016 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
2017
2018 /// out-of-line slow case for flipAllBits.
2019 void flipAllBitsSlowCase();
2020
2021 /// out-of-line slow case for concat.
2022 APInt concatSlowCase(const APInt &NewLSB) const;
2023
2024 /// out-of-line slow case for operator&=.
2025 void andAssignSlowCase(const APInt &RHS);
2026
2027 /// out-of-line slow case for operator|=.
2028 void orAssignSlowCase(const APInt &RHS);
2029
2030 /// out-of-line slow case for operator^=.
2031 void xorAssignSlowCase(const APInt &RHS);
2032
2033 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
2034 /// to, or greater than RHS.
2035 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
2036
2037 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
2038 /// to, or greater than RHS.
2039 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
2040
2041 /// @}
2042};
2043
2044inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
2045
2046inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
2047
2048/// Unary bitwise complement operator.
2049///
2050/// \returns an APInt that is the bitwise complement of \p v.
2051inline APInt operator~(APInt v) {
2052 v.flipAllBits();
2053 return v;
2054}
2055
2056inline APInt operator&(APInt a, const APInt &b) {
2057 a &= b;
2058 return a;
2059}
2060
2061inline APInt operator&(const APInt &a, APInt &&b) {
2062 b &= a;
2063 return std::move(b);
2064}
2065
2066inline APInt operator&(APInt a, uint64_t RHS) {
2067 a &= RHS;
2068 return a;
2069}
2070
2071inline APInt operator&(uint64_t LHS, APInt b) {
2072 b &= LHS;
2073 return b;
2074}
2075
2076inline APInt operator|(APInt a, const APInt &b) {
2077 a |= b;
2078 return a;
2079}
2080
2081inline APInt operator|(const APInt &a, APInt &&b) {
2082 b |= a;
2083 return std::move(b);
2084}
2085
2086inline APInt operator|(APInt a, uint64_t RHS) {
2087 a |= RHS;
2088 return a;
2089}
2090
2091inline APInt operator|(uint64_t LHS, APInt b) {
2092 b |= LHS;
2093 return b;
2094}
2095
2096inline APInt operator^(APInt a, const APInt &b) {
2097 a ^= b;
2098 return a;
2099}
2100
2101inline APInt operator^(const APInt &a, APInt &&b) {
2102 b ^= a;
2103 return std::move(b);
2104}
2105
2106inline APInt operator^(APInt a, uint64_t RHS) {
2107 a ^= RHS;
2108 return a;
2109}
2110
2111inline APInt operator^(uint64_t LHS, APInt b) {
2112 b ^= LHS;
2113 return b;
2114}
2115
2116inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2117 I.print(OS, true);
2118 return OS;
2119}
2120
2121inline APInt operator-(APInt v) {
2122 v.negate();
2123 return v;
2124}
2125
2126inline APInt operator+(APInt a, const APInt &b) {
2127 a += b;
2128 return a;
2129}
2130
2131inline APInt operator+(const APInt &a, APInt &&b) {
2132 b += a;
2133 return std::move(b);
2134}
2135
2136inline APInt operator+(APInt a, uint64_t RHS) {
2137 a += RHS;
2138 return a;
2139}
2140
2141inline APInt operator+(uint64_t LHS, APInt b) {
2142 b += LHS;
2143 return b;
2144}
2145
2146inline APInt operator-(APInt a, const APInt &b) {
2147 a -= b;
2148 return a;
2149}
2150
2151inline APInt operator-(const APInt &a, APInt &&b) {
2152 b.negate();
2153 b += a;
2154 return std::move(b);
2155}
2156
2157inline APInt operator-(APInt a, uint64_t RHS) {
2158 a -= RHS;
2159 return a;
2160}
2161
2162inline APInt operator-(uint64_t LHS, APInt b) {
2163 b.negate();
2164 b += LHS;
2165 return b;
2166}
2167
2168inline APInt operator*(APInt a, uint64_t RHS) {
2169 a *= RHS;
2170 return a;
2171}
2172
2173inline APInt operator*(uint64_t LHS, APInt b) {
2174 b *= LHS;
2175 return b;
2176}
2177
2178namespace APIntOps {
2179
2180/// Determine the smaller of two APInts considered to be signed.
2181inline const APInt &smin(const APInt &A, const APInt &B) {
2182 return A.slt(B) ? A : B;
2183}
2184
2185/// Determine the larger of two APInts considered to be signed.
2186inline const APInt &smax(const APInt &A, const APInt &B) {
2187 return A.sgt(B) ? A : B;
2188}
2189
2190/// Determine the smaller of two APInts considered to be unsigned.
2191inline const APInt &umin(const APInt &A, const APInt &B) {
2192 return A.ult(B) ? A : B;
2193}
2194
2195/// Determine the larger of two APInts considered to be unsigned.
2196inline const APInt &umax(const APInt &A, const APInt &B) {
2197 return A.ugt(B) ? A : B;
2198}
2199
2200/// Compute GCD of two unsigned APInt values.
2201///
2202/// This function returns the greatest common divisor of the two APInt values
2203/// using Stein's algorithm.
2204///
2205/// \returns the greatest common divisor of A and B.
2206APInt GreatestCommonDivisor(APInt A, APInt B);
2207
2208/// Converts the given APInt to a double value.
2209///
2210/// Treats the APInt as an unsigned value for conversion purposes.
2211inline double RoundAPIntToDouble(const APInt &APIVal) {
2212 return APIVal.roundToDouble();
2213}
2214
2215/// Converts the given APInt to a double value.
2216///
2217/// Treats the APInt as a signed value for conversion purposes.
2218inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2219 return APIVal.signedRoundToDouble();
2220}
2221
2222/// Converts the given APInt to a float value.
2223inline float RoundAPIntToFloat(const APInt &APIVal) {
2224 return float(RoundAPIntToDouble(APIVal));
2225}
2226
2227/// Converts the given APInt to a float value.
2228///
2229/// Treats the APInt as a signed value for conversion purposes.
2230inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2231 return float(APIVal.signedRoundToDouble());
2232}
2233
2234/// Converts the given double value into a APInt.
2235///
2236/// This function convert a double value to an APInt value.
2237APInt RoundDoubleToAPInt(double Double, unsigned width);
2238
2239/// Converts a float value into a APInt.
2240///
2241/// Converts a float value into an APInt value.
2242inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2243 return RoundDoubleToAPInt(double(Float), width);
2244}
2245
2246/// Return A unsign-divided by B, rounded by the given rounding mode.
2247APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2248
2249/// Return A sign-divided by B, rounded by the given rounding mode.
2250APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2251
2252/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2253/// (e.g. 32 for i32).
2254/// This function finds the smallest number n, such that
2255/// (a) n >= 0 and q(n) = 0, or
2256/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2257/// integers, belong to two different intervals [Rk, Rk+R),
2258/// where R = 2^BW, and k is an integer.
2259/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2260/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2261/// subtraction (treated as addition of negated numbers) would always
2262/// count as an overflow, but here we want to allow values to decrease
2263/// and increase as long as they are within the same interval.
2264/// Specifically, adding of two negative numbers should not cause an
2265/// overflow (as long as the magnitude does not exceed the bit width).
2266/// On the other hand, given a positive number, adding a negative
2267/// number to it can give a negative result, which would cause the
2268/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2269/// treated as a special case of an overflow.
2270///
2271/// This function returns std::nullopt if after finding k that minimizes the
2272/// positive solution to q(n) = kR, both solutions are contained between
2273/// two consecutive integers.
2274///
2275/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2276/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2277/// virtue of *signed* overflow. This function will *not* find such an n,
2278/// however it may find a value of n satisfying the inequalities due to
2279/// an *unsigned* overflow (if the values are treated as unsigned).
2280/// To find a solution for a signed overflow, treat it as a problem of
2281/// finding an unsigned overflow with a range with of BW-1.
2282///
2283/// The returned value may have a different bit width from the input
2284/// coefficients.
2285std::optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2286 unsigned RangeWidth);
2287
2288/// Compare two values, and if they are different, return the position of the
2289/// most significant bit that is different in the values.
2290std::optional<unsigned> GetMostSignificantDifferentBit(const APInt &A,
2291 const APInt &B);
2292
2293/// Splat/Merge neighboring bits to widen/narrow the bitmask represented
2294/// by \param A to \param NewBitWidth bits.
2295///
2296/// MatchAnyBits: (Default)
2297/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
2298/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0111
2299///
2300/// MatchAllBits:
2301/// e.g. ScaleBitMask(0b0101, 8) -> 0b00110011
2302/// e.g. ScaleBitMask(0b00011011, 4) -> 0b0001
2303/// A.getBitwidth() or NewBitWidth must be a whole multiples of the other.
2304APInt ScaleBitMask(const APInt &A, unsigned NewBitWidth,
2305 bool MatchAllBits = false);
2306} // namespace APIntOps
2307
2308// See friend declaration above. This additional declaration is required in
2309// order to compile LLVM with IBM xlC compiler.
2310hash_code hash_value(const APInt &Arg);
2311
2312/// StoreIntToMemory - Fills the StoreBytes bytes of memory starting from Dst
2313/// with the integer held in IntVal.
2314void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
2315
2316/// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
2317/// from Src into IntVal, which is assumed to be wide enough and to hold zero.
2318void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
2319
2320/// Provide DenseMapInfo for APInt.
2321template <> struct DenseMapInfo<APInt, void> {
2322 static inline APInt getEmptyKey() {
2323 APInt V(nullptr, 0);
2324 V.U.VAL = ~0ULL;
2325 return V;
2326 }
2327
2328 static inline APInt getTombstoneKey() {
2329 APInt V(nullptr, 0);
2330 V.U.VAL = ~1ULL;
2331 return V;
2332 }
2333
2334 static unsigned getHashValue(const APInt &Key);
2335
2336 static bool isEqual(const APInt &LHS, const APInt &RHS) {
2337 return LHS.getBitWidth() == RHS.getBitWidth() && LHS == RHS;
2338 }
2339};
2340
2341} // namespace llvm
2342
2343#endif