Bug Summary

File:lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1544, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-config-compatibility-mode=true -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn348900/build-llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-8~svn348900/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn348900/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-command-line-argument -Wno-unknown-warning-option -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn348900/build-llvm/lib/Target/Hexagon -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-12-12-042652-12204-1 -x c++ /build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
11
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Triple.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/InstructionSimplify.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/MemoryLocation.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpander.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Transforms/Utils/Local.h"
30#include "llvm/Analysis/ValueTracking.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/Module.h"
47#include "llvm/IR/PatternMatch.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include <algorithm>
62#include <array>
63#include <cassert>
64#include <cstdint>
65#include <cstdlib>
66#include <deque>
67#include <functional>
68#include <iterator>
69#include <map>
70#include <set>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
77 cl::Hidden, cl::init(false),
78 cl::desc("Disable generation of memcpy in loop idiom recognition"));
79
80static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memmove in loop idiom recognition"));
83
84static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
85 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
86 "check guarding the memmove."));
87
88static cl::opt<unsigned> CompileTimeMemSizeThreshold(
89 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
90 cl::desc("Threshold (in bytes) to perform the transformation, if the "
91 "runtime loop count (mem transfer size) is known at compile-time."));
92
93static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
94 cl::Hidden, cl::init(true),
95 cl::desc("Only enable generating memmove in non-nested loops"));
96
97cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
98 cl::Hidden, cl::init(false),
99 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
100
101static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
102 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
103
104static const char *HexagonVolatileMemcpyName
105 = "hexagon_memcpy_forward_vp4cp4n2";
106
107
108namespace llvm {
109
110 void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
111 Pass *createHexagonLoopIdiomPass();
112
113} // end namespace llvm
114
115namespace {
116
117 class HexagonLoopIdiomRecognize : public LoopPass {
118 public:
119 static char ID;
120
121 explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
122 initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
123 }
124
125 StringRef getPassName() const override {
126 return "Recognize Hexagon-specific loop idioms";
127 }
128
129 void getAnalysisUsage(AnalysisUsage &AU) const override {
130 AU.addRequired<LoopInfoWrapperPass>();
131 AU.addRequiredID(LoopSimplifyID);
132 AU.addRequiredID(LCSSAID);
133 AU.addRequired<AAResultsWrapperPass>();
134 AU.addPreserved<AAResultsWrapperPass>();
135 AU.addRequired<ScalarEvolutionWrapperPass>();
136 AU.addRequired<DominatorTreeWrapperPass>();
137 AU.addRequired<TargetLibraryInfoWrapperPass>();
138 AU.addPreserved<TargetLibraryInfoWrapperPass>();
139 }
140
141 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
142
143 private:
144 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
145 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
146 void collectStores(Loop *CurLoop, BasicBlock *BB,
147 SmallVectorImpl<StoreInst*> &Stores);
148 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
149 bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
150 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
151 SmallVectorImpl<BasicBlock*> &ExitBlocks);
152 bool runOnCountableLoop(Loop *L);
153
154 AliasAnalysis *AA;
155 const DataLayout *DL;
156 DominatorTree *DT;
157 LoopInfo *LF;
158 const TargetLibraryInfo *TLI;
159 ScalarEvolution *SE;
160 bool HasMemcpy, HasMemmove;
161 };
162
163 struct Simplifier {
164 struct Rule {
165 using FuncType = std::function<Value* (Instruction*, LLVMContext&)>;
166 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
167 StringRef Name; // For debugging.
168 FuncType Fn;
169 };
170
171 void addRule(StringRef N, const Rule::FuncType &F) {
172 Rules.push_back(Rule(N, F));
173 }
174
175 private:
176 struct WorkListType {
177 WorkListType() = default;
178
179 void push_back(Value* V) {
180 // Do not push back duplicates.
181 if (!S.count(V)) { Q.push_back(V); S.insert(V); }
182 }
183
184 Value *pop_front_val() {
185 Value *V = Q.front(); Q.pop_front(); S.erase(V);
186 return V;
187 }
188
189 bool empty() const { return Q.empty(); }
190
191 private:
192 std::deque<Value*> Q;
193 std::set<Value*> S;
194 };
195
196 using ValueSetType = std::set<Value *>;
197
198 std::vector<Rule> Rules;
199
200 public:
201 struct Context {
202 using ValueMapType = DenseMap<Value *, Value *>;
203
204 Value *Root;
205 ValueSetType Used; // The set of all cloned values used by Root.
206 ValueSetType Clones; // The set of all cloned values.
207 LLVMContext &Ctx;
208
209 Context(Instruction *Exp)
210 : Ctx(Exp->getParent()->getParent()->getContext()) {
211 initialize(Exp);
212 }
213
214 ~Context() { cleanup(); }
215
216 void print(raw_ostream &OS, const Value *V) const;
217 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
218
219 private:
220 friend struct Simplifier;
221
222 void initialize(Instruction *Exp);
223 void cleanup();
224
225 template <typename FuncT> void traverse(Value *V, FuncT F);
226 void record(Value *V);
227 void use(Value *V);
228 void unuse(Value *V);
229
230 bool equal(const Instruction *I, const Instruction *J) const;
231 Value *find(Value *Tree, Value *Sub) const;
232 Value *subst(Value *Tree, Value *OldV, Value *NewV);
233 void replace(Value *OldV, Value *NewV);
234 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
235 };
236
237 Value *simplify(Context &C);
238 };
239
240 struct PE {
241 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
242
243 const Simplifier::Context &C;
244 const Value *V;
245 };
246
247 LLVM_ATTRIBUTE_USED__attribute__((__used__))
248 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
249 P.C.print(OS, P.V ? P.V : P.C.Root);
250 return OS;
251 }
252
253} // end anonymous namespace
254
255char HexagonLoopIdiomRecognize::ID = 0;
256
257INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
258 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
259INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
260INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
261INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
262INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
263INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
264INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
265INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
266INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
267 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
268
269template <typename FuncT>
270void Simplifier::Context::traverse(Value *V, FuncT F) {
271 WorkListType Q;
272 Q.push_back(V);
273
274 while (!Q.empty()) {
275 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
276 if (!U || U->getParent())
277 continue;
278 if (!F(U))
279 continue;
280 for (Value *Op : U->operands())
281 Q.push_back(Op);
282 }
283}
284
285void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
286 const auto *U = dyn_cast<const Instruction>(V);
287 if (!U) {
288 OS << V << '(' << *V << ')';
289 return;
290 }
291
292 if (U->getParent()) {
293 OS << U << '(';
294 U->printAsOperand(OS, true);
295 OS << ')';
296 return;
297 }
298
299 unsigned N = U->getNumOperands();
300 if (N != 0)
301 OS << U << '(';
302 OS << U->getOpcodeName();
303 for (const Value *Op : U->operands()) {
304 OS << ' ';
305 print(OS, Op);
306 }
307 if (N != 0)
308 OS << ')';
309}
310
311void Simplifier::Context::initialize(Instruction *Exp) {
312 // Perform a deep clone of the expression, set Root to the root
313 // of the clone, and build a map from the cloned values to the
314 // original ones.
315 ValueMapType M;
316 BasicBlock *Block = Exp->getParent();
317 WorkListType Q;
318 Q.push_back(Exp);
319
320 while (!Q.empty()) {
321 Value *V = Q.pop_front_val();
322 if (M.find(V) != M.end())
323 continue;
324 if (Instruction *U = dyn_cast<Instruction>(V)) {
325 if (isa<PHINode>(U) || U->getParent() != Block)
326 continue;
327 for (Value *Op : U->operands())
328 Q.push_back(Op);
329 M.insert({U, U->clone()});
330 }
331 }
332
333 for (std::pair<Value*,Value*> P : M) {
334 Instruction *U = cast<Instruction>(P.second);
335 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
336 auto F = M.find(U->getOperand(i));
337 if (F != M.end())
338 U->setOperand(i, F->second);
339 }
340 }
341
342 auto R = M.find(Exp);
343 assert(R != M.end())((R != M.end()) ? static_cast<void> (0) : __assert_fail
("R != M.end()", "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 343, __PRETTY_FUNCTION__))
;
344 Root = R->second;
345
346 record(Root);
347 use(Root);
348}
349
350void Simplifier::Context::record(Value *V) {
351 auto Record = [this](Instruction *U) -> bool {
352 Clones.insert(U);
353 return true;
354 };
355 traverse(V, Record);
356}
357
358void Simplifier::Context::use(Value *V) {
359 auto Use = [this](Instruction *U) -> bool {
360 Used.insert(U);
361 return true;
362 };
363 traverse(V, Use);
364}
365
366void Simplifier::Context::unuse(Value *V) {
367 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
368 return;
369
370 auto Unuse = [this](Instruction *U) -> bool {
371 if (!U->use_empty())
372 return false;
373 Used.erase(U);
374 return true;
375 };
376 traverse(V, Unuse);
377}
378
379Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
380 if (Tree == OldV)
381 return NewV;
382 if (OldV == NewV)
383 return Tree;
384
385 WorkListType Q;
386 Q.push_back(Tree);
387 while (!Q.empty()) {
388 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
389 // If U is not an instruction, or it's not a clone, skip it.
390 if (!U || U->getParent())
391 continue;
392 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
393 Value *Op = U->getOperand(i);
394 if (Op == OldV) {
395 U->setOperand(i, NewV);
396 unuse(OldV);
397 } else {
398 Q.push_back(Op);
399 }
400 }
401 }
402 return Tree;
403}
404
405void Simplifier::Context::replace(Value *OldV, Value *NewV) {
406 if (Root == OldV) {
407 Root = NewV;
408 use(Root);
409 return;
410 }
411
412 // NewV may be a complex tree that has just been created by one of the
413 // transformation rules. We need to make sure that it is commoned with
414 // the existing Root to the maximum extent possible.
415 // Identify all subtrees of NewV (including NewV itself) that have
416 // equivalent counterparts in Root, and replace those subtrees with
417 // these counterparts.
418 WorkListType Q;
419 Q.push_back(NewV);
420 while (!Q.empty()) {
421 Value *V = Q.pop_front_val();
422 Instruction *U = dyn_cast<Instruction>(V);
423 if (!U || U->getParent())
424 continue;
425 if (Value *DupV = find(Root, V)) {
426 if (DupV != V)
427 NewV = subst(NewV, V, DupV);
428 } else {
429 for (Value *Op : U->operands())
430 Q.push_back(Op);
431 }
432 }
433
434 // Now, simply replace OldV with NewV in Root.
435 Root = subst(Root, OldV, NewV);
436 use(Root);
437}
438
439void Simplifier::Context::cleanup() {
440 for (Value *V : Clones) {
441 Instruction *U = cast<Instruction>(V);
442 if (!U->getParent())
443 U->dropAllReferences();
444 }
445
446 for (Value *V : Clones) {
447 Instruction *U = cast<Instruction>(V);
448 if (!U->getParent())
449 U->deleteValue();
450 }
451}
452
453bool Simplifier::Context::equal(const Instruction *I,
454 const Instruction *J) const {
455 if (I == J)
456 return true;
457 if (!I->isSameOperationAs(J))
458 return false;
459 if (isa<PHINode>(I))
460 return I->isIdenticalTo(J);
461
462 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
463 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
464 if (OpI == OpJ)
465 continue;
466 auto *InI = dyn_cast<const Instruction>(OpI);
467 auto *InJ = dyn_cast<const Instruction>(OpJ);
468 if (InI && InJ) {
469 if (!equal(InI, InJ))
470 return false;
471 } else if (InI != InJ || !InI)
472 return false;
473 }
474 return true;
475}
476
477Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
478 Instruction *SubI = dyn_cast<Instruction>(Sub);
479 WorkListType Q;
480 Q.push_back(Tree);
481
482 while (!Q.empty()) {
483 Value *V = Q.pop_front_val();
484 if (V == Sub)
485 return V;
486 Instruction *U = dyn_cast<Instruction>(V);
487 if (!U || U->getParent())
488 continue;
489 if (SubI && equal(SubI, U))
490 return U;
491 assert(!isa<PHINode>(U))((!isa<PHINode>(U)) ? static_cast<void> (0) : __assert_fail
("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 491, __PRETTY_FUNCTION__))
;
492 for (Value *Op : U->operands())
493 Q.push_back(Op);
494 }
495 return nullptr;
496}
497
498void Simplifier::Context::link(Instruction *I, BasicBlock *B,
499 BasicBlock::iterator At) {
500 if (I->getParent())
501 return;
502
503 for (Value *Op : I->operands()) {
504 if (Instruction *OpI = dyn_cast<Instruction>(Op))
505 link(OpI, B, At);
506 }
507
508 B->getInstList().insert(At, I);
509}
510
511Value *Simplifier::Context::materialize(BasicBlock *B,
512 BasicBlock::iterator At) {
513 if (Instruction *RootI = dyn_cast<Instruction>(Root))
514 link(RootI, B, At);
515 return Root;
516}
517
518Value *Simplifier::simplify(Context &C) {
519 WorkListType Q;
520 Q.push_back(C.Root);
521 unsigned Count = 0;
522 const unsigned Limit = SimplifyLimit;
523
524 while (!Q.empty()) {
525 if (Count++ >= Limit)
526 break;
527 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
528 if (!U || U->getParent() || !C.Used.count(U))
529 continue;
530 bool Changed = false;
531 for (Rule &R : Rules) {
532 Value *W = R.Fn(U, C.Ctx);
533 if (!W)
534 continue;
535 Changed = true;
536 C.record(W);
537 C.replace(U, W);
538 Q.push_back(C.Root);
539 break;
540 }
541 if (!Changed) {
542 for (Value *Op : U->operands())
543 Q.push_back(Op);
544 }
545 }
546 return Count < Limit ? C.Root : nullptr;
547}
548
549//===----------------------------------------------------------------------===//
550//
551// Implementation of PolynomialMultiplyRecognize
552//
553//===----------------------------------------------------------------------===//
554
555namespace {
556
557 class PolynomialMultiplyRecognize {
558 public:
559 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
560 const DominatorTree &dt, const TargetLibraryInfo &tli,
561 ScalarEvolution &se)
562 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
563
564 bool recognize();
565
566 private:
567 using ValueSeq = SetVector<Value *>;
568
569 IntegerType *getPmpyType() const {
570 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
571 return IntegerType::get(Ctx, 32);
572 }
573
574 bool isPromotableTo(Value *V, IntegerType *Ty);
575 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
576 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
577
578 Value *getCountIV(BasicBlock *BB);
579 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
580 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
581 ValueSeq &Late);
582 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
583 bool commutesWithShift(Instruction *I);
584 bool highBitsAreZero(Value *V, unsigned IterCount);
585 bool keepsHighBitsZero(Value *V, unsigned IterCount);
586 bool isOperandShifted(Instruction *I, Value *Op);
587 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
588 unsigned IterCount);
589 void cleanupLoopBody(BasicBlock *LoopB);
590
591 struct ParsedValues {
592 ParsedValues() = default;
593
594 Value *M = nullptr;
595 Value *P = nullptr;
596 Value *Q = nullptr;
597 Value *R = nullptr;
598 Value *X = nullptr;
599 Instruction *Res = nullptr;
600 unsigned IterCount = 0;
601 bool Left = false;
602 bool Inv = false;
603 };
604
605 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
606 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
607 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
608 Value *CIV, ParsedValues &PV, bool PreScan);
609 unsigned getInverseMxN(unsigned QP);
610 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
611
612 void setupPreSimplifier(Simplifier &S);
613 void setupPostSimplifier(Simplifier &S);
614
615 Loop *CurLoop;
616 const DataLayout &DL;
617 const DominatorTree &DT;
618 const TargetLibraryInfo &TLI;
619 ScalarEvolution &SE;
620 };
621
622} // end anonymous namespace
623
624Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
625 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
626 if (std::distance(PI, PE) != 2)
627 return nullptr;
628 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
629
630 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
631 auto *PN = cast<PHINode>(I);
632 Value *InitV = PN->getIncomingValueForBlock(PB);
633 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
634 continue;
635 Value *IterV = PN->getIncomingValueForBlock(BB);
636 if (!isa<BinaryOperator>(IterV))
637 continue;
638 auto *BO = dyn_cast<BinaryOperator>(IterV);
639 if (BO->getOpcode() != Instruction::Add)
640 continue;
641 Value *IncV = nullptr;
642 if (BO->getOperand(0) == PN)
643 IncV = BO->getOperand(1);
644 else if (BO->getOperand(1) == PN)
645 IncV = BO->getOperand(0);
646 if (IncV == nullptr)
647 continue;
648
649 if (auto *T = dyn_cast<ConstantInt>(IncV))
650 if (T->getZExtValue() == 1)
651 return PN;
652 }
653 return nullptr;
654}
655
656static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
657 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
658 Use &TheUse = UI.getUse();
659 ++UI;
660 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
661 if (BB == II->getParent())
662 II->replaceUsesOfWith(I, J);
663 }
664}
665
666bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
667 Value *CIV, ParsedValues &PV) {
668 // Match the following:
669 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
670 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
671 // The condition may also check for equality with the masked value, i.e
672 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
673 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
674
675 Value *CondV = SelI->getCondition();
676 Value *TrueV = SelI->getTrueValue();
677 Value *FalseV = SelI->getFalseValue();
678
679 using namespace PatternMatch;
680
681 CmpInst::Predicate P;
682 Value *A = nullptr, *B = nullptr, *C = nullptr;
683
684 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
685 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
686 return false;
687 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
688 return false;
689 // Matched: select (A & B) == C ? ... : ...
690 // select (A & B) != C ? ... : ...
691
692 Value *X = nullptr, *Sh1 = nullptr;
693 // Check (A & B) for (X & (1 << i)):
694 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
695 Sh1 = A;
696 X = B;
697 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
698 Sh1 = B;
699 X = A;
700 } else {
701 // TODO: Could also check for an induction variable containing single
702 // bit shifted left by 1 in each iteration.
703 return false;
704 }
705
706 bool TrueIfZero;
707
708 // Check C against the possible values for comparison: 0 and (1 << i):
709 if (match(C, m_Zero()))
710 TrueIfZero = (P == CmpInst::ICMP_EQ);
711 else if (C == Sh1)
712 TrueIfZero = (P == CmpInst::ICMP_NE);
713 else
714 return false;
715
716 // So far, matched:
717 // select (X & (1 << i)) ? ... : ...
718 // including variations of the check against zero/non-zero value.
719
720 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
721 if (TrueIfZero) {
722 ShouldSameV = TrueV;
723 ShouldXoredV = FalseV;
724 } else {
725 ShouldSameV = FalseV;
726 ShouldXoredV = TrueV;
727 }
728
729 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
730 Value *T = nullptr;
731 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
732 // Matched: select +++ ? ... : Y ^ Z
733 // select +++ ? Y ^ Z : ...
734 // where +++ denotes previously checked matches.
735 if (ShouldSameV == Y)
736 T = Z;
737 else if (ShouldSameV == Z)
738 T = Y;
739 else
740 return false;
741 R = ShouldSameV;
742 // Matched: select +++ ? R : R ^ T
743 // select +++ ? R ^ T : R
744 // depending on TrueIfZero.
745
746 } else if (match(ShouldSameV, m_Zero())) {
747 // Matched: select +++ ? 0 : ...
748 // select +++ ? ... : 0
749 if (!SelI->hasOneUse())
750 return false;
751 T = ShouldXoredV;
752 // Matched: select +++ ? 0 : T
753 // select +++ ? T : 0
754
755 Value *U = *SelI->user_begin();
756 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
757 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
758 return false;
759 // Matched: xor (select +++ ? 0 : T), R
760 // xor (select +++ ? T : 0), R
761 } else
762 return false;
763
764 // The xor input value T is isolated into its own match so that it could
765 // be checked against an induction variable containing a shifted bit
766 // (todo).
767 // For now, check against (Q << i).
768 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
769 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
770 return false;
771 // Matched: select +++ ? R : R ^ (Q << i)
772 // select +++ ? R ^ (Q << i) : R
773
774 PV.X = X;
775 PV.Q = Q;
776 PV.R = R;
777 PV.Left = true;
778 return true;
779}
780
781bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
782 ParsedValues &PV) {
783 // Match the following:
784 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
785 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
786 // The condition may also check for equality with the masked value, i.e
787 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
788 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
789
790 Value *CondV = SelI->getCondition();
791 Value *TrueV = SelI->getTrueValue();
792 Value *FalseV = SelI->getFalseValue();
793
794 using namespace PatternMatch;
795
796 Value *C = nullptr;
797 CmpInst::Predicate P;
798 bool TrueIfZero;
799
800 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
801 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
802 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
803 return false;
804 // Matched: select C == 0 ? ... : ...
805 // select C != 0 ? ... : ...
806 TrueIfZero = (P == CmpInst::ICMP_EQ);
807 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
808 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
809 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
810 return false;
811 // Matched: select C == 1 ? ... : ...
812 // select C != 1 ? ... : ...
813 TrueIfZero = (P == CmpInst::ICMP_NE);
814 } else
815 return false;
816
817 Value *X = nullptr;
818 if (!match(C, m_And(m_Value(X), m_One())) &&
819 !match(C, m_And(m_One(), m_Value(X))))
820 return false;
821 // Matched: select (X & 1) == +++ ? ... : ...
822 // select (X & 1) != +++ ? ... : ...
823
824 Value *R = nullptr, *Q = nullptr;
825 if (TrueIfZero) {
826 // The select's condition is true if the tested bit is 0.
827 // TrueV must be the shift, FalseV must be the xor.
828 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
829 return false;
830 // Matched: select +++ ? (R >> 1) : ...
831 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
832 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
833 return false;
834 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
835 // with commuting ^.
836 } else {
837 // The select's condition is true if the tested bit is 1.
838 // TrueV must be the xor, FalseV must be the shift.
839 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
840 return false;
841 // Matched: select +++ ? ... : (R >> 1)
842 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
843 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
844 return false;
845 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
846 // with commuting ^.
847 }
848
849 PV.X = X;
850 PV.Q = Q;
851 PV.R = R;
852 PV.Left = false;
853 return true;
854}
855
856bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
857 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
858 bool PreScan) {
859 using namespace PatternMatch;
860
861 // The basic pattern for R = P.Q is:
862 // for i = 0..31
863 // R = phi (0, R')
864 // if (P & (1 << i)) ; test-bit(P, i)
865 // R' = R ^ (Q << i)
866 //
867 // Similarly, the basic pattern for R = (P/Q).Q - P
868 // for i = 0..31
869 // R = phi(P, R')
870 // if (R & (1 << i))
871 // R' = R ^ (Q << i)
872
873 // There exist idioms, where instead of Q being shifted left, P is shifted
874 // right. This produces a result that is shifted right by 32 bits (the
875 // non-shifted result is 64-bit).
876 //
877 // For R = P.Q, this would be:
878 // for i = 0..31
879 // R = phi (0, R')
880 // if ((P >> i) & 1)
881 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
882 // else ; be shifted by 1, not i.
883 // R' = R >> 1
884 //
885 // And for the inverse:
886 // for i = 0..31
887 // R = phi (P, R')
888 // if (R & 1)
889 // R' = (R >> 1) ^ Q
890 // else
891 // R' = R >> 1
892
893 // The left-shifting idioms share the same pattern:
894 // select (X & (1 << i)) ? R ^ (Q << i) : R
895 // Similarly for right-shifting idioms:
896 // select (X & 1) ? (R >> 1) ^ Q
897
898 if (matchLeftShift(SelI, CIV, PV)) {
899 // If this is a pre-scan, getting this far is sufficient.
900 if (PreScan)
901 return true;
902
903 // Need to make sure that the SelI goes back into R.
904 auto *RPhi = dyn_cast<PHINode>(PV.R);
905 if (!RPhi)
906 return false;
907 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
908 return false;
909 PV.Res = SelI;
910
911 // If X is loop invariant, it must be the input polynomial, and the
912 // idiom is the basic polynomial multiply.
913 if (CurLoop->isLoopInvariant(PV.X)) {
914 PV.P = PV.X;
915 PV.Inv = false;
916 } else {
917 // X is not loop invariant. If X == R, this is the inverse pmpy.
918 // Otherwise, check for an xor with an invariant value. If the
919 // variable argument to the xor is R, then this is still a valid
920 // inverse pmpy.
921 PV.Inv = true;
922 if (PV.X != PV.R) {
923 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
924 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
925 return false;
926 auto *I1 = dyn_cast<Instruction>(X1);
927 auto *I2 = dyn_cast<Instruction>(X2);
928 if (!I1 || I1->getParent() != LoopB) {
929 Var = X2;
930 Inv = X1;
931 } else if (!I2 || I2->getParent() != LoopB) {
932 Var = X1;
933 Inv = X2;
934 } else
935 return false;
936 if (Var != PV.R)
937 return false;
938 PV.M = Inv;
939 }
940 // The input polynomial P still needs to be determined. It will be
941 // the entry value of R.
942 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
943 PV.P = EntryP;
944 }
945
946 return true;
947 }
948
949 if (matchRightShift(SelI, PV)) {
950 // If this is an inverse pattern, the Q polynomial must be known at
951 // compile time.
952 if (PV.Inv && !isa<ConstantInt>(PV.Q))
953 return false;
954 if (PreScan)
955 return true;
956 // There is no exact matching of right-shift pmpy.
957 return false;
958 }
959
960 return false;
961}
962
963bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
964 IntegerType *DestTy) {
965 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
966 if (!T || T->getBitWidth() > DestTy->getBitWidth())
967 return false;
968 if (T->getBitWidth() == DestTy->getBitWidth())
969 return true;
970 // Non-instructions are promotable. The reason why an instruction may not
971 // be promotable is that it may produce a different result if its operands
972 // and the result are promoted, for example, it may produce more non-zero
973 // bits. While it would still be possible to represent the proper result
974 // in a wider type, it may require adding additional instructions (which
975 // we don't want to do).
976 Instruction *In = dyn_cast<Instruction>(Val);
977 if (!In)
978 return true;
979 // The bitwidth of the source type is smaller than the destination.
980 // Check if the individual operation can be promoted.
981 switch (In->getOpcode()) {
982 case Instruction::PHI:
983 case Instruction::ZExt:
984 case Instruction::And:
985 case Instruction::Or:
986 case Instruction::Xor:
987 case Instruction::LShr: // Shift right is ok.
988 case Instruction::Select:
989 case Instruction::Trunc:
990 return true;
991 case Instruction::ICmp:
992 if (CmpInst *CI = cast<CmpInst>(In))
993 return CI->isEquality() || CI->isUnsigned();
994 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 994)
;
995 case Instruction::Add:
996 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
997 }
998 return false;
999}
1000
1001void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1002 IntegerType *DestTy, BasicBlock *LoopB) {
1003 Type *OrigTy = In->getType();
1004
1005 // Leave boolean values alone.
1006 if (!In->getType()->isIntegerTy(1))
1007 In->mutateType(DestTy);
1008 unsigned DestBW = DestTy->getBitWidth();
1009
1010 // Handle PHIs.
1011 if (PHINode *P = dyn_cast<PHINode>(In)) {
1012 unsigned N = P->getNumIncomingValues();
1013 for (unsigned i = 0; i != N; ++i) {
1014 BasicBlock *InB = P->getIncomingBlock(i);
1015 if (InB == LoopB)
1016 continue;
1017 Value *InV = P->getIncomingValue(i);
1018 IntegerType *Ty = cast<IntegerType>(InV->getType());
1019 // Do not promote values in PHI nodes of type i1.
1020 if (Ty != P->getType()) {
1021 // If the value type does not match the PHI type, the PHI type
1022 // must have been promoted.
1023 assert(Ty->getBitWidth() < DestBW)((Ty->getBitWidth() < DestBW) ? static_cast<void>
(0) : __assert_fail ("Ty->getBitWidth() < DestBW", "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1023, __PRETTY_FUNCTION__))
;
1024 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1025 P->setIncomingValue(i, InV);
1026 }
1027 }
1028 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1029 Value *Op = Z->getOperand(0);
1030 if (Op->getType() == Z->getType())
1031 Z->replaceAllUsesWith(Op);
1032 Z->eraseFromParent();
1033 return;
1034 }
1035 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1036 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1037 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1038 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1039 T->replaceAllUsesWith(And);
1040 T->eraseFromParent();
1041 return;
1042 }
1043
1044 // Promote immediates.
1045 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1046 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1047 if (CI->getType()->getBitWidth() < DestBW)
1048 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1049 }
1050}
1051
1052bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1053 BasicBlock *ExitB) {
1054 assert(LoopB)((LoopB) ? static_cast<void> (0) : __assert_fail ("LoopB"
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1054, __PRETTY_FUNCTION__))
;
1055 // Skip loops where the exit block has more than one predecessor. The values
1056 // coming from the loop block will be promoted to another type, and so the
1057 // values coming into the exit block from other predecessors would also have
1058 // to be promoted.
1059 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1060 return false;
1061 IntegerType *DestTy = getPmpyType();
1062 // Check if the exit values have types that are no wider than the type
1063 // that we want to promote to.
1064 unsigned DestBW = DestTy->getBitWidth();
1065 for (PHINode &P : ExitB->phis()) {
1066 if (P.getNumIncomingValues() != 1)
1067 return false;
1068 assert(P.getIncomingBlock(0) == LoopB)((P.getIncomingBlock(0) == LoopB) ? static_cast<void> (
0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1068, __PRETTY_FUNCTION__))
;
1069 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1070 if (!T || T->getBitWidth() > DestBW)
1071 return false;
1072 }
1073
1074 // Check all instructions in the loop.
1075 for (Instruction &In : *LoopB)
1076 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1077 return false;
1078
1079 // Perform the promotion.
1080 std::vector<Instruction*> LoopIns;
1081 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1082 [](Instruction &In) { return &In; });
1083 for (Instruction *In : LoopIns)
1084 promoteTo(In, DestTy, LoopB);
1085
1086 // Fix up the PHI nodes in the exit block.
1087 Instruction *EndI = ExitB->getFirstNonPHI();
1088 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1089 for (auto I = ExitB->begin(); I != End; ++I) {
1090 PHINode *P = dyn_cast<PHINode>(I);
1091 if (!P)
1092 break;
1093 Type *Ty0 = P->getIncomingValue(0)->getType();
1094 Type *PTy = P->getType();
1095 if (PTy != Ty0) {
1096 assert(Ty0 == DestTy)((Ty0 == DestTy) ? static_cast<void> (0) : __assert_fail
("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1096, __PRETTY_FUNCTION__))
;
1097 // In order to create the trunc, P must have the promoted type.
1098 P->mutateType(Ty0);
1099 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1100 // In order for the RAUW to work, the types of P and T must match.
1101 P->mutateType(PTy);
1102 P->replaceAllUsesWith(T);
1103 // Final update of the P's type.
1104 P->mutateType(Ty0);
1105 cast<Instruction>(T)->setOperand(0, P);
1106 }
1107 }
1108
1109 return true;
1110}
1111
1112bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1113 ValueSeq &Cycle) {
1114 // Out = ..., In, ...
1115 if (Out == In)
1116 return true;
1117
1118 auto *BB = cast<Instruction>(Out)->getParent();
1119 bool HadPhi = false;
1120
1121 for (auto U : Out->users()) {
1122 auto *I = dyn_cast<Instruction>(&*U);
1123 if (I == nullptr || I->getParent() != BB)
1124 continue;
1125 // Make sure that there are no multi-iteration cycles, e.g.
1126 // p1 = phi(p2)
1127 // p2 = phi(p1)
1128 // The cycle p1->p2->p1 would span two loop iterations.
1129 // Check that there is only one phi in the cycle.
1130 bool IsPhi = isa<PHINode>(I);
1131 if (IsPhi && HadPhi)
1132 return false;
1133 HadPhi |= IsPhi;
1134 if (Cycle.count(I))
1135 return false;
1136 Cycle.insert(I);
1137 if (findCycle(I, In, Cycle))
1138 break;
1139 Cycle.remove(I);
1140 }
1141 return !Cycle.empty();
1142}
1143
1144void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1145 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1146 // All the values in the cycle that are between the phi node and the
1147 // divider instruction will be classified as "early", all other values
1148 // will be "late".
1149
1150 bool IsE = true;
1151 unsigned I, N = Cycle.size();
1152 for (I = 0; I < N; ++I) {
1153 Value *V = Cycle[I];
1154 if (DivI == V)
1155 IsE = false;
1156 else if (!isa<PHINode>(V))
1157 continue;
1158 // Stop if found either.
1159 break;
1160 }
1161 // "I" is the index of either DivI or the phi node, whichever was first.
1162 // "E" is "false" or "true" respectively.
1163 ValueSeq &First = !IsE ? Early : Late;
1164 for (unsigned J = 0; J < I; ++J)
1165 First.insert(Cycle[J]);
1166
1167 ValueSeq &Second = IsE ? Early : Late;
1168 Second.insert(Cycle[I]);
1169 for (++I; I < N; ++I) {
1170 Value *V = Cycle[I];
1171 if (DivI == V || isa<PHINode>(V))
1172 break;
1173 Second.insert(V);
1174 }
1175
1176 for (; I < N; ++I)
1177 First.insert(Cycle[I]);
1178}
1179
1180bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1181 ValueSeq &Early, ValueSeq &Late) {
1182 // Select is an exception, since the condition value does not have to be
1183 // classified in the same way as the true/false values. The true/false
1184 // values do have to be both early or both late.
1185 if (UseI->getOpcode() == Instruction::Select) {
1186 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1187 if (Early.count(TV) || Early.count(FV)) {
1188 if (Late.count(TV) || Late.count(FV))
1189 return false;
1190 Early.insert(UseI);
1191 } else if (Late.count(TV) || Late.count(FV)) {
1192 if (Early.count(TV) || Early.count(FV))
1193 return false;
1194 Late.insert(UseI);
1195 }
1196 return true;
1197 }
1198
1199 // Not sure what would be the example of this, but the code below relies
1200 // on having at least one operand.
1201 if (UseI->getNumOperands() == 0)
1202 return true;
1203
1204 bool AE = true, AL = true;
1205 for (auto &I : UseI->operands()) {
1206 if (Early.count(&*I))
1207 AL = false;
1208 else if (Late.count(&*I))
1209 AE = false;
1210 }
1211 // If the operands appear "all early" and "all late" at the same time,
1212 // then it means that none of them are actually classified as either.
1213 // This is harmless.
1214 if (AE && AL)
1215 return true;
1216 // Conversely, if they are neither "all early" nor "all late", then
1217 // we have a mixture of early and late operands that is not a known
1218 // exception.
1219 if (!AE && !AL)
1220 return false;
1221
1222 // Check that we have covered the two special cases.
1223 assert(AE != AL)((AE != AL) ? static_cast<void> (0) : __assert_fail ("AE != AL"
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1223, __PRETTY_FUNCTION__))
;
1224
1225 if (AE)
1226 Early.insert(UseI);
1227 else
1228 Late.insert(UseI);
1229 return true;
1230}
1231
1232bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1233 switch (I->getOpcode()) {
1234 case Instruction::And:
1235 case Instruction::Or:
1236 case Instruction::Xor:
1237 case Instruction::LShr:
1238 case Instruction::Shl:
1239 case Instruction::Select:
1240 case Instruction::ICmp:
1241 case Instruction::PHI:
1242 break;
1243 default:
1244 return false;
1245 }
1246 return true;
1247}
1248
1249bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1250 unsigned IterCount) {
1251 auto *T = dyn_cast<IntegerType>(V->getType());
1252 if (!T)
1253 return false;
1254
1255 KnownBits Known(T->getBitWidth());
1256 computeKnownBits(V, Known, DL);
1257 return Known.countMinLeadingZeros() >= IterCount;
1258}
1259
1260bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1261 unsigned IterCount) {
1262 // Assume that all inputs to the value have the high bits zero.
1263 // Check if the value itself preserves the zeros in the high bits.
1264 if (auto *C = dyn_cast<ConstantInt>(V))
1265 return C->getValue().countLeadingZeros() >= IterCount;
1266
1267 if (auto *I = dyn_cast<Instruction>(V)) {
1268 switch (I->getOpcode()) {
1269 case Instruction::And:
1270 case Instruction::Or:
1271 case Instruction::Xor:
1272 case Instruction::LShr:
1273 case Instruction::Select:
1274 case Instruction::ICmp:
1275 case Instruction::PHI:
1276 case Instruction::ZExt:
1277 return true;
1278 }
1279 }
1280
1281 return false;
1282}
1283
1284bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1285 unsigned Opc = I->getOpcode();
1286 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1287 return Op != I->getOperand(1);
1288 return true;
1289}
1290
1291bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1292 BasicBlock *ExitB, unsigned IterCount) {
1293 Value *CIV = getCountIV(LoopB);
1294 if (CIV == nullptr)
1295 return false;
1296 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1297 if (CIVTy == nullptr)
1298 return false;
1299
1300 ValueSeq RShifts;
1301 ValueSeq Early, Late, Cycled;
1302
1303 // Find all value cycles that contain logical right shifts by 1.
1304 for (Instruction &I : *LoopB) {
1305 using namespace PatternMatch;
1306
1307 Value *V = nullptr;
1308 if (!match(&I, m_LShr(m_Value(V), m_One())))
1309 continue;
1310 ValueSeq C;
1311 if (!findCycle(&I, V, C))
1312 continue;
1313
1314 // Found a cycle.
1315 C.insert(&I);
1316 classifyCycle(&I, C, Early, Late);
1317 Cycled.insert(C.begin(), C.end());
1318 RShifts.insert(&I);
1319 }
1320
1321 // Find the set of all values affected by the shift cycles, i.e. all
1322 // cycled values, and (recursively) all their users.
1323 ValueSeq Users(Cycled.begin(), Cycled.end());
1324 for (unsigned i = 0; i < Users.size(); ++i) {
1325 Value *V = Users[i];
1326 if (!isa<IntegerType>(V->getType()))
1327 return false;
1328 auto *R = cast<Instruction>(V);
1329 // If the instruction does not commute with shifts, the loop cannot
1330 // be unshifted.
1331 if (!commutesWithShift(R))
1332 return false;
1333 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1334 auto *T = cast<Instruction>(*I);
1335 // Skip users from outside of the loop. They will be handled later.
1336 // Also, skip the right-shifts and phi nodes, since they mix early
1337 // and late values.
1338 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1339 continue;
1340
1341 Users.insert(T);
1342 if (!classifyInst(T, Early, Late))
1343 return false;
1344 }
1345 }
1346
1347 if (Users.empty())
1348 return false;
1349
1350 // Verify that high bits remain zero.
1351 ValueSeq Internal(Users.begin(), Users.end());
1352 ValueSeq Inputs;
1353 for (unsigned i = 0; i < Internal.size(); ++i) {
1354 auto *R = dyn_cast<Instruction>(Internal[i]);
1355 if (!R)
1356 continue;
1357 for (Value *Op : R->operands()) {
1358 auto *T = dyn_cast<Instruction>(Op);
1359 if (T && T->getParent() != LoopB)
1360 Inputs.insert(Op);
1361 else
1362 Internal.insert(Op);
1363 }
1364 }
1365 for (Value *V : Inputs)
1366 if (!highBitsAreZero(V, IterCount))
1367 return false;
1368 for (Value *V : Internal)
1369 if (!keepsHighBitsZero(V, IterCount))
1370 return false;
1371
1372 // Finally, the work can be done. Unshift each user.
1373 IRBuilder<> IRB(LoopB);
1374 std::map<Value*,Value*> ShiftMap;
1375
1376 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1377
1378 CastMapType CastMap;
1379
1380 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1381 IntegerType *Ty) -> Value* {
1382 auto H = CM.find(std::make_pair(V, Ty));
1383 if (H != CM.end())
1384 return H->second;
1385 Value *CV = IRB.CreateIntCast(V, Ty, false);
1386 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1387 return CV;
1388 };
1389
1390 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1391 using namespace PatternMatch;
1392
1393 if (isa<PHINode>(I) || !Users.count(&*I))
1394 continue;
1395
1396 // Match lshr x, 1.
1397 Value *V = nullptr;
1398 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1399 replaceAllUsesOfWithIn(&*I, V, LoopB);
1400 continue;
1401 }
1402 // For each non-cycled operand, replace it with the corresponding
1403 // value shifted left.
1404 for (auto &J : I->operands()) {
1405 Value *Op = J.get();
1406 if (!isOperandShifted(&*I, Op))
1407 continue;
1408 if (Users.count(Op))
1409 continue;
1410 // Skip shifting zeros.
1411 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1412 continue;
1413 // Check if we have already generated a shift for this value.
1414 auto F = ShiftMap.find(Op);
1415 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1416 if (W == nullptr) {
1417 IRB.SetInsertPoint(&*I);
1418 // First, the shift amount will be CIV or CIV+1, depending on
1419 // whether the value is early or late. Instead of creating CIV+1,
1420 // do a single shift of the value.
1421 Value *ShAmt = CIV, *ShVal = Op;
1422 auto *VTy = cast<IntegerType>(ShVal->getType());
1423 auto *ATy = cast<IntegerType>(ShAmt->getType());
1424 if (Late.count(&*I))
1425 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1426 // Second, the types of the shifted value and the shift amount
1427 // must match.
1428 if (VTy != ATy) {
1429 if (VTy->getBitWidth() < ATy->getBitWidth())
1430 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1431 else
1432 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1433 }
1434 // Ready to generate the shift and memoize it.
1435 W = IRB.CreateShl(ShVal, ShAmt);
1436 ShiftMap.insert(std::make_pair(Op, W));
1437 }
1438 I->replaceUsesOfWith(Op, W);
1439 }
1440 }
1441
1442 // Update the users outside of the loop to account for having left
1443 // shifts. They would normally be shifted right in the loop, so shift
1444 // them right after the loop exit.
1445 // Take advantage of the loop-closed SSA form, which has all the post-
1446 // loop values in phi nodes.
1447 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1448 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1449 if (!isa<PHINode>(P))
1450 break;
1451 auto *PN = cast<PHINode>(P);
1452 Value *U = PN->getIncomingValueForBlock(LoopB);
1453 if (!Users.count(U))
1454 continue;
1455 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1456 PN->replaceAllUsesWith(S);
1457 // The above RAUW will create
1458 // S = lshr S, IterCount
1459 // so we need to fix it back into
1460 // S = lshr PN, IterCount
1461 cast<User>(S)->replaceUsesOfWith(S, PN);
1462 }
1463
1464 return true;
1465}
1466
1467void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1468 for (auto &I : *LoopB)
1469 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1470 I.replaceAllUsesWith(SV);
1471
1472 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1473 N = std::next(I);
1474 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1475 }
1476}
1477
1478unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1479 // Arrays of coefficients of Q and the inverse, C.
1480 // Q[i] = coefficient at x^i.
1481 std::array<char,32> Q, C;
1482
1483 for (unsigned i = 0; i < 32; ++i) {
1484 Q[i] = QP & 1;
1485 QP >>= 1;
1486 }
1487 assert(Q[0] == 1)((Q[0] == 1) ? static_cast<void> (0) : __assert_fail ("Q[0] == 1"
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1487, __PRETTY_FUNCTION__))
;
1488
1489 // Find C, such that
1490 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1491 //
1492 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1493 // operations * and + are & and ^ respectively.
1494 //
1495 // Find C[i] recursively, by comparing i-th coefficient in the product
1496 // with 0 (or 1 for i=0).
1497 //
1498 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1499 C[0] = 1;
1500 for (unsigned i = 1; i < 32; ++i) {
1501 // Solve for C[i] in:
1502 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1503 // This is equivalent to
1504 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1505 // which is
1506 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1507 unsigned T = 0;
1508 for (unsigned j = 0; j < i; ++j)
1509 T = T ^ (C[j] & Q[i-j]);
1510 C[i] = T;
1511 }
1512
1513 unsigned QV = 0;
1514 for (unsigned i = 0; i < 32; ++i)
1515 if (C[i])
1516 QV |= (1 << i);
1517
1518 return QV;
1519}
1520
1521Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1522 ParsedValues &PV) {
1523 IRBuilder<> B(&*At);
1524 Module *M = At->getParent()->getParent()->getParent();
1525 Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1526
1527 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1528 unsigned IC = PV.IterCount;
1529
1530 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1531 P0 = P = B.CreateXor(P, PV.M);
1532
1533 // Create a bit mask to clear the high bits beyond IterCount.
1534 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
16
Returning from 'APInt::getLowBitsSet'
1535
1536 if (PV.IterCount != 32)
17
Assuming the condition is false
18
Taking false branch
1537 P = B.CreateAnd(P, BMI);
1538
1539 if (PV.Inv) {
19
Assuming the condition is true
20
Taking true branch
1540 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1541 assert(QI && QI->getBitWidth() <= 32)((QI && QI->getBitWidth() <= 32) ? static_cast<
void> (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1541, __PRETTY_FUNCTION__))
;
21
Assuming the condition is true
22
'?' condition is true
1542
1543 // Again, clearing bits beyond IterCount.
1544 unsigned M = (1 << PV.IterCount) - 1;
23
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1545 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1546 unsigned QV = getInverseMxN(Tmp) & M;
1547 auto *QVI = ConstantInt::get(QI->getType(), QV);
1548 P = B.CreateCall(PMF, {P, QVI});
1549 P = B.CreateTrunc(P, QI->getType());
1550 if (IC != 32)
1551 P = B.CreateAnd(P, BMI);
1552 }
1553
1554 Value *R = B.CreateCall(PMF, {P, Q});
1555
1556 if (PV.M != nullptr)
1557 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1558
1559 return R;
1560}
1561
1562static bool hasZeroSignBit(const Value *V) {
1563 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1564 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1565 const Instruction *I = dyn_cast<const Instruction>(V);
1566 if (!I)
1567 return false;
1568 switch (I->getOpcode()) {
1569 case Instruction::LShr:
1570 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1571 return SI->getZExtValue() > 0;
1572 return false;
1573 case Instruction::Or:
1574 case Instruction::Xor:
1575 return hasZeroSignBit(I->getOperand(0)) &&
1576 hasZeroSignBit(I->getOperand(1));
1577 case Instruction::And:
1578 return hasZeroSignBit(I->getOperand(0)) ||
1579 hasZeroSignBit(I->getOperand(1));
1580 }
1581 return false;
1582}
1583
1584void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1585 S.addRule("sink-zext",
1586 // Sink zext past bitwise operations.
1587 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1588 if (I->getOpcode() != Instruction::ZExt)
1589 return nullptr;
1590 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1591 if (!T)
1592 return nullptr;
1593 switch (T->getOpcode()) {
1594 case Instruction::And:
1595 case Instruction::Or:
1596 case Instruction::Xor:
1597 break;
1598 default:
1599 return nullptr;
1600 }
1601 IRBuilder<> B(Ctx);
1602 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1603 B.CreateZExt(T->getOperand(0), I->getType()),
1604 B.CreateZExt(T->getOperand(1), I->getType()));
1605 });
1606 S.addRule("xor/and -> and/xor",
1607 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1608 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1609 if (I->getOpcode() != Instruction::Xor)
1610 return nullptr;
1611 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1612 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1613 if (!And0 || !And1)
1614 return nullptr;
1615 if (And0->getOpcode() != Instruction::And ||
1616 And1->getOpcode() != Instruction::And)
1617 return nullptr;
1618 if (And0->getOperand(1) != And1->getOperand(1))
1619 return nullptr;
1620 IRBuilder<> B(Ctx);
1621 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1622 And0->getOperand(1));
1623 });
1624 S.addRule("sink binop into select",
1625 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1626 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1627 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1628 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1629 if (!BO)
1630 return nullptr;
1631 Instruction::BinaryOps Op = BO->getOpcode();
1632 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1633 IRBuilder<> B(Ctx);
1634 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1635 Value *Z = BO->getOperand(1);
1636 return B.CreateSelect(Sel->getCondition(),
1637 B.CreateBinOp(Op, X, Z),
1638 B.CreateBinOp(Op, Y, Z));
1639 }
1640 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1641 IRBuilder<> B(Ctx);
1642 Value *X = BO->getOperand(0);
1643 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1644 return B.CreateSelect(Sel->getCondition(),
1645 B.CreateBinOp(Op, X, Y),
1646 B.CreateBinOp(Op, X, Z));
1647 }
1648 return nullptr;
1649 });
1650 S.addRule("fold select-select",
1651 // (select c (select c x y) z) -> (select c x z)
1652 // (select c x (select c y z)) -> (select c x z)
1653 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1654 SelectInst *Sel = dyn_cast<SelectInst>(I);
1655 if (!Sel)
1656 return nullptr;
1657 IRBuilder<> B(Ctx);
1658 Value *C = Sel->getCondition();
1659 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1660 if (Sel0->getCondition() == C)
1661 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1662 }
1663 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1664 if (Sel1->getCondition() == C)
1665 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1666 }
1667 return nullptr;
1668 });
1669 S.addRule("or-signbit -> xor-signbit",
1670 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1671 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1672 if (I->getOpcode() != Instruction::Or)
1673 return nullptr;
1674 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1675 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1676 return nullptr;
1677 if (!hasZeroSignBit(I->getOperand(0)))
1678 return nullptr;
1679 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1680 });
1681 S.addRule("sink lshr into binop",
1682 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1683 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1684 if (I->getOpcode() != Instruction::LShr)
1685 return nullptr;
1686 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1687 if (!BitOp)
1688 return nullptr;
1689 switch (BitOp->getOpcode()) {
1690 case Instruction::And:
1691 case Instruction::Or:
1692 case Instruction::Xor:
1693 break;
1694 default:
1695 return nullptr;
1696 }
1697 IRBuilder<> B(Ctx);
1698 Value *S = I->getOperand(1);
1699 return B.CreateBinOp(BitOp->getOpcode(),
1700 B.CreateLShr(BitOp->getOperand(0), S),
1701 B.CreateLShr(BitOp->getOperand(1), S));
1702 });
1703 S.addRule("expose bitop-const",
1704 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1705 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1706 auto IsBitOp = [](unsigned Op) -> bool {
1707 switch (Op) {
1708 case Instruction::And:
1709 case Instruction::Or:
1710 case Instruction::Xor:
1711 return true;
1712 }
1713 return false;
1714 };
1715 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1716 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1717 return nullptr;
1718 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1719 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1720 return nullptr;
1721 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1722 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1723 if (!CA || !CB)
1724 return nullptr;
1725 IRBuilder<> B(Ctx);
1726 Value *X = BitOp2->getOperand(0);
1727 return B.CreateBinOp(BitOp2->getOpcode(), X,
1728 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1729 });
1730}
1731
1732void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1733 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1734 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1735 if (I->getOpcode() != Instruction::And)
1736 return nullptr;
1737 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1738 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1739 if (!Xor || !C0)
1740 return nullptr;
1741 if (Xor->getOpcode() != Instruction::Xor)
1742 return nullptr;
1743 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1744 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1745 // Pick the first non-null and.
1746 if (!And0 || And0->getOpcode() != Instruction::And)
1747 std::swap(And0, And1);
1748 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1749 if (!C1)
1750 return nullptr;
1751 uint32_t V0 = C0->getZExtValue();
1752 uint32_t V1 = C1->getZExtValue();
1753 if (V0 != (V0 & V1))
1754 return nullptr;
1755 IRBuilder<> B(Ctx);
1756 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1757 });
1758}
1759
1760bool PolynomialMultiplyRecognize::recognize() {
1761 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1762 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1763 // Restrictions:
1764 // - The loop must consist of a single block.
1765 // - The iteration count must be known at compile-time.
1766 // - The loop must have an induction variable starting from 0, and
1767 // incremented in each iteration of the loop.
1768 BasicBlock *LoopB = CurLoop->getHeader();
1769 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1770
1771 if (LoopB != CurLoop->getLoopLatch())
1772 return false;
1773 BasicBlock *ExitB = CurLoop->getExitBlock();
1774 if (ExitB == nullptr)
1775 return false;
1776 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1777 if (EntryB == nullptr)
1778 return false;
1779
1780 unsigned IterCount = 0;
1781 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1782 if (isa<SCEVCouldNotCompute>(CT))
1783 return false;
1784 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1785 IterCount = CV->getValue()->getZExtValue() + 1;
1786
1787 Value *CIV = getCountIV(LoopB);
1788 ParsedValues PV;
1789 Simplifier PreSimp;
1790 PV.IterCount = IterCount;
1791 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1792 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1793
1794 setupPreSimplifier(PreSimp);
1795
1796 // Perform a preliminary scan of select instructions to see if any of them
1797 // looks like a generator of the polynomial multiply steps. Assume that a
1798 // loop can only contain a single transformable operation, so stop the
1799 // traversal after the first reasonable candidate was found.
1800 // XXX: Currently this approach can modify the loop before being 100% sure
1801 // that the transformation can be carried out.
1802 bool FoundPreScan = false;
1803 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1804 for (const Value *U : V->users()) {
1805 if (const auto *P = dyn_cast<const PHINode>(U))
1806 if (P->getParent() == LoopB)
1807 return true;
1808 }
1809 return false;
1810 };
1811 for (Instruction &In : *LoopB) {
1812 SelectInst *SI = dyn_cast<SelectInst>(&In);
1813 if (!SI || !FeedsPHI(SI))
1814 continue;
1815
1816 Simplifier::Context C(SI);
1817 Value *T = PreSimp.simplify(C);
1818 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1819 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1820 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1821 FoundPreScan = true;
1822 if (SelI != SI) {
1823 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1824 SI->replaceAllUsesWith(NewSel);
1825 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1826 }
1827 break;
1828 }
1829 }
1830
1831 if (!FoundPreScan) {
1832 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1833 return false;
1834 }
1835
1836 if (!PV.Left) {
1837 // The right shift version actually only returns the higher bits of
1838 // the result (each iteration discards the LSB). If we want to convert it
1839 // to a left-shifting loop, the working data type must be at least as
1840 // wide as the target's pmpy instruction.
1841 if (!promoteTypes(LoopB, ExitB))
1842 return false;
1843 // Run post-promotion simplifications.
1844 Simplifier PostSimp;
1845 setupPostSimplifier(PostSimp);
1846 for (Instruction &In : *LoopB) {
1847 SelectInst *SI = dyn_cast<SelectInst>(&In);
1848 if (!SI || !FeedsPHI(SI))
1849 continue;
1850 Simplifier::Context C(SI);
1851 Value *T = PostSimp.simplify(C);
1852 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1853 if (SelI != SI) {
1854 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1855 SI->replaceAllUsesWith(NewSel);
1856 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1857 }
1858 break;
1859 }
1860
1861 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1862 return false;
1863 cleanupLoopBody(LoopB);
1864 }
1865
1866 // Scan the loop again, find the generating select instruction.
1867 bool FoundScan = false;
1868 for (Instruction &In : *LoopB) {
1869 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1870 if (!SelI)
1871 continue;
1872 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1873 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1874 if (FoundScan)
1875 break;
1876 }
1877 assert(FoundScan)((FoundScan) ? static_cast<void> (0) : __assert_fail ("FoundScan"
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1877, __PRETTY_FUNCTION__))
;
1878
1879 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1880 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1881 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1882 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1883 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1884 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1885 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1886 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1887 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1888 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1889 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1890 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1891 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1892
1893 BasicBlock::iterator At(EntryB->getTerminator());
1894 Value *PM = generate(At, PV);
1895 if (PM == nullptr)
1896 return false;
1897
1898 if (PM->getType() != PV.Res->getType())
1899 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1900
1901 PV.Res->replaceAllUsesWith(PM);
1902 PV.Res->eraseFromParent();
1903 return true;
1904}
1905
1906int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1907 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1908 return SC->getAPInt().getSExtValue();
1909 return 0;
1910}
1911
1912bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1913 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1914 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1915 return false;
1916
1917 Value *StoredVal = SI->getValueOperand();
1918 Value *StorePtr = SI->getPointerOperand();
1919
1920 // Reject stores that are so large that they overflow an unsigned.
1921 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1922 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1923 return false;
1924
1925 // See if the pointer expression is an AddRec like {base,+,1} on the current
1926 // loop, which indicates a strided store. If we have something else, it's a
1927 // random store we can't handle.
1928 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1929 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1930 return false;
1931
1932 // Check to see if the stride matches the size of the store. If so, then we
1933 // know that every byte is touched in the loop.
1934 int Stride = getSCEVStride(StoreEv);
1935 if (Stride == 0)
1936 return false;
1937 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1938 if (StoreSize != unsigned(std::abs(Stride)))
1939 return false;
1940
1941 // The store must be feeding a non-volatile load.
1942 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1943 if (!LI || !LI->isSimple())
1944 return false;
1945
1946 // See if the pointer expression is an AddRec like {base,+,1} on the current
1947 // loop, which indicates a strided load. If we have something else, it's a
1948 // random load we can't handle.
1949 Value *LoadPtr = LI->getPointerOperand();
1950 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1951 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1952 return false;
1953
1954 // The store and load must share the same stride.
1955 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1956 return false;
1957
1958 // Success. This store can be converted into a memcpy.
1959 return true;
1960}
1961
1962/// mayLoopAccessLocation - Return true if the specified loop might access the
1963/// specified pointer location, which is a loop-strided access. The 'Access'
1964/// argument specifies what the verboten forms of access are (read or write).
1965static bool
1966mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1967 const SCEV *BECount, unsigned StoreSize,
1968 AliasAnalysis &AA,
1969 SmallPtrSetImpl<Instruction *> &Ignored) {
1970 // Get the location that may be stored across the loop. Since the access
1971 // is strided positively through memory, we say that the modified location
1972 // starts at the pointer and has infinite size.
1973 LocationSize AccessSize = LocationSize::unknown();
1974
1975 // If the loop iterates a fixed number of times, we can refine the access
1976 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1977 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1978 AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
1979
1980 // TODO: For this to be really effective, we have to dive into the pointer
1981 // operand in the store. Store to &A[i] of 100 will always return may alias
1982 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1983 // which will then no-alias a store to &A[100].
1984 MemoryLocation StoreLoc(Ptr, AccessSize);
1985
1986 for (auto *B : L->blocks())
1987 for (auto &I : *B)
1988 if (Ignored.count(&I) == 0 &&
1989 isModOrRefSet(
1990 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
1991 return true;
1992
1993 return false;
1994}
1995
1996void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
1997 SmallVectorImpl<StoreInst*> &Stores) {
1998 Stores.clear();
1999 for (Instruction &I : *BB)
2000 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2001 if (isLegalStore(CurLoop, SI))
2002 Stores.push_back(SI);
2003}
2004
2005bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2006 StoreInst *SI, const SCEV *BECount) {
2007 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __PRETTY_FUNCTION__))
2008 "Expected only non-volatile stores, or Hexagon-specific memcpy"(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __PRETTY_FUNCTION__))
2009 "to volatile destination.")(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __PRETTY_FUNCTION__))
;
2010
2011 Value *StorePtr = SI->getPointerOperand();
2012 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2013 unsigned Stride = getSCEVStride(StoreEv);
2014 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2015 if (Stride != StoreSize)
2016 return false;
2017
2018 // See if the pointer expression is an AddRec like {base,+,1} on the current
2019 // loop, which indicates a strided load. If we have something else, it's a
2020 // random load we can't handle.
2021 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
2022 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2023
2024 // The trip count of the loop and the base pointer of the addrec SCEV is
2025 // guaranteed to be loop invariant, which means that it should dominate the
2026 // header. This allows us to insert code for it in the preheader.
2027 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2028 Instruction *ExpPt = Preheader->getTerminator();
2029 IRBuilder<> Builder(ExpPt);
2030 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2031
2032 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2033
2034 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2035 // this into a memcpy/memmove in the loop preheader now if we want. However,
2036 // this would be unsafe to do if there is anything else in the loop that may
2037 // read or write the memory region we're storing to. For memcpy, this
2038 // includes the load that feeds the stores. Check for an alias by generating
2039 // the base address and checking everything.
2040 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2041 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2042 Value *LoadBasePtr = nullptr;
2043
2044 bool Overlap = false;
2045 bool DestVolatile = SI->isVolatile();
2046 Type *BECountTy = BECount->getType();
2047
2048 if (DestVolatile) {
2049 // The trip count must fit in i32, since it is the type of the "num_words"
2050 // argument to hexagon_memcpy_forward_vp4cp4n2.
2051 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2052CleanupAndExit:
2053 // If we generated new code for the base pointer, clean up.
2054 Expander.clear();
2055 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2056 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2057 StoreBasePtr = nullptr;
2058 }
2059 if (LoadBasePtr) {
2060 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2061 LoadBasePtr = nullptr;
2062 }
2063 return false;
2064 }
2065 }
2066
2067 SmallPtrSet<Instruction*, 2> Ignore1;
2068 Ignore1.insert(SI);
2069 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2070 StoreSize, *AA, Ignore1)) {
2071 // Check if the load is the offending instruction.
2072 Ignore1.insert(LI);
2073 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2074 BECount, StoreSize, *AA, Ignore1)) {
2075 // Still bad. Nothing we can do.
2076 goto CleanupAndExit;
2077 }
2078 // It worked with the load ignored.
2079 Overlap = true;
2080 }
2081
2082 if (!Overlap) {
2083 if (DisableMemcpyIdiom || !HasMemcpy)
2084 goto CleanupAndExit;
2085 } else {
2086 // Don't generate memmove if this function will be inlined. This is
2087 // because the caller will undergo this transformation after inlining.
2088 Function *Func = CurLoop->getHeader()->getParent();
2089 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2090 goto CleanupAndExit;
2091
2092 // In case of a memmove, the call to memmove will be executed instead
2093 // of the loop, so we need to make sure that there is nothing else in
2094 // the loop than the load, store and instructions that these two depend
2095 // on.
2096 SmallVector<Instruction*,2> Insts;
2097 Insts.push_back(SI);
2098 Insts.push_back(LI);
2099 if (!coverLoop(CurLoop, Insts))
2100 goto CleanupAndExit;
2101
2102 if (DisableMemmoveIdiom || !HasMemmove)
2103 goto CleanupAndExit;
2104 bool IsNested = CurLoop->getParentLoop() != nullptr;
2105 if (IsNested && OnlyNonNestedMemmove)
2106 goto CleanupAndExit;
2107 }
2108
2109 // For a memcpy, we have to make sure that the input array is not being
2110 // mutated by the loop.
2111 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2112 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2113
2114 SmallPtrSet<Instruction*, 2> Ignore2;
2115 Ignore2.insert(SI);
2116 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2117 StoreSize, *AA, Ignore2))
2118 goto CleanupAndExit;
2119
2120 // Check the stride.
2121 bool StridePos = getSCEVStride(LoadEv) >= 0;
2122
2123 // Currently, the volatile memcpy only emulates traversing memory forward.
2124 if (!StridePos && DestVolatile)
2125 goto CleanupAndExit;
2126
2127 bool RuntimeCheck = (Overlap || DestVolatile);
2128
2129 BasicBlock *ExitB;
2130 if (RuntimeCheck) {
2131 // The runtime check needs a single exit block.
2132 SmallVector<BasicBlock*, 8> ExitBlocks;
2133 CurLoop->getUniqueExitBlocks(ExitBlocks);
2134 if (ExitBlocks.size() != 1)
2135 goto CleanupAndExit;
2136 ExitB = ExitBlocks[0];
2137 }
2138
2139 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2140 // pointer size if it isn't already.
2141 LLVMContext &Ctx = SI->getContext();
2142 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2143 DebugLoc DLoc = SI->getDebugLoc();
2144
2145 const SCEV *NumBytesS =
2146 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2147 if (StoreSize != 1)
2148 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2149 SCEV::FlagNUW);
2150 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2151 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2152 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2153 NumBytes = Simp;
2154
2155 CallInst *NewCall;
2156
2157 if (RuntimeCheck) {
2158 unsigned Threshold = RuntimeMemSizeThreshold;
2159 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2160 uint64_t C = CI->getZExtValue();
2161 if (Threshold != 0 && C < Threshold)
2162 goto CleanupAndExit;
2163 if (C < CompileTimeMemSizeThreshold)
2164 goto CleanupAndExit;
2165 }
2166
2167 BasicBlock *Header = CurLoop->getHeader();
2168 Function *Func = Header->getParent();
2169 Loop *ParentL = LF->getLoopFor(Preheader);
2170 StringRef HeaderName = Header->getName();
2171
2172 // Create a new (empty) preheader, and update the PHI nodes in the
2173 // header to use the new preheader.
2174 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2175 Func, Header);
2176 if (ParentL)
2177 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2178 IRBuilder<>(NewPreheader).CreateBr(Header);
2179 for (auto &In : *Header) {
2180 PHINode *PN = dyn_cast<PHINode>(&In);
2181 if (!PN)
2182 break;
2183 int bx = PN->getBasicBlockIndex(Preheader);
2184 if (bx >= 0)
2185 PN->setIncomingBlock(bx, NewPreheader);
2186 }
2187 DT->addNewBlock(NewPreheader, Preheader);
2188 DT->changeImmediateDominator(Header, NewPreheader);
2189
2190 // Check for safe conditions to execute memmove.
2191 // If stride is positive, copying things from higher to lower addresses
2192 // is equivalent to memmove. For negative stride, it's the other way
2193 // around. Copying forward in memory with positive stride may not be
2194 // same as memmove since we may be copying values that we just stored
2195 // in some previous iteration.
2196 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2197 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2198 Value *LowA = StridePos ? SA : LA;
2199 Value *HighA = StridePos ? LA : SA;
2200 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2201 Value *Cond = CmpA;
2202
2203 // Check for distance between pointers. Since the case LowA < HighA
2204 // is checked for above, assume LowA >= HighA.
2205 Value *Dist = Builder.CreateSub(LowA, HighA);
2206 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2207 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2208 Cond = CmpEither;
2209
2210 if (Threshold != 0) {
2211 Type *Ty = NumBytes->getType();
2212 Value *Thr = ConstantInt::get(Ty, Threshold);
2213 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2214 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2215 Cond = CmpBoth;
2216 }
2217 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2218 Func, NewPreheader);
2219 if (ParentL)
2220 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2221 Instruction *OldT = Preheader->getTerminator();
2222 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2223 OldT->eraseFromParent();
2224 Preheader->setName(Preheader->getName()+".old");
2225 DT->addNewBlock(MemmoveB, Preheader);
2226 // Find the new immediate dominator of the exit block.
2227 BasicBlock *ExitD = Preheader;
2228 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2229 BasicBlock *PB = *PI;
2230 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2231 if (!ExitD)
2232 break;
2233 }
2234 // If the prior immediate dominator of ExitB was dominated by the
2235 // old preheader, then the old preheader becomes the new immediate
2236 // dominator. Otherwise don't change anything (because the newly
2237 // added blocks are dominated by the old preheader).
2238 if (ExitD && DT->dominates(Preheader, ExitD)) {
2239 DomTreeNode *BN = DT->getNode(ExitB);
2240 DomTreeNode *DN = DT->getNode(ExitD);
2241 BN->setIDom(DN);
2242 }
2243
2244 // Add a call to memmove to the conditional block.
2245 IRBuilder<> CondBuilder(MemmoveB);
2246 CondBuilder.CreateBr(ExitB);
2247 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2248
2249 if (DestVolatile) {
2250 Type *Int32Ty = Type::getInt32Ty(Ctx);
2251 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2252 Type *VoidTy = Type::getVoidTy(Ctx);
2253 Module *M = Func->getParent();
2254 Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
2255 Int32PtrTy, Int32PtrTy, Int32Ty);
2256 Function *Fn = cast<Function>(CF);
2257 Fn->setLinkage(Function::ExternalLinkage);
2258
2259 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2260 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2261 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2262 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2263 MemmoveB->getTerminator());
2264 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2265 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2266 NumWords = Simp;
2267
2268 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2269 ? StoreBasePtr
2270 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2271 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2272 ? LoadBasePtr
2273 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2274 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2275 } else {
2276 NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(),
2277 LoadBasePtr, LI->getAlignment(),
2278 NumBytes);
2279 }
2280 } else {
2281 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
2282 LoadBasePtr, LI->getAlignment(),
2283 NumBytes);
2284 // Okay, the memcpy has been formed. Zap the original store and
2285 // anything that feeds into it.
2286 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2287 }
2288
2289 NewCall->setDebugLoc(DLoc);
2290
2291 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2292 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2293 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2294 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2295 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2296
2297 return true;
2298}
2299
2300// Check if the instructions in Insts, together with their dependencies
2301// cover the loop in the sense that the loop could be safely eliminated once
2302// the instructions in Insts are removed.
2303bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2304 SmallVectorImpl<Instruction*> &Insts) const {
2305 SmallSet<BasicBlock*,8> LoopBlocks;
2306 for (auto *B : L->blocks())
2307 LoopBlocks.insert(B);
2308
2309 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2310
2311 // Collect all instructions from the loop that the instructions in Insts
2312 // depend on (plus their dependencies, etc.). These instructions will
2313 // constitute the expression trees that feed those in Insts, but the trees
2314 // will be limited only to instructions contained in the loop.
2315 for (unsigned i = 0; i < Worklist.size(); ++i) {
2316 Instruction *In = Worklist[i];
2317 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2318 Instruction *OpI = dyn_cast<Instruction>(I);
2319 if (!OpI)
2320 continue;
2321 BasicBlock *PB = OpI->getParent();
2322 if (!LoopBlocks.count(PB))
2323 continue;
2324 Worklist.insert(OpI);
2325 }
2326 }
2327
2328 // Scan all instructions in the loop, if any of them have a user outside
2329 // of the loop, or outside of the expressions collected above, then either
2330 // the loop has a side-effect visible outside of it, or there are
2331 // instructions in it that are not involved in the original set Insts.
2332 for (auto *B : L->blocks()) {
2333 for (auto &In : *B) {
2334 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2335 continue;
2336 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2337 return false;
2338 for (const auto &K : In.users()) {
2339 Instruction *UseI = dyn_cast<Instruction>(K);
2340 if (!UseI)
2341 continue;
2342 BasicBlock *UseB = UseI->getParent();
2343 if (LF->getLoopFor(UseB) != L)
2344 return false;
2345 }
2346 }
2347 }
2348
2349 return true;
2350}
2351
2352/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2353/// with the specified backedge count. This block is known to be in the current
2354/// loop and not in any subloops.
2355bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2356 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2357 // We can only promote stores in this block if they are unconditionally
2358 // executed in the loop. For a block to be unconditionally executed, it has
2359 // to dominate all the exit blocks of the loop. Verify this now.
2360 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2361 return DT->dominates(BB, EB);
2362 };
2363 if (!all_of(ExitBlocks, DominatedByBB))
2364 return false;
2365
2366 bool MadeChange = false;
2367 // Look for store instructions, which may be optimized to memset/memcpy.
2368 SmallVector<StoreInst*,8> Stores;
2369 collectStores(CurLoop, BB, Stores);
2370
2371 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2372 for (auto &SI : Stores)
2373 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2374
2375 return MadeChange;
2376}
2377
2378bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2379 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2380 if (PMR.recognize())
2381 return true;
2382
2383 if (!HasMemcpy && !HasMemmove)
2384 return false;
2385
2386 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2387 assert(!isa<SCEVCouldNotCompute>(BECount) &&((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
2388 "runOnCountableLoop() called on a loop without a predictable"((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
2389 "backedge-taken count")((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-8~svn348900/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
;
2390
2391 SmallVector<BasicBlock *, 8> ExitBlocks;
2392 L->getUniqueExitBlocks(ExitBlocks);
2393
2394 bool Changed = false;
2395
2396 // Scan all the blocks in the loop that are not in subloops.
2397 for (auto *BB : L->getBlocks()) {
2398 // Ignore blocks in subloops.
2399 if (LF->getLoopFor(BB) != L)
2400 continue;
2401 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2402 }
2403
2404 return Changed;
2405}
2406
2407bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
2408 const Module &M = *L->getHeader()->getParent()->getParent();
2409 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2410 return false;
2411
2412 if (skipLoop(L))
2413 return false;
2414
2415 // If the loop could not be converted to canonical form, it must have an
2416 // indirectbr in it, just give up.
2417 if (!L->getLoopPreheader())
2418 return false;
2419
2420 // Disable loop idiom recognition if the function's name is a common idiom.
2421 StringRef Name = L->getHeader()->getParent()->getName();
2422 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2423 return false;
2424
2425 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2426 DL = &L->getHeader()->getModule()->getDataLayout();
2427 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2428 LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2429 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2430 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2431
2432 HasMemcpy = TLI->has(LibFunc_memcpy);
2433 HasMemmove = TLI->has(LibFunc_memmove);
2434
2435 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2436 return runOnCountableLoop(L);
2437 return false;
2438}
2439
2440Pass *llvm::createHexagonLoopIdiomPass() {
2441 return new HexagonLoopIdiomRecognize();
2442}

/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// This file implements a class to represent arbitrary precision
12/// integral constant values and operations on them.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_ADT_APINT_H
17#define LLVM_ADT_APINT_H
18
19#include "llvm/Support/Compiler.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22#include <climits>
23#include <cstring>
24#include <string>
25
26namespace llvm {
27class FoldingSetNodeID;
28class StringRef;
29class hash_code;
30class raw_ostream;
31
32template <typename T> class SmallVectorImpl;
33template <typename T> class ArrayRef;
34template <typename T> class Optional;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69///
70class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
71public:
72 typedef uint64_t WordType;
73
74 /// This enum is used to hold the constants we needed for APInt.
75 enum : unsigned {
76 /// Byte size of a word.
77 APINT_WORD_SIZE = sizeof(WordType),
78 /// Bits in a word.
79 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
80 };
81
82 enum class Rounding {
83 DOWN,
84 TOWARD_ZERO,
85 UP,
86 };
87
88 static const WordType WORDTYPE_MAX = ~WordType(0);
89
90private:
91 /// This union is used to store the integer value. When the
92 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
93 union {
94 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
95 uint64_t *pVal; ///< Used to store the >64 bits integer value.
96 } U;
97
98 unsigned BitWidth; ///< The number of bits in this APInt.
99
100 friend struct DenseMapAPIntKeyInfo;
101
102 friend class APSInt;
103
104 /// Fast internal constructor
105 ///
106 /// This constructor is used only internally for speed of construction of
107 /// temporaries. It is unsafe for general use so it is not public.
108 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
109 U.pVal = val;
110 }
111
112 /// Determine if this APInt just has one word to store value.
113 ///
114 /// \returns true if the number of bits <= 64, false otherwise.
115 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
116
117 /// Determine which word a bit is in.
118 ///
119 /// \returns the word position for the specified bit position.
120 static unsigned whichWord(unsigned bitPosition) {
121 return bitPosition / APINT_BITS_PER_WORD;
122 }
123
124 /// Determine which bit in a word a bit is in.
125 ///
126 /// \returns the bit position in a word for the specified bit position
127 /// in the APInt.
128 static unsigned whichBit(unsigned bitPosition) {
129 return bitPosition % APINT_BITS_PER_WORD;
130 }
131
132 /// Get a single bit mask.
133 ///
134 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
135 /// This method generates and returns a uint64_t (word) mask for a single
136 /// bit at a specific bit position. This is used to mask the bit in the
137 /// corresponding word.
138 static uint64_t maskBit(unsigned bitPosition) {
139 return 1ULL << whichBit(bitPosition);
140 }
141
142 /// Clear unused high order bits
143 ///
144 /// This method is used internally to clear the top "N" bits in the high order
145 /// word that are not used by the APInt. This is needed after the most
146 /// significant word is assigned a value to ensure that those bits are
147 /// zero'd out.
148 APInt &clearUnusedBits() {
149 // Compute how many bits are used in the final word
150 unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
151
152 // Mask out the high bits.
153 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
154 if (isSingleWord())
155 U.VAL &= mask;
156 else
157 U.pVal[getNumWords() - 1] &= mask;
158 return *this;
159 }
160
161 /// Get the word corresponding to a bit position
162 /// \returns the corresponding word for the specified bit position.
163 uint64_t getWord(unsigned bitPosition) const {
164 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
165 }
166
167 /// Utility method to change the bit width of this APInt to new bit width,
168 /// allocating and/or deallocating as necessary. There is no guarantee on the
169 /// value of any bits upon return. Caller should populate the bits after.
170 void reallocate(unsigned NewBitWidth);
171
172 /// Convert a char array into an APInt
173 ///
174 /// \param radix 2, 8, 10, 16, or 36
175 /// Converts a string into a number. The string must be non-empty
176 /// and well-formed as a number of the given base. The bit-width
177 /// must be sufficient to hold the result.
178 ///
179 /// This is used by the constructors that take string arguments.
180 ///
181 /// StringRef::getAsInteger is superficially similar but (1) does
182 /// not assume that the string is well-formed and (2) grows the
183 /// result to hold the input.
184 void fromString(unsigned numBits, StringRef str, uint8_t radix);
185
186 /// An internal division function for dividing APInts.
187 ///
188 /// This is used by the toString method to divide by the radix. It simply
189 /// provides a more convenient form of divide for internal use since KnuthDiv
190 /// has specific constraints on its inputs. If those constraints are not met
191 /// then it provides a simpler form of divide.
192 static void divide(const WordType *LHS, unsigned lhsWords,
193 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
194 WordType *Remainder);
195
196 /// out-of-line slow case for inline constructor
197 void initSlowCase(uint64_t val, bool isSigned);
198
199 /// shared code between two array constructors
200 void initFromArray(ArrayRef<uint64_t> array);
201
202 /// out-of-line slow case for inline copy constructor
203 void initSlowCase(const APInt &that);
204
205 /// out-of-line slow case for shl
206 void shlSlowCase(unsigned ShiftAmt);
207
208 /// out-of-line slow case for lshr.
209 void lshrSlowCase(unsigned ShiftAmt);
210
211 /// out-of-line slow case for ashr.
212 void ashrSlowCase(unsigned ShiftAmt);
213
214 /// out-of-line slow case for operator=
215 void AssignSlowCase(const APInt &RHS);
216
217 /// out-of-line slow case for operator==
218 bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
219
220 /// out-of-line slow case for countLeadingZeros
221 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
222
223 /// out-of-line slow case for countLeadingOnes.
224 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
225
226 /// out-of-line slow case for countTrailingZeros.
227 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
228
229 /// out-of-line slow case for countTrailingOnes
230 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
231
232 /// out-of-line slow case for countPopulation
233 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
234
235 /// out-of-line slow case for intersects.
236 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
237
238 /// out-of-line slow case for isSubsetOf.
239 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
240
241 /// out-of-line slow case for setBits.
242 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
243
244 /// out-of-line slow case for flipAllBits.
245 void flipAllBitsSlowCase();
246
247 /// out-of-line slow case for operator&=.
248 void AndAssignSlowCase(const APInt& RHS);
249
250 /// out-of-line slow case for operator|=.
251 void OrAssignSlowCase(const APInt& RHS);
252
253 /// out-of-line slow case for operator^=.
254 void XorAssignSlowCase(const APInt& RHS);
255
256 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
257 /// to, or greater than RHS.
258 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
259
260 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
261 /// to, or greater than RHS.
262 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
263
264public:
265 /// \name Constructors
266 /// @{
267
268 /// Create a new APInt of numBits width, initialized as val.
269 ///
270 /// If isSigned is true then val is treated as if it were a signed value
271 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
272 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
273 /// the range of val are zero filled).
274 ///
275 /// \param numBits the bit width of the constructed APInt
276 /// \param val the initial value of the APInt
277 /// \param isSigned how to treat signedness of val
278 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
279 : BitWidth(numBits) {
280 assert(BitWidth && "bitwidth too small")((BitWidth && "bitwidth too small") ? static_cast<
void> (0) : __assert_fail ("BitWidth && \"bitwidth too small\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 280, __PRETTY_FUNCTION__))
;
281 if (isSingleWord()) {
282 U.VAL = val;
283 clearUnusedBits();
284 } else {
285 initSlowCase(val, isSigned);
286 }
287 }
288
289 /// Construct an APInt of numBits width, initialized as bigVal[].
290 ///
291 /// Note that bigVal.size() can be smaller or larger than the corresponding
292 /// bit width but any extraneous bits will be dropped.
293 ///
294 /// \param numBits the bit width of the constructed APInt
295 /// \param bigVal a sequence of words to form the initial value of the APInt
296 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
297
298 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
299 /// deprecated because this constructor is prone to ambiguity with the
300 /// APInt(unsigned, uint64_t, bool) constructor.
301 ///
302 /// If this overload is ever deleted, care should be taken to prevent calls
303 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
304 /// constructor.
305 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
306
307 /// Construct an APInt from a string representation.
308 ///
309 /// This constructor interprets the string \p str in the given radix. The
310 /// interpretation stops when the first character that is not suitable for the
311 /// radix is encountered, or the end of the string. Acceptable radix values
312 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
313 /// string to require more bits than numBits.
314 ///
315 /// \param numBits the bit width of the constructed APInt
316 /// \param str the string to be interpreted
317 /// \param radix the radix to use for the conversion
318 APInt(unsigned numBits, StringRef str, uint8_t radix);
319
320 /// Simply makes *this a copy of that.
321 /// Copy Constructor.
322 APInt(const APInt &that) : BitWidth(that.BitWidth) {
323 if (isSingleWord())
324 U.VAL = that.U.VAL;
325 else
326 initSlowCase(that);
327 }
328
329 /// Move Constructor.
330 APInt(APInt &&that) : BitWidth(that.BitWidth) {
331 memcpy(&U, &that.U, sizeof(U));
332 that.BitWidth = 0;
333 }
334
335 /// Destructor.
336 ~APInt() {
337 if (needsCleanup())
338 delete[] U.pVal;
339 }
340
341 /// Default constructor that creates an uninteresting APInt
342 /// representing a 1-bit zero value.
343 ///
344 /// This is useful for object deserialization (pair this with the static
345 /// method Read).
346 explicit APInt() : BitWidth(1) { U.VAL = 0; }
347
348 /// Returns whether this instance allocated memory.
349 bool needsCleanup() const { return !isSingleWord(); }
350
351 /// Used to insert APInt objects, or objects that contain APInt objects, into
352 /// FoldingSets.
353 void Profile(FoldingSetNodeID &id) const;
354
355 /// @}
356 /// \name Value Tests
357 /// @{
358
359 /// Determine sign of this APInt.
360 ///
361 /// This tests the high bit of this APInt to determine if it is set.
362 ///
363 /// \returns true if this APInt is negative, false otherwise
364 bool isNegative() const { return (*this)[BitWidth - 1]; }
365
366 /// Determine if this APInt Value is non-negative (>= 0)
367 ///
368 /// This tests the high bit of the APInt to determine if it is unset.
369 bool isNonNegative() const { return !isNegative(); }
370
371 /// Determine if sign bit of this APInt is set.
372 ///
373 /// This tests the high bit of this APInt to determine if it is set.
374 ///
375 /// \returns true if this APInt has its sign bit set, false otherwise.
376 bool isSignBitSet() const { return (*this)[BitWidth-1]; }
377
378 /// Determine if sign bit of this APInt is clear.
379 ///
380 /// This tests the high bit of this APInt to determine if it is clear.
381 ///
382 /// \returns true if this APInt has its sign bit clear, false otherwise.
383 bool isSignBitClear() const { return !isSignBitSet(); }
384
385 /// Determine if this APInt Value is positive.
386 ///
387 /// This tests if the value of this APInt is positive (> 0). Note
388 /// that 0 is not a positive value.
389 ///
390 /// \returns true if this APInt is positive.
391 bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
392
393 /// Determine if all bits are set
394 ///
395 /// This checks to see if the value has all bits of the APInt are set or not.
396 bool isAllOnesValue() const {
397 if (isSingleWord())
398 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
399 return countTrailingOnesSlowCase() == BitWidth;
400 }
401
402 /// Determine if all bits are clear
403 ///
404 /// This checks to see if the value has all bits of the APInt are clear or
405 /// not.
406 bool isNullValue() const { return !*this; }
407
408 /// Determine if this is a value of 1.
409 ///
410 /// This checks to see if the value of this APInt is one.
411 bool isOneValue() const {
412 if (isSingleWord())
413 return U.VAL == 1;
414 return countLeadingZerosSlowCase() == BitWidth - 1;
415 }
416
417 /// Determine if this is the largest unsigned value.
418 ///
419 /// This checks to see if the value of this APInt is the maximum unsigned
420 /// value for the APInt's bit width.
421 bool isMaxValue() const { return isAllOnesValue(); }
422
423 /// Determine if this is the largest signed value.
424 ///
425 /// This checks to see if the value of this APInt is the maximum signed
426 /// value for the APInt's bit width.
427 bool isMaxSignedValue() const {
428 if (isSingleWord())
429 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
430 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
431 }
432
433 /// Determine if this is the smallest unsigned value.
434 ///
435 /// This checks to see if the value of this APInt is the minimum unsigned
436 /// value for the APInt's bit width.
437 bool isMinValue() const { return isNullValue(); }
438
439 /// Determine if this is the smallest signed value.
440 ///
441 /// This checks to see if the value of this APInt is the minimum signed
442 /// value for the APInt's bit width.
443 bool isMinSignedValue() const {
444 if (isSingleWord())
445 return U.VAL == (WordType(1) << (BitWidth - 1));
446 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
447 }
448
449 /// Check if this APInt has an N-bits unsigned integer value.
450 bool isIntN(unsigned N) const {
451 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 451, __PRETTY_FUNCTION__))
;
452 return getActiveBits() <= N;
453 }
454
455 /// Check if this APInt has an N-bits signed integer value.
456 bool isSignedIntN(unsigned N) const {
457 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 457, __PRETTY_FUNCTION__))
;
458 return getMinSignedBits() <= N;
459 }
460
461 /// Check if this APInt's value is a power of two greater than zero.
462 ///
463 /// \returns true if the argument APInt value is a power of two > 0.
464 bool isPowerOf2() const {
465 if (isSingleWord())
466 return isPowerOf2_64(U.VAL);
467 return countPopulationSlowCase() == 1;
468 }
469
470 /// Check if the APInt's value is returned by getSignMask.
471 ///
472 /// \returns true if this is the value returned by getSignMask.
473 bool isSignMask() const { return isMinSignedValue(); }
474
475 /// Convert APInt to a boolean value.
476 ///
477 /// This converts the APInt to a boolean value as a test against zero.
478 bool getBoolValue() const { return !!*this; }
479
480 /// If this value is smaller than the specified limit, return it, otherwise
481 /// return the limit value. This causes the value to saturate to the limit.
482 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
483 return ugt(Limit) ? Limit : getZExtValue();
484 }
485
486 /// Check if the APInt consists of a repeated bit pattern.
487 ///
488 /// e.g. 0x01010101 satisfies isSplat(8).
489 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
490 /// width without remainder.
491 bool isSplat(unsigned SplatSizeInBits) const;
492
493 /// \returns true if this APInt value is a sequence of \param numBits ones
494 /// starting at the least significant bit with the remainder zero.
495 bool isMask(unsigned numBits) const {
496 assert(numBits != 0 && "numBits must be non-zero")((numBits != 0 && "numBits must be non-zero") ? static_cast
<void> (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 496, __PRETTY_FUNCTION__))
;
497 assert(numBits <= BitWidth && "numBits out of range")((numBits <= BitWidth && "numBits out of range") ?
static_cast<void> (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 497, __PRETTY_FUNCTION__))
;
498 if (isSingleWord())
499 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
500 unsigned Ones = countTrailingOnesSlowCase();
501 return (numBits == Ones) &&
502 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
503 }
504
505 /// \returns true if this APInt is a non-empty sequence of ones starting at
506 /// the least significant bit with the remainder zero.
507 /// Ex. isMask(0x0000FFFFU) == true.
508 bool isMask() const {
509 if (isSingleWord())
510 return isMask_64(U.VAL);
511 unsigned Ones = countTrailingOnesSlowCase();
512 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
513 }
514
515 /// Return true if this APInt value contains a sequence of ones with
516 /// the remainder zero.
517 bool isShiftedMask() const {
518 if (isSingleWord())
519 return isShiftedMask_64(U.VAL);
520 unsigned Ones = countPopulationSlowCase();
521 unsigned LeadZ = countLeadingZerosSlowCase();
522 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
523 }
524
525 /// @}
526 /// \name Value Generators
527 /// @{
528
529 /// Gets maximum unsigned value of APInt for specific bit width.
530 static APInt getMaxValue(unsigned numBits) {
531 return getAllOnesValue(numBits);
532 }
533
534 /// Gets maximum signed value of APInt for a specific bit width.
535 static APInt getSignedMaxValue(unsigned numBits) {
536 APInt API = getAllOnesValue(numBits);
537 API.clearBit(numBits - 1);
538 return API;
539 }
540
541 /// Gets minimum unsigned value of APInt for a specific bit width.
542 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
543
544 /// Gets minimum signed value of APInt for a specific bit width.
545 static APInt getSignedMinValue(unsigned numBits) {
546 APInt API(numBits, 0);
547 API.setBit(numBits - 1);
548 return API;
549 }
550
551 /// Get the SignMask for a specific bit width.
552 ///
553 /// This is just a wrapper function of getSignedMinValue(), and it helps code
554 /// readability when we want to get a SignMask.
555 static APInt getSignMask(unsigned BitWidth) {
556 return getSignedMinValue(BitWidth);
557 }
558
559 /// Get the all-ones value.
560 ///
561 /// \returns the all-ones value for an APInt of the specified bit-width.
562 static APInt getAllOnesValue(unsigned numBits) {
563 return APInt(numBits, WORDTYPE_MAX, true);
564 }
565
566 /// Get the '0' value.
567 ///
568 /// \returns the '0' value for an APInt of the specified bit-width.
569 static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
570
571 /// Compute an APInt containing numBits highbits from this APInt.
572 ///
573 /// Get an APInt with the same BitWidth as this APInt, just zero mask
574 /// the low bits and right shift to the least significant bit.
575 ///
576 /// \returns the high "numBits" bits of this APInt.
577 APInt getHiBits(unsigned numBits) const;
578
579 /// Compute an APInt containing numBits lowbits from this APInt.
580 ///
581 /// Get an APInt with the same BitWidth as this APInt, just zero mask
582 /// the high bits.
583 ///
584 /// \returns the low "numBits" bits of this APInt.
585 APInt getLoBits(unsigned numBits) const;
586
587 /// Return an APInt with exactly one bit set in the result.
588 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
589 APInt Res(numBits, 0);
590 Res.setBit(BitNo);
591 return Res;
592 }
593
594 /// Get a value with a block of bits set.
595 ///
596 /// Constructs an APInt value that has a contiguous range of bits set. The
597 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
598 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
599 /// 0x0000FFFF. If hiBit is less than loBit then the set bits "wrap". For
600 /// example, with parameters (32, 28, 4), you would get 0xF000000F.
601 ///
602 /// \param numBits the intended bit width of the result
603 /// \param loBit the index of the lowest bit set.
604 /// \param hiBit the index of the highest bit set.
605 ///
606 /// \returns An APInt value with the requested bits set.
607 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
608 APInt Res(numBits, 0);
609 Res.setBits(loBit, hiBit);
610 return Res;
611 }
612
613 /// Get a value with upper bits starting at loBit set.
614 ///
615 /// Constructs an APInt value that has a contiguous range of bits set. The
616 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
617 /// bits will be zero. For example, with parameters(32, 12) you would get
618 /// 0xFFFFF000.
619 ///
620 /// \param numBits the intended bit width of the result
621 /// \param loBit the index of the lowest bit to set.
622 ///
623 /// \returns An APInt value with the requested bits set.
624 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
625 APInt Res(numBits, 0);
626 Res.setBitsFrom(loBit);
627 return Res;
628 }
629
630 /// Get a value with high bits set
631 ///
632 /// Constructs an APInt value that has the top hiBitsSet bits set.
633 ///
634 /// \param numBits the bitwidth of the result
635 /// \param hiBitsSet the number of high-order bits set in the result.
636 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
637 APInt Res(numBits, 0);
638 Res.setHighBits(hiBitsSet);
639 return Res;
640 }
641
642 /// Get a value with low bits set
643 ///
644 /// Constructs an APInt value that has the bottom loBitsSet bits set.
645 ///
646 /// \param numBits the bitwidth of the result
647 /// \param loBitsSet the number of low-order bits set in the result.
648 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
649 APInt Res(numBits, 0);
650 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
15
Returning from 'APInt::setLowBits'
651 return Res;
652 }
653
654 /// Return a value containing V broadcasted over NewLen bits.
655 static APInt getSplat(unsigned NewLen, const APInt &V);
656
657 /// Determine if two APInts have the same value, after zero-extending
658 /// one of them (if needed!) to ensure that the bit-widths match.
659 static bool isSameValue(const APInt &I1, const APInt &I2) {
660 if (I1.getBitWidth() == I2.getBitWidth())
661 return I1 == I2;
662
663 if (I1.getBitWidth() > I2.getBitWidth())
664 return I1 == I2.zext(I1.getBitWidth());
665
666 return I1.zext(I2.getBitWidth()) == I2;
667 }
668
669 /// Overload to compute a hash_code for an APInt value.
670 friend hash_code hash_value(const APInt &Arg);
671
672 /// This function returns a pointer to the internal storage of the APInt.
673 /// This is useful for writing out the APInt in binary form without any
674 /// conversions.
675 const uint64_t *getRawData() const {
676 if (isSingleWord())
677 return &U.VAL;
678 return &U.pVal[0];
679 }
680
681 /// @}
682 /// \name Unary Operators
683 /// @{
684
685 /// Postfix increment operator.
686 ///
687 /// Increments *this by 1.
688 ///
689 /// \returns a new APInt value representing the original value of *this.
690 const APInt operator++(int) {
691 APInt API(*this);
692 ++(*this);
693 return API;
694 }
695
696 /// Prefix increment operator.
697 ///
698 /// \returns *this incremented by one
699 APInt &operator++();
700
701 /// Postfix decrement operator.
702 ///
703 /// Decrements *this by 1.
704 ///
705 /// \returns a new APInt value representing the original value of *this.
706 const APInt operator--(int) {
707 APInt API(*this);
708 --(*this);
709 return API;
710 }
711
712 /// Prefix decrement operator.
713 ///
714 /// \returns *this decremented by one.
715 APInt &operator--();
716
717 /// Logical negation operator.
718 ///
719 /// Performs logical negation operation on this APInt.
720 ///
721 /// \returns true if *this is zero, false otherwise.
722 bool operator!() const {
723 if (isSingleWord())
724 return U.VAL == 0;
725 return countLeadingZerosSlowCase() == BitWidth;
726 }
727
728 /// @}
729 /// \name Assignment Operators
730 /// @{
731
732 /// Copy assignment operator.
733 ///
734 /// \returns *this after assignment of RHS.
735 APInt &operator=(const APInt &RHS) {
736 // If the bitwidths are the same, we can avoid mucking with memory
737 if (isSingleWord() && RHS.isSingleWord()) {
738 U.VAL = RHS.U.VAL;
739 BitWidth = RHS.BitWidth;
740 return clearUnusedBits();
741 }
742
743 AssignSlowCase(RHS);
744 return *this;
745 }
746
747 /// Move assignment operator.
748 APInt &operator=(APInt &&that) {
749#ifdef _MSC_VER
750 // The MSVC std::shuffle implementation still does self-assignment.
751 if (this == &that)
752 return *this;
753#endif
754 assert(this != &that && "Self-move not supported")((this != &that && "Self-move not supported") ? static_cast
<void> (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 754, __PRETTY_FUNCTION__))
;
755 if (!isSingleWord())
756 delete[] U.pVal;
757
758 // Use memcpy so that type based alias analysis sees both VAL and pVal
759 // as modified.
760 memcpy(&U, &that.U, sizeof(U));
761
762 BitWidth = that.BitWidth;
763 that.BitWidth = 0;
764
765 return *this;
766 }
767
768 /// Assignment operator.
769 ///
770 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
771 /// the bit width, the excess bits are truncated. If the bit width is larger
772 /// than 64, the value is zero filled in the unspecified high order bits.
773 ///
774 /// \returns *this after assignment of RHS value.
775 APInt &operator=(uint64_t RHS) {
776 if (isSingleWord()) {
777 U.VAL = RHS;
778 clearUnusedBits();
779 } else {
780 U.pVal[0] = RHS;
781 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
782 }
783 return *this;
784 }
785
786 /// Bitwise AND assignment operator.
787 ///
788 /// Performs a bitwise AND operation on this APInt and RHS. The result is
789 /// assigned to *this.
790 ///
791 /// \returns *this after ANDing with RHS.
792 APInt &operator&=(const APInt &RHS) {
793 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 793, __PRETTY_FUNCTION__))
;
794 if (isSingleWord())
795 U.VAL &= RHS.U.VAL;
796 else
797 AndAssignSlowCase(RHS);
798 return *this;
799 }
800
801 /// Bitwise AND assignment operator.
802 ///
803 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
804 /// logically zero-extended or truncated to match the bit-width of
805 /// the LHS.
806 APInt &operator&=(uint64_t RHS) {
807 if (isSingleWord()) {
808 U.VAL &= RHS;
809 return *this;
810 }
811 U.pVal[0] &= RHS;
812 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
813 return *this;
814 }
815
816 /// Bitwise OR assignment operator.
817 ///
818 /// Performs a bitwise OR operation on this APInt and RHS. The result is
819 /// assigned *this;
820 ///
821 /// \returns *this after ORing with RHS.
822 APInt &operator|=(const APInt &RHS) {
823 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 823, __PRETTY_FUNCTION__))
;
824 if (isSingleWord())
825 U.VAL |= RHS.U.VAL;
826 else
827 OrAssignSlowCase(RHS);
828 return *this;
829 }
830
831 /// Bitwise OR assignment operator.
832 ///
833 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
834 /// logically zero-extended or truncated to match the bit-width of
835 /// the LHS.
836 APInt &operator|=(uint64_t RHS) {
837 if (isSingleWord()) {
838 U.VAL |= RHS;
839 clearUnusedBits();
840 } else {
841 U.pVal[0] |= RHS;
842 }
843 return *this;
844 }
845
846 /// Bitwise XOR assignment operator.
847 ///
848 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
849 /// assigned to *this.
850 ///
851 /// \returns *this after XORing with RHS.
852 APInt &operator^=(const APInt &RHS) {
853 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 853, __PRETTY_FUNCTION__))
;
854 if (isSingleWord())
855 U.VAL ^= RHS.U.VAL;
856 else
857 XorAssignSlowCase(RHS);
858 return *this;
859 }
860
861 /// Bitwise XOR assignment operator.
862 ///
863 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
864 /// logically zero-extended or truncated to match the bit-width of
865 /// the LHS.
866 APInt &operator^=(uint64_t RHS) {
867 if (isSingleWord()) {
868 U.VAL ^= RHS;
869 clearUnusedBits();
870 } else {
871 U.pVal[0] ^= RHS;
872 }
873 return *this;
874 }
875
876 /// Multiplication assignment operator.
877 ///
878 /// Multiplies this APInt by RHS and assigns the result to *this.
879 ///
880 /// \returns *this
881 APInt &operator*=(const APInt &RHS);
882 APInt &operator*=(uint64_t RHS);
883
884 /// Addition assignment operator.
885 ///
886 /// Adds RHS to *this and assigns the result to *this.
887 ///
888 /// \returns *this
889 APInt &operator+=(const APInt &RHS);
890 APInt &operator+=(uint64_t RHS);
891
892 /// Subtraction assignment operator.
893 ///
894 /// Subtracts RHS from *this and assigns the result to *this.
895 ///
896 /// \returns *this
897 APInt &operator-=(const APInt &RHS);
898 APInt &operator-=(uint64_t RHS);
899
900 /// Left-shift assignment function.
901 ///
902 /// Shifts *this left by shiftAmt and assigns the result to *this.
903 ///
904 /// \returns *this after shifting left by ShiftAmt
905 APInt &operator<<=(unsigned ShiftAmt) {
906 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 906, __PRETTY_FUNCTION__))
;
907 if (isSingleWord()) {
908 if (ShiftAmt == BitWidth)
909 U.VAL = 0;
910 else
911 U.VAL <<= ShiftAmt;
912 return clearUnusedBits();
913 }
914 shlSlowCase(ShiftAmt);
915 return *this;
916 }
917
918 /// Left-shift assignment function.
919 ///
920 /// Shifts *this left by shiftAmt and assigns the result to *this.
921 ///
922 /// \returns *this after shifting left by ShiftAmt
923 APInt &operator<<=(const APInt &ShiftAmt);
924
925 /// @}
926 /// \name Binary Operators
927 /// @{
928
929 /// Multiplication operator.
930 ///
931 /// Multiplies this APInt by RHS and returns the result.
932 APInt operator*(const APInt &RHS) const;
933
934 /// Left logical shift operator.
935 ///
936 /// Shifts this APInt left by \p Bits and returns the result.
937 APInt operator<<(unsigned Bits) const { return shl(Bits); }
938
939 /// Left logical shift operator.
940 ///
941 /// Shifts this APInt left by \p Bits and returns the result.
942 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
943
944 /// Arithmetic right-shift function.
945 ///
946 /// Arithmetic right-shift this APInt by shiftAmt.
947 APInt ashr(unsigned ShiftAmt) const {
948 APInt R(*this);
949 R.ashrInPlace(ShiftAmt);
950 return R;
951 }
952
953 /// Arithmetic right-shift this APInt by ShiftAmt in place.
954 void ashrInPlace(unsigned ShiftAmt) {
955 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 955, __PRETTY_FUNCTION__))
;
956 if (isSingleWord()) {
957 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
958 if (ShiftAmt == BitWidth)
959 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
960 else
961 U.VAL = SExtVAL >> ShiftAmt;
962 clearUnusedBits();
963 return;
964 }
965 ashrSlowCase(ShiftAmt);
966 }
967
968 /// Logical right-shift function.
969 ///
970 /// Logical right-shift this APInt by shiftAmt.
971 APInt lshr(unsigned shiftAmt) const {
972 APInt R(*this);
973 R.lshrInPlace(shiftAmt);
974 return R;
975 }
976
977 /// Logical right-shift this APInt by ShiftAmt in place.
978 void lshrInPlace(unsigned ShiftAmt) {
979 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 979, __PRETTY_FUNCTION__))
;
980 if (isSingleWord()) {
981 if (ShiftAmt == BitWidth)
982 U.VAL = 0;
983 else
984 U.VAL >>= ShiftAmt;
985 return;
986 }
987 lshrSlowCase(ShiftAmt);
988 }
989
990 /// Left-shift function.
991 ///
992 /// Left-shift this APInt by shiftAmt.
993 APInt shl(unsigned shiftAmt) const {
994 APInt R(*this);
995 R <<= shiftAmt;
996 return R;
997 }
998
999 /// Rotate left by rotateAmt.
1000 APInt rotl(unsigned rotateAmt) const;
1001
1002 /// Rotate right by rotateAmt.
1003 APInt rotr(unsigned rotateAmt) const;
1004
1005 /// Arithmetic right-shift function.
1006 ///
1007 /// Arithmetic right-shift this APInt by shiftAmt.
1008 APInt ashr(const APInt &ShiftAmt) const {
1009 APInt R(*this);
1010 R.ashrInPlace(ShiftAmt);
1011 return R;
1012 }
1013
1014 /// Arithmetic right-shift this APInt by shiftAmt in place.
1015 void ashrInPlace(const APInt &shiftAmt);
1016
1017 /// Logical right-shift function.
1018 ///
1019 /// Logical right-shift this APInt by shiftAmt.
1020 APInt lshr(const APInt &ShiftAmt) const {
1021 APInt R(*this);
1022 R.lshrInPlace(ShiftAmt);
1023 return R;
1024 }
1025
1026 /// Logical right-shift this APInt by ShiftAmt in place.
1027 void lshrInPlace(const APInt &ShiftAmt);
1028
1029 /// Left-shift function.
1030 ///
1031 /// Left-shift this APInt by shiftAmt.
1032 APInt shl(const APInt &ShiftAmt) const {
1033 APInt R(*this);
1034 R <<= ShiftAmt;
1035 return R;
1036 }
1037
1038 /// Rotate left by rotateAmt.
1039 APInt rotl(const APInt &rotateAmt) const;
1040
1041 /// Rotate right by rotateAmt.
1042 APInt rotr(const APInt &rotateAmt) const;
1043
1044 /// Unsigned division operation.
1045 ///
1046 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
1047 /// RHS are treated as unsigned quantities for purposes of this division.
1048 ///
1049 /// \returns a new APInt value containing the division result, rounded towards
1050 /// zero.
1051 APInt udiv(const APInt &RHS) const;
1052 APInt udiv(uint64_t RHS) const;
1053
1054 /// Signed division function for APInt.
1055 ///
1056 /// Signed divide this APInt by APInt RHS.
1057 ///
1058 /// The result is rounded towards zero.
1059 APInt sdiv(const APInt &RHS) const;
1060 APInt sdiv(int64_t RHS) const;
1061
1062 /// Unsigned remainder operation.
1063 ///
1064 /// Perform an unsigned remainder operation on this APInt with RHS being the
1065 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
1066 /// of this operation. Note that this is a true remainder operation and not a
1067 /// modulo operation because the sign follows the sign of the dividend which
1068 /// is *this.
1069 ///
1070 /// \returns a new APInt value containing the remainder result
1071 APInt urem(const APInt &RHS) const;
1072 uint64_t urem(uint64_t RHS) const;
1073
1074 /// Function for signed remainder operation.
1075 ///
1076 /// Signed remainder operation on APInt.
1077 APInt srem(const APInt &RHS) const;
1078 int64_t srem(int64_t RHS) const;
1079
1080 /// Dual division/remainder interface.
1081 ///
1082 /// Sometimes it is convenient to divide two APInt values and obtain both the
1083 /// quotient and remainder. This function does both operations in the same
1084 /// computation making it a little more efficient. The pair of input arguments
1085 /// may overlap with the pair of output arguments. It is safe to call
1086 /// udivrem(X, Y, X, Y), for example.
1087 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1088 APInt &Remainder);
1089 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1090 uint64_t &Remainder);
1091
1092 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1093 APInt &Remainder);
1094 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
1095 int64_t &Remainder);
1096
1097 // Operations that return overflow indicators.
1098 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
1099 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1100 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1101 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1102 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1103 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1104 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1105 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1106 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1107
1108 // Operations that saturate
1109 APInt sadd_sat(const APInt &RHS) const;
1110 APInt uadd_sat(const APInt &RHS) const;
1111 APInt ssub_sat(const APInt &RHS) const;
1112 APInt usub_sat(const APInt &RHS) const;
1113
1114 /// Array-indexing support.
1115 ///
1116 /// \returns the bit value at bitPosition
1117 bool operator[](unsigned bitPosition) const {
1118 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")((bitPosition < getBitWidth() && "Bit position out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1118, __PRETTY_FUNCTION__))
;
1119 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1120 }
1121
1122 /// @}
1123 /// \name Comparison Operators
1124 /// @{
1125
1126 /// Equality operator.
1127 ///
1128 /// Compares this APInt with RHS for the validity of the equality
1129 /// relationship.
1130 bool operator==(const APInt &RHS) const {
1131 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")((BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1131, __PRETTY_FUNCTION__))
;
1132 if (isSingleWord())
1133 return U.VAL == RHS.U.VAL;
1134 return EqualSlowCase(RHS);
1135 }
1136
1137 /// Equality operator.
1138 ///
1139 /// Compares this APInt with a uint64_t for the validity of the equality
1140 /// relationship.
1141 ///
1142 /// \returns true if *this == Val
1143 bool operator==(uint64_t Val) const {
1144 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1145 }
1146
1147 /// Equality comparison.
1148 ///
1149 /// Compares this APInt with RHS for the validity of the equality
1150 /// relationship.
1151 ///
1152 /// \returns true if *this == Val
1153 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1154
1155 /// Inequality operator.
1156 ///
1157 /// Compares this APInt with RHS for the validity of the inequality
1158 /// relationship.
1159 ///
1160 /// \returns true if *this != Val
1161 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1162
1163 /// Inequality operator.
1164 ///
1165 /// Compares this APInt with a uint64_t for the validity of the inequality
1166 /// relationship.
1167 ///
1168 /// \returns true if *this != Val
1169 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1170
1171 /// Inequality comparison
1172 ///
1173 /// Compares this APInt with RHS for the validity of the inequality
1174 /// relationship.
1175 ///
1176 /// \returns true if *this != Val
1177 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1178
1179 /// Unsigned less than comparison
1180 ///
1181 /// Regards both *this and RHS as unsigned quantities and compares them for
1182 /// the validity of the less-than relationship.
1183 ///
1184 /// \returns true if *this < RHS when both are considered unsigned.
1185 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1186
1187 /// Unsigned less than comparison
1188 ///
1189 /// Regards both *this as an unsigned quantity and compares it with RHS for
1190 /// the validity of the less-than relationship.
1191 ///
1192 /// \returns true if *this < RHS when considered unsigned.
1193 bool ult(uint64_t RHS) const {
1194 // Only need to check active bits if not a single word.
1195 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1196 }
1197
1198 /// Signed less than comparison
1199 ///
1200 /// Regards both *this and RHS as signed quantities and compares them for
1201 /// validity of the less-than relationship.
1202 ///
1203 /// \returns true if *this < RHS when both are considered signed.
1204 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1205
1206 /// Signed less than comparison
1207 ///
1208 /// Regards both *this as a signed quantity and compares it with RHS for
1209 /// the validity of the less-than relationship.
1210 ///
1211 /// \returns true if *this < RHS when considered signed.
1212 bool slt(int64_t RHS) const {
1213 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1214 : getSExtValue() < RHS;
1215 }
1216
1217 /// Unsigned less or equal comparison
1218 ///
1219 /// Regards both *this and RHS as unsigned quantities and compares them for
1220 /// validity of the less-or-equal relationship.
1221 ///
1222 /// \returns true if *this <= RHS when both are considered unsigned.
1223 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1224
1225 /// Unsigned less or equal comparison
1226 ///
1227 /// Regards both *this as an unsigned quantity and compares it with RHS for
1228 /// the validity of the less-or-equal relationship.
1229 ///
1230 /// \returns true if *this <= RHS when considered unsigned.
1231 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1232
1233 /// Signed less or equal comparison
1234 ///
1235 /// Regards both *this and RHS as signed quantities and compares them for
1236 /// validity of the less-or-equal relationship.
1237 ///
1238 /// \returns true if *this <= RHS when both are considered signed.
1239 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1240
1241 /// Signed less or equal comparison
1242 ///
1243 /// Regards both *this as a signed quantity and compares it with RHS for the
1244 /// validity of the less-or-equal relationship.
1245 ///
1246 /// \returns true if *this <= RHS when considered signed.
1247 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1248
1249 /// Unsigned greather than comparison
1250 ///
1251 /// Regards both *this and RHS as unsigned quantities and compares them for
1252 /// the validity of the greater-than relationship.
1253 ///
1254 /// \returns true if *this > RHS when both are considered unsigned.
1255 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1256
1257 /// Unsigned greater than comparison
1258 ///
1259 /// Regards both *this as an unsigned quantity and compares it with RHS for
1260 /// the validity of the greater-than relationship.
1261 ///
1262 /// \returns true if *this > RHS when considered unsigned.
1263 bool ugt(uint64_t RHS) const {
1264 // Only need to check active bits if not a single word.
1265 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1266 }
1267
1268 /// Signed greather than comparison
1269 ///
1270 /// Regards both *this and RHS as signed quantities and compares them for the
1271 /// validity of the greater-than relationship.
1272 ///
1273 /// \returns true if *this > RHS when both are considered signed.
1274 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1275
1276 /// Signed greater than comparison
1277 ///
1278 /// Regards both *this as a signed quantity and compares it with RHS for
1279 /// the validity of the greater-than relationship.
1280 ///
1281 /// \returns true if *this > RHS when considered signed.
1282 bool sgt(int64_t RHS) const {
1283 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1284 : getSExtValue() > RHS;
1285 }
1286
1287 /// Unsigned greater or equal comparison
1288 ///
1289 /// Regards both *this and RHS as unsigned quantities and compares them for
1290 /// validity of the greater-or-equal relationship.
1291 ///
1292 /// \returns true if *this >= RHS when both are considered unsigned.
1293 bool uge(const APInt &RHS) const { return !ult(RHS); }
1294
1295 /// Unsigned greater or equal comparison
1296 ///
1297 /// Regards both *this as an unsigned quantity and compares it with RHS for
1298 /// the validity of the greater-or-equal relationship.
1299 ///
1300 /// \returns true if *this >= RHS when considered unsigned.
1301 bool uge(uint64_t RHS) const { return !ult(RHS); }
1302
1303 /// Signed greater or equal comparison
1304 ///
1305 /// Regards both *this and RHS as signed quantities and compares them for
1306 /// validity of the greater-or-equal relationship.
1307 ///
1308 /// \returns true if *this >= RHS when both are considered signed.
1309 bool sge(const APInt &RHS) const { return !slt(RHS); }
1310
1311 /// Signed greater or equal comparison
1312 ///
1313 /// Regards both *this as a signed quantity and compares it with RHS for
1314 /// the validity of the greater-or-equal relationship.
1315 ///
1316 /// \returns true if *this >= RHS when considered signed.
1317 bool sge(int64_t RHS) const { return !slt(RHS); }
1318
1319 /// This operation tests if there are any pairs of corresponding bits
1320 /// between this APInt and RHS that are both set.
1321 bool intersects(const APInt &RHS) const {
1322 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1322, __PRETTY_FUNCTION__))
;
1323 if (isSingleWord())
1324 return (U.VAL & RHS.U.VAL) != 0;
1325 return intersectsSlowCase(RHS);
1326 }
1327
1328 /// This operation checks that all bits set in this APInt are also set in RHS.
1329 bool isSubsetOf(const APInt &RHS) const {
1330 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1330, __PRETTY_FUNCTION__))
;
1331 if (isSingleWord())
1332 return (U.VAL & ~RHS.U.VAL) == 0;
1333 return isSubsetOfSlowCase(RHS);
1334 }
1335
1336 /// @}
1337 /// \name Resizing Operators
1338 /// @{
1339
1340 /// Truncate to new width.
1341 ///
1342 /// Truncate the APInt to a specified width. It is an error to specify a width
1343 /// that is greater than or equal to the current width.
1344 APInt trunc(unsigned width) const;
1345
1346 /// Sign extend to a new width.
1347 ///
1348 /// This operation sign extends the APInt to a new width. If the high order
1349 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1350 /// It is an error to specify a width that is less than or equal to the
1351 /// current width.
1352 APInt sext(unsigned width) const;
1353
1354 /// Zero extend to a new width.
1355 ///
1356 /// This operation zero extends the APInt to a new width. The high order bits
1357 /// are filled with 0 bits. It is an error to specify a width that is less
1358 /// than or equal to the current width.
1359 APInt zext(unsigned width) const;
1360
1361 /// Sign extend or truncate to width
1362 ///
1363 /// Make this APInt have the bit width given by \p width. The value is sign
1364 /// extended, truncated, or left alone to make it that width.
1365 APInt sextOrTrunc(unsigned width) const;
1366
1367 /// Zero extend or truncate to width
1368 ///
1369 /// Make this APInt have the bit width given by \p width. The value is zero
1370 /// extended, truncated, or left alone to make it that width.
1371 APInt zextOrTrunc(unsigned width) const;
1372
1373 /// Sign extend or truncate to width
1374 ///
1375 /// Make this APInt have the bit width given by \p width. The value is sign
1376 /// extended, or left alone to make it that width.
1377 APInt sextOrSelf(unsigned width) const;
1378
1379 /// Zero extend or truncate to width
1380 ///
1381 /// Make this APInt have the bit width given by \p width. The value is zero
1382 /// extended, or left alone to make it that width.
1383 APInt zextOrSelf(unsigned width) const;
1384
1385 /// @}
1386 /// \name Bit Manipulation Operators
1387 /// @{
1388
1389 /// Set every bit to 1.
1390 void setAllBits() {
1391 if (isSingleWord())
1392 U.VAL = WORDTYPE_MAX;
1393 else
1394 // Set all the bits in all the words.
1395 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1396 // Clear the unused ones
1397 clearUnusedBits();
1398 }
1399
1400 /// Set a given bit to 1.
1401 ///
1402 /// Set the given bit to 1 whose position is given as "bitPosition".
1403 void setBit(unsigned BitPosition) {
1404 assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1404, __PRETTY_FUNCTION__))
;
1405 WordType Mask = maskBit(BitPosition);
1406 if (isSingleWord())
1407 U.VAL |= Mask;
1408 else
1409 U.pVal[whichWord(BitPosition)] |= Mask;
1410 }
1411
1412 /// Set the sign bit to 1.
1413 void setSignBit() {
1414 setBit(BitWidth - 1);
1415 }
1416
1417 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1418 void setBits(unsigned loBit, unsigned hiBit) {
1419 assert(hiBit <= BitWidth && "hiBit out of range")((hiBit <= BitWidth && "hiBit out of range") ? static_cast
<void> (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1419, __PRETTY_FUNCTION__))
;
6
Assuming the condition is true
7
'?' condition is true
1420 assert(loBit <= BitWidth && "loBit out of range")((loBit <= BitWidth && "loBit out of range") ? static_cast
<void> (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1420, __PRETTY_FUNCTION__))
;
8
'?' condition is true
1421 assert(loBit <= hiBit && "loBit greater than hiBit")((loBit <= hiBit && "loBit greater than hiBit") ? static_cast
<void> (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1421, __PRETTY_FUNCTION__))
;
9
'?' condition is true
1422 if (loBit == hiBit)
10
Assuming 'loBit' is not equal to 'hiBit'
11
Taking false branch
1423 return;
1424 if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
12
Taking true branch
1425 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1426 mask <<= loBit;
1427 if (isSingleWord())
13
Taking true branch
1428 U.VAL |= mask;
1429 else
1430 U.pVal[0] |= mask;
1431 } else {
1432 setBitsSlowCase(loBit, hiBit);
1433 }
1434 }
1435
1436 /// Set the top bits starting from loBit.
1437 void setBitsFrom(unsigned loBit) {
1438 return setBits(loBit, BitWidth);
1439 }
1440
1441 /// Set the bottom loBits bits.
1442 void setLowBits(unsigned loBits) {
1443 return setBits(0, loBits);
5
Calling 'APInt::setBits'
14
Returning from 'APInt::setBits'
1444 }
1445
1446 /// Set the top hiBits bits.
1447 void setHighBits(unsigned hiBits) {
1448 return setBits(BitWidth - hiBits, BitWidth);
1449 }
1450
1451 /// Set every bit to 0.
1452 void clearAllBits() {
1453 if (isSingleWord())
1454 U.VAL = 0;
1455 else
1456 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1457 }
1458
1459 /// Set a given bit to 0.
1460 ///
1461 /// Set the given bit to 0 whose position is given as "bitPosition".
1462 void clearBit(unsigned BitPosition) {
1463 assert(BitPosition < BitWidth && "BitPosition out of range")((BitPosition < BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition < BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1463, __PRETTY_FUNCTION__))
;
1464 WordType Mask = ~maskBit(BitPosition);
1465 if (isSingleWord())
1466 U.VAL &= Mask;
1467 else
1468 U.pVal[whichWord(BitPosition)] &= Mask;
1469 }
1470
1471 /// Set the sign bit to 0.
1472 void clearSignBit() {
1473 clearBit(BitWidth - 1);
1474 }
1475
1476 /// Toggle every bit to its opposite value.
1477 void flipAllBits() {
1478 if (isSingleWord()) {
1479 U.VAL ^= WORDTYPE_MAX;
1480 clearUnusedBits();
1481 } else {
1482 flipAllBitsSlowCase();
1483 }
1484 }
1485
1486 /// Toggles a given bit to its opposite value.
1487 ///
1488 /// Toggle a given bit to its opposite value whose position is given
1489 /// as "bitPosition".
1490 void flipBit(unsigned bitPosition);
1491
1492 /// Negate this APInt in place.
1493 void negate() {
1494 flipAllBits();
1495 ++(*this);
1496 }
1497
1498 /// Insert the bits from a smaller APInt starting at bitPosition.
1499 void insertBits(const APInt &SubBits, unsigned bitPosition);
1500
1501 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1502 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1503
1504 /// @}
1505 /// \name Value Characterization Functions
1506 /// @{
1507
1508 /// Return the number of bits in the APInt.
1509 unsigned getBitWidth() const { return BitWidth; }
1510
1511 /// Get the number of words.
1512 ///
1513 /// Here one word's bitwidth equals to that of uint64_t.
1514 ///
1515 /// \returns the number of words to hold the integer value of this APInt.
1516 unsigned getNumWords() const { return getNumWords(BitWidth); }
1517
1518 /// Get the number of words.
1519 ///
1520 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1521 ///
1522 /// \returns the number of words to hold the integer value with a given bit
1523 /// width.
1524 static unsigned getNumWords(unsigned BitWidth) {
1525 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1526 }
1527
1528 /// Compute the number of active bits in the value
1529 ///
1530 /// This function returns the number of active bits which is defined as the
1531 /// bit width minus the number of leading zeros. This is used in several
1532 /// computations to see how "wide" the value is.
1533 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1534
1535 /// Compute the number of active words in the value of this APInt.
1536 ///
1537 /// This is used in conjunction with getActiveData to extract the raw value of
1538 /// the APInt.
1539 unsigned getActiveWords() const {
1540 unsigned numActiveBits = getActiveBits();
1541 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1542 }
1543
1544 /// Get the minimum bit size for this signed APInt
1545 ///
1546 /// Computes the minimum bit width for this APInt while considering it to be a
1547 /// signed (and probably negative) value. If the value is not negative, this
1548 /// function returns the same value as getActiveBits()+1. Otherwise, it
1549 /// returns the smallest bit width that will retain the negative value. For
1550 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1551 /// for -1, this function will always return 1.
1552 unsigned getMinSignedBits() const {
1553 if (isNegative())
1554 return BitWidth - countLeadingOnes() + 1;
1555 return getActiveBits() + 1;
1556 }
1557
1558 /// Get zero extended value
1559 ///
1560 /// This method attempts to return the value of this APInt as a zero extended
1561 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1562 /// uint64_t. Otherwise an assertion will result.
1563 uint64_t getZExtValue() const {
1564 if (isSingleWord())
1565 return U.VAL;
1566 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")((getActiveBits() <= 64 && "Too many bits for uint64_t"
) ? static_cast<void> (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1566, __PRETTY_FUNCTION__))
;
1567 return U.pVal[0];
1568 }
1569
1570 /// Get sign extended value
1571 ///
1572 /// This method attempts to return the value of this APInt as a sign extended
1573 /// int64_t. The bit width must be <= 64 or the value must fit within an
1574 /// int64_t. Otherwise an assertion will result.
1575 int64_t getSExtValue() const {
1576 if (isSingleWord())
1577 return SignExtend64(U.VAL, BitWidth);
1578 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")((getMinSignedBits() <= 64 && "Too many bits for int64_t"
) ? static_cast<void> (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-8~svn348900/include/llvm/ADT/APInt.h"
, 1578, __PRETTY_FUNCTION__))
;
1579 return int64_t(U.pVal[0]);
1580 }
1581
1582 /// Get bits required for string value.
1583 ///
1584 /// This method determines how many bits are required to hold the APInt
1585 /// equivalent of the string given by \p str.
1586 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1587
1588 /// The APInt version of the countLeadingZeros functions in
1589 /// MathExtras.h.
1590 ///
1591 /// It counts the number of zeros from the most significant bit to the first
1592 /// one bit.
1593 ///
1594 /// \returns BitWidth if the value is zero, otherwise returns the number of
1595 /// zeros from the most significant bit to the first one bits.
1596 unsigned countLeadingZeros() const {
1597 if (isSingleWord()) {
1598 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1599 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1600 }
1601 return countLeadingZerosSlowCase();
1602 }
1603
1604 /// Count the number of leading one bits.
1605 ///
1606 /// This function is an APInt version of the countLeadingOnes
1607 /// functions in MathExtras.h. It counts the number of ones from the most
1608 /// significant bit to the first zero bit.
1609 ///
1610 /// \returns 0 if the high order bit is not set, otherwise returns the number
1611 /// of 1 bits from the most significant to the least
1612 unsigned countLeadingOnes() const {
1613 if (isSingleWord())
1614 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1615 return countLeadingOnesSlowCase();
1616 }
1617
1618 /// Computes the number of leading bits of this APInt that are equal to its
1619 /// sign bit.
1620 unsigned getNumSignBits() const {
1621 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1622 }
1623
1624 /// Count the number of trailing zero bits.
1625 ///
1626 /// This function is an APInt version of the countTrailingZeros
1627 /// functions in MathExtras.h. It counts the number of zeros from the least
1628 /// significant bit to the first set bit.
1629 ///
1630 /// \returns BitWidth if the value is zero, otherwise returns the number of
1631 /// zeros from the least significant bit to the first one bit.
1632 unsigned countTrailingZeros() const {
1633 if (isSingleWord())
1634 return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
1635 return countTrailingZerosSlowCase();
1636 }
1637
1638 /// Count the number of trailing one bits.
1639 ///
1640 /// This function is an APInt version of the countTrailingOnes
1641 /// functions in MathExtras.h. It counts the number of ones from the least
1642 /// significant bit to the first zero bit.
1643 ///
1644 /// \returns BitWidth if the value is all ones, otherwise returns the number
1645 /// of ones from the least significant bit to the first zero bit.
1646 unsigned countTrailingOnes() const {
1647 if (isSingleWord())
1648 return llvm::countTrailingOnes(U.VAL);
1649 return countTrailingOnesSlowCase();
1650 }
1651
1652 /// Count the number of bits set.
1653 ///
1654 /// This function is an APInt version of the countPopulation functions
1655 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1656 ///
1657 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1658 unsigned countPopulation() const {
1659 if (isSingleWord())
1660 return llvm::countPopulation(U.VAL);
1661 return countPopulationSlowCase();
1662 }
1663
1664 /// @}
1665 /// \name Conversion Functions
1666 /// @{
1667 void print(raw_ostream &OS, bool isSigned) const;
1668
1669 /// Converts an APInt to a string and append it to Str. Str is commonly a
1670 /// SmallString.
1671 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1672 bool formatAsCLiteral = false) const;
1673
1674 /// Considers the APInt to be unsigned and converts it into a string in the
1675 /// radix given. The radix can be 2, 8, 10 16, or 36.
1676 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1677 toString(Str, Radix, false, false);
1678 }
1679
1680 /// Considers the APInt to be signed and converts it into a string in the
1681 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1682 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1683 toString(Str, Radix, true, false);
1684 }
1685
1686 /// Return the APInt as a std::string.
1687 ///
1688 /// Note that this is an inefficient method. It is better to pass in a
1689 /// SmallVector/SmallString to the methods above to avoid thrashing the heap
1690 /// for the string.
1691 std::string toString(unsigned Radix, bool Signed) const;
1692
1693 /// \returns a byte-swapped representation of this APInt Value.
1694 APInt byteSwap() const;
1695
1696 /// \returns the value with the bit representation reversed of this APInt
1697 /// Value.
1698 APInt reverseBits() const;
1699
1700 /// Converts this APInt to a double value.
1701 double roundToDouble(bool isSigned) const;
1702
1703 /// Converts this unsigned APInt to a double value.
1704 double roundToDouble() const { return roundToDouble(false); }
1705
1706 /// Converts this signed APInt to a double value.
1707 double signedRoundToDouble() const { return roundToDouble(true); }
1708
1709 /// Converts APInt bits to a double
1710 ///
1711 /// The conversion does not do a translation from integer to double, it just
1712 /// re-interprets the bits as a double. Note that it is valid to do this on
1713 /// any bit width. Exactly 64 bits will be translated.
1714 double bitsToDouble() const {
1715 return BitsToDouble(getWord(0));
1716 }
1717
1718 /// Converts APInt bits to a double
1719 ///
1720 /// The conversion does not do a translation from integer to float, it just
1721 /// re-interprets the bits as a float. Note that it is valid to do this on
1722 /// any bit width. Exactly 32 bits will be translated.
1723 float bitsToFloat() const {
1724 return BitsToFloat(getWord(0));
1725 }
1726
1727 /// Converts a double to APInt bits.
1728 ///
1729 /// The conversion does not do a translation from double to integer, it just
1730 /// re-interprets the bits of the double.
1731 static APInt doubleToBits(double V) {
1732 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1733 }
1734
1735 /// Converts a float to APInt bits.
1736 ///
1737 /// The conversion does not do a translation from float to integer, it just
1738 /// re-interprets the bits of the float.
1739 static APInt floatToBits(float V) {
1740 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1741 }
1742
1743 /// @}
1744 /// \name Mathematics Operations
1745 /// @{
1746
1747 /// \returns the floor log base 2 of this APInt.
1748 unsigned logBase2() const { return getActiveBits() - 1; }
1749
1750 /// \returns the ceil log base 2 of this APInt.
1751 unsigned ceilLogBase2() const {
1752 APInt temp(*this);
1753 --temp;
1754 return temp.getActiveBits();
1755 }
1756
1757 /// \returns the nearest log base 2 of this APInt. Ties round up.
1758 ///
1759 /// NOTE: When we have a BitWidth of 1, we define:
1760 ///
1761 /// log2(0) = UINT32_MAX
1762 /// log2(1) = 0
1763 ///
1764 /// to get around any mathematical concerns resulting from
1765 /// referencing 2 in a space where 2 does no exist.
1766 unsigned nearestLogBase2() const {
1767 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1768 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1769 // UINT32_MAX.
1770 if (BitWidth == 1)
1771 return U.VAL - 1;
1772
1773 // Handle the zero case.
1774 if (isNullValue())
1775 return UINT32_MAX(4294967295U);
1776
1777 // The non-zero case is handled by computing:
1778 //
1779 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1780 //
1781 // where x[i] is referring to the value of the ith bit of x.
1782 unsigned lg = logBase2();
1783 return lg + unsigned((*this)[lg - 1]);
1784 }
1785
1786 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1787 /// otherwise
1788 int32_t exactLogBase2() const {
1789 if (!isPowerOf2())
1790 return -1;
1791 return logBase2();
1792 }
1793
1794 /// Compute the square root
1795 APInt sqrt() const;
1796
1797 /// Get the absolute value;
1798 ///
1799 /// If *this is < 0 then return -(*this), otherwise *this;
1800 APInt abs() const {
1801 if (isNegative())
1802 return -(*this);
1803 return *this;
1804 }
1805
1806 /// \returns the multiplicative inverse for a given modulo.
1807 APInt multiplicativeInverse(const APInt &modulo) const;
1808
1809 /// @}
1810 /// \name Support for division by constant
1811 /// @{
1812
1813 /// Calculate the magic number for signed division by a constant.
1814 struct ms;
1815 ms magic() const;
1816
1817 /// Calculate the magic number for unsigned division by a constant.
1818 struct mu;
1819 mu magicu(unsigned LeadingZeros = 0) const;
1820
1821 /// @}
1822 /// \name Building-block Operations for APInt and APFloat
1823 /// @{
1824
1825 // These building block operations operate on a representation of arbitrary
1826 // precision, two's-complement, bignum integer values. They should be
1827 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1828 // generally a pointer to the base of an array of integer parts, representing
1829 // an unsigned bignum, and a count of how many parts there are.
1830
1831 /// Sets the least significant part of a bignum to the input value, and zeroes
1832 /// out higher parts.
1833 static void tcSet(WordType *, WordType, unsigned);
1834
1835 /// Assign one bignum to another.
1836 static void tcAssign(WordType *, const WordType *, unsigned);
1837
1838 /// Returns true if a bignum is zero, false otherwise.
1839 static bool tcIsZero(const WordType *, unsigned);
1840
1841 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1842 static int tcExtractBit(const WordType *, unsigned bit);
1843
1844 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1845 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1846 /// significant bit of DST. All high bits above srcBITS in DST are
1847 /// zero-filled.
1848 static void tcExtract(WordType *, unsigned dstCount,
1849 const WordType *, unsigned srcBits,
1850 unsigned srcLSB);
1851
1852 /// Set the given bit of a bignum. Zero-based.
1853 static void tcSetBit(WordType *, unsigned bit);
1854
1855 /// Clear the given bit of a bignum. Zero-based.
1856 static void tcClearBit(WordType *, unsigned bit);
1857
1858 /// Returns the bit number of the least or most significant set bit of a
1859 /// number. If the input number has no bits set -1U is returned.
1860 static unsigned tcLSB(const WordType *, unsigned n);
1861 static unsigned tcMSB(const WordType *parts, unsigned n);
1862
1863 /// Negate a bignum in-place.
1864 static void tcNegate(WordType *, unsigned);
1865
1866 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1867 static WordType tcAdd(WordType *, const WordType *,
1868 WordType carry, unsigned);
1869 /// DST += RHS. Returns the carry flag.
1870 static WordType tcAddPart(WordType *, WordType, unsigned);
1871
1872 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1873 static WordType tcSubtract(WordType *, const WordType *,
1874 WordType carry, unsigned);
1875 /// DST -= RHS. Returns the carry flag.
1876 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1877
1878 /// DST += SRC * MULTIPLIER + PART if add is true
1879 /// DST = SRC * MULTIPLIER + PART if add is false
1880 ///
1881 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1882 /// start at the same point, i.e. DST == SRC.
1883 ///
1884 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1885 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1886 /// result, and if all of the omitted higher parts were zero return zero,
1887 /// otherwise overflow occurred and return one.
1888 static int tcMultiplyPart(WordType *dst, const WordType *src,
1889 WordType multiplier, WordType carry,
1890 unsigned srcParts, unsigned dstParts,
1891 bool add);
1892
1893 /// DST = LHS * RHS, where DST has the same width as the operands and is
1894 /// filled with the least significant parts of the result. Returns one if
1895 /// overflow occurred, otherwise zero. DST must be disjoint from both
1896 /// operands.
1897 static int tcMultiply(WordType *, const WordType *, const WordType *,
1898 unsigned);
1899
1900 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1901 /// operands. No overflow occurs. DST must be disjoint from both operands.
1902 static void tcFullMultiply(WordType *, const WordType *,
1903 const WordType *, unsigned, unsigned);
1904
1905 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1906 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1907 /// REMAINDER to the remainder, return zero. i.e.
1908 ///
1909 /// OLD_LHS = RHS * LHS + REMAINDER
1910 ///
1911 /// SCRATCH is a bignum of the same size as the operands and result for use by
1912 /// the routine; its contents need not be initialized and are destroyed. LHS,
1913 /// REMAINDER and SCRATCH must be distinct.
1914 static int tcDivide(WordType *lhs, const WordType *rhs,
1915 WordType *remainder, WordType *scratch,
1916 unsigned parts);
1917
1918 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1919 /// restrictions on Count.
1920 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1921
1922 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1923 /// restrictions on Count.
1924 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1925
1926 /// The obvious AND, OR and XOR and complement operations.
1927 static void tcAnd(WordType *, const WordType *, unsigned);
1928 static void tcOr(WordType *, const WordType *, unsigned);
1929 static void tcXor(WordType *, const WordType *, unsigned);
1930 static void tcComplement(WordType *, unsigned);
1931
1932 /// Comparison (unsigned) of two bignums.
1933 static int tcCompare(const WordType *, const WordType *, unsigned);
1934
1935 /// Increment a bignum in-place. Return the carry flag.
1936 static WordType tcIncrement(WordType *dst, unsigned parts) {
1937 return tcAddPart(dst, 1, parts);
1938 }
1939
1940 /// Decrement a bignum in-place. Return the borrow flag.
1941 static WordType tcDecrement(WordType *dst, unsigned parts) {
1942 return tcSubtractPart(dst, 1, parts);
1943 }
1944
1945 /// Set the least significant BITS and clear the rest.
1946 static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
1947
1948 /// debug method
1949 void dump() const;
1950
1951 /// @}
1952};
1953
1954/// Magic data for optimising signed division by a constant.
1955struct APInt::ms {
1956 APInt m; ///< magic number
1957 unsigned s; ///< shift amount
1958};
1959
1960/// Magic data for optimising unsigned division by a constant.
1961struct APInt::mu {
1962 APInt m; ///< magic number
1963 bool a; ///< add indicator
1964 unsigned s; ///< shift amount
1965};
1966
1967inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
1968
1969inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
1970
1971/// Unary bitwise complement operator.
1972///
1973/// \returns an APInt that is the bitwise complement of \p v.
1974inline APInt operator~(APInt v) {
1975 v.flipAllBits();
1976 return v;
1977}
1978
1979inline APInt operator&(APInt a, const APInt &b) {
1980 a &= b;
1981 return a;
1982}
1983
1984inline APInt operator&(const APInt &a, APInt &&b) {
1985 b &= a;
1986 return std::move(b);
1987}
1988
1989inline APInt operator&(APInt a, uint64_t RHS) {
1990 a &= RHS;
1991 return a;
1992}
1993
1994inline APInt operator&(uint64_t LHS, APInt b) {
1995 b &= LHS;
1996 return b;
1997}
1998
1999inline APInt operator|(APInt a, const APInt &b) {
2000 a |= b;
2001 return a;
2002}
2003
2004inline APInt operator|(const APInt &a, APInt &&b) {
2005 b |= a;
2006 return std::move(b);
2007}
2008
2009inline APInt operator|(APInt a, uint64_t RHS) {
2010 a |= RHS;
2011 return a;
2012}
2013
2014inline APInt operator|(uint64_t LHS, APInt b) {
2015 b |= LHS;
2016 return b;
2017}
2018
2019inline APInt operator^(APInt a, const APInt &b) {
2020 a ^= b;
2021 return a;
2022}
2023
2024inline APInt operator^(const APInt &a, APInt &&b) {
2025 b ^= a;
2026 return std::move(b);
2027}
2028
2029inline APInt operator^(APInt a, uint64_t RHS) {
2030 a ^= RHS;
2031 return a;
2032}
2033
2034inline APInt operator^(uint64_t LHS, APInt b) {
2035 b ^= LHS;
2036 return b;
2037}
2038
2039inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2040 I.print(OS, true);
2041 return OS;
2042}
2043
2044inline APInt operator-(APInt v) {
2045 v.negate();
2046 return v;
2047}
2048
2049inline APInt operator+(APInt a, const APInt &b) {
2050 a += b;
2051 return a;
2052}
2053
2054inline APInt operator+(const APInt &a, APInt &&b) {
2055 b += a;
2056 return std::move(b);
2057}
2058
2059inline APInt operator+(APInt a, uint64_t RHS) {
2060 a += RHS;
2061 return a;
2062}
2063
2064inline APInt operator+(uint64_t LHS, APInt b) {
2065 b += LHS;
2066 return b;
2067}
2068
2069inline APInt operator-(APInt a, const APInt &b) {
2070 a -= b;
2071 return a;
2072}
2073
2074inline APInt operator-(const APInt &a, APInt &&b) {
2075 b.negate();
2076 b += a;
2077 return std::move(b);
2078}
2079
2080inline APInt operator-(APInt a, uint64_t RHS) {
2081 a -= RHS;
2082 return a;
2083}
2084
2085inline APInt operator-(uint64_t LHS, APInt b) {
2086 b.negate();
2087 b += LHS;
2088 return b;
2089}
2090
2091inline APInt operator*(APInt a, uint64_t RHS) {
2092 a *= RHS;
2093 return a;
2094}
2095
2096inline APInt operator*(uint64_t LHS, APInt b) {
2097 b *= LHS;
2098 return b;
2099}
2100
2101
2102namespace APIntOps {
2103
2104/// Determine the smaller of two APInts considered to be signed.
2105inline const APInt &smin(const APInt &A, const APInt &B) {
2106 return A.slt(B) ? A : B;
2107}
2108
2109/// Determine the larger of two APInts considered to be signed.
2110inline const APInt &smax(const APInt &A, const APInt &B) {
2111 return A.sgt(B) ? A : B;
2112}
2113
2114/// Determine the smaller of two APInts considered to be signed.
2115inline const APInt &umin(const APInt &A, const APInt &B) {
2116 return A.ult(B) ? A : B;
2117}
2118
2119/// Determine the larger of two APInts considered to be unsigned.
2120inline const APInt &umax(const APInt &A, const APInt &B) {
2121 return A.ugt(B) ? A : B;
2122}
2123
2124/// Compute GCD of two unsigned APInt values.
2125///
2126/// This function returns the greatest common divisor of the two APInt values
2127/// using Stein's algorithm.
2128///
2129/// \returns the greatest common divisor of A and B.
2130APInt GreatestCommonDivisor(APInt A, APInt B);
2131
2132/// Converts the given APInt to a double value.
2133///
2134/// Treats the APInt as an unsigned value for conversion purposes.
2135inline double RoundAPIntToDouble(const APInt &APIVal) {
2136 return APIVal.roundToDouble();
2137}
2138
2139/// Converts the given APInt to a double value.
2140///
2141/// Treats the APInt as a signed value for conversion purposes.
2142inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2143 return APIVal.signedRoundToDouble();
2144}
2145
2146/// Converts the given APInt to a float vlalue.
2147inline float RoundAPIntToFloat(const APInt &APIVal) {
2148 return float(RoundAPIntToDouble(APIVal));
2149}
2150
2151/// Converts the given APInt to a float value.
2152///
2153/// Treast the APInt as a signed value for conversion purposes.
2154inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2155 return float(APIVal.signedRoundToDouble());
2156}
2157
2158/// Converts the given double value into a APInt.
2159///
2160/// This function convert a double value to an APInt value.
2161APInt RoundDoubleToAPInt(double Double, unsigned width);
2162
2163/// Converts a float value into a APInt.
2164///
2165/// Converts a float value into an APInt value.
2166inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2167 return RoundDoubleToAPInt(double(Float), width);
2168}
2169
2170/// Return A unsign-divided by B, rounded by the given rounding mode.
2171APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2172
2173/// Return A sign-divided by B, rounded by the given rounding mode.
2174APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2175
2176/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2177/// (e.g. 32 for i32).
2178/// This function finds the smallest number n, such that
2179/// (a) n >= 0 and q(n) = 0, or
2180/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2181/// integers, belong to two different intervals [Rk, Rk+R),
2182/// where R = 2^BW, and k is an integer.
2183/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2184/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2185/// subtraction (treated as addition of negated numbers) would always
2186/// count as an overflow, but here we want to allow values to decrease
2187/// and increase as long as they are within the same interval.
2188/// Specifically, adding of two negative numbers should not cause an
2189/// overflow (as long as the magnitude does not exceed the bith width).
2190/// On the other hand, given a positive number, adding a negative
2191/// number to it can give a negative result, which would cause the
2192/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2193/// treated as a special case of an overflow.
2194///
2195/// This function returns None if after finding k that minimizes the
2196/// positive solution to q(n) = kR, both solutions are contained between
2197/// two consecutive integers.
2198///
2199/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2200/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2201/// virtue of *signed* overflow. This function will *not* find such an n,
2202/// however it may find a value of n satisfying the inequalities due to
2203/// an *unsigned* overflow (if the values are treated as unsigned).
2204/// To find a solution for a signed overflow, treat it as a problem of
2205/// finding an unsigned overflow with a range with of BW-1.
2206///
2207/// The returned value may have a different bit width from the input
2208/// coefficients.
2209Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2210 unsigned RangeWidth);
2211} // End of APIntOps namespace
2212
2213// See friend declaration above. This additional declaration is required in
2214// order to compile LLVM with IBM xlC compiler.
2215hash_code hash_value(const APInt &Arg);
2216} // End of llvm namespace
2217
2218#endif