Bug Summary

File:lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1544, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/Hexagon -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
11
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Triple.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/InstructionSimplify.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/MemoryLocation.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpander.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Transforms/Utils/Local.h"
30#include "llvm/Analysis/ValueTracking.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/Module.h"
47#include "llvm/IR/PatternMatch.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include <algorithm>
62#include <array>
63#include <cassert>
64#include <cstdint>
65#include <cstdlib>
66#include <deque>
67#include <functional>
68#include <iterator>
69#include <map>
70#include <set>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
77 cl::Hidden, cl::init(false),
78 cl::desc("Disable generation of memcpy in loop idiom recognition"));
79
80static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memmove in loop idiom recognition"));
83
84static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
85 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
86 "check guarding the memmove."));
87
88static cl::opt<unsigned> CompileTimeMemSizeThreshold(
89 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
90 cl::desc("Threshold (in bytes) to perform the transformation, if the "
91 "runtime loop count (mem transfer size) is known at compile-time."));
92
93static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
94 cl::Hidden, cl::init(true),
95 cl::desc("Only enable generating memmove in non-nested loops"));
96
97cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
98 cl::Hidden, cl::init(false),
99 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
100
101static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
102 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
103
104static const char *HexagonVolatileMemcpyName
105 = "hexagon_memcpy_forward_vp4cp4n2";
106
107
108namespace llvm {
109
110 void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
111 Pass *createHexagonLoopIdiomPass();
112
113} // end namespace llvm
114
115namespace {
116
117 class HexagonLoopIdiomRecognize : public LoopPass {
118 public:
119 static char ID;
120
121 explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
122 initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
123 }
124
125 StringRef getPassName() const override {
126 return "Recognize Hexagon-specific loop idioms";
127 }
128
129 void getAnalysisUsage(AnalysisUsage &AU) const override {
130 AU.addRequired<LoopInfoWrapperPass>();
131 AU.addRequiredID(LoopSimplifyID);
132 AU.addRequiredID(LCSSAID);
133 AU.addRequired<AAResultsWrapperPass>();
134 AU.addPreserved<AAResultsWrapperPass>();
135 AU.addRequired<ScalarEvolutionWrapperPass>();
136 AU.addRequired<DominatorTreeWrapperPass>();
137 AU.addRequired<TargetLibraryInfoWrapperPass>();
138 AU.addPreserved<TargetLibraryInfoWrapperPass>();
139 }
140
141 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
142
143 private:
144 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
145 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
146 void collectStores(Loop *CurLoop, BasicBlock *BB,
147 SmallVectorImpl<StoreInst*> &Stores);
148 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
149 bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
150 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
151 SmallVectorImpl<BasicBlock*> &ExitBlocks);
152 bool runOnCountableLoop(Loop *L);
153
154 AliasAnalysis *AA;
155 const DataLayout *DL;
156 DominatorTree *DT;
157 LoopInfo *LF;
158 const TargetLibraryInfo *TLI;
159 ScalarEvolution *SE;
160 bool HasMemcpy, HasMemmove;
161 };
162
163 struct Simplifier {
164 struct Rule {
165 using FuncType = std::function<Value* (Instruction*, LLVMContext&)>;
166 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
167 StringRef Name; // For debugging.
168 FuncType Fn;
169 };
170
171 void addRule(StringRef N, const Rule::FuncType &F) {
172 Rules.push_back(Rule(N, F));
173 }
174
175 private:
176 struct WorkListType {
177 WorkListType() = default;
178
179 void push_back(Value* V) {
180 // Do not push back duplicates.
181 if (!S.count(V)) { Q.push_back(V); S.insert(V); }
182 }
183
184 Value *pop_front_val() {
185 Value *V = Q.front(); Q.pop_front(); S.erase(V);
186 return V;
187 }
188
189 bool empty() const { return Q.empty(); }
190
191 private:
192 std::deque<Value*> Q;
193 std::set<Value*> S;
194 };
195
196 using ValueSetType = std::set<Value *>;
197
198 std::vector<Rule> Rules;
199
200 public:
201 struct Context {
202 using ValueMapType = DenseMap<Value *, Value *>;
203
204 Value *Root;
205 ValueSetType Used; // The set of all cloned values used by Root.
206 ValueSetType Clones; // The set of all cloned values.
207 LLVMContext &Ctx;
208
209 Context(Instruction *Exp)
210 : Ctx(Exp->getParent()->getParent()->getContext()) {
211 initialize(Exp);
212 }
213
214 ~Context() { cleanup(); }
215
216 void print(raw_ostream &OS, const Value *V) const;
217 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
218
219 private:
220 friend struct Simplifier;
221
222 void initialize(Instruction *Exp);
223 void cleanup();
224
225 template <typename FuncT> void traverse(Value *V, FuncT F);
226 void record(Value *V);
227 void use(Value *V);
228 void unuse(Value *V);
229
230 bool equal(const Instruction *I, const Instruction *J) const;
231 Value *find(Value *Tree, Value *Sub) const;
232 Value *subst(Value *Tree, Value *OldV, Value *NewV);
233 void replace(Value *OldV, Value *NewV);
234 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
235 };
236
237 Value *simplify(Context &C);
238 };
239
240 struct PE {
241 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
242
243 const Simplifier::Context &C;
244 const Value *V;
245 };
246
247 LLVM_ATTRIBUTE_USED__attribute__((__used__))
248 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
249 P.C.print(OS, P.V ? P.V : P.C.Root);
250 return OS;
251 }
252
253} // end anonymous namespace
254
255char HexagonLoopIdiomRecognize::ID = 0;
256
257INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
258 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
259INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
260INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
261INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
262INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
263INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
264INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
265INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
266INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
267 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
268
269template <typename FuncT>
270void Simplifier::Context::traverse(Value *V, FuncT F) {
271 WorkListType Q;
272 Q.push_back(V);
273
274 while (!Q.empty()) {
275 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
276 if (!U || U->getParent())
277 continue;
278 if (!F(U))
279 continue;
280 for (Value *Op : U->operands())
281 Q.push_back(Op);
282 }
283}
284
285void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
286 const auto *U = dyn_cast<const Instruction>(V);
287 if (!U) {
288 OS << V << '(' << *V << ')';
289 return;
290 }
291
292 if (U->getParent()) {
293 OS << U << '(';
294 U->printAsOperand(OS, true);
295 OS << ')';
296 return;
297 }
298
299 unsigned N = U->getNumOperands();
300 if (N != 0)
301 OS << U << '(';
302 OS << U->getOpcodeName();
303 for (const Value *Op : U->operands()) {
304 OS << ' ';
305 print(OS, Op);
306 }
307 if (N != 0)
308 OS << ')';
309}
310
311void Simplifier::Context::initialize(Instruction *Exp) {
312 // Perform a deep clone of the expression, set Root to the root
313 // of the clone, and build a map from the cloned values to the
314 // original ones.
315 ValueMapType M;
316 BasicBlock *Block = Exp->getParent();
317 WorkListType Q;
318 Q.push_back(Exp);
319
320 while (!Q.empty()) {
321 Value *V = Q.pop_front_val();
322 if (M.find(V) != M.end())
323 continue;
324 if (Instruction *U = dyn_cast<Instruction>(V)) {
325 if (isa<PHINode>(U) || U->getParent() != Block)
326 continue;
327 for (Value *Op : U->operands())
328 Q.push_back(Op);
329 M.insert({U, U->clone()});
330 }
331 }
332
333 for (std::pair<Value*,Value*> P : M) {
334 Instruction *U = cast<Instruction>(P.second);
335 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
336 auto F = M.find(U->getOperand(i));
337 if (F != M.end())
338 U->setOperand(i, F->second);
339 }
340 }
341
342 auto R = M.find(Exp);
343 assert(R != M.end())((R != M.end()) ? static_cast<void> (0) : __assert_fail
("R != M.end()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 343, __PRETTY_FUNCTION__))
;
344 Root = R->second;
345
346 record(Root);
347 use(Root);
348}
349
350void Simplifier::Context::record(Value *V) {
351 auto Record = [this](Instruction *U) -> bool {
352 Clones.insert(U);
353 return true;
354 };
355 traverse(V, Record);
356}
357
358void Simplifier::Context::use(Value *V) {
359 auto Use = [this](Instruction *U) -> bool {
360 Used.insert(U);
361 return true;
362 };
363 traverse(V, Use);
364}
365
366void Simplifier::Context::unuse(Value *V) {
367 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
368 return;
369
370 auto Unuse = [this](Instruction *U) -> bool {
371 if (!U->use_empty())
372 return false;
373 Used.erase(U);
374 return true;
375 };
376 traverse(V, Unuse);
377}
378
379Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
380 if (Tree == OldV)
381 return NewV;
382 if (OldV == NewV)
383 return Tree;
384
385 WorkListType Q;
386 Q.push_back(Tree);
387 while (!Q.empty()) {
388 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
389 // If U is not an instruction, or it's not a clone, skip it.
390 if (!U || U->getParent())
391 continue;
392 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
393 Value *Op = U->getOperand(i);
394 if (Op == OldV) {
395 U->setOperand(i, NewV);
396 unuse(OldV);
397 } else {
398 Q.push_back(Op);
399 }
400 }
401 }
402 return Tree;
403}
404
405void Simplifier::Context::replace(Value *OldV, Value *NewV) {
406 if (Root == OldV) {
407 Root = NewV;
408 use(Root);
409 return;
410 }
411
412 // NewV may be a complex tree that has just been created by one of the
413 // transformation rules. We need to make sure that it is commoned with
414 // the existing Root to the maximum extent possible.
415 // Identify all subtrees of NewV (including NewV itself) that have
416 // equivalent counterparts in Root, and replace those subtrees with
417 // these counterparts.
418 WorkListType Q;
419 Q.push_back(NewV);
420 while (!Q.empty()) {
421 Value *V = Q.pop_front_val();
422 Instruction *U = dyn_cast<Instruction>(V);
423 if (!U || U->getParent())
424 continue;
425 if (Value *DupV = find(Root, V)) {
426 if (DupV != V)
427 NewV = subst(NewV, V, DupV);
428 } else {
429 for (Value *Op : U->operands())
430 Q.push_back(Op);
431 }
432 }
433
434 // Now, simply replace OldV with NewV in Root.
435 Root = subst(Root, OldV, NewV);
436 use(Root);
437}
438
439void Simplifier::Context::cleanup() {
440 for (Value *V : Clones) {
441 Instruction *U = cast<Instruction>(V);
442 if (!U->getParent())
443 U->dropAllReferences();
444 }
445
446 for (Value *V : Clones) {
447 Instruction *U = cast<Instruction>(V);
448 if (!U->getParent())
449 U->deleteValue();
450 }
451}
452
453bool Simplifier::Context::equal(const Instruction *I,
454 const Instruction *J) const {
455 if (I == J)
456 return true;
457 if (!I->isSameOperationAs(J))
458 return false;
459 if (isa<PHINode>(I))
460 return I->isIdenticalTo(J);
461
462 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
463 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
464 if (OpI == OpJ)
465 continue;
466 auto *InI = dyn_cast<const Instruction>(OpI);
467 auto *InJ = dyn_cast<const Instruction>(OpJ);
468 if (InI && InJ) {
469 if (!equal(InI, InJ))
470 return false;
471 } else if (InI != InJ || !InI)
472 return false;
473 }
474 return true;
475}
476
477Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
478 Instruction *SubI = dyn_cast<Instruction>(Sub);
479 WorkListType Q;
480 Q.push_back(Tree);
481
482 while (!Q.empty()) {
483 Value *V = Q.pop_front_val();
484 if (V == Sub)
485 return V;
486 Instruction *U = dyn_cast<Instruction>(V);
487 if (!U || U->getParent())
488 continue;
489 if (SubI && equal(SubI, U))
490 return U;
491 assert(!isa<PHINode>(U))((!isa<PHINode>(U)) ? static_cast<void> (0) : __assert_fail
("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 491, __PRETTY_FUNCTION__))
;
492 for (Value *Op : U->operands())
493 Q.push_back(Op);
494 }
495 return nullptr;
496}
497
498void Simplifier::Context::link(Instruction *I, BasicBlock *B,
499 BasicBlock::iterator At) {
500 if (I->getParent())
501 return;
502
503 for (Value *Op : I->operands()) {
504 if (Instruction *OpI = dyn_cast<Instruction>(Op))
505 link(OpI, B, At);
506 }
507
508 B->getInstList().insert(At, I);
509}
510
511Value *Simplifier::Context::materialize(BasicBlock *B,
512 BasicBlock::iterator At) {
513 if (Instruction *RootI = dyn_cast<Instruction>(Root))
514 link(RootI, B, At);
515 return Root;
516}
517
518Value *Simplifier::simplify(Context &C) {
519 WorkListType Q;
520 Q.push_back(C.Root);
521 unsigned Count = 0;
522 const unsigned Limit = SimplifyLimit;
523
524 while (!Q.empty()) {
525 if (Count++ >= Limit)
526 break;
527 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
528 if (!U || U->getParent() || !C.Used.count(U))
529 continue;
530 bool Changed = false;
531 for (Rule &R : Rules) {
532 Value *W = R.Fn(U, C.Ctx);
533 if (!W)
534 continue;
535 Changed = true;
536 C.record(W);
537 C.replace(U, W);
538 Q.push_back(C.Root);
539 break;
540 }
541 if (!Changed) {
542 for (Value *Op : U->operands())
543 Q.push_back(Op);
544 }
545 }
546 return Count < Limit ? C.Root : nullptr;
547}
548
549//===----------------------------------------------------------------------===//
550//
551// Implementation of PolynomialMultiplyRecognize
552//
553//===----------------------------------------------------------------------===//
554
555namespace {
556
557 class PolynomialMultiplyRecognize {
558 public:
559 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
560 const DominatorTree &dt, const TargetLibraryInfo &tli,
561 ScalarEvolution &se)
562 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
563
564 bool recognize();
565
566 private:
567 using ValueSeq = SetVector<Value *>;
568
569 IntegerType *getPmpyType() const {
570 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
571 return IntegerType::get(Ctx, 32);
572 }
573
574 bool isPromotableTo(Value *V, IntegerType *Ty);
575 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
576 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
577
578 Value *getCountIV(BasicBlock *BB);
579 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
580 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
581 ValueSeq &Late);
582 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
583 bool commutesWithShift(Instruction *I);
584 bool highBitsAreZero(Value *V, unsigned IterCount);
585 bool keepsHighBitsZero(Value *V, unsigned IterCount);
586 bool isOperandShifted(Instruction *I, Value *Op);
587 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
588 unsigned IterCount);
589 void cleanupLoopBody(BasicBlock *LoopB);
590
591 struct ParsedValues {
592 ParsedValues() = default;
593
594 Value *M = nullptr;
595 Value *P = nullptr;
596 Value *Q = nullptr;
597 Value *R = nullptr;
598 Value *X = nullptr;
599 Instruction *Res = nullptr;
600 unsigned IterCount = 0;
601 bool Left = false;
602 bool Inv = false;
603 };
604
605 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
606 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
607 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
608 Value *CIV, ParsedValues &PV, bool PreScan);
609 unsigned getInverseMxN(unsigned QP);
610 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
611
612 void setupPreSimplifier(Simplifier &S);
613 void setupPostSimplifier(Simplifier &S);
614
615 Loop *CurLoop;
616 const DataLayout &DL;
617 const DominatorTree &DT;
618 const TargetLibraryInfo &TLI;
619 ScalarEvolution &SE;
620 };
621
622} // end anonymous namespace
623
624Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
625 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
626 if (std::distance(PI, PE) != 2)
627 return nullptr;
628 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
629
630 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
631 auto *PN = cast<PHINode>(I);
632 Value *InitV = PN->getIncomingValueForBlock(PB);
633 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
634 continue;
635 Value *IterV = PN->getIncomingValueForBlock(BB);
636 if (!isa<BinaryOperator>(IterV))
637 continue;
638 auto *BO = dyn_cast<BinaryOperator>(IterV);
639 if (BO->getOpcode() != Instruction::Add)
640 continue;
641 Value *IncV = nullptr;
642 if (BO->getOperand(0) == PN)
643 IncV = BO->getOperand(1);
644 else if (BO->getOperand(1) == PN)
645 IncV = BO->getOperand(0);
646 if (IncV == nullptr)
647 continue;
648
649 if (auto *T = dyn_cast<ConstantInt>(IncV))
650 if (T->getZExtValue() == 1)
651 return PN;
652 }
653 return nullptr;
654}
655
656static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
657 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
658 Use &TheUse = UI.getUse();
659 ++UI;
660 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
661 if (BB == II->getParent())
662 II->replaceUsesOfWith(I, J);
663 }
664}
665
666bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
667 Value *CIV, ParsedValues &PV) {
668 // Match the following:
669 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
670 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
671 // The condition may also check for equality with the masked value, i.e
672 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
673 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
674
675 Value *CondV = SelI->getCondition();
676 Value *TrueV = SelI->getTrueValue();
677 Value *FalseV = SelI->getFalseValue();
678
679 using namespace PatternMatch;
680
681 CmpInst::Predicate P;
682 Value *A = nullptr, *B = nullptr, *C = nullptr;
683
684 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
685 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
686 return false;
687 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
688 return false;
689 // Matched: select (A & B) == C ? ... : ...
690 // select (A & B) != C ? ... : ...
691
692 Value *X = nullptr, *Sh1 = nullptr;
693 // Check (A & B) for (X & (1 << i)):
694 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
695 Sh1 = A;
696 X = B;
697 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
698 Sh1 = B;
699 X = A;
700 } else {
701 // TODO: Could also check for an induction variable containing single
702 // bit shifted left by 1 in each iteration.
703 return false;
704 }
705
706 bool TrueIfZero;
707
708 // Check C against the possible values for comparison: 0 and (1 << i):
709 if (match(C, m_Zero()))
710 TrueIfZero = (P == CmpInst::ICMP_EQ);
711 else if (C == Sh1)
712 TrueIfZero = (P == CmpInst::ICMP_NE);
713 else
714 return false;
715
716 // So far, matched:
717 // select (X & (1 << i)) ? ... : ...
718 // including variations of the check against zero/non-zero value.
719
720 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
721 if (TrueIfZero) {
722 ShouldSameV = TrueV;
723 ShouldXoredV = FalseV;
724 } else {
725 ShouldSameV = FalseV;
726 ShouldXoredV = TrueV;
727 }
728
729 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
730 Value *T = nullptr;
731 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
732 // Matched: select +++ ? ... : Y ^ Z
733 // select +++ ? Y ^ Z : ...
734 // where +++ denotes previously checked matches.
735 if (ShouldSameV == Y)
736 T = Z;
737 else if (ShouldSameV == Z)
738 T = Y;
739 else
740 return false;
741 R = ShouldSameV;
742 // Matched: select +++ ? R : R ^ T
743 // select +++ ? R ^ T : R
744 // depending on TrueIfZero.
745
746 } else if (match(ShouldSameV, m_Zero())) {
747 // Matched: select +++ ? 0 : ...
748 // select +++ ? ... : 0
749 if (!SelI->hasOneUse())
750 return false;
751 T = ShouldXoredV;
752 // Matched: select +++ ? 0 : T
753 // select +++ ? T : 0
754
755 Value *U = *SelI->user_begin();
756 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
757 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
758 return false;
759 // Matched: xor (select +++ ? 0 : T), R
760 // xor (select +++ ? T : 0), R
761 } else
762 return false;
763
764 // The xor input value T is isolated into its own match so that it could
765 // be checked against an induction variable containing a shifted bit
766 // (todo).
767 // For now, check against (Q << i).
768 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
769 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
770 return false;
771 // Matched: select +++ ? R : R ^ (Q << i)
772 // select +++ ? R ^ (Q << i) : R
773
774 PV.X = X;
775 PV.Q = Q;
776 PV.R = R;
777 PV.Left = true;
778 return true;
779}
780
781bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
782 ParsedValues &PV) {
783 // Match the following:
784 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
785 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
786 // The condition may also check for equality with the masked value, i.e
787 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
788 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
789
790 Value *CondV = SelI->getCondition();
791 Value *TrueV = SelI->getTrueValue();
792 Value *FalseV = SelI->getFalseValue();
793
794 using namespace PatternMatch;
795
796 Value *C = nullptr;
797 CmpInst::Predicate P;
798 bool TrueIfZero;
799
800 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
801 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
802 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
803 return false;
804 // Matched: select C == 0 ? ... : ...
805 // select C != 0 ? ... : ...
806 TrueIfZero = (P == CmpInst::ICMP_EQ);
807 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
808 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
809 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
810 return false;
811 // Matched: select C == 1 ? ... : ...
812 // select C != 1 ? ... : ...
813 TrueIfZero = (P == CmpInst::ICMP_NE);
814 } else
815 return false;
816
817 Value *X = nullptr;
818 if (!match(C, m_And(m_Value(X), m_One())) &&
819 !match(C, m_And(m_One(), m_Value(X))))
820 return false;
821 // Matched: select (X & 1) == +++ ? ... : ...
822 // select (X & 1) != +++ ? ... : ...
823
824 Value *R = nullptr, *Q = nullptr;
825 if (TrueIfZero) {
826 // The select's condition is true if the tested bit is 0.
827 // TrueV must be the shift, FalseV must be the xor.
828 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
829 return false;
830 // Matched: select +++ ? (R >> 1) : ...
831 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
832 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
833 return false;
834 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
835 // with commuting ^.
836 } else {
837 // The select's condition is true if the tested bit is 1.
838 // TrueV must be the xor, FalseV must be the shift.
839 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
840 return false;
841 // Matched: select +++ ? ... : (R >> 1)
842 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
843 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
844 return false;
845 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
846 // with commuting ^.
847 }
848
849 PV.X = X;
850 PV.Q = Q;
851 PV.R = R;
852 PV.Left = false;
853 return true;
854}
855
856bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
857 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
858 bool PreScan) {
859 using namespace PatternMatch;
860
861 // The basic pattern for R = P.Q is:
862 // for i = 0..31
863 // R = phi (0, R')
864 // if (P & (1 << i)) ; test-bit(P, i)
865 // R' = R ^ (Q << i)
866 //
867 // Similarly, the basic pattern for R = (P/Q).Q - P
868 // for i = 0..31
869 // R = phi(P, R')
870 // if (R & (1 << i))
871 // R' = R ^ (Q << i)
872
873 // There exist idioms, where instead of Q being shifted left, P is shifted
874 // right. This produces a result that is shifted right by 32 bits (the
875 // non-shifted result is 64-bit).
876 //
877 // For R = P.Q, this would be:
878 // for i = 0..31
879 // R = phi (0, R')
880 // if ((P >> i) & 1)
881 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
882 // else ; be shifted by 1, not i.
883 // R' = R >> 1
884 //
885 // And for the inverse:
886 // for i = 0..31
887 // R = phi (P, R')
888 // if (R & 1)
889 // R' = (R >> 1) ^ Q
890 // else
891 // R' = R >> 1
892
893 // The left-shifting idioms share the same pattern:
894 // select (X & (1 << i)) ? R ^ (Q << i) : R
895 // Similarly for right-shifting idioms:
896 // select (X & 1) ? (R >> 1) ^ Q
897
898 if (matchLeftShift(SelI, CIV, PV)) {
899 // If this is a pre-scan, getting this far is sufficient.
900 if (PreScan)
901 return true;
902
903 // Need to make sure that the SelI goes back into R.
904 auto *RPhi = dyn_cast<PHINode>(PV.R);
905 if (!RPhi)
906 return false;
907 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
908 return false;
909 PV.Res = SelI;
910
911 // If X is loop invariant, it must be the input polynomial, and the
912 // idiom is the basic polynomial multiply.
913 if (CurLoop->isLoopInvariant(PV.X)) {
914 PV.P = PV.X;
915 PV.Inv = false;
916 } else {
917 // X is not loop invariant. If X == R, this is the inverse pmpy.
918 // Otherwise, check for an xor with an invariant value. If the
919 // variable argument to the xor is R, then this is still a valid
920 // inverse pmpy.
921 PV.Inv = true;
922 if (PV.X != PV.R) {
923 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
924 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
925 return false;
926 auto *I1 = dyn_cast<Instruction>(X1);
927 auto *I2 = dyn_cast<Instruction>(X2);
928 if (!I1 || I1->getParent() != LoopB) {
929 Var = X2;
930 Inv = X1;
931 } else if (!I2 || I2->getParent() != LoopB) {
932 Var = X1;
933 Inv = X2;
934 } else
935 return false;
936 if (Var != PV.R)
937 return false;
938 PV.M = Inv;
939 }
940 // The input polynomial P still needs to be determined. It will be
941 // the entry value of R.
942 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
943 PV.P = EntryP;
944 }
945
946 return true;
947 }
948
949 if (matchRightShift(SelI, PV)) {
950 // If this is an inverse pattern, the Q polynomial must be known at
951 // compile time.
952 if (PV.Inv && !isa<ConstantInt>(PV.Q))
953 return false;
954 if (PreScan)
955 return true;
956 // There is no exact matching of right-shift pmpy.
957 return false;
958 }
959
960 return false;
961}
962
963bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
964 IntegerType *DestTy) {
965 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
966 if (!T || T->getBitWidth() > DestTy->getBitWidth())
967 return false;
968 if (T->getBitWidth() == DestTy->getBitWidth())
969 return true;
970 // Non-instructions are promotable. The reason why an instruction may not
971 // be promotable is that it may produce a different result if its operands
972 // and the result are promoted, for example, it may produce more non-zero
973 // bits. While it would still be possible to represent the proper result
974 // in a wider type, it may require adding additional instructions (which
975 // we don't want to do).
976 Instruction *In = dyn_cast<Instruction>(Val);
977 if (!In)
978 return true;
979 // The bitwidth of the source type is smaller than the destination.
980 // Check if the individual operation can be promoted.
981 switch (In->getOpcode()) {
982 case Instruction::PHI:
983 case Instruction::ZExt:
984 case Instruction::And:
985 case Instruction::Or:
986 case Instruction::Xor:
987 case Instruction::LShr: // Shift right is ok.
988 case Instruction::Select:
989 case Instruction::Trunc:
990 return true;
991 case Instruction::ICmp:
992 if (CmpInst *CI = cast<CmpInst>(In))
993 return CI->isEquality() || CI->isUnsigned();
994 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 994)
;
995 case Instruction::Add:
996 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
997 }
998 return false;
999}
1000
1001void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1002 IntegerType *DestTy, BasicBlock *LoopB) {
1003 Type *OrigTy = In->getType();
1004
1005 // Leave boolean values alone.
1006 if (!In->getType()->isIntegerTy(1))
1007 In->mutateType(DestTy);
1008 unsigned DestBW = DestTy->getBitWidth();
1009
1010 // Handle PHIs.
1011 if (PHINode *P = dyn_cast<PHINode>(In)) {
1012 unsigned N = P->getNumIncomingValues();
1013 for (unsigned i = 0; i != N; ++i) {
1014 BasicBlock *InB = P->getIncomingBlock(i);
1015 if (InB == LoopB)
1016 continue;
1017 Value *InV = P->getIncomingValue(i);
1018 IntegerType *Ty = cast<IntegerType>(InV->getType());
1019 // Do not promote values in PHI nodes of type i1.
1020 if (Ty != P->getType()) {
1021 // If the value type does not match the PHI type, the PHI type
1022 // must have been promoted.
1023 assert(Ty->getBitWidth() < DestBW)((Ty->getBitWidth() < DestBW) ? static_cast<void>
(0) : __assert_fail ("Ty->getBitWidth() < DestBW", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1023, __PRETTY_FUNCTION__))
;
1024 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1025 P->setIncomingValue(i, InV);
1026 }
1027 }
1028 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1029 Value *Op = Z->getOperand(0);
1030 if (Op->getType() == Z->getType())
1031 Z->replaceAllUsesWith(Op);
1032 Z->eraseFromParent();
1033 return;
1034 }
1035 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1036 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1037 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1038 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1039 T->replaceAllUsesWith(And);
1040 T->eraseFromParent();
1041 return;
1042 }
1043
1044 // Promote immediates.
1045 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1046 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1047 if (CI->getType()->getBitWidth() < DestBW)
1048 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1049 }
1050}
1051
1052bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1053 BasicBlock *ExitB) {
1054 assert(LoopB)((LoopB) ? static_cast<void> (0) : __assert_fail ("LoopB"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1054, __PRETTY_FUNCTION__))
;
1055 // Skip loops where the exit block has more than one predecessor. The values
1056 // coming from the loop block will be promoted to another type, and so the
1057 // values coming into the exit block from other predecessors would also have
1058 // to be promoted.
1059 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1060 return false;
1061 IntegerType *DestTy = getPmpyType();
1062 // Check if the exit values have types that are no wider than the type
1063 // that we want to promote to.
1064 unsigned DestBW = DestTy->getBitWidth();
1065 for (PHINode &P : ExitB->phis()) {
1066 if (P.getNumIncomingValues() != 1)
1067 return false;
1068 assert(P.getIncomingBlock(0) == LoopB)((P.getIncomingBlock(0) == LoopB) ? static_cast<void> (
0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1068, __PRETTY_FUNCTION__))
;
1069 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1070 if (!T || T->getBitWidth() > DestBW)
1071 return false;
1072 }
1073
1074 // Check all instructions in the loop.
1075 for (Instruction &In : *LoopB)
1076 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1077 return false;
1078
1079 // Perform the promotion.
1080 std::vector<Instruction*> LoopIns;
1081 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1082 [](Instruction &In) { return &In; });
1083 for (Instruction *In : LoopIns)
1084 promoteTo(In, DestTy, LoopB);
1085
1086 // Fix up the PHI nodes in the exit block.
1087 Instruction *EndI = ExitB->getFirstNonPHI();
1088 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1089 for (auto I = ExitB->begin(); I != End; ++I) {
1090 PHINode *P = dyn_cast<PHINode>(I);
1091 if (!P)
1092 break;
1093 Type *Ty0 = P->getIncomingValue(0)->getType();
1094 Type *PTy = P->getType();
1095 if (PTy != Ty0) {
1096 assert(Ty0 == DestTy)((Ty0 == DestTy) ? static_cast<void> (0) : __assert_fail
("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1096, __PRETTY_FUNCTION__))
;
1097 // In order to create the trunc, P must have the promoted type.
1098 P->mutateType(Ty0);
1099 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1100 // In order for the RAUW to work, the types of P and T must match.
1101 P->mutateType(PTy);
1102 P->replaceAllUsesWith(T);
1103 // Final update of the P's type.
1104 P->mutateType(Ty0);
1105 cast<Instruction>(T)->setOperand(0, P);
1106 }
1107 }
1108
1109 return true;
1110}
1111
1112bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1113 ValueSeq &Cycle) {
1114 // Out = ..., In, ...
1115 if (Out == In)
1116 return true;
1117
1118 auto *BB = cast<Instruction>(Out)->getParent();
1119 bool HadPhi = false;
1120
1121 for (auto U : Out->users()) {
1122 auto *I = dyn_cast<Instruction>(&*U);
1123 if (I == nullptr || I->getParent() != BB)
1124 continue;
1125 // Make sure that there are no multi-iteration cycles, e.g.
1126 // p1 = phi(p2)
1127 // p2 = phi(p1)
1128 // The cycle p1->p2->p1 would span two loop iterations.
1129 // Check that there is only one phi in the cycle.
1130 bool IsPhi = isa<PHINode>(I);
1131 if (IsPhi && HadPhi)
1132 return false;
1133 HadPhi |= IsPhi;
1134 if (Cycle.count(I))
1135 return false;
1136 Cycle.insert(I);
1137 if (findCycle(I, In, Cycle))
1138 break;
1139 Cycle.remove(I);
1140 }
1141 return !Cycle.empty();
1142}
1143
1144void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1145 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1146 // All the values in the cycle that are between the phi node and the
1147 // divider instruction will be classified as "early", all other values
1148 // will be "late".
1149
1150 bool IsE = true;
1151 unsigned I, N = Cycle.size();
1152 for (I = 0; I < N; ++I) {
1153 Value *V = Cycle[I];
1154 if (DivI == V)
1155 IsE = false;
1156 else if (!isa<PHINode>(V))
1157 continue;
1158 // Stop if found either.
1159 break;
1160 }
1161 // "I" is the index of either DivI or the phi node, whichever was first.
1162 // "E" is "false" or "true" respectively.
1163 ValueSeq &First = !IsE ? Early : Late;
1164 for (unsigned J = 0; J < I; ++J)
1165 First.insert(Cycle[J]);
1166
1167 ValueSeq &Second = IsE ? Early : Late;
1168 Second.insert(Cycle[I]);
1169 for (++I; I < N; ++I) {
1170 Value *V = Cycle[I];
1171 if (DivI == V || isa<PHINode>(V))
1172 break;
1173 Second.insert(V);
1174 }
1175
1176 for (; I < N; ++I)
1177 First.insert(Cycle[I]);
1178}
1179
1180bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1181 ValueSeq &Early, ValueSeq &Late) {
1182 // Select is an exception, since the condition value does not have to be
1183 // classified in the same way as the true/false values. The true/false
1184 // values do have to be both early or both late.
1185 if (UseI->getOpcode() == Instruction::Select) {
1186 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1187 if (Early.count(TV) || Early.count(FV)) {
1188 if (Late.count(TV) || Late.count(FV))
1189 return false;
1190 Early.insert(UseI);
1191 } else if (Late.count(TV) || Late.count(FV)) {
1192 if (Early.count(TV) || Early.count(FV))
1193 return false;
1194 Late.insert(UseI);
1195 }
1196 return true;
1197 }
1198
1199 // Not sure what would be the example of this, but the code below relies
1200 // on having at least one operand.
1201 if (UseI->getNumOperands() == 0)
1202 return true;
1203
1204 bool AE = true, AL = true;
1205 for (auto &I : UseI->operands()) {
1206 if (Early.count(&*I))
1207 AL = false;
1208 else if (Late.count(&*I))
1209 AE = false;
1210 }
1211 // If the operands appear "all early" and "all late" at the same time,
1212 // then it means that none of them are actually classified as either.
1213 // This is harmless.
1214 if (AE && AL)
1215 return true;
1216 // Conversely, if they are neither "all early" nor "all late", then
1217 // we have a mixture of early and late operands that is not a known
1218 // exception.
1219 if (!AE && !AL)
1220 return false;
1221
1222 // Check that we have covered the two special cases.
1223 assert(AE != AL)((AE != AL) ? static_cast<void> (0) : __assert_fail ("AE != AL"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1223, __PRETTY_FUNCTION__))
;
1224
1225 if (AE)
1226 Early.insert(UseI);
1227 else
1228 Late.insert(UseI);
1229 return true;
1230}
1231
1232bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1233 switch (I->getOpcode()) {
1234 case Instruction::And:
1235 case Instruction::Or:
1236 case Instruction::Xor:
1237 case Instruction::LShr:
1238 case Instruction::Shl:
1239 case Instruction::Select:
1240 case Instruction::ICmp:
1241 case Instruction::PHI:
1242 break;
1243 default:
1244 return false;
1245 }
1246 return true;
1247}
1248
1249bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1250 unsigned IterCount) {
1251 auto *T = dyn_cast<IntegerType>(V->getType());
1252 if (!T)
1253 return false;
1254
1255 KnownBits Known(T->getBitWidth());
1256 computeKnownBits(V, Known, DL);
1257 return Known.countMinLeadingZeros() >= IterCount;
1258}
1259
1260bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1261 unsigned IterCount) {
1262 // Assume that all inputs to the value have the high bits zero.
1263 // Check if the value itself preserves the zeros in the high bits.
1264 if (auto *C = dyn_cast<ConstantInt>(V))
1265 return C->getValue().countLeadingZeros() >= IterCount;
1266
1267 if (auto *I = dyn_cast<Instruction>(V)) {
1268 switch (I->getOpcode()) {
1269 case Instruction::And:
1270 case Instruction::Or:
1271 case Instruction::Xor:
1272 case Instruction::LShr:
1273 case Instruction::Select:
1274 case Instruction::ICmp:
1275 case Instruction::PHI:
1276 case Instruction::ZExt:
1277 return true;
1278 }
1279 }
1280
1281 return false;
1282}
1283
1284bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1285 unsigned Opc = I->getOpcode();
1286 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1287 return Op != I->getOperand(1);
1288 return true;
1289}
1290
1291bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1292 BasicBlock *ExitB, unsigned IterCount) {
1293 Value *CIV = getCountIV(LoopB);
1294 if (CIV == nullptr)
1295 return false;
1296 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1297 if (CIVTy == nullptr)
1298 return false;
1299
1300 ValueSeq RShifts;
1301 ValueSeq Early, Late, Cycled;
1302
1303 // Find all value cycles that contain logical right shifts by 1.
1304 for (Instruction &I : *LoopB) {
1305 using namespace PatternMatch;
1306
1307 Value *V = nullptr;
1308 if (!match(&I, m_LShr(m_Value(V), m_One())))
1309 continue;
1310 ValueSeq C;
1311 if (!findCycle(&I, V, C))
1312 continue;
1313
1314 // Found a cycle.
1315 C.insert(&I);
1316 classifyCycle(&I, C, Early, Late);
1317 Cycled.insert(C.begin(), C.end());
1318 RShifts.insert(&I);
1319 }
1320
1321 // Find the set of all values affected by the shift cycles, i.e. all
1322 // cycled values, and (recursively) all their users.
1323 ValueSeq Users(Cycled.begin(), Cycled.end());
1324 for (unsigned i = 0; i < Users.size(); ++i) {
1325 Value *V = Users[i];
1326 if (!isa<IntegerType>(V->getType()))
1327 return false;
1328 auto *R = cast<Instruction>(V);
1329 // If the instruction does not commute with shifts, the loop cannot
1330 // be unshifted.
1331 if (!commutesWithShift(R))
1332 return false;
1333 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1334 auto *T = cast<Instruction>(*I);
1335 // Skip users from outside of the loop. They will be handled later.
1336 // Also, skip the right-shifts and phi nodes, since they mix early
1337 // and late values.
1338 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1339 continue;
1340
1341 Users.insert(T);
1342 if (!classifyInst(T, Early, Late))
1343 return false;
1344 }
1345 }
1346
1347 if (Users.empty())
1348 return false;
1349
1350 // Verify that high bits remain zero.
1351 ValueSeq Internal(Users.begin(), Users.end());
1352 ValueSeq Inputs;
1353 for (unsigned i = 0; i < Internal.size(); ++i) {
1354 auto *R = dyn_cast<Instruction>(Internal[i]);
1355 if (!R)
1356 continue;
1357 for (Value *Op : R->operands()) {
1358 auto *T = dyn_cast<Instruction>(Op);
1359 if (T && T->getParent() != LoopB)
1360 Inputs.insert(Op);
1361 else
1362 Internal.insert(Op);
1363 }
1364 }
1365 for (Value *V : Inputs)
1366 if (!highBitsAreZero(V, IterCount))
1367 return false;
1368 for (Value *V : Internal)
1369 if (!keepsHighBitsZero(V, IterCount))
1370 return false;
1371
1372 // Finally, the work can be done. Unshift each user.
1373 IRBuilder<> IRB(LoopB);
1374 std::map<Value*,Value*> ShiftMap;
1375
1376 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1377
1378 CastMapType CastMap;
1379
1380 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1381 IntegerType *Ty) -> Value* {
1382 auto H = CM.find(std::make_pair(V, Ty));
1383 if (H != CM.end())
1384 return H->second;
1385 Value *CV = IRB.CreateIntCast(V, Ty, false);
1386 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1387 return CV;
1388 };
1389
1390 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1391 using namespace PatternMatch;
1392
1393 if (isa<PHINode>(I) || !Users.count(&*I))
1394 continue;
1395
1396 // Match lshr x, 1.
1397 Value *V = nullptr;
1398 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1399 replaceAllUsesOfWithIn(&*I, V, LoopB);
1400 continue;
1401 }
1402 // For each non-cycled operand, replace it with the corresponding
1403 // value shifted left.
1404 for (auto &J : I->operands()) {
1405 Value *Op = J.get();
1406 if (!isOperandShifted(&*I, Op))
1407 continue;
1408 if (Users.count(Op))
1409 continue;
1410 // Skip shifting zeros.
1411 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1412 continue;
1413 // Check if we have already generated a shift for this value.
1414 auto F = ShiftMap.find(Op);
1415 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1416 if (W == nullptr) {
1417 IRB.SetInsertPoint(&*I);
1418 // First, the shift amount will be CIV or CIV+1, depending on
1419 // whether the value is early or late. Instead of creating CIV+1,
1420 // do a single shift of the value.
1421 Value *ShAmt = CIV, *ShVal = Op;
1422 auto *VTy = cast<IntegerType>(ShVal->getType());
1423 auto *ATy = cast<IntegerType>(ShAmt->getType());
1424 if (Late.count(&*I))
1425 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1426 // Second, the types of the shifted value and the shift amount
1427 // must match.
1428 if (VTy != ATy) {
1429 if (VTy->getBitWidth() < ATy->getBitWidth())
1430 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1431 else
1432 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1433 }
1434 // Ready to generate the shift and memoize it.
1435 W = IRB.CreateShl(ShVal, ShAmt);
1436 ShiftMap.insert(std::make_pair(Op, W));
1437 }
1438 I->replaceUsesOfWith(Op, W);
1439 }
1440 }
1441
1442 // Update the users outside of the loop to account for having left
1443 // shifts. They would normally be shifted right in the loop, so shift
1444 // them right after the loop exit.
1445 // Take advantage of the loop-closed SSA form, which has all the post-
1446 // loop values in phi nodes.
1447 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1448 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1449 if (!isa<PHINode>(P))
1450 break;
1451 auto *PN = cast<PHINode>(P);
1452 Value *U = PN->getIncomingValueForBlock(LoopB);
1453 if (!Users.count(U))
1454 continue;
1455 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1456 PN->replaceAllUsesWith(S);
1457 // The above RAUW will create
1458 // S = lshr S, IterCount
1459 // so we need to fix it back into
1460 // S = lshr PN, IterCount
1461 cast<User>(S)->replaceUsesOfWith(S, PN);
1462 }
1463
1464 return true;
1465}
1466
1467void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1468 for (auto &I : *LoopB)
1469 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1470 I.replaceAllUsesWith(SV);
1471
1472 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1473 N = std::next(I);
1474 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1475 }
1476}
1477
1478unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1479 // Arrays of coefficients of Q and the inverse, C.
1480 // Q[i] = coefficient at x^i.
1481 std::array<char,32> Q, C;
1482
1483 for (unsigned i = 0; i < 32; ++i) {
1484 Q[i] = QP & 1;
1485 QP >>= 1;
1486 }
1487 assert(Q[0] == 1)((Q[0] == 1) ? static_cast<void> (0) : __assert_fail ("Q[0] == 1"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1487, __PRETTY_FUNCTION__))
;
1488
1489 // Find C, such that
1490 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1491 //
1492 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1493 // operations * and + are & and ^ respectively.
1494 //
1495 // Find C[i] recursively, by comparing i-th coefficient in the product
1496 // with 0 (or 1 for i=0).
1497 //
1498 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1499 C[0] = 1;
1500 for (unsigned i = 1; i < 32; ++i) {
1501 // Solve for C[i] in:
1502 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1503 // This is equivalent to
1504 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1505 // which is
1506 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1507 unsigned T = 0;
1508 for (unsigned j = 0; j < i; ++j)
1509 T = T ^ (C[j] & Q[i-j]);
1510 C[i] = T;
1511 }
1512
1513 unsigned QV = 0;
1514 for (unsigned i = 0; i < 32; ++i)
1515 if (C[i])
1516 QV |= (1 << i);
1517
1518 return QV;
1519}
1520
1521Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1522 ParsedValues &PV) {
1523 IRBuilder<> B(&*At);
1524 Module *M = At->getParent()->getParent()->getParent();
1525 Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1526
1527 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1528 unsigned IC = PV.IterCount;
1529
1530 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1531 P0 = P = B.CreateXor(P, PV.M);
1532
1533 // Create a bit mask to clear the high bits beyond IterCount.
1534 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
3
Calling 'APInt::getLowBitsSet'
12
Returning from 'APInt::getLowBitsSet'
1535
1536 if (PV.IterCount != 32)
13
Assuming the condition is false
14
Taking false branch
1537 P = B.CreateAnd(P, BMI);
1538
1539 if (PV.Inv) {
15
Assuming the condition is true
16
Taking true branch
1540 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1541 assert(QI && QI->getBitWidth() <= 32)((QI && QI->getBitWidth() <= 32) ? static_cast<
void> (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1541, __PRETTY_FUNCTION__))
;
1542
1543 // Again, clearing bits beyond IterCount.
1544 unsigned M = (1 << PV.IterCount) - 1;
17
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1545 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1546 unsigned QV = getInverseMxN(Tmp) & M;
1547 auto *QVI = ConstantInt::get(QI->getType(), QV);
1548 P = B.CreateCall(PMF, {P, QVI});
1549 P = B.CreateTrunc(P, QI->getType());
1550 if (IC != 32)
1551 P = B.CreateAnd(P, BMI);
1552 }
1553
1554 Value *R = B.CreateCall(PMF, {P, Q});
1555
1556 if (PV.M != nullptr)
1557 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1558
1559 return R;
1560}
1561
1562static bool hasZeroSignBit(const Value *V) {
1563 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1564 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1565 const Instruction *I = dyn_cast<const Instruction>(V);
1566 if (!I)
1567 return false;
1568 switch (I->getOpcode()) {
1569 case Instruction::LShr:
1570 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1571 return SI->getZExtValue() > 0;
1572 return false;
1573 case Instruction::Or:
1574 case Instruction::Xor:
1575 return hasZeroSignBit(I->getOperand(0)) &&
1576 hasZeroSignBit(I->getOperand(1));
1577 case Instruction::And:
1578 return hasZeroSignBit(I->getOperand(0)) ||
1579 hasZeroSignBit(I->getOperand(1));
1580 }
1581 return false;
1582}
1583
1584void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1585 S.addRule("sink-zext",
1586 // Sink zext past bitwise operations.
1587 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1588 if (I->getOpcode() != Instruction::ZExt)
1589 return nullptr;
1590 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1591 if (!T)
1592 return nullptr;
1593 switch (T->getOpcode()) {
1594 case Instruction::And:
1595 case Instruction::Or:
1596 case Instruction::Xor:
1597 break;
1598 default:
1599 return nullptr;
1600 }
1601 IRBuilder<> B(Ctx);
1602 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1603 B.CreateZExt(T->getOperand(0), I->getType()),
1604 B.CreateZExt(T->getOperand(1), I->getType()));
1605 });
1606 S.addRule("xor/and -> and/xor",
1607 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1608 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1609 if (I->getOpcode() != Instruction::Xor)
1610 return nullptr;
1611 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1612 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1613 if (!And0 || !And1)
1614 return nullptr;
1615 if (And0->getOpcode() != Instruction::And ||
1616 And1->getOpcode() != Instruction::And)
1617 return nullptr;
1618 if (And0->getOperand(1) != And1->getOperand(1))
1619 return nullptr;
1620 IRBuilder<> B(Ctx);
1621 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1622 And0->getOperand(1));
1623 });
1624 S.addRule("sink binop into select",
1625 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1626 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1627 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1628 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1629 if (!BO)
1630 return nullptr;
1631 Instruction::BinaryOps Op = BO->getOpcode();
1632 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1633 IRBuilder<> B(Ctx);
1634 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1635 Value *Z = BO->getOperand(1);
1636 return B.CreateSelect(Sel->getCondition(),
1637 B.CreateBinOp(Op, X, Z),
1638 B.CreateBinOp(Op, Y, Z));
1639 }
1640 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1641 IRBuilder<> B(Ctx);
1642 Value *X = BO->getOperand(0);
1643 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1644 return B.CreateSelect(Sel->getCondition(),
1645 B.CreateBinOp(Op, X, Y),
1646 B.CreateBinOp(Op, X, Z));
1647 }
1648 return nullptr;
1649 });
1650 S.addRule("fold select-select",
1651 // (select c (select c x y) z) -> (select c x z)
1652 // (select c x (select c y z)) -> (select c x z)
1653 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1654 SelectInst *Sel = dyn_cast<SelectInst>(I);
1655 if (!Sel)
1656 return nullptr;
1657 IRBuilder<> B(Ctx);
1658 Value *C = Sel->getCondition();
1659 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1660 if (Sel0->getCondition() == C)
1661 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1662 }
1663 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1664 if (Sel1->getCondition() == C)
1665 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1666 }
1667 return nullptr;
1668 });
1669 S.addRule("or-signbit -> xor-signbit",
1670 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1671 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1672 if (I->getOpcode() != Instruction::Or)
1673 return nullptr;
1674 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1675 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1676 return nullptr;
1677 if (!hasZeroSignBit(I->getOperand(0)))
1678 return nullptr;
1679 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1680 });
1681 S.addRule("sink lshr into binop",
1682 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1683 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1684 if (I->getOpcode() != Instruction::LShr)
1685 return nullptr;
1686 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1687 if (!BitOp)
1688 return nullptr;
1689 switch (BitOp->getOpcode()) {
1690 case Instruction::And:
1691 case Instruction::Or:
1692 case Instruction::Xor:
1693 break;
1694 default:
1695 return nullptr;
1696 }
1697 IRBuilder<> B(Ctx);
1698 Value *S = I->getOperand(1);
1699 return B.CreateBinOp(BitOp->getOpcode(),
1700 B.CreateLShr(BitOp->getOperand(0), S),
1701 B.CreateLShr(BitOp->getOperand(1), S));
1702 });
1703 S.addRule("expose bitop-const",
1704 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1705 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1706 auto IsBitOp = [](unsigned Op) -> bool {
1707 switch (Op) {
1708 case Instruction::And:
1709 case Instruction::Or:
1710 case Instruction::Xor:
1711 return true;
1712 }
1713 return false;
1714 };
1715 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1716 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1717 return nullptr;
1718 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1719 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1720 return nullptr;
1721 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1722 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1723 if (!CA || !CB)
1724 return nullptr;
1725 IRBuilder<> B(Ctx);
1726 Value *X = BitOp2->getOperand(0);
1727 return B.CreateBinOp(BitOp2->getOpcode(), X,
1728 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1729 });
1730}
1731
1732void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1733 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1734 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1735 if (I->getOpcode() != Instruction::And)
1736 return nullptr;
1737 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1738 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1739 if (!Xor || !C0)
1740 return nullptr;
1741 if (Xor->getOpcode() != Instruction::Xor)
1742 return nullptr;
1743 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1744 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1745 // Pick the first non-null and.
1746 if (!And0 || And0->getOpcode() != Instruction::And)
1747 std::swap(And0, And1);
1748 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1749 if (!C1)
1750 return nullptr;
1751 uint32_t V0 = C0->getZExtValue();
1752 uint32_t V1 = C1->getZExtValue();
1753 if (V0 != (V0 & V1))
1754 return nullptr;
1755 IRBuilder<> B(Ctx);
1756 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1757 });
1758}
1759
1760bool PolynomialMultiplyRecognize::recognize() {
1761 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1762 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1763 // Restrictions:
1764 // - The loop must consist of a single block.
1765 // - The iteration count must be known at compile-time.
1766 // - The loop must have an induction variable starting from 0, and
1767 // incremented in each iteration of the loop.
1768 BasicBlock *LoopB = CurLoop->getHeader();
1769 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1770
1771 if (LoopB != CurLoop->getLoopLatch())
1772 return false;
1773 BasicBlock *ExitB = CurLoop->getExitBlock();
1774 if (ExitB == nullptr)
1775 return false;
1776 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1777 if (EntryB == nullptr)
1778 return false;
1779
1780 unsigned IterCount = 0;
1781 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1782 if (isa<SCEVCouldNotCompute>(CT))
1783 return false;
1784 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1785 IterCount = CV->getValue()->getZExtValue() + 1;
1786
1787 Value *CIV = getCountIV(LoopB);
1788 ParsedValues PV;
1789 Simplifier PreSimp;
1790 PV.IterCount = IterCount;
1791 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1792 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1793
1794 setupPreSimplifier(PreSimp);
1795
1796 // Perform a preliminary scan of select instructions to see if any of them
1797 // looks like a generator of the polynomial multiply steps. Assume that a
1798 // loop can only contain a single transformable operation, so stop the
1799 // traversal after the first reasonable candidate was found.
1800 // XXX: Currently this approach can modify the loop before being 100% sure
1801 // that the transformation can be carried out.
1802 bool FoundPreScan = false;
1803 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1804 for (const Value *U : V->users()) {
1805 if (const auto *P = dyn_cast<const PHINode>(U))
1806 if (P->getParent() == LoopB)
1807 return true;
1808 }
1809 return false;
1810 };
1811 for (Instruction &In : *LoopB) {
1812 SelectInst *SI = dyn_cast<SelectInst>(&In);
1813 if (!SI || !FeedsPHI(SI))
1814 continue;
1815
1816 Simplifier::Context C(SI);
1817 Value *T = PreSimp.simplify(C);
1818 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1819 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1820 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1821 FoundPreScan = true;
1822 if (SelI != SI) {
1823 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1824 SI->replaceAllUsesWith(NewSel);
1825 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1826 }
1827 break;
1828 }
1829 }
1830
1831 if (!FoundPreScan) {
1832 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1833 return false;
1834 }
1835
1836 if (!PV.Left) {
1837 // The right shift version actually only returns the higher bits of
1838 // the result (each iteration discards the LSB). If we want to convert it
1839 // to a left-shifting loop, the working data type must be at least as
1840 // wide as the target's pmpy instruction.
1841 if (!promoteTypes(LoopB, ExitB))
1842 return false;
1843 // Run post-promotion simplifications.
1844 Simplifier PostSimp;
1845 setupPostSimplifier(PostSimp);
1846 for (Instruction &In : *LoopB) {
1847 SelectInst *SI = dyn_cast<SelectInst>(&In);
1848 if (!SI || !FeedsPHI(SI))
1849 continue;
1850 Simplifier::Context C(SI);
1851 Value *T = PostSimp.simplify(C);
1852 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1853 if (SelI != SI) {
1854 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1855 SI->replaceAllUsesWith(NewSel);
1856 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1857 }
1858 break;
1859 }
1860
1861 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1862 return false;
1863 cleanupLoopBody(LoopB);
1864 }
1865
1866 // Scan the loop again, find the generating select instruction.
1867 bool FoundScan = false;
1868 for (Instruction &In : *LoopB) {
1869 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1870 if (!SelI)
1871 continue;
1872 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1873 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1874 if (FoundScan)
1875 break;
1876 }
1877 assert(FoundScan)((FoundScan) ? static_cast<void> (0) : __assert_fail ("FoundScan"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1877, __PRETTY_FUNCTION__))
;
1878
1879 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1880 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1881 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1882 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1883 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1884 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1885 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1886 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1887 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1888 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1889 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1890 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1891 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1892
1893 BasicBlock::iterator At(EntryB->getTerminator());
1894 Value *PM = generate(At, PV);
1895 if (PM == nullptr)
1896 return false;
1897
1898 if (PM->getType() != PV.Res->getType())
1899 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1900
1901 PV.Res->replaceAllUsesWith(PM);
1902 PV.Res->eraseFromParent();
1903 return true;
1904}
1905
1906int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1907 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1908 return SC->getAPInt().getSExtValue();
1909 return 0;
1910}
1911
1912bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1913 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1914 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1915 return false;
1916
1917 Value *StoredVal = SI->getValueOperand();
1918 Value *StorePtr = SI->getPointerOperand();
1919
1920 // Reject stores that are so large that they overflow an unsigned.
1921 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1922 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1923 return false;
1924
1925 // See if the pointer expression is an AddRec like {base,+,1} on the current
1926 // loop, which indicates a strided store. If we have something else, it's a
1927 // random store we can't handle.
1928 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1929 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1930 return false;
1931
1932 // Check to see if the stride matches the size of the store. If so, then we
1933 // know that every byte is touched in the loop.
1934 int Stride = getSCEVStride(StoreEv);
1935 if (Stride == 0)
1936 return false;
1937 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1938 if (StoreSize != unsigned(std::abs(Stride)))
1939 return false;
1940
1941 // The store must be feeding a non-volatile load.
1942 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1943 if (!LI || !LI->isSimple())
1944 return false;
1945
1946 // See if the pointer expression is an AddRec like {base,+,1} on the current
1947 // loop, which indicates a strided load. If we have something else, it's a
1948 // random load we can't handle.
1949 Value *LoadPtr = LI->getPointerOperand();
1950 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1951 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1952 return false;
1953
1954 // The store and load must share the same stride.
1955 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1956 return false;
1957
1958 // Success. This store can be converted into a memcpy.
1959 return true;
1960}
1961
1962/// mayLoopAccessLocation - Return true if the specified loop might access the
1963/// specified pointer location, which is a loop-strided access. The 'Access'
1964/// argument specifies what the verboten forms of access are (read or write).
1965static bool
1966mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1967 const SCEV *BECount, unsigned StoreSize,
1968 AliasAnalysis &AA,
1969 SmallPtrSetImpl<Instruction *> &Ignored) {
1970 // Get the location that may be stored across the loop. Since the access
1971 // is strided positively through memory, we say that the modified location
1972 // starts at the pointer and has infinite size.
1973 LocationSize AccessSize = LocationSize::unknown();
1974
1975 // If the loop iterates a fixed number of times, we can refine the access
1976 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1977 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1978 AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
1979
1980 // TODO: For this to be really effective, we have to dive into the pointer
1981 // operand in the store. Store to &A[i] of 100 will always return may alias
1982 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1983 // which will then no-alias a store to &A[100].
1984 MemoryLocation StoreLoc(Ptr, AccessSize);
1985
1986 for (auto *B : L->blocks())
1987 for (auto &I : *B)
1988 if (Ignored.count(&I) == 0 &&
1989 isModOrRefSet(
1990 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
1991 return true;
1992
1993 return false;
1994}
1995
1996void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
1997 SmallVectorImpl<StoreInst*> &Stores) {
1998 Stores.clear();
1999 for (Instruction &I : *BB)
2000 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2001 if (isLegalStore(CurLoop, SI))
2002 Stores.push_back(SI);
2003}
2004
2005bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2006 StoreInst *SI, const SCEV *BECount) {
2007 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __PRETTY_FUNCTION__))
2008 "Expected only non-volatile stores, or Hexagon-specific memcpy"(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __PRETTY_FUNCTION__))
2009 "to volatile destination.")(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __PRETTY_FUNCTION__))
;
2010
2011 Value *StorePtr = SI->getPointerOperand();
2012 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2013 unsigned Stride = getSCEVStride(StoreEv);
2014 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2015 if (Stride != StoreSize)
2016 return false;
2017
2018 // See if the pointer expression is an AddRec like {base,+,1} on the current
2019 // loop, which indicates a strided load. If we have something else, it's a
2020 // random load we can't handle.
2021 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
2022 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2023
2024 // The trip count of the loop and the base pointer of the addrec SCEV is
2025 // guaranteed to be loop invariant, which means that it should dominate the
2026 // header. This allows us to insert code for it in the preheader.
2027 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2028 Instruction *ExpPt = Preheader->getTerminator();
2029 IRBuilder<> Builder(ExpPt);
2030 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2031
2032 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2033
2034 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2035 // this into a memcpy/memmove in the loop preheader now if we want. However,
2036 // this would be unsafe to do if there is anything else in the loop that may
2037 // read or write the memory region we're storing to. For memcpy, this
2038 // includes the load that feeds the stores. Check for an alias by generating
2039 // the base address and checking everything.
2040 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2041 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2042 Value *LoadBasePtr = nullptr;
2043
2044 bool Overlap = false;
2045 bool DestVolatile = SI->isVolatile();
2046 Type *BECountTy = BECount->getType();
2047
2048 if (DestVolatile) {
2049 // The trip count must fit in i32, since it is the type of the "num_words"
2050 // argument to hexagon_memcpy_forward_vp4cp4n2.
2051 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2052CleanupAndExit:
2053 // If we generated new code for the base pointer, clean up.
2054 Expander.clear();
2055 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2056 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2057 StoreBasePtr = nullptr;
2058 }
2059 if (LoadBasePtr) {
2060 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2061 LoadBasePtr = nullptr;
2062 }
2063 return false;
2064 }
2065 }
2066
2067 SmallPtrSet<Instruction*, 2> Ignore1;
2068 Ignore1.insert(SI);
2069 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2070 StoreSize, *AA, Ignore1)) {
2071 // Check if the load is the offending instruction.
2072 Ignore1.insert(LI);
2073 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2074 BECount, StoreSize, *AA, Ignore1)) {
2075 // Still bad. Nothing we can do.
2076 goto CleanupAndExit;
2077 }
2078 // It worked with the load ignored.
2079 Overlap = true;
2080 }
2081
2082 if (!Overlap) {
2083 if (DisableMemcpyIdiom || !HasMemcpy)
2084 goto CleanupAndExit;
2085 } else {
2086 // Don't generate memmove if this function will be inlined. This is
2087 // because the caller will undergo this transformation after inlining.
2088 Function *Func = CurLoop->getHeader()->getParent();
2089 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2090 goto CleanupAndExit;
2091
2092 // In case of a memmove, the call to memmove will be executed instead
2093 // of the loop, so we need to make sure that there is nothing else in
2094 // the loop than the load, store and instructions that these two depend
2095 // on.
2096 SmallVector<Instruction*,2> Insts;
2097 Insts.push_back(SI);
2098 Insts.push_back(LI);
2099 if (!coverLoop(CurLoop, Insts))
2100 goto CleanupAndExit;
2101
2102 if (DisableMemmoveIdiom || !HasMemmove)
2103 goto CleanupAndExit;
2104 bool IsNested = CurLoop->getParentLoop() != nullptr;
2105 if (IsNested && OnlyNonNestedMemmove)
2106 goto CleanupAndExit;
2107 }
2108
2109 // For a memcpy, we have to make sure that the input array is not being
2110 // mutated by the loop.
2111 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2112 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2113
2114 SmallPtrSet<Instruction*, 2> Ignore2;
2115 Ignore2.insert(SI);
2116 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2117 StoreSize, *AA, Ignore2))
2118 goto CleanupAndExit;
2119
2120 // Check the stride.
2121 bool StridePos = getSCEVStride(LoadEv) >= 0;
2122
2123 // Currently, the volatile memcpy only emulates traversing memory forward.
2124 if (!StridePos && DestVolatile)
2125 goto CleanupAndExit;
2126
2127 bool RuntimeCheck = (Overlap || DestVolatile);
2128
2129 BasicBlock *ExitB;
2130 if (RuntimeCheck) {
2131 // The runtime check needs a single exit block.
2132 SmallVector<BasicBlock*, 8> ExitBlocks;
2133 CurLoop->getUniqueExitBlocks(ExitBlocks);
2134 if (ExitBlocks.size() != 1)
2135 goto CleanupAndExit;
2136 ExitB = ExitBlocks[0];
2137 }
2138
2139 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2140 // pointer size if it isn't already.
2141 LLVMContext &Ctx = SI->getContext();
2142 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2143 DebugLoc DLoc = SI->getDebugLoc();
2144
2145 const SCEV *NumBytesS =
2146 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2147 if (StoreSize != 1)
2148 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2149 SCEV::FlagNUW);
2150 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2151 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2152 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2153 NumBytes = Simp;
2154
2155 CallInst *NewCall;
2156
2157 if (RuntimeCheck) {
2158 unsigned Threshold = RuntimeMemSizeThreshold;
2159 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2160 uint64_t C = CI->getZExtValue();
2161 if (Threshold != 0 && C < Threshold)
2162 goto CleanupAndExit;
2163 if (C < CompileTimeMemSizeThreshold)
2164 goto CleanupAndExit;
2165 }
2166
2167 BasicBlock *Header = CurLoop->getHeader();
2168 Function *Func = Header->getParent();
2169 Loop *ParentL = LF->getLoopFor(Preheader);
2170 StringRef HeaderName = Header->getName();
2171
2172 // Create a new (empty) preheader, and update the PHI nodes in the
2173 // header to use the new preheader.
2174 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2175 Func, Header);
2176 if (ParentL)
2177 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2178 IRBuilder<>(NewPreheader).CreateBr(Header);
2179 for (auto &In : *Header) {
2180 PHINode *PN = dyn_cast<PHINode>(&In);
2181 if (!PN)
2182 break;
2183 int bx = PN->getBasicBlockIndex(Preheader);
2184 if (bx >= 0)
2185 PN->setIncomingBlock(bx, NewPreheader);
2186 }
2187 DT->addNewBlock(NewPreheader, Preheader);
2188 DT->changeImmediateDominator(Header, NewPreheader);
2189
2190 // Check for safe conditions to execute memmove.
2191 // If stride is positive, copying things from higher to lower addresses
2192 // is equivalent to memmove. For negative stride, it's the other way
2193 // around. Copying forward in memory with positive stride may not be
2194 // same as memmove since we may be copying values that we just stored
2195 // in some previous iteration.
2196 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2197 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2198 Value *LowA = StridePos ? SA : LA;
2199 Value *HighA = StridePos ? LA : SA;
2200 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2201 Value *Cond = CmpA;
2202
2203 // Check for distance between pointers. Since the case LowA < HighA
2204 // is checked for above, assume LowA >= HighA.
2205 Value *Dist = Builder.CreateSub(LowA, HighA);
2206 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2207 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2208 Cond = CmpEither;
2209
2210 if (Threshold != 0) {
2211 Type *Ty = NumBytes->getType();
2212 Value *Thr = ConstantInt::get(Ty, Threshold);
2213 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2214 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2215 Cond = CmpBoth;
2216 }
2217 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2218 Func, NewPreheader);
2219 if (ParentL)
2220 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2221 Instruction *OldT = Preheader->getTerminator();
2222 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2223 OldT->eraseFromParent();
2224 Preheader->setName(Preheader->getName()+".old");
2225 DT->addNewBlock(MemmoveB, Preheader);
2226 // Find the new immediate dominator of the exit block.
2227 BasicBlock *ExitD = Preheader;
2228 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2229 BasicBlock *PB = *PI;
2230 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2231 if (!ExitD)
2232 break;
2233 }
2234 // If the prior immediate dominator of ExitB was dominated by the
2235 // old preheader, then the old preheader becomes the new immediate
2236 // dominator. Otherwise don't change anything (because the newly
2237 // added blocks are dominated by the old preheader).
2238 if (ExitD && DT->dominates(Preheader, ExitD)) {
2239 DomTreeNode *BN = DT->getNode(ExitB);
2240 DomTreeNode *DN = DT->getNode(ExitD);
2241 BN->setIDom(DN);
2242 }
2243
2244 // Add a call to memmove to the conditional block.
2245 IRBuilder<> CondBuilder(MemmoveB);
2246 CondBuilder.CreateBr(ExitB);
2247 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2248
2249 if (DestVolatile) {
2250 Type *Int32Ty = Type::getInt32Ty(Ctx);
2251 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2252 Type *VoidTy = Type::getVoidTy(Ctx);
2253 Module *M = Func->getParent();
2254 Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
2255 Int32PtrTy, Int32PtrTy, Int32Ty);
2256 Function *Fn = cast<Function>(CF);
2257 Fn->setLinkage(Function::ExternalLinkage);
2258
2259 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2260 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2261 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2262 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2263 MemmoveB->getTerminator());
2264 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2265 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2266 NumWords = Simp;
2267
2268 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2269 ? StoreBasePtr
2270 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2271 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2272 ? LoadBasePtr
2273 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2274 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2275 } else {
2276 NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(),
2277 LoadBasePtr, LI->getAlignment(),
2278 NumBytes);
2279 }
2280 } else {
2281 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
2282 LoadBasePtr, LI->getAlignment(),
2283 NumBytes);
2284 // Okay, the memcpy has been formed. Zap the original store and
2285 // anything that feeds into it.
2286 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2287 }
2288
2289 NewCall->setDebugLoc(DLoc);
2290
2291 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2292 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2293 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2294 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2295 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2296
2297 return true;
2298}
2299
2300// Check if the instructions in Insts, together with their dependencies
2301// cover the loop in the sense that the loop could be safely eliminated once
2302// the instructions in Insts are removed.
2303bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2304 SmallVectorImpl<Instruction*> &Insts) const {
2305 SmallSet<BasicBlock*,8> LoopBlocks;
2306 for (auto *B : L->blocks())
2307 LoopBlocks.insert(B);
2308
2309 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2310
2311 // Collect all instructions from the loop that the instructions in Insts
2312 // depend on (plus their dependencies, etc.). These instructions will
2313 // constitute the expression trees that feed those in Insts, but the trees
2314 // will be limited only to instructions contained in the loop.
2315 for (unsigned i = 0; i < Worklist.size(); ++i) {
2316 Instruction *In = Worklist[i];
2317 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2318 Instruction *OpI = dyn_cast<Instruction>(I);
2319 if (!OpI)
2320 continue;
2321 BasicBlock *PB = OpI->getParent();
2322 if (!LoopBlocks.count(PB))
2323 continue;
2324 Worklist.insert(OpI);
2325 }
2326 }
2327
2328 // Scan all instructions in the loop, if any of them have a user outside
2329 // of the loop, or outside of the expressions collected above, then either
2330 // the loop has a side-effect visible outside of it, or there are
2331 // instructions in it that are not involved in the original set Insts.
2332 for (auto *B : L->blocks()) {
2333 for (auto &In : *B) {
2334 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2335 continue;
2336 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2337 return false;
2338 for (const auto &K : In.users()) {
2339 Instruction *UseI = dyn_cast<Instruction>(K);
2340 if (!UseI)
2341 continue;
2342 BasicBlock *UseB = UseI->getParent();
2343 if (LF->getLoopFor(UseB) != L)
2344 return false;
2345 }
2346 }
2347 }
2348
2349 return true;
2350}
2351
2352/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2353/// with the specified backedge count. This block is known to be in the current
2354/// loop and not in any subloops.
2355bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2356 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2357 // We can only promote stores in this block if they are unconditionally
2358 // executed in the loop. For a block to be unconditionally executed, it has
2359 // to dominate all the exit blocks of the loop. Verify this now.
2360 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2361 return DT->dominates(BB, EB);
2362 };
2363 if (!all_of(ExitBlocks, DominatedByBB))
2364 return false;
2365
2366 bool MadeChange = false;
2367 // Look for store instructions, which may be optimized to memset/memcpy.
2368 SmallVector<StoreInst*,8> Stores;
2369 collectStores(CurLoop, BB, Stores);
2370
2371 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2372 for (auto &SI : Stores)
2373 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2374
2375 return MadeChange;
2376}
2377
2378bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2379 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2380 if (PMR.recognize())
2381 return true;
2382
2383 if (!HasMemcpy && !HasMemmove)
2384 return false;
2385
2386 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2387 assert(!isa<SCEVCouldNotCompute>(BECount) &&((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
2388 "runOnCountableLoop() called on a loop without a predictable"((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
2389 "backedge-taken count")((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __PRETTY_FUNCTION__))
;
2390
2391 SmallVector<BasicBlock *, 8> ExitBlocks;
2392 L->getUniqueExitBlocks(ExitBlocks);
2393
2394 bool Changed = false;
2395
2396 // Scan all the blocks in the loop that are not in subloops.
2397 for (auto *BB : L->getBlocks()) {
2398 // Ignore blocks in subloops.
2399 if (LF->getLoopFor(BB) != L)
2400 continue;
2401 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2402 }
2403
2404 return Changed;
2405}
2406
2407bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
2408 const Module &M = *L->getHeader()->getParent()->getParent();
2409 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2410 return false;
2411
2412 if (skipLoop(L))
2413 return false;
2414
2415 // If the loop could not be converted to canonical form, it must have an
2416 // indirectbr in it, just give up.
2417 if (!L->getLoopPreheader())
2418 return false;
2419
2420 // Disable loop idiom recognition if the function's name is a common idiom.
2421 StringRef Name = L->getHeader()->getParent()->getName();
2422 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2423 return false;
2424
2425 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2426 DL = &L->getHeader()->getModule()->getDataLayout();
2427 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2428 LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2429 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2430 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2431
2432 HasMemcpy = TLI->has(LibFunc_memcpy);
2433 HasMemmove = TLI->has(LibFunc_memmove);
2434
2435 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2436 return runOnCountableLoop(L);
2437 return false;
2438}
2439
2440Pass *llvm::createHexagonLoopIdiomPass() {
2441 return new HexagonLoopIdiomRecognize();
2442}

/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h

1//===-- llvm/ADT/APInt.h - For Arbitrary Precision Integer -----*- C++ -*--===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9///
10/// \file
11/// This file implements a class to represent arbitrary precision
12/// integral constant values and operations on them.
13///
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_ADT_APINT_H
17#define LLVM_ADT_APINT_H
18
19#include "llvm/Support/Compiler.h"
20#include "llvm/Support/MathExtras.h"
21#include <cassert>
22#include <climits>
23#include <cstring>
24#include <string>
25
26namespace llvm {
27class FoldingSetNodeID;
28class StringRef;
29class hash_code;
30class raw_ostream;
31
32template <typename T> class SmallVectorImpl;
33template <typename T> class ArrayRef;
34template <typename T> class Optional;
35
36class APInt;
37
38inline APInt operator-(APInt);
39
40//===----------------------------------------------------------------------===//
41// APInt Class
42//===----------------------------------------------------------------------===//
43
44/// Class for arbitrary precision integers.
45///
46/// APInt is a functional replacement for common case unsigned integer type like
47/// "unsigned", "unsigned long" or "uint64_t", but also allows non-byte-width
48/// integer sizes and large integer value types such as 3-bits, 15-bits, or more
49/// than 64-bits of precision. APInt provides a variety of arithmetic operators
50/// and methods to manipulate integer values of any bit-width. It supports both
51/// the typical integer arithmetic and comparison operations as well as bitwise
52/// manipulation.
53///
54/// The class has several invariants worth noting:
55/// * All bit, byte, and word positions are zero-based.
56/// * Once the bit width is set, it doesn't change except by the Truncate,
57/// SignExtend, or ZeroExtend operations.
58/// * All binary operators must be on APInt instances of the same bit width.
59/// Attempting to use these operators on instances with different bit
60/// widths will yield an assertion.
61/// * The value is stored canonically as an unsigned value. For operations
62/// where it makes a difference, there are both signed and unsigned variants
63/// of the operation. For example, sdiv and udiv. However, because the bit
64/// widths must be the same, operations such as Mul and Add produce the same
65/// results regardless of whether the values are interpreted as signed or
66/// not.
67/// * In general, the class tries to follow the style of computation that LLVM
68/// uses in its IR. This simplifies its use for LLVM.
69///
70class LLVM_NODISCARD[[clang::warn_unused_result]] APInt {
71public:
72 typedef uint64_t WordType;
73
74 /// This enum is used to hold the constants we needed for APInt.
75 enum : unsigned {
76 /// Byte size of a word.
77 APINT_WORD_SIZE = sizeof(WordType),
78 /// Bits in a word.
79 APINT_BITS_PER_WORD = APINT_WORD_SIZE * CHAR_BIT8
80 };
81
82 enum class Rounding {
83 DOWN,
84 TOWARD_ZERO,
85 UP,
86 };
87
88 static const WordType WORDTYPE_MAX = ~WordType(0);
89
90private:
91 /// This union is used to store the integer value. When the
92 /// integer bit-width <= 64, it uses VAL, otherwise it uses pVal.
93 union {
94 uint64_t VAL; ///< Used to store the <= 64 bits integer value.
95 uint64_t *pVal; ///< Used to store the >64 bits integer value.
96 } U;
97
98 unsigned BitWidth; ///< The number of bits in this APInt.
99
100 friend struct DenseMapAPIntKeyInfo;
101
102 friend class APSInt;
103
104 /// Fast internal constructor
105 ///
106 /// This constructor is used only internally for speed of construction of
107 /// temporaries. It is unsafe for general use so it is not public.
108 APInt(uint64_t *val, unsigned bits) : BitWidth(bits) {
109 U.pVal = val;
110 }
111
112 /// Determine if this APInt just has one word to store value.
113 ///
114 /// \returns true if the number of bits <= 64, false otherwise.
115 bool isSingleWord() const { return BitWidth <= APINT_BITS_PER_WORD; }
116
117 /// Determine which word a bit is in.
118 ///
119 /// \returns the word position for the specified bit position.
120 static unsigned whichWord(unsigned bitPosition) {
121 return bitPosition / APINT_BITS_PER_WORD;
122 }
123
124 /// Determine which bit in a word a bit is in.
125 ///
126 /// \returns the bit position in a word for the specified bit position
127 /// in the APInt.
128 static unsigned whichBit(unsigned bitPosition) {
129 return bitPosition % APINT_BITS_PER_WORD;
130 }
131
132 /// Get a single bit mask.
133 ///
134 /// \returns a uint64_t with only bit at "whichBit(bitPosition)" set
135 /// This method generates and returns a uint64_t (word) mask for a single
136 /// bit at a specific bit position. This is used to mask the bit in the
137 /// corresponding word.
138 static uint64_t maskBit(unsigned bitPosition) {
139 return 1ULL << whichBit(bitPosition);
140 }
141
142 /// Clear unused high order bits
143 ///
144 /// This method is used internally to clear the top "N" bits in the high order
145 /// word that are not used by the APInt. This is needed after the most
146 /// significant word is assigned a value to ensure that those bits are
147 /// zero'd out.
148 APInt &clearUnusedBits() {
149 // Compute how many bits are used in the final word
150 unsigned WordBits = ((BitWidth-1) % APINT_BITS_PER_WORD) + 1;
151
152 // Mask out the high bits.
153 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - WordBits);
154 if (isSingleWord())
155 U.VAL &= mask;
156 else
157 U.pVal[getNumWords() - 1] &= mask;
158 return *this;
159 }
160
161 /// Get the word corresponding to a bit position
162 /// \returns the corresponding word for the specified bit position.
163 uint64_t getWord(unsigned bitPosition) const {
164 return isSingleWord() ? U.VAL : U.pVal[whichWord(bitPosition)];
165 }
166
167 /// Utility method to change the bit width of this APInt to new bit width,
168 /// allocating and/or deallocating as necessary. There is no guarantee on the
169 /// value of any bits upon return. Caller should populate the bits after.
170 void reallocate(unsigned NewBitWidth);
171
172 /// Convert a char array into an APInt
173 ///
174 /// \param radix 2, 8, 10, 16, or 36
175 /// Converts a string into a number. The string must be non-empty
176 /// and well-formed as a number of the given base. The bit-width
177 /// must be sufficient to hold the result.
178 ///
179 /// This is used by the constructors that take string arguments.
180 ///
181 /// StringRef::getAsInteger is superficially similar but (1) does
182 /// not assume that the string is well-formed and (2) grows the
183 /// result to hold the input.
184 void fromString(unsigned numBits, StringRef str, uint8_t radix);
185
186 /// An internal division function for dividing APInts.
187 ///
188 /// This is used by the toString method to divide by the radix. It simply
189 /// provides a more convenient form of divide for internal use since KnuthDiv
190 /// has specific constraints on its inputs. If those constraints are not met
191 /// then it provides a simpler form of divide.
192 static void divide(const WordType *LHS, unsigned lhsWords,
193 const WordType *RHS, unsigned rhsWords, WordType *Quotient,
194 WordType *Remainder);
195
196 /// out-of-line slow case for inline constructor
197 void initSlowCase(uint64_t val, bool isSigned);
198
199 /// shared code between two array constructors
200 void initFromArray(ArrayRef<uint64_t> array);
201
202 /// out-of-line slow case for inline copy constructor
203 void initSlowCase(const APInt &that);
204
205 /// out-of-line slow case for shl
206 void shlSlowCase(unsigned ShiftAmt);
207
208 /// out-of-line slow case for lshr.
209 void lshrSlowCase(unsigned ShiftAmt);
210
211 /// out-of-line slow case for ashr.
212 void ashrSlowCase(unsigned ShiftAmt);
213
214 /// out-of-line slow case for operator=
215 void AssignSlowCase(const APInt &RHS);
216
217 /// out-of-line slow case for operator==
218 bool EqualSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
219
220 /// out-of-line slow case for countLeadingZeros
221 unsigned countLeadingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
222
223 /// out-of-line slow case for countLeadingOnes.
224 unsigned countLeadingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
225
226 /// out-of-line slow case for countTrailingZeros.
227 unsigned countTrailingZerosSlowCase() const LLVM_READONLY__attribute__((__pure__));
228
229 /// out-of-line slow case for countTrailingOnes
230 unsigned countTrailingOnesSlowCase() const LLVM_READONLY__attribute__((__pure__));
231
232 /// out-of-line slow case for countPopulation
233 unsigned countPopulationSlowCase() const LLVM_READONLY__attribute__((__pure__));
234
235 /// out-of-line slow case for intersects.
236 bool intersectsSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
237
238 /// out-of-line slow case for isSubsetOf.
239 bool isSubsetOfSlowCase(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
240
241 /// out-of-line slow case for setBits.
242 void setBitsSlowCase(unsigned loBit, unsigned hiBit);
243
244 /// out-of-line slow case for flipAllBits.
245 void flipAllBitsSlowCase();
246
247 /// out-of-line slow case for operator&=.
248 void AndAssignSlowCase(const APInt& RHS);
249
250 /// out-of-line slow case for operator|=.
251 void OrAssignSlowCase(const APInt& RHS);
252
253 /// out-of-line slow case for operator^=.
254 void XorAssignSlowCase(const APInt& RHS);
255
256 /// Unsigned comparison. Returns -1, 0, or 1 if this APInt is less than, equal
257 /// to, or greater than RHS.
258 int compare(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
259
260 /// Signed comparison. Returns -1, 0, or 1 if this APInt is less than, equal
261 /// to, or greater than RHS.
262 int compareSigned(const APInt &RHS) const LLVM_READONLY__attribute__((__pure__));
263
264public:
265 /// \name Constructors
266 /// @{
267
268 /// Create a new APInt of numBits width, initialized as val.
269 ///
270 /// If isSigned is true then val is treated as if it were a signed value
271 /// (i.e. as an int64_t) and the appropriate sign extension to the bit width
272 /// will be done. Otherwise, no sign extension occurs (high order bits beyond
273 /// the range of val are zero filled).
274 ///
275 /// \param numBits the bit width of the constructed APInt
276 /// \param val the initial value of the APInt
277 /// \param isSigned how to treat signedness of val
278 APInt(unsigned numBits, uint64_t val, bool isSigned = false)
279 : BitWidth(numBits) {
280 assert(BitWidth && "bitwidth too small")((BitWidth && "bitwidth too small") ? static_cast<
void> (0) : __assert_fail ("BitWidth && \"bitwidth too small\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 280, __PRETTY_FUNCTION__))
;
281 if (isSingleWord()) {
282 U.VAL = val;
283 clearUnusedBits();
284 } else {
285 initSlowCase(val, isSigned);
286 }
287 }
288
289 /// Construct an APInt of numBits width, initialized as bigVal[].
290 ///
291 /// Note that bigVal.size() can be smaller or larger than the corresponding
292 /// bit width but any extraneous bits will be dropped.
293 ///
294 /// \param numBits the bit width of the constructed APInt
295 /// \param bigVal a sequence of words to form the initial value of the APInt
296 APInt(unsigned numBits, ArrayRef<uint64_t> bigVal);
297
298 /// Equivalent to APInt(numBits, ArrayRef<uint64_t>(bigVal, numWords)), but
299 /// deprecated because this constructor is prone to ambiguity with the
300 /// APInt(unsigned, uint64_t, bool) constructor.
301 ///
302 /// If this overload is ever deleted, care should be taken to prevent calls
303 /// from being incorrectly captured by the APInt(unsigned, uint64_t, bool)
304 /// constructor.
305 APInt(unsigned numBits, unsigned numWords, const uint64_t bigVal[]);
306
307 /// Construct an APInt from a string representation.
308 ///
309 /// This constructor interprets the string \p str in the given radix. The
310 /// interpretation stops when the first character that is not suitable for the
311 /// radix is encountered, or the end of the string. Acceptable radix values
312 /// are 2, 8, 10, 16, and 36. It is an error for the value implied by the
313 /// string to require more bits than numBits.
314 ///
315 /// \param numBits the bit width of the constructed APInt
316 /// \param str the string to be interpreted
317 /// \param radix the radix to use for the conversion
318 APInt(unsigned numBits, StringRef str, uint8_t radix);
319
320 /// Simply makes *this a copy of that.
321 /// Copy Constructor.
322 APInt(const APInt &that) : BitWidth(that.BitWidth) {
323 if (isSingleWord())
324 U.VAL = that.U.VAL;
325 else
326 initSlowCase(that);
327 }
328
329 /// Move Constructor.
330 APInt(APInt &&that) : BitWidth(that.BitWidth) {
331 memcpy(&U, &that.U, sizeof(U));
332 that.BitWidth = 0;
333 }
334
335 /// Destructor.
336 ~APInt() {
337 if (needsCleanup())
338 delete[] U.pVal;
339 }
340
341 /// Default constructor that creates an uninteresting APInt
342 /// representing a 1-bit zero value.
343 ///
344 /// This is useful for object deserialization (pair this with the static
345 /// method Read).
346 explicit APInt() : BitWidth(1) { U.VAL = 0; }
347
348 /// Returns whether this instance allocated memory.
349 bool needsCleanup() const { return !isSingleWord(); }
350
351 /// Used to insert APInt objects, or objects that contain APInt objects, into
352 /// FoldingSets.
353 void Profile(FoldingSetNodeID &id) const;
354
355 /// @}
356 /// \name Value Tests
357 /// @{
358
359 /// Determine sign of this APInt.
360 ///
361 /// This tests the high bit of this APInt to determine if it is set.
362 ///
363 /// \returns true if this APInt is negative, false otherwise
364 bool isNegative() const { return (*this)[BitWidth - 1]; }
365
366 /// Determine if this APInt Value is non-negative (>= 0)
367 ///
368 /// This tests the high bit of the APInt to determine if it is unset.
369 bool isNonNegative() const { return !isNegative(); }
370
371 /// Determine if sign bit of this APInt is set.
372 ///
373 /// This tests the high bit of this APInt to determine if it is set.
374 ///
375 /// \returns true if this APInt has its sign bit set, false otherwise.
376 bool isSignBitSet() const { return (*this)[BitWidth-1]; }
377
378 /// Determine if sign bit of this APInt is clear.
379 ///
380 /// This tests the high bit of this APInt to determine if it is clear.
381 ///
382 /// \returns true if this APInt has its sign bit clear, false otherwise.
383 bool isSignBitClear() const { return !isSignBitSet(); }
384
385 /// Determine if this APInt Value is positive.
386 ///
387 /// This tests if the value of this APInt is positive (> 0). Note
388 /// that 0 is not a positive value.
389 ///
390 /// \returns true if this APInt is positive.
391 bool isStrictlyPositive() const { return isNonNegative() && !isNullValue(); }
392
393 /// Determine if all bits are set
394 ///
395 /// This checks to see if the value has all bits of the APInt are set or not.
396 bool isAllOnesValue() const {
397 if (isSingleWord())
398 return U.VAL == WORDTYPE_MAX >> (APINT_BITS_PER_WORD - BitWidth);
399 return countTrailingOnesSlowCase() == BitWidth;
400 }
401
402 /// Determine if all bits are clear
403 ///
404 /// This checks to see if the value has all bits of the APInt are clear or
405 /// not.
406 bool isNullValue() const { return !*this; }
407
408 /// Determine if this is a value of 1.
409 ///
410 /// This checks to see if the value of this APInt is one.
411 bool isOneValue() const {
412 if (isSingleWord())
413 return U.VAL == 1;
414 return countLeadingZerosSlowCase() == BitWidth - 1;
415 }
416
417 /// Determine if this is the largest unsigned value.
418 ///
419 /// This checks to see if the value of this APInt is the maximum unsigned
420 /// value for the APInt's bit width.
421 bool isMaxValue() const { return isAllOnesValue(); }
422
423 /// Determine if this is the largest signed value.
424 ///
425 /// This checks to see if the value of this APInt is the maximum signed
426 /// value for the APInt's bit width.
427 bool isMaxSignedValue() const {
428 if (isSingleWord())
429 return U.VAL == ((WordType(1) << (BitWidth - 1)) - 1);
430 return !isNegative() && countTrailingOnesSlowCase() == BitWidth - 1;
431 }
432
433 /// Determine if this is the smallest unsigned value.
434 ///
435 /// This checks to see if the value of this APInt is the minimum unsigned
436 /// value for the APInt's bit width.
437 bool isMinValue() const { return isNullValue(); }
438
439 /// Determine if this is the smallest signed value.
440 ///
441 /// This checks to see if the value of this APInt is the minimum signed
442 /// value for the APInt's bit width.
443 bool isMinSignedValue() const {
444 if (isSingleWord())
445 return U.VAL == (WordType(1) << (BitWidth - 1));
446 return isNegative() && countTrailingZerosSlowCase() == BitWidth - 1;
447 }
448
449 /// Check if this APInt has an N-bits unsigned integer value.
450 bool isIntN(unsigned N) const {
451 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 451, __PRETTY_FUNCTION__))
;
452 return getActiveBits() <= N;
453 }
454
455 /// Check if this APInt has an N-bits signed integer value.
456 bool isSignedIntN(unsigned N) const {
457 assert(N && "N == 0 ???")((N && "N == 0 ???") ? static_cast<void> (0) : __assert_fail
("N && \"N == 0 ???\"", "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 457, __PRETTY_FUNCTION__))
;
458 return getMinSignedBits() <= N;
459 }
460
461 /// Check if this APInt's value is a power of two greater than zero.
462 ///
463 /// \returns true if the argument APInt value is a power of two > 0.
464 bool isPowerOf2() const {
465 if (isSingleWord())
466 return isPowerOf2_64(U.VAL);
467 return countPopulationSlowCase() == 1;
468 }
469
470 /// Check if the APInt's value is returned by getSignMask.
471 ///
472 /// \returns true if this is the value returned by getSignMask.
473 bool isSignMask() const { return isMinSignedValue(); }
474
475 /// Convert APInt to a boolean value.
476 ///
477 /// This converts the APInt to a boolean value as a test against zero.
478 bool getBoolValue() const { return !!*this; }
479
480 /// If this value is smaller than the specified limit, return it, otherwise
481 /// return the limit value. This causes the value to saturate to the limit.
482 uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX(18446744073709551615UL)) const {
483 return ugt(Limit) ? Limit : getZExtValue();
484 }
485
486 /// Check if the APInt consists of a repeated bit pattern.
487 ///
488 /// e.g. 0x01010101 satisfies isSplat(8).
489 /// \param SplatSizeInBits The size of the pattern in bits. Must divide bit
490 /// width without remainder.
491 bool isSplat(unsigned SplatSizeInBits) const;
492
493 /// \returns true if this APInt value is a sequence of \param numBits ones
494 /// starting at the least significant bit with the remainder zero.
495 bool isMask(unsigned numBits) const {
496 assert(numBits != 0 && "numBits must be non-zero")((numBits != 0 && "numBits must be non-zero") ? static_cast
<void> (0) : __assert_fail ("numBits != 0 && \"numBits must be non-zero\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 496, __PRETTY_FUNCTION__))
;
497 assert(numBits <= BitWidth && "numBits out of range")((numBits <= BitWidth && "numBits out of range") ?
static_cast<void> (0) : __assert_fail ("numBits <= BitWidth && \"numBits out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 497, __PRETTY_FUNCTION__))
;
498 if (isSingleWord())
499 return U.VAL == (WORDTYPE_MAX >> (APINT_BITS_PER_WORD - numBits));
500 unsigned Ones = countTrailingOnesSlowCase();
501 return (numBits == Ones) &&
502 ((Ones + countLeadingZerosSlowCase()) == BitWidth);
503 }
504
505 /// \returns true if this APInt is a non-empty sequence of ones starting at
506 /// the least significant bit with the remainder zero.
507 /// Ex. isMask(0x0000FFFFU) == true.
508 bool isMask() const {
509 if (isSingleWord())
510 return isMask_64(U.VAL);
511 unsigned Ones = countTrailingOnesSlowCase();
512 return (Ones > 0) && ((Ones + countLeadingZerosSlowCase()) == BitWidth);
513 }
514
515 /// Return true if this APInt value contains a sequence of ones with
516 /// the remainder zero.
517 bool isShiftedMask() const {
518 if (isSingleWord())
519 return isShiftedMask_64(U.VAL);
520 unsigned Ones = countPopulationSlowCase();
521 unsigned LeadZ = countLeadingZerosSlowCase();
522 return (Ones + LeadZ + countTrailingZeros()) == BitWidth;
523 }
524
525 /// @}
526 /// \name Value Generators
527 /// @{
528
529 /// Gets maximum unsigned value of APInt for specific bit width.
530 static APInt getMaxValue(unsigned numBits) {
531 return getAllOnesValue(numBits);
532 }
533
534 /// Gets maximum signed value of APInt for a specific bit width.
535 static APInt getSignedMaxValue(unsigned numBits) {
536 APInt API = getAllOnesValue(numBits);
537 API.clearBit(numBits - 1);
538 return API;
539 }
540
541 /// Gets minimum unsigned value of APInt for a specific bit width.
542 static APInt getMinValue(unsigned numBits) { return APInt(numBits, 0); }
543
544 /// Gets minimum signed value of APInt for a specific bit width.
545 static APInt getSignedMinValue(unsigned numBits) {
546 APInt API(numBits, 0);
547 API.setBit(numBits - 1);
548 return API;
549 }
550
551 /// Get the SignMask for a specific bit width.
552 ///
553 /// This is just a wrapper function of getSignedMinValue(), and it helps code
554 /// readability when we want to get a SignMask.
555 static APInt getSignMask(unsigned BitWidth) {
556 return getSignedMinValue(BitWidth);
557 }
558
559 /// Get the all-ones value.
560 ///
561 /// \returns the all-ones value for an APInt of the specified bit-width.
562 static APInt getAllOnesValue(unsigned numBits) {
563 return APInt(numBits, WORDTYPE_MAX, true);
564 }
565
566 /// Get the '0' value.
567 ///
568 /// \returns the '0' value for an APInt of the specified bit-width.
569 static APInt getNullValue(unsigned numBits) { return APInt(numBits, 0); }
570
571 /// Compute an APInt containing numBits highbits from this APInt.
572 ///
573 /// Get an APInt with the same BitWidth as this APInt, just zero mask
574 /// the low bits and right shift to the least significant bit.
575 ///
576 /// \returns the high "numBits" bits of this APInt.
577 APInt getHiBits(unsigned numBits) const;
578
579 /// Compute an APInt containing numBits lowbits from this APInt.
580 ///
581 /// Get an APInt with the same BitWidth as this APInt, just zero mask
582 /// the high bits.
583 ///
584 /// \returns the low "numBits" bits of this APInt.
585 APInt getLoBits(unsigned numBits) const;
586
587 /// Return an APInt with exactly one bit set in the result.
588 static APInt getOneBitSet(unsigned numBits, unsigned BitNo) {
589 APInt Res(numBits, 0);
590 Res.setBit(BitNo);
591 return Res;
592 }
593
594 /// Get a value with a block of bits set.
595 ///
596 /// Constructs an APInt value that has a contiguous range of bits set. The
597 /// bits from loBit (inclusive) to hiBit (exclusive) will be set. All other
598 /// bits will be zero. For example, with parameters(32, 0, 16) you would get
599 /// 0x0000FFFF. If hiBit is less than loBit then the set bits "wrap". For
600 /// example, with parameters (32, 28, 4), you would get 0xF000000F.
601 ///
602 /// \param numBits the intended bit width of the result
603 /// \param loBit the index of the lowest bit set.
604 /// \param hiBit the index of the highest bit set.
605 ///
606 /// \returns An APInt value with the requested bits set.
607 static APInt getBitsSet(unsigned numBits, unsigned loBit, unsigned hiBit) {
608 APInt Res(numBits, 0);
609 Res.setBits(loBit, hiBit);
610 return Res;
611 }
612
613 /// Get a value with upper bits starting at loBit set.
614 ///
615 /// Constructs an APInt value that has a contiguous range of bits set. The
616 /// bits from loBit (inclusive) to numBits (exclusive) will be set. All other
617 /// bits will be zero. For example, with parameters(32, 12) you would get
618 /// 0xFFFFF000.
619 ///
620 /// \param numBits the intended bit width of the result
621 /// \param loBit the index of the lowest bit to set.
622 ///
623 /// \returns An APInt value with the requested bits set.
624 static APInt getBitsSetFrom(unsigned numBits, unsigned loBit) {
625 APInt Res(numBits, 0);
626 Res.setBitsFrom(loBit);
627 return Res;
628 }
629
630 /// Get a value with high bits set
631 ///
632 /// Constructs an APInt value that has the top hiBitsSet bits set.
633 ///
634 /// \param numBits the bitwidth of the result
635 /// \param hiBitsSet the number of high-order bits set in the result.
636 static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet) {
637 APInt Res(numBits, 0);
638 Res.setHighBits(hiBitsSet);
639 return Res;
640 }
641
642 /// Get a value with low bits set
643 ///
644 /// Constructs an APInt value that has the bottom loBitsSet bits set.
645 ///
646 /// \param numBits the bitwidth of the result
647 /// \param loBitsSet the number of low-order bits set in the result.
648 static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet) {
649 APInt Res(numBits, 0);
650 Res.setLowBits(loBitsSet);
4
Calling 'APInt::setLowBits'
11
Returning from 'APInt::setLowBits'
651 return Res;
652 }
653
654 /// Return a value containing V broadcasted over NewLen bits.
655 static APInt getSplat(unsigned NewLen, const APInt &V);
656
657 /// Determine if two APInts have the same value, after zero-extending
658 /// one of them (if needed!) to ensure that the bit-widths match.
659 static bool isSameValue(const APInt &I1, const APInt &I2) {
660 if (I1.getBitWidth() == I2.getBitWidth())
661 return I1 == I2;
662
663 if (I1.getBitWidth() > I2.getBitWidth())
664 return I1 == I2.zext(I1.getBitWidth());
665
666 return I1.zext(I2.getBitWidth()) == I2;
667 }
668
669 /// Overload to compute a hash_code for an APInt value.
670 friend hash_code hash_value(const APInt &Arg);
671
672 /// This function returns a pointer to the internal storage of the APInt.
673 /// This is useful for writing out the APInt in binary form without any
674 /// conversions.
675 const uint64_t *getRawData() const {
676 if (isSingleWord())
677 return &U.VAL;
678 return &U.pVal[0];
679 }
680
681 /// @}
682 /// \name Unary Operators
683 /// @{
684
685 /// Postfix increment operator.
686 ///
687 /// Increments *this by 1.
688 ///
689 /// \returns a new APInt value representing the original value of *this.
690 const APInt operator++(int) {
691 APInt API(*this);
692 ++(*this);
693 return API;
694 }
695
696 /// Prefix increment operator.
697 ///
698 /// \returns *this incremented by one
699 APInt &operator++();
700
701 /// Postfix decrement operator.
702 ///
703 /// Decrements *this by 1.
704 ///
705 /// \returns a new APInt value representing the original value of *this.
706 const APInt operator--(int) {
707 APInt API(*this);
708 --(*this);
709 return API;
710 }
711
712 /// Prefix decrement operator.
713 ///
714 /// \returns *this decremented by one.
715 APInt &operator--();
716
717 /// Logical negation operator.
718 ///
719 /// Performs logical negation operation on this APInt.
720 ///
721 /// \returns true if *this is zero, false otherwise.
722 bool operator!() const {
723 if (isSingleWord())
724 return U.VAL == 0;
725 return countLeadingZerosSlowCase() == BitWidth;
726 }
727
728 /// @}
729 /// \name Assignment Operators
730 /// @{
731
732 /// Copy assignment operator.
733 ///
734 /// \returns *this after assignment of RHS.
735 APInt &operator=(const APInt &RHS) {
736 // If the bitwidths are the same, we can avoid mucking with memory
737 if (isSingleWord() && RHS.isSingleWord()) {
738 U.VAL = RHS.U.VAL;
739 BitWidth = RHS.BitWidth;
740 return clearUnusedBits();
741 }
742
743 AssignSlowCase(RHS);
744 return *this;
745 }
746
747 /// Move assignment operator.
748 APInt &operator=(APInt &&that) {
749#ifdef _MSC_VER
750 // The MSVC std::shuffle implementation still does self-assignment.
751 if (this == &that)
752 return *this;
753#endif
754 assert(this != &that && "Self-move not supported")((this != &that && "Self-move not supported") ? static_cast
<void> (0) : __assert_fail ("this != &that && \"Self-move not supported\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 754, __PRETTY_FUNCTION__))
;
755 if (!isSingleWord())
756 delete[] U.pVal;
757
758 // Use memcpy so that type based alias analysis sees both VAL and pVal
759 // as modified.
760 memcpy(&U, &that.U, sizeof(U));
761
762 BitWidth = that.BitWidth;
763 that.BitWidth = 0;
764
765 return *this;
766 }
767
768 /// Assignment operator.
769 ///
770 /// The RHS value is assigned to *this. If the significant bits in RHS exceed
771 /// the bit width, the excess bits are truncated. If the bit width is larger
772 /// than 64, the value is zero filled in the unspecified high order bits.
773 ///
774 /// \returns *this after assignment of RHS value.
775 APInt &operator=(uint64_t RHS) {
776 if (isSingleWord()) {
777 U.VAL = RHS;
778 clearUnusedBits();
779 } else {
780 U.pVal[0] = RHS;
781 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
782 }
783 return *this;
784 }
785
786 /// Bitwise AND assignment operator.
787 ///
788 /// Performs a bitwise AND operation on this APInt and RHS. The result is
789 /// assigned to *this.
790 ///
791 /// \returns *this after ANDing with RHS.
792 APInt &operator&=(const APInt &RHS) {
793 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 793, __PRETTY_FUNCTION__))
;
794 if (isSingleWord())
795 U.VAL &= RHS.U.VAL;
796 else
797 AndAssignSlowCase(RHS);
798 return *this;
799 }
800
801 /// Bitwise AND assignment operator.
802 ///
803 /// Performs a bitwise AND operation on this APInt and RHS. RHS is
804 /// logically zero-extended or truncated to match the bit-width of
805 /// the LHS.
806 APInt &operator&=(uint64_t RHS) {
807 if (isSingleWord()) {
808 U.VAL &= RHS;
809 return *this;
810 }
811 U.pVal[0] &= RHS;
812 memset(U.pVal+1, 0, (getNumWords() - 1) * APINT_WORD_SIZE);
813 return *this;
814 }
815
816 /// Bitwise OR assignment operator.
817 ///
818 /// Performs a bitwise OR operation on this APInt and RHS. The result is
819 /// assigned *this;
820 ///
821 /// \returns *this after ORing with RHS.
822 APInt &operator|=(const APInt &RHS) {
823 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 823, __PRETTY_FUNCTION__))
;
824 if (isSingleWord())
825 U.VAL |= RHS.U.VAL;
826 else
827 OrAssignSlowCase(RHS);
828 return *this;
829 }
830
831 /// Bitwise OR assignment operator.
832 ///
833 /// Performs a bitwise OR operation on this APInt and RHS. RHS is
834 /// logically zero-extended or truncated to match the bit-width of
835 /// the LHS.
836 APInt &operator|=(uint64_t RHS) {
837 if (isSingleWord()) {
838 U.VAL |= RHS;
839 clearUnusedBits();
840 } else {
841 U.pVal[0] |= RHS;
842 }
843 return *this;
844 }
845
846 /// Bitwise XOR assignment operator.
847 ///
848 /// Performs a bitwise XOR operation on this APInt and RHS. The result is
849 /// assigned to *this.
850 ///
851 /// \returns *this after XORing with RHS.
852 APInt &operator^=(const APInt &RHS) {
853 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 853, __PRETTY_FUNCTION__))
;
854 if (isSingleWord())
855 U.VAL ^= RHS.U.VAL;
856 else
857 XorAssignSlowCase(RHS);
858 return *this;
859 }
860
861 /// Bitwise XOR assignment operator.
862 ///
863 /// Performs a bitwise XOR operation on this APInt and RHS. RHS is
864 /// logically zero-extended or truncated to match the bit-width of
865 /// the LHS.
866 APInt &operator^=(uint64_t RHS) {
867 if (isSingleWord()) {
868 U.VAL ^= RHS;
869 clearUnusedBits();
870 } else {
871 U.pVal[0] ^= RHS;
872 }
873 return *this;
874 }
875
876 /// Multiplication assignment operator.
877 ///
878 /// Multiplies this APInt by RHS and assigns the result to *this.
879 ///
880 /// \returns *this
881 APInt &operator*=(const APInt &RHS);
882 APInt &operator*=(uint64_t RHS);
883
884 /// Addition assignment operator.
885 ///
886 /// Adds RHS to *this and assigns the result to *this.
887 ///
888 /// \returns *this
889 APInt &operator+=(const APInt &RHS);
890 APInt &operator+=(uint64_t RHS);
891
892 /// Subtraction assignment operator.
893 ///
894 /// Subtracts RHS from *this and assigns the result to *this.
895 ///
896 /// \returns *this
897 APInt &operator-=(const APInt &RHS);
898 APInt &operator-=(uint64_t RHS);
899
900 /// Left-shift assignment function.
901 ///
902 /// Shifts *this left by shiftAmt and assigns the result to *this.
903 ///
904 /// \returns *this after shifting left by ShiftAmt
905 APInt &operator<<=(unsigned ShiftAmt) {
906 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 906, __PRETTY_FUNCTION__))
;
907 if (isSingleWord()) {
908 if (ShiftAmt == BitWidth)
909 U.VAL = 0;
910 else
911 U.VAL <<= ShiftAmt;
912 return clearUnusedBits();
913 }
914 shlSlowCase(ShiftAmt);
915 return *this;
916 }
917
918 /// Left-shift assignment function.
919 ///
920 /// Shifts *this left by shiftAmt and assigns the result to *this.
921 ///
922 /// \returns *this after shifting left by ShiftAmt
923 APInt &operator<<=(const APInt &ShiftAmt);
924
925 /// @}
926 /// \name Binary Operators
927 /// @{
928
929 /// Multiplication operator.
930 ///
931 /// Multiplies this APInt by RHS and returns the result.
932 APInt operator*(const APInt &RHS) const;
933
934 /// Left logical shift operator.
935 ///
936 /// Shifts this APInt left by \p Bits and returns the result.
937 APInt operator<<(unsigned Bits) const { return shl(Bits); }
938
939 /// Left logical shift operator.
940 ///
941 /// Shifts this APInt left by \p Bits and returns the result.
942 APInt operator<<(const APInt &Bits) const { return shl(Bits); }
943
944 /// Arithmetic right-shift function.
945 ///
946 /// Arithmetic right-shift this APInt by shiftAmt.
947 APInt ashr(unsigned ShiftAmt) const {
948 APInt R(*this);
949 R.ashrInPlace(ShiftAmt);
950 return R;
951 }
952
953 /// Arithmetic right-shift this APInt by ShiftAmt in place.
954 void ashrInPlace(unsigned ShiftAmt) {
955 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 955, __PRETTY_FUNCTION__))
;
956 if (isSingleWord()) {
957 int64_t SExtVAL = SignExtend64(U.VAL, BitWidth);
958 if (ShiftAmt == BitWidth)
959 U.VAL = SExtVAL >> (APINT_BITS_PER_WORD - 1); // Fill with sign bit.
960 else
961 U.VAL = SExtVAL >> ShiftAmt;
962 clearUnusedBits();
963 return;
964 }
965 ashrSlowCase(ShiftAmt);
966 }
967
968 /// Logical right-shift function.
969 ///
970 /// Logical right-shift this APInt by shiftAmt.
971 APInt lshr(unsigned shiftAmt) const {
972 APInt R(*this);
973 R.lshrInPlace(shiftAmt);
974 return R;
975 }
976
977 /// Logical right-shift this APInt by ShiftAmt in place.
978 void lshrInPlace(unsigned ShiftAmt) {
979 assert(ShiftAmt <= BitWidth && "Invalid shift amount")((ShiftAmt <= BitWidth && "Invalid shift amount") ?
static_cast<void> (0) : __assert_fail ("ShiftAmt <= BitWidth && \"Invalid shift amount\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 979, __PRETTY_FUNCTION__))
;
980 if (isSingleWord()) {
981 if (ShiftAmt == BitWidth)
982 U.VAL = 0;
983 else
984 U.VAL >>= ShiftAmt;
985 return;
986 }
987 lshrSlowCase(ShiftAmt);
988 }
989
990 /// Left-shift function.
991 ///
992 /// Left-shift this APInt by shiftAmt.
993 APInt shl(unsigned shiftAmt) const {
994 APInt R(*this);
995 R <<= shiftAmt;
996 return R;
997 }
998
999 /// Rotate left by rotateAmt.
1000 APInt rotl(unsigned rotateAmt) const;
1001
1002 /// Rotate right by rotateAmt.
1003 APInt rotr(unsigned rotateAmt) const;
1004
1005 /// Arithmetic right-shift function.
1006 ///
1007 /// Arithmetic right-shift this APInt by shiftAmt.
1008 APInt ashr(const APInt &ShiftAmt) const {
1009 APInt R(*this);
1010 R.ashrInPlace(ShiftAmt);
1011 return R;
1012 }
1013
1014 /// Arithmetic right-shift this APInt by shiftAmt in place.
1015 void ashrInPlace(const APInt &shiftAmt);
1016
1017 /// Logical right-shift function.
1018 ///
1019 /// Logical right-shift this APInt by shiftAmt.
1020 APInt lshr(const APInt &ShiftAmt) const {
1021 APInt R(*this);
1022 R.lshrInPlace(ShiftAmt);
1023 return R;
1024 }
1025
1026 /// Logical right-shift this APInt by ShiftAmt in place.
1027 void lshrInPlace(const APInt &ShiftAmt);
1028
1029 /// Left-shift function.
1030 ///
1031 /// Left-shift this APInt by shiftAmt.
1032 APInt shl(const APInt &ShiftAmt) const {
1033 APInt R(*this);
1034 R <<= ShiftAmt;
1035 return R;
1036 }
1037
1038 /// Rotate left by rotateAmt.
1039 APInt rotl(const APInt &rotateAmt) const;
1040
1041 /// Rotate right by rotateAmt.
1042 APInt rotr(const APInt &rotateAmt) const;
1043
1044 /// Unsigned division operation.
1045 ///
1046 /// Perform an unsigned divide operation on this APInt by RHS. Both this and
1047 /// RHS are treated as unsigned quantities for purposes of this division.
1048 ///
1049 /// \returns a new APInt value containing the division result, rounded towards
1050 /// zero.
1051 APInt udiv(const APInt &RHS) const;
1052 APInt udiv(uint64_t RHS) const;
1053
1054 /// Signed division function for APInt.
1055 ///
1056 /// Signed divide this APInt by APInt RHS.
1057 ///
1058 /// The result is rounded towards zero.
1059 APInt sdiv(const APInt &RHS) const;
1060 APInt sdiv(int64_t RHS) const;
1061
1062 /// Unsigned remainder operation.
1063 ///
1064 /// Perform an unsigned remainder operation on this APInt with RHS being the
1065 /// divisor. Both this and RHS are treated as unsigned quantities for purposes
1066 /// of this operation. Note that this is a true remainder operation and not a
1067 /// modulo operation because the sign follows the sign of the dividend which
1068 /// is *this.
1069 ///
1070 /// \returns a new APInt value containing the remainder result
1071 APInt urem(const APInt &RHS) const;
1072 uint64_t urem(uint64_t RHS) const;
1073
1074 /// Function for signed remainder operation.
1075 ///
1076 /// Signed remainder operation on APInt.
1077 APInt srem(const APInt &RHS) const;
1078 int64_t srem(int64_t RHS) const;
1079
1080 /// Dual division/remainder interface.
1081 ///
1082 /// Sometimes it is convenient to divide two APInt values and obtain both the
1083 /// quotient and remainder. This function does both operations in the same
1084 /// computation making it a little more efficient. The pair of input arguments
1085 /// may overlap with the pair of output arguments. It is safe to call
1086 /// udivrem(X, Y, X, Y), for example.
1087 static void udivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1088 APInt &Remainder);
1089 static void udivrem(const APInt &LHS, uint64_t RHS, APInt &Quotient,
1090 uint64_t &Remainder);
1091
1092 static void sdivrem(const APInt &LHS, const APInt &RHS, APInt &Quotient,
1093 APInt &Remainder);
1094 static void sdivrem(const APInt &LHS, int64_t RHS, APInt &Quotient,
1095 int64_t &Remainder);
1096
1097 // Operations that return overflow indicators.
1098 APInt sadd_ov(const APInt &RHS, bool &Overflow) const;
1099 APInt uadd_ov(const APInt &RHS, bool &Overflow) const;
1100 APInt ssub_ov(const APInt &RHS, bool &Overflow) const;
1101 APInt usub_ov(const APInt &RHS, bool &Overflow) const;
1102 APInt sdiv_ov(const APInt &RHS, bool &Overflow) const;
1103 APInt smul_ov(const APInt &RHS, bool &Overflow) const;
1104 APInt umul_ov(const APInt &RHS, bool &Overflow) const;
1105 APInt sshl_ov(const APInt &Amt, bool &Overflow) const;
1106 APInt ushl_ov(const APInt &Amt, bool &Overflow) const;
1107
1108 /// Array-indexing support.
1109 ///
1110 /// \returns the bit value at bitPosition
1111 bool operator[](unsigned bitPosition) const {
1112 assert(bitPosition < getBitWidth() && "Bit position out of bounds!")((bitPosition < getBitWidth() && "Bit position out of bounds!"
) ? static_cast<void> (0) : __assert_fail ("bitPosition < getBitWidth() && \"Bit position out of bounds!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1112, __PRETTY_FUNCTION__))
;
1113 return (maskBit(bitPosition) & getWord(bitPosition)) != 0;
1114 }
1115
1116 /// @}
1117 /// \name Comparison Operators
1118 /// @{
1119
1120 /// Equality operator.
1121 ///
1122 /// Compares this APInt with RHS for the validity of the equality
1123 /// relationship.
1124 bool operator==(const APInt &RHS) const {
1125 assert(BitWidth == RHS.BitWidth && "Comparison requires equal bit widths")((BitWidth == RHS.BitWidth && "Comparison requires equal bit widths"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Comparison requires equal bit widths\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1125, __PRETTY_FUNCTION__))
;
1126 if (isSingleWord())
1127 return U.VAL == RHS.U.VAL;
1128 return EqualSlowCase(RHS);
1129 }
1130
1131 /// Equality operator.
1132 ///
1133 /// Compares this APInt with a uint64_t for the validity of the equality
1134 /// relationship.
1135 ///
1136 /// \returns true if *this == Val
1137 bool operator==(uint64_t Val) const {
1138 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() == Val;
1139 }
1140
1141 /// Equality comparison.
1142 ///
1143 /// Compares this APInt with RHS for the validity of the equality
1144 /// relationship.
1145 ///
1146 /// \returns true if *this == Val
1147 bool eq(const APInt &RHS) const { return (*this) == RHS; }
1148
1149 /// Inequality operator.
1150 ///
1151 /// Compares this APInt with RHS for the validity of the inequality
1152 /// relationship.
1153 ///
1154 /// \returns true if *this != Val
1155 bool operator!=(const APInt &RHS) const { return !((*this) == RHS); }
1156
1157 /// Inequality operator.
1158 ///
1159 /// Compares this APInt with a uint64_t for the validity of the inequality
1160 /// relationship.
1161 ///
1162 /// \returns true if *this != Val
1163 bool operator!=(uint64_t Val) const { return !((*this) == Val); }
1164
1165 /// Inequality comparison
1166 ///
1167 /// Compares this APInt with RHS for the validity of the inequality
1168 /// relationship.
1169 ///
1170 /// \returns true if *this != Val
1171 bool ne(const APInt &RHS) const { return !((*this) == RHS); }
1172
1173 /// Unsigned less than comparison
1174 ///
1175 /// Regards both *this and RHS as unsigned quantities and compares them for
1176 /// the validity of the less-than relationship.
1177 ///
1178 /// \returns true if *this < RHS when both are considered unsigned.
1179 bool ult(const APInt &RHS) const { return compare(RHS) < 0; }
1180
1181 /// Unsigned less than comparison
1182 ///
1183 /// Regards both *this as an unsigned quantity and compares it with RHS for
1184 /// the validity of the less-than relationship.
1185 ///
1186 /// \returns true if *this < RHS when considered unsigned.
1187 bool ult(uint64_t RHS) const {
1188 // Only need to check active bits if not a single word.
1189 return (isSingleWord() || getActiveBits() <= 64) && getZExtValue() < RHS;
1190 }
1191
1192 /// Signed less than comparison
1193 ///
1194 /// Regards both *this and RHS as signed quantities and compares them for
1195 /// validity of the less-than relationship.
1196 ///
1197 /// \returns true if *this < RHS when both are considered signed.
1198 bool slt(const APInt &RHS) const { return compareSigned(RHS) < 0; }
1199
1200 /// Signed less than comparison
1201 ///
1202 /// Regards both *this as a signed quantity and compares it with RHS for
1203 /// the validity of the less-than relationship.
1204 ///
1205 /// \returns true if *this < RHS when considered signed.
1206 bool slt(int64_t RHS) const {
1207 return (!isSingleWord() && getMinSignedBits() > 64) ? isNegative()
1208 : getSExtValue() < RHS;
1209 }
1210
1211 /// Unsigned less or equal comparison
1212 ///
1213 /// Regards both *this and RHS as unsigned quantities and compares them for
1214 /// validity of the less-or-equal relationship.
1215 ///
1216 /// \returns true if *this <= RHS when both are considered unsigned.
1217 bool ule(const APInt &RHS) const { return compare(RHS) <= 0; }
1218
1219 /// Unsigned less or equal comparison
1220 ///
1221 /// Regards both *this as an unsigned quantity and compares it with RHS for
1222 /// the validity of the less-or-equal relationship.
1223 ///
1224 /// \returns true if *this <= RHS when considered unsigned.
1225 bool ule(uint64_t RHS) const { return !ugt(RHS); }
1226
1227 /// Signed less or equal comparison
1228 ///
1229 /// Regards both *this and RHS as signed quantities and compares them for
1230 /// validity of the less-or-equal relationship.
1231 ///
1232 /// \returns true if *this <= RHS when both are considered signed.
1233 bool sle(const APInt &RHS) const { return compareSigned(RHS) <= 0; }
1234
1235 /// Signed less or equal comparison
1236 ///
1237 /// Regards both *this as a signed quantity and compares it with RHS for the
1238 /// validity of the less-or-equal relationship.
1239 ///
1240 /// \returns true if *this <= RHS when considered signed.
1241 bool sle(uint64_t RHS) const { return !sgt(RHS); }
1242
1243 /// Unsigned greather than comparison
1244 ///
1245 /// Regards both *this and RHS as unsigned quantities and compares them for
1246 /// the validity of the greater-than relationship.
1247 ///
1248 /// \returns true if *this > RHS when both are considered unsigned.
1249 bool ugt(const APInt &RHS) const { return !ule(RHS); }
1250
1251 /// Unsigned greater than comparison
1252 ///
1253 /// Regards both *this as an unsigned quantity and compares it with RHS for
1254 /// the validity of the greater-than relationship.
1255 ///
1256 /// \returns true if *this > RHS when considered unsigned.
1257 bool ugt(uint64_t RHS) const {
1258 // Only need to check active bits if not a single word.
1259 return (!isSingleWord() && getActiveBits() > 64) || getZExtValue() > RHS;
1260 }
1261
1262 /// Signed greather than comparison
1263 ///
1264 /// Regards both *this and RHS as signed quantities and compares them for the
1265 /// validity of the greater-than relationship.
1266 ///
1267 /// \returns true if *this > RHS when both are considered signed.
1268 bool sgt(const APInt &RHS) const { return !sle(RHS); }
1269
1270 /// Signed greater than comparison
1271 ///
1272 /// Regards both *this as a signed quantity and compares it with RHS for
1273 /// the validity of the greater-than relationship.
1274 ///
1275 /// \returns true if *this > RHS when considered signed.
1276 bool sgt(int64_t RHS) const {
1277 return (!isSingleWord() && getMinSignedBits() > 64) ? !isNegative()
1278 : getSExtValue() > RHS;
1279 }
1280
1281 /// Unsigned greater or equal comparison
1282 ///
1283 /// Regards both *this and RHS as unsigned quantities and compares them for
1284 /// validity of the greater-or-equal relationship.
1285 ///
1286 /// \returns true if *this >= RHS when both are considered unsigned.
1287 bool uge(const APInt &RHS) const { return !ult(RHS); }
1288
1289 /// Unsigned greater or equal comparison
1290 ///
1291 /// Regards both *this as an unsigned quantity and compares it with RHS for
1292 /// the validity of the greater-or-equal relationship.
1293 ///
1294 /// \returns true if *this >= RHS when considered unsigned.
1295 bool uge(uint64_t RHS) const { return !ult(RHS); }
1296
1297 /// Signed greater or equal comparison
1298 ///
1299 /// Regards both *this and RHS as signed quantities and compares them for
1300 /// validity of the greater-or-equal relationship.
1301 ///
1302 /// \returns true if *this >= RHS when both are considered signed.
1303 bool sge(const APInt &RHS) const { return !slt(RHS); }
1304
1305 /// Signed greater or equal comparison
1306 ///
1307 /// Regards both *this as a signed quantity and compares it with RHS for
1308 /// the validity of the greater-or-equal relationship.
1309 ///
1310 /// \returns true if *this >= RHS when considered signed.
1311 bool sge(int64_t RHS) const { return !slt(RHS); }
1312
1313 /// This operation tests if there are any pairs of corresponding bits
1314 /// between this APInt and RHS that are both set.
1315 bool intersects(const APInt &RHS) const {
1316 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1316, __PRETTY_FUNCTION__))
;
1317 if (isSingleWord())
1318 return (U.VAL & RHS.U.VAL) != 0;
1319 return intersectsSlowCase(RHS);
1320 }
1321
1322 /// This operation checks that all bits set in this APInt are also set in RHS.
1323 bool isSubsetOf(const APInt &RHS) const {
1324 assert(BitWidth == RHS.BitWidth && "Bit widths must be the same")((BitWidth == RHS.BitWidth && "Bit widths must be the same"
) ? static_cast<void> (0) : __assert_fail ("BitWidth == RHS.BitWidth && \"Bit widths must be the same\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1324, __PRETTY_FUNCTION__))
;
1325 if (isSingleWord())
1326 return (U.VAL & ~RHS.U.VAL) == 0;
1327 return isSubsetOfSlowCase(RHS);
1328 }
1329
1330 /// @}
1331 /// \name Resizing Operators
1332 /// @{
1333
1334 /// Truncate to new width.
1335 ///
1336 /// Truncate the APInt to a specified width. It is an error to specify a width
1337 /// that is greater than or equal to the current width.
1338 APInt trunc(unsigned width) const;
1339
1340 /// Sign extend to a new width.
1341 ///
1342 /// This operation sign extends the APInt to a new width. If the high order
1343 /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
1344 /// It is an error to specify a width that is less than or equal to the
1345 /// current width.
1346 APInt sext(unsigned width) const;
1347
1348 /// Zero extend to a new width.
1349 ///
1350 /// This operation zero extends the APInt to a new width. The high order bits
1351 /// are filled with 0 bits. It is an error to specify a width that is less
1352 /// than or equal to the current width.
1353 APInt zext(unsigned width) const;
1354
1355 /// Sign extend or truncate to width
1356 ///
1357 /// Make this APInt have the bit width given by \p width. The value is sign
1358 /// extended, truncated, or left alone to make it that width.
1359 APInt sextOrTrunc(unsigned width) const;
1360
1361 /// Zero extend or truncate to width
1362 ///
1363 /// Make this APInt have the bit width given by \p width. The value is zero
1364 /// extended, truncated, or left alone to make it that width.
1365 APInt zextOrTrunc(unsigned width) const;
1366
1367 /// Sign extend or truncate to width
1368 ///
1369 /// Make this APInt have the bit width given by \p width. The value is sign
1370 /// extended, or left alone to make it that width.
1371 APInt sextOrSelf(unsigned width) const;
1372
1373 /// Zero extend or truncate to width
1374 ///
1375 /// Make this APInt have the bit width given by \p width. The value is zero
1376 /// extended, or left alone to make it that width.
1377 APInt zextOrSelf(unsigned width) const;
1378
1379 /// @}
1380 /// \name Bit Manipulation Operators
1381 /// @{
1382
1383 /// Set every bit to 1.
1384 void setAllBits() {
1385 if (isSingleWord())
1386 U.VAL = WORDTYPE_MAX;
1387 else
1388 // Set all the bits in all the words.
1389 memset(U.pVal, -1, getNumWords() * APINT_WORD_SIZE);
1390 // Clear the unused ones
1391 clearUnusedBits();
1392 }
1393
1394 /// Set a given bit to 1.
1395 ///
1396 /// Set the given bit to 1 whose position is given as "bitPosition".
1397 void setBit(unsigned BitPosition) {
1398 assert(BitPosition <= BitWidth && "BitPosition out of range")((BitPosition <= BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition <= BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1398, __PRETTY_FUNCTION__))
;
1399 WordType Mask = maskBit(BitPosition);
1400 if (isSingleWord())
1401 U.VAL |= Mask;
1402 else
1403 U.pVal[whichWord(BitPosition)] |= Mask;
1404 }
1405
1406 /// Set the sign bit to 1.
1407 void setSignBit() {
1408 setBit(BitWidth - 1);
1409 }
1410
1411 /// Set the bits from loBit (inclusive) to hiBit (exclusive) to 1.
1412 void setBits(unsigned loBit, unsigned hiBit) {
1413 assert(hiBit <= BitWidth && "hiBit out of range")((hiBit <= BitWidth && "hiBit out of range") ? static_cast
<void> (0) : __assert_fail ("hiBit <= BitWidth && \"hiBit out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1413, __PRETTY_FUNCTION__))
;
1414 assert(loBit <= BitWidth && "loBit out of range")((loBit <= BitWidth && "loBit out of range") ? static_cast
<void> (0) : __assert_fail ("loBit <= BitWidth && \"loBit out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1414, __PRETTY_FUNCTION__))
;
1415 assert(loBit <= hiBit && "loBit greater than hiBit")((loBit <= hiBit && "loBit greater than hiBit") ? static_cast
<void> (0) : __assert_fail ("loBit <= hiBit && \"loBit greater than hiBit\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1415, __PRETTY_FUNCTION__))
;
1416 if (loBit == hiBit)
6
Assuming 'loBit' is not equal to 'hiBit'
7
Taking false branch
1417 return;
1418 if (loBit < APINT_BITS_PER_WORD && hiBit <= APINT_BITS_PER_WORD) {
8
Taking true branch
1419 uint64_t mask = WORDTYPE_MAX >> (APINT_BITS_PER_WORD - (hiBit - loBit));
1420 mask <<= loBit;
1421 if (isSingleWord())
9
Taking true branch
1422 U.VAL |= mask;
1423 else
1424 U.pVal[0] |= mask;
1425 } else {
1426 setBitsSlowCase(loBit, hiBit);
1427 }
1428 }
1429
1430 /// Set the top bits starting from loBit.
1431 void setBitsFrom(unsigned loBit) {
1432 return setBits(loBit, BitWidth);
1433 }
1434
1435 /// Set the bottom loBits bits.
1436 void setLowBits(unsigned loBits) {
1437 return setBits(0, loBits);
5
Calling 'APInt::setBits'
10
Returning from 'APInt::setBits'
1438 }
1439
1440 /// Set the top hiBits bits.
1441 void setHighBits(unsigned hiBits) {
1442 return setBits(BitWidth - hiBits, BitWidth);
1443 }
1444
1445 /// Set every bit to 0.
1446 void clearAllBits() {
1447 if (isSingleWord())
1448 U.VAL = 0;
1449 else
1450 memset(U.pVal, 0, getNumWords() * APINT_WORD_SIZE);
1451 }
1452
1453 /// Set a given bit to 0.
1454 ///
1455 /// Set the given bit to 0 whose position is given as "bitPosition".
1456 void clearBit(unsigned BitPosition) {
1457 assert(BitPosition <= BitWidth && "BitPosition out of range")((BitPosition <= BitWidth && "BitPosition out of range"
) ? static_cast<void> (0) : __assert_fail ("BitPosition <= BitWidth && \"BitPosition out of range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1457, __PRETTY_FUNCTION__))
;
1458 WordType Mask = ~maskBit(BitPosition);
1459 if (isSingleWord())
1460 U.VAL &= Mask;
1461 else
1462 U.pVal[whichWord(BitPosition)] &= Mask;
1463 }
1464
1465 /// Set the sign bit to 0.
1466 void clearSignBit() {
1467 clearBit(BitWidth - 1);
1468 }
1469
1470 /// Toggle every bit to its opposite value.
1471 void flipAllBits() {
1472 if (isSingleWord()) {
1473 U.VAL ^= WORDTYPE_MAX;
1474 clearUnusedBits();
1475 } else {
1476 flipAllBitsSlowCase();
1477 }
1478 }
1479
1480 /// Toggles a given bit to its opposite value.
1481 ///
1482 /// Toggle a given bit to its opposite value whose position is given
1483 /// as "bitPosition".
1484 void flipBit(unsigned bitPosition);
1485
1486 /// Negate this APInt in place.
1487 void negate() {
1488 flipAllBits();
1489 ++(*this);
1490 }
1491
1492 /// Insert the bits from a smaller APInt starting at bitPosition.
1493 void insertBits(const APInt &SubBits, unsigned bitPosition);
1494
1495 /// Return an APInt with the extracted bits [bitPosition,bitPosition+numBits).
1496 APInt extractBits(unsigned numBits, unsigned bitPosition) const;
1497
1498 /// @}
1499 /// \name Value Characterization Functions
1500 /// @{
1501
1502 /// Return the number of bits in the APInt.
1503 unsigned getBitWidth() const { return BitWidth; }
1504
1505 /// Get the number of words.
1506 ///
1507 /// Here one word's bitwidth equals to that of uint64_t.
1508 ///
1509 /// \returns the number of words to hold the integer value of this APInt.
1510 unsigned getNumWords() const { return getNumWords(BitWidth); }
1511
1512 /// Get the number of words.
1513 ///
1514 /// *NOTE* Here one word's bitwidth equals to that of uint64_t.
1515 ///
1516 /// \returns the number of words to hold the integer value with a given bit
1517 /// width.
1518 static unsigned getNumWords(unsigned BitWidth) {
1519 return ((uint64_t)BitWidth + APINT_BITS_PER_WORD - 1) / APINT_BITS_PER_WORD;
1520 }
1521
1522 /// Compute the number of active bits in the value
1523 ///
1524 /// This function returns the number of active bits which is defined as the
1525 /// bit width minus the number of leading zeros. This is used in several
1526 /// computations to see how "wide" the value is.
1527 unsigned getActiveBits() const { return BitWidth - countLeadingZeros(); }
1528
1529 /// Compute the number of active words in the value of this APInt.
1530 ///
1531 /// This is used in conjunction with getActiveData to extract the raw value of
1532 /// the APInt.
1533 unsigned getActiveWords() const {
1534 unsigned numActiveBits = getActiveBits();
1535 return numActiveBits ? whichWord(numActiveBits - 1) + 1 : 1;
1536 }
1537
1538 /// Get the minimum bit size for this signed APInt
1539 ///
1540 /// Computes the minimum bit width for this APInt while considering it to be a
1541 /// signed (and probably negative) value. If the value is not negative, this
1542 /// function returns the same value as getActiveBits()+1. Otherwise, it
1543 /// returns the smallest bit width that will retain the negative value. For
1544 /// example, -1 can be written as 0b1 or 0xFFFFFFFFFF. 0b1 is shorter and so
1545 /// for -1, this function will always return 1.
1546 unsigned getMinSignedBits() const {
1547 if (isNegative())
1548 return BitWidth - countLeadingOnes() + 1;
1549 return getActiveBits() + 1;
1550 }
1551
1552 /// Get zero extended value
1553 ///
1554 /// This method attempts to return the value of this APInt as a zero extended
1555 /// uint64_t. The bitwidth must be <= 64 or the value must fit within a
1556 /// uint64_t. Otherwise an assertion will result.
1557 uint64_t getZExtValue() const {
1558 if (isSingleWord())
1559 return U.VAL;
1560 assert(getActiveBits() <= 64 && "Too many bits for uint64_t")((getActiveBits() <= 64 && "Too many bits for uint64_t"
) ? static_cast<void> (0) : __assert_fail ("getActiveBits() <= 64 && \"Too many bits for uint64_t\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1560, __PRETTY_FUNCTION__))
;
1561 return U.pVal[0];
1562 }
1563
1564 /// Get sign extended value
1565 ///
1566 /// This method attempts to return the value of this APInt as a sign extended
1567 /// int64_t. The bit width must be <= 64 or the value must fit within an
1568 /// int64_t. Otherwise an assertion will result.
1569 int64_t getSExtValue() const {
1570 if (isSingleWord())
1571 return SignExtend64(U.VAL, BitWidth);
1572 assert(getMinSignedBits() <= 64 && "Too many bits for int64_t")((getMinSignedBits() <= 64 && "Too many bits for int64_t"
) ? static_cast<void> (0) : __assert_fail ("getMinSignedBits() <= 64 && \"Too many bits for int64_t\""
, "/build/llvm-toolchain-snapshot-8~svn345461/include/llvm/ADT/APInt.h"
, 1572, __PRETTY_FUNCTION__))
;
1573 return int64_t(U.pVal[0]);
1574 }
1575
1576 /// Get bits required for string value.
1577 ///
1578 /// This method determines how many bits are required to hold the APInt
1579 /// equivalent of the string given by \p str.
1580 static unsigned getBitsNeeded(StringRef str, uint8_t radix);
1581
1582 /// The APInt version of the countLeadingZeros functions in
1583 /// MathExtras.h.
1584 ///
1585 /// It counts the number of zeros from the most significant bit to the first
1586 /// one bit.
1587 ///
1588 /// \returns BitWidth if the value is zero, otherwise returns the number of
1589 /// zeros from the most significant bit to the first one bits.
1590 unsigned countLeadingZeros() const {
1591 if (isSingleWord()) {
1592 unsigned unusedBits = APINT_BITS_PER_WORD - BitWidth;
1593 return llvm::countLeadingZeros(U.VAL) - unusedBits;
1594 }
1595 return countLeadingZerosSlowCase();
1596 }
1597
1598 /// Count the number of leading one bits.
1599 ///
1600 /// This function is an APInt version of the countLeadingOnes
1601 /// functions in MathExtras.h. It counts the number of ones from the most
1602 /// significant bit to the first zero bit.
1603 ///
1604 /// \returns 0 if the high order bit is not set, otherwise returns the number
1605 /// of 1 bits from the most significant to the least
1606 unsigned countLeadingOnes() const {
1607 if (isSingleWord())
1608 return llvm::countLeadingOnes(U.VAL << (APINT_BITS_PER_WORD - BitWidth));
1609 return countLeadingOnesSlowCase();
1610 }
1611
1612 /// Computes the number of leading bits of this APInt that are equal to its
1613 /// sign bit.
1614 unsigned getNumSignBits() const {
1615 return isNegative() ? countLeadingOnes() : countLeadingZeros();
1616 }
1617
1618 /// Count the number of trailing zero bits.
1619 ///
1620 /// This function is an APInt version of the countTrailingZeros
1621 /// functions in MathExtras.h. It counts the number of zeros from the least
1622 /// significant bit to the first set bit.
1623 ///
1624 /// \returns BitWidth if the value is zero, otherwise returns the number of
1625 /// zeros from the least significant bit to the first one bit.
1626 unsigned countTrailingZeros() const {
1627 if (isSingleWord())
1628 return std::min(unsigned(llvm::countTrailingZeros(U.VAL)), BitWidth);
1629 return countTrailingZerosSlowCase();
1630 }
1631
1632 /// Count the number of trailing one bits.
1633 ///
1634 /// This function is an APInt version of the countTrailingOnes
1635 /// functions in MathExtras.h. It counts the number of ones from the least
1636 /// significant bit to the first zero bit.
1637 ///
1638 /// \returns BitWidth if the value is all ones, otherwise returns the number
1639 /// of ones from the least significant bit to the first zero bit.
1640 unsigned countTrailingOnes() const {
1641 if (isSingleWord())
1642 return llvm::countTrailingOnes(U.VAL);
1643 return countTrailingOnesSlowCase();
1644 }
1645
1646 /// Count the number of bits set.
1647 ///
1648 /// This function is an APInt version of the countPopulation functions
1649 /// in MathExtras.h. It counts the number of 1 bits in the APInt value.
1650 ///
1651 /// \returns 0 if the value is zero, otherwise returns the number of set bits.
1652 unsigned countPopulation() const {
1653 if (isSingleWord())
1654 return llvm::countPopulation(U.VAL);
1655 return countPopulationSlowCase();
1656 }
1657
1658 /// @}
1659 /// \name Conversion Functions
1660 /// @{
1661 void print(raw_ostream &OS, bool isSigned) const;
1662
1663 /// Converts an APInt to a string and append it to Str. Str is commonly a
1664 /// SmallString.
1665 void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
1666 bool formatAsCLiteral = false) const;
1667
1668 /// Considers the APInt to be unsigned and converts it into a string in the
1669 /// radix given. The radix can be 2, 8, 10 16, or 36.
1670 void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1671 toString(Str, Radix, false, false);
1672 }
1673
1674 /// Considers the APInt to be signed and converts it into a string in the
1675 /// radix given. The radix can be 2, 8, 10, 16, or 36.
1676 void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
1677 toString(Str, Radix, true, false);
1678 }
1679
1680 /// Return the APInt as a std::string.
1681 ///
1682 /// Note that this is an inefficient method. It is better to pass in a
1683 /// SmallVector/SmallString to the methods above to avoid thrashing the heap
1684 /// for the string.
1685 std::string toString(unsigned Radix, bool Signed) const;
1686
1687 /// \returns a byte-swapped representation of this APInt Value.
1688 APInt byteSwap() const;
1689
1690 /// \returns the value with the bit representation reversed of this APInt
1691 /// Value.
1692 APInt reverseBits() const;
1693
1694 /// Converts this APInt to a double value.
1695 double roundToDouble(bool isSigned) const;
1696
1697 /// Converts this unsigned APInt to a double value.
1698 double roundToDouble() const { return roundToDouble(false); }
1699
1700 /// Converts this signed APInt to a double value.
1701 double signedRoundToDouble() const { return roundToDouble(true); }
1702
1703 /// Converts APInt bits to a double
1704 ///
1705 /// The conversion does not do a translation from integer to double, it just
1706 /// re-interprets the bits as a double. Note that it is valid to do this on
1707 /// any bit width. Exactly 64 bits will be translated.
1708 double bitsToDouble() const {
1709 return BitsToDouble(getWord(0));
1710 }
1711
1712 /// Converts APInt bits to a double
1713 ///
1714 /// The conversion does not do a translation from integer to float, it just
1715 /// re-interprets the bits as a float. Note that it is valid to do this on
1716 /// any bit width. Exactly 32 bits will be translated.
1717 float bitsToFloat() const {
1718 return BitsToFloat(getWord(0));
1719 }
1720
1721 /// Converts a double to APInt bits.
1722 ///
1723 /// The conversion does not do a translation from double to integer, it just
1724 /// re-interprets the bits of the double.
1725 static APInt doubleToBits(double V) {
1726 return APInt(sizeof(double) * CHAR_BIT8, DoubleToBits(V));
1727 }
1728
1729 /// Converts a float to APInt bits.
1730 ///
1731 /// The conversion does not do a translation from float to integer, it just
1732 /// re-interprets the bits of the float.
1733 static APInt floatToBits(float V) {
1734 return APInt(sizeof(float) * CHAR_BIT8, FloatToBits(V));
1735 }
1736
1737 /// @}
1738 /// \name Mathematics Operations
1739 /// @{
1740
1741 /// \returns the floor log base 2 of this APInt.
1742 unsigned logBase2() const { return getActiveBits() - 1; }
1743
1744 /// \returns the ceil log base 2 of this APInt.
1745 unsigned ceilLogBase2() const {
1746 APInt temp(*this);
1747 --temp;
1748 return temp.getActiveBits();
1749 }
1750
1751 /// \returns the nearest log base 2 of this APInt. Ties round up.
1752 ///
1753 /// NOTE: When we have a BitWidth of 1, we define:
1754 ///
1755 /// log2(0) = UINT32_MAX
1756 /// log2(1) = 0
1757 ///
1758 /// to get around any mathematical concerns resulting from
1759 /// referencing 2 in a space where 2 does no exist.
1760 unsigned nearestLogBase2() const {
1761 // Special case when we have a bitwidth of 1. If VAL is 1, then we
1762 // get 0. If VAL is 0, we get WORDTYPE_MAX which gets truncated to
1763 // UINT32_MAX.
1764 if (BitWidth == 1)
1765 return U.VAL - 1;
1766
1767 // Handle the zero case.
1768 if (isNullValue())
1769 return UINT32_MAX(4294967295U);
1770
1771 // The non-zero case is handled by computing:
1772 //
1773 // nearestLogBase2(x) = logBase2(x) + x[logBase2(x)-1].
1774 //
1775 // where x[i] is referring to the value of the ith bit of x.
1776 unsigned lg = logBase2();
1777 return lg + unsigned((*this)[lg - 1]);
1778 }
1779
1780 /// \returns the log base 2 of this APInt if its an exact power of two, -1
1781 /// otherwise
1782 int32_t exactLogBase2() const {
1783 if (!isPowerOf2())
1784 return -1;
1785 return logBase2();
1786 }
1787
1788 /// Compute the square root
1789 APInt sqrt() const;
1790
1791 /// Get the absolute value;
1792 ///
1793 /// If *this is < 0 then return -(*this), otherwise *this;
1794 APInt abs() const {
1795 if (isNegative())
1796 return -(*this);
1797 return *this;
1798 }
1799
1800 /// \returns the multiplicative inverse for a given modulo.
1801 APInt multiplicativeInverse(const APInt &modulo) const;
1802
1803 /// @}
1804 /// \name Support for division by constant
1805 /// @{
1806
1807 /// Calculate the magic number for signed division by a constant.
1808 struct ms;
1809 ms magic() const;
1810
1811 /// Calculate the magic number for unsigned division by a constant.
1812 struct mu;
1813 mu magicu(unsigned LeadingZeros = 0) const;
1814
1815 /// @}
1816 /// \name Building-block Operations for APInt and APFloat
1817 /// @{
1818
1819 // These building block operations operate on a representation of arbitrary
1820 // precision, two's-complement, bignum integer values. They should be
1821 // sufficient to implement APInt and APFloat bignum requirements. Inputs are
1822 // generally a pointer to the base of an array of integer parts, representing
1823 // an unsigned bignum, and a count of how many parts there are.
1824
1825 /// Sets the least significant part of a bignum to the input value, and zeroes
1826 /// out higher parts.
1827 static void tcSet(WordType *, WordType, unsigned);
1828
1829 /// Assign one bignum to another.
1830 static void tcAssign(WordType *, const WordType *, unsigned);
1831
1832 /// Returns true if a bignum is zero, false otherwise.
1833 static bool tcIsZero(const WordType *, unsigned);
1834
1835 /// Extract the given bit of a bignum; returns 0 or 1. Zero-based.
1836 static int tcExtractBit(const WordType *, unsigned bit);
1837
1838 /// Copy the bit vector of width srcBITS from SRC, starting at bit srcLSB, to
1839 /// DST, of dstCOUNT parts, such that the bit srcLSB becomes the least
1840 /// significant bit of DST. All high bits above srcBITS in DST are
1841 /// zero-filled.
1842 static void tcExtract(WordType *, unsigned dstCount,
1843 const WordType *, unsigned srcBits,
1844 unsigned srcLSB);
1845
1846 /// Set the given bit of a bignum. Zero-based.
1847 static void tcSetBit(WordType *, unsigned bit);
1848
1849 /// Clear the given bit of a bignum. Zero-based.
1850 static void tcClearBit(WordType *, unsigned bit);
1851
1852 /// Returns the bit number of the least or most significant set bit of a
1853 /// number. If the input number has no bits set -1U is returned.
1854 static unsigned tcLSB(const WordType *, unsigned n);
1855 static unsigned tcMSB(const WordType *parts, unsigned n);
1856
1857 /// Negate a bignum in-place.
1858 static void tcNegate(WordType *, unsigned);
1859
1860 /// DST += RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1861 static WordType tcAdd(WordType *, const WordType *,
1862 WordType carry, unsigned);
1863 /// DST += RHS. Returns the carry flag.
1864 static WordType tcAddPart(WordType *, WordType, unsigned);
1865
1866 /// DST -= RHS + CARRY where CARRY is zero or one. Returns the carry flag.
1867 static WordType tcSubtract(WordType *, const WordType *,
1868 WordType carry, unsigned);
1869 /// DST -= RHS. Returns the carry flag.
1870 static WordType tcSubtractPart(WordType *, WordType, unsigned);
1871
1872 /// DST += SRC * MULTIPLIER + PART if add is true
1873 /// DST = SRC * MULTIPLIER + PART if add is false
1874 ///
1875 /// Requires 0 <= DSTPARTS <= SRCPARTS + 1. If DST overlaps SRC they must
1876 /// start at the same point, i.e. DST == SRC.
1877 ///
1878 /// If DSTPARTS == SRC_PARTS + 1 no overflow occurs and zero is returned.
1879 /// Otherwise DST is filled with the least significant DSTPARTS parts of the
1880 /// result, and if all of the omitted higher parts were zero return zero,
1881 /// otherwise overflow occurred and return one.
1882 static int tcMultiplyPart(WordType *dst, const WordType *src,
1883 WordType multiplier, WordType carry,
1884 unsigned srcParts, unsigned dstParts,
1885 bool add);
1886
1887 /// DST = LHS * RHS, where DST has the same width as the operands and is
1888 /// filled with the least significant parts of the result. Returns one if
1889 /// overflow occurred, otherwise zero. DST must be disjoint from both
1890 /// operands.
1891 static int tcMultiply(WordType *, const WordType *, const WordType *,
1892 unsigned);
1893
1894 /// DST = LHS * RHS, where DST has width the sum of the widths of the
1895 /// operands. No overflow occurs. DST must be disjoint from both operands.
1896 static void tcFullMultiply(WordType *, const WordType *,
1897 const WordType *, unsigned, unsigned);
1898
1899 /// If RHS is zero LHS and REMAINDER are left unchanged, return one.
1900 /// Otherwise set LHS to LHS / RHS with the fractional part discarded, set
1901 /// REMAINDER to the remainder, return zero. i.e.
1902 ///
1903 /// OLD_LHS = RHS * LHS + REMAINDER
1904 ///
1905 /// SCRATCH is a bignum of the same size as the operands and result for use by
1906 /// the routine; its contents need not be initialized and are destroyed. LHS,
1907 /// REMAINDER and SCRATCH must be distinct.
1908 static int tcDivide(WordType *lhs, const WordType *rhs,
1909 WordType *remainder, WordType *scratch,
1910 unsigned parts);
1911
1912 /// Shift a bignum left Count bits. Shifted in bits are zero. There are no
1913 /// restrictions on Count.
1914 static void tcShiftLeft(WordType *, unsigned Words, unsigned Count);
1915
1916 /// Shift a bignum right Count bits. Shifted in bits are zero. There are no
1917 /// restrictions on Count.
1918 static void tcShiftRight(WordType *, unsigned Words, unsigned Count);
1919
1920 /// The obvious AND, OR and XOR and complement operations.
1921 static void tcAnd(WordType *, const WordType *, unsigned);
1922 static void tcOr(WordType *, const WordType *, unsigned);
1923 static void tcXor(WordType *, const WordType *, unsigned);
1924 static void tcComplement(WordType *, unsigned);
1925
1926 /// Comparison (unsigned) of two bignums.
1927 static int tcCompare(const WordType *, const WordType *, unsigned);
1928
1929 /// Increment a bignum in-place. Return the carry flag.
1930 static WordType tcIncrement(WordType *dst, unsigned parts) {
1931 return tcAddPart(dst, 1, parts);
1932 }
1933
1934 /// Decrement a bignum in-place. Return the borrow flag.
1935 static WordType tcDecrement(WordType *dst, unsigned parts) {
1936 return tcSubtractPart(dst, 1, parts);
1937 }
1938
1939 /// Set the least significant BITS and clear the rest.
1940 static void tcSetLeastSignificantBits(WordType *, unsigned, unsigned bits);
1941
1942 /// debug method
1943 void dump() const;
1944
1945 /// @}
1946};
1947
1948/// Magic data for optimising signed division by a constant.
1949struct APInt::ms {
1950 APInt m; ///< magic number
1951 unsigned s; ///< shift amount
1952};
1953
1954/// Magic data for optimising unsigned division by a constant.
1955struct APInt::mu {
1956 APInt m; ///< magic number
1957 bool a; ///< add indicator
1958 unsigned s; ///< shift amount
1959};
1960
1961inline bool operator==(uint64_t V1, const APInt &V2) { return V2 == V1; }
1962
1963inline bool operator!=(uint64_t V1, const APInt &V2) { return V2 != V1; }
1964
1965/// Unary bitwise complement operator.
1966///
1967/// \returns an APInt that is the bitwise complement of \p v.
1968inline APInt operator~(APInt v) {
1969 v.flipAllBits();
1970 return v;
1971}
1972
1973inline APInt operator&(APInt a, const APInt &b) {
1974 a &= b;
1975 return a;
1976}
1977
1978inline APInt operator&(const APInt &a, APInt &&b) {
1979 b &= a;
1980 return std::move(b);
1981}
1982
1983inline APInt operator&(APInt a, uint64_t RHS) {
1984 a &= RHS;
1985 return a;
1986}
1987
1988inline APInt operator&(uint64_t LHS, APInt b) {
1989 b &= LHS;
1990 return b;
1991}
1992
1993inline APInt operator|(APInt a, const APInt &b) {
1994 a |= b;
1995 return a;
1996}
1997
1998inline APInt operator|(const APInt &a, APInt &&b) {
1999 b |= a;
2000 return std::move(b);
2001}
2002
2003inline APInt operator|(APInt a, uint64_t RHS) {
2004 a |= RHS;
2005 return a;
2006}
2007
2008inline APInt operator|(uint64_t LHS, APInt b) {
2009 b |= LHS;
2010 return b;
2011}
2012
2013inline APInt operator^(APInt a, const APInt &b) {
2014 a ^= b;
2015 return a;
2016}
2017
2018inline APInt operator^(const APInt &a, APInt &&b) {
2019 b ^= a;
2020 return std::move(b);
2021}
2022
2023inline APInt operator^(APInt a, uint64_t RHS) {
2024 a ^= RHS;
2025 return a;
2026}
2027
2028inline APInt operator^(uint64_t LHS, APInt b) {
2029 b ^= LHS;
2030 return b;
2031}
2032
2033inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
2034 I.print(OS, true);
2035 return OS;
2036}
2037
2038inline APInt operator-(APInt v) {
2039 v.negate();
2040 return v;
2041}
2042
2043inline APInt operator+(APInt a, const APInt &b) {
2044 a += b;
2045 return a;
2046}
2047
2048inline APInt operator+(const APInt &a, APInt &&b) {
2049 b += a;
2050 return std::move(b);
2051}
2052
2053inline APInt operator+(APInt a, uint64_t RHS) {
2054 a += RHS;
2055 return a;
2056}
2057
2058inline APInt operator+(uint64_t LHS, APInt b) {
2059 b += LHS;
2060 return b;
2061}
2062
2063inline APInt operator-(APInt a, const APInt &b) {
2064 a -= b;
2065 return a;
2066}
2067
2068inline APInt operator-(const APInt &a, APInt &&b) {
2069 b.negate();
2070 b += a;
2071 return std::move(b);
2072}
2073
2074inline APInt operator-(APInt a, uint64_t RHS) {
2075 a -= RHS;
2076 return a;
2077}
2078
2079inline APInt operator-(uint64_t LHS, APInt b) {
2080 b.negate();
2081 b += LHS;
2082 return b;
2083}
2084
2085inline APInt operator*(APInt a, uint64_t RHS) {
2086 a *= RHS;
2087 return a;
2088}
2089
2090inline APInt operator*(uint64_t LHS, APInt b) {
2091 b *= LHS;
2092 return b;
2093}
2094
2095
2096namespace APIntOps {
2097
2098/// Determine the smaller of two APInts considered to be signed.
2099inline const APInt &smin(const APInt &A, const APInt &B) {
2100 return A.slt(B) ? A : B;
2101}
2102
2103/// Determine the larger of two APInts considered to be signed.
2104inline const APInt &smax(const APInt &A, const APInt &B) {
2105 return A.sgt(B) ? A : B;
2106}
2107
2108/// Determine the smaller of two APInts considered to be signed.
2109inline const APInt &umin(const APInt &A, const APInt &B) {
2110 return A.ult(B) ? A : B;
2111}
2112
2113/// Determine the larger of two APInts considered to be unsigned.
2114inline const APInt &umax(const APInt &A, const APInt &B) {
2115 return A.ugt(B) ? A : B;
2116}
2117
2118/// Compute GCD of two unsigned APInt values.
2119///
2120/// This function returns the greatest common divisor of the two APInt values
2121/// using Stein's algorithm.
2122///
2123/// \returns the greatest common divisor of A and B.
2124APInt GreatestCommonDivisor(APInt A, APInt B);
2125
2126/// Converts the given APInt to a double value.
2127///
2128/// Treats the APInt as an unsigned value for conversion purposes.
2129inline double RoundAPIntToDouble(const APInt &APIVal) {
2130 return APIVal.roundToDouble();
2131}
2132
2133/// Converts the given APInt to a double value.
2134///
2135/// Treats the APInt as a signed value for conversion purposes.
2136inline double RoundSignedAPIntToDouble(const APInt &APIVal) {
2137 return APIVal.signedRoundToDouble();
2138}
2139
2140/// Converts the given APInt to a float vlalue.
2141inline float RoundAPIntToFloat(const APInt &APIVal) {
2142 return float(RoundAPIntToDouble(APIVal));
2143}
2144
2145/// Converts the given APInt to a float value.
2146///
2147/// Treast the APInt as a signed value for conversion purposes.
2148inline float RoundSignedAPIntToFloat(const APInt &APIVal) {
2149 return float(APIVal.signedRoundToDouble());
2150}
2151
2152/// Converts the given double value into a APInt.
2153///
2154/// This function convert a double value to an APInt value.
2155APInt RoundDoubleToAPInt(double Double, unsigned width);
2156
2157/// Converts a float value into a APInt.
2158///
2159/// Converts a float value into an APInt value.
2160inline APInt RoundFloatToAPInt(float Float, unsigned width) {
2161 return RoundDoubleToAPInt(double(Float), width);
2162}
2163
2164/// Return A unsign-divided by B, rounded by the given rounding mode.
2165APInt RoundingUDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2166
2167/// Return A sign-divided by B, rounded by the given rounding mode.
2168APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM);
2169
2170/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range
2171/// (e.g. 32 for i32).
2172/// This function finds the smallest number n, such that
2173/// (a) n >= 0 and q(n) = 0, or
2174/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all
2175/// integers, belong to two different intervals [Rk, Rk+R),
2176/// where R = 2^BW, and k is an integer.
2177/// The idea here is to find when q(n) "overflows" 2^BW, while at the
2178/// same time "allowing" subtraction. In unsigned modulo arithmetic a
2179/// subtraction (treated as addition of negated numbers) would always
2180/// count as an overflow, but here we want to allow values to decrease
2181/// and increase as long as they are within the same interval.
2182/// Specifically, adding of two negative numbers should not cause an
2183/// overflow (as long as the magnitude does not exceed the bith width).
2184/// On the other hand, given a positive number, adding a negative
2185/// number to it can give a negative result, which would cause the
2186/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is
2187/// treated as a special case of an overflow.
2188///
2189/// This function returns None if after finding k that minimizes the
2190/// positive solution to q(n) = kR, both solutions are contained between
2191/// two consecutive integers.
2192///
2193/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation
2194/// in arithmetic modulo 2^BW, and treating the values as signed) by the
2195/// virtue of *signed* overflow. This function will *not* find such an n,
2196/// however it may find a value of n satisfying the inequalities due to
2197/// an *unsigned* overflow (if the values are treated as unsigned).
2198/// To find a solution for a signed overflow, treat it as a problem of
2199/// finding an unsigned overflow with a range with of BW-1.
2200///
2201/// The returned value may have a different bit width from the input
2202/// coefficients.
2203Optional<APInt> SolveQuadraticEquationWrap(APInt A, APInt B, APInt C,
2204 unsigned RangeWidth);
2205} // End of APIntOps namespace
2206
2207// See friend declaration above. This additional declaration is required in
2208// order to compile LLVM with IBM xlC compiler.
2209hash_code hash_value(const APInt &Arg);
2210} // End of llvm namespace
2211
2212#endif