Bug Summary

File:lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1544, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name HexagonLoopIdiomRecognition.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn338205/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/lib/gcc/x86_64-linux-gnu/8/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/lib/Target/Hexagon -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-07-29-043837-17923-1 -x c++ /build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp -faddrsig
1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
11
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Triple.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/InstructionSimplify.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/MemoryLocation.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpander.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Transforms/Utils/Local.h"
30#include "llvm/Analysis/ValueTracking.h"
31#include "llvm/IR/Attributes.h"
32#include "llvm/IR/BasicBlock.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DebugLoc.h"
37#include "llvm/IR/DerivedTypes.h"
38#include "llvm/IR/Dominators.h"
39#include "llvm/IR/Function.h"
40#include "llvm/IR/IRBuilder.h"
41#include "llvm/IR/InstrTypes.h"
42#include "llvm/IR/Instruction.h"
43#include "llvm/IR/Instructions.h"
44#include "llvm/IR/IntrinsicInst.h"
45#include "llvm/IR/Intrinsics.h"
46#include "llvm/IR/Module.h"
47#include "llvm/IR/PatternMatch.h"
48#include "llvm/IR/Type.h"
49#include "llvm/IR/User.h"
50#include "llvm/IR/Value.h"
51#include "llvm/Pass.h"
52#include "llvm/Support/Casting.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Compiler.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/raw_ostream.h"
59#include "llvm/Transforms/Scalar.h"
60#include "llvm/Transforms/Utils.h"
61#include <algorithm>
62#include <array>
63#include <cassert>
64#include <cstdint>
65#include <cstdlib>
66#include <deque>
67#include <functional>
68#include <iterator>
69#include <map>
70#include <set>
71#include <utility>
72#include <vector>
73
74using namespace llvm;
75
76static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
77 cl::Hidden, cl::init(false),
78 cl::desc("Disable generation of memcpy in loop idiom recognition"));
79
80static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
81 cl::Hidden, cl::init(false),
82 cl::desc("Disable generation of memmove in loop idiom recognition"));
83
84static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
85 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
86 "check guarding the memmove."));
87
88static cl::opt<unsigned> CompileTimeMemSizeThreshold(
89 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
90 cl::desc("Threshold (in bytes) to perform the transformation, if the "
91 "runtime loop count (mem transfer size) is known at compile-time."));
92
93static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
94 cl::Hidden, cl::init(true),
95 cl::desc("Only enable generating memmove in non-nested loops"));
96
97cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
98 cl::Hidden, cl::init(false),
99 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
100
101static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
102 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
103
104static const char *HexagonVolatileMemcpyName
105 = "hexagon_memcpy_forward_vp4cp4n2";
106
107
108namespace llvm {
109
110 void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
111 Pass *createHexagonLoopIdiomPass();
112
113} // end namespace llvm
114
115namespace {
116
117 class HexagonLoopIdiomRecognize : public LoopPass {
118 public:
119 static char ID;
120
121 explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
122 initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
123 }
124
125 StringRef getPassName() const override {
126 return "Recognize Hexagon-specific loop idioms";
127 }
128
129 void getAnalysisUsage(AnalysisUsage &AU) const override {
130 AU.addRequired<LoopInfoWrapperPass>();
131 AU.addRequiredID(LoopSimplifyID);
132 AU.addRequiredID(LCSSAID);
133 AU.addRequired<AAResultsWrapperPass>();
134 AU.addPreserved<AAResultsWrapperPass>();
135 AU.addRequired<ScalarEvolutionWrapperPass>();
136 AU.addRequired<DominatorTreeWrapperPass>();
137 AU.addRequired<TargetLibraryInfoWrapperPass>();
138 AU.addPreserved<TargetLibraryInfoWrapperPass>();
139 }
140
141 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
142
143 private:
144 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
145 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
146 void collectStores(Loop *CurLoop, BasicBlock *BB,
147 SmallVectorImpl<StoreInst*> &Stores);
148 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
149 bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
150 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
151 SmallVectorImpl<BasicBlock*> &ExitBlocks);
152 bool runOnCountableLoop(Loop *L);
153
154 AliasAnalysis *AA;
155 const DataLayout *DL;
156 DominatorTree *DT;
157 LoopInfo *LF;
158 const TargetLibraryInfo *TLI;
159 ScalarEvolution *SE;
160 bool HasMemcpy, HasMemmove;
161 };
162
163 struct Simplifier {
164 struct Rule {
165 using FuncType = std::function<Value* (Instruction*, LLVMContext&)>;
166 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
167 StringRef Name; // For debugging.
168 FuncType Fn;
169 };
170
171 void addRule(StringRef N, const Rule::FuncType &F) {
172 Rules.push_back(Rule(N, F));
173 }
174
175 private:
176 struct WorkListType {
177 WorkListType() = default;
178
179 void push_back(Value* V) {
180 // Do not push back duplicates.
181 if (!S.count(V)) { Q.push_back(V); S.insert(V); }
182 }
183
184 Value *pop_front_val() {
185 Value *V = Q.front(); Q.pop_front(); S.erase(V);
186 return V;
187 }
188
189 bool empty() const { return Q.empty(); }
190
191 private:
192 std::deque<Value*> Q;
193 std::set<Value*> S;
194 };
195
196 using ValueSetType = std::set<Value *>;
197
198 std::vector<Rule> Rules;
199
200 public:
201 struct Context {
202 using ValueMapType = DenseMap<Value *, Value *>;
203
204 Value *Root;
205 ValueSetType Used; // The set of all cloned values used by Root.
206 ValueSetType Clones; // The set of all cloned values.
207 LLVMContext &Ctx;
208
209 Context(Instruction *Exp)
210 : Ctx(Exp->getParent()->getParent()->getContext()) {
211 initialize(Exp);
212 }
213
214 ~Context() { cleanup(); }
215
216 void print(raw_ostream &OS, const Value *V) const;
217 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
218
219 private:
220 friend struct Simplifier;
221
222 void initialize(Instruction *Exp);
223 void cleanup();
224
225 template <typename FuncT> void traverse(Value *V, FuncT F);
226 void record(Value *V);
227 void use(Value *V);
228 void unuse(Value *V);
229
230 bool equal(const Instruction *I, const Instruction *J) const;
231 Value *find(Value *Tree, Value *Sub) const;
232 Value *subst(Value *Tree, Value *OldV, Value *NewV);
233 void replace(Value *OldV, Value *NewV);
234 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
235 };
236
237 Value *simplify(Context &C);
238 };
239
240 struct PE {
241 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
242
243 const Simplifier::Context &C;
244 const Value *V;
245 };
246
247 LLVM_ATTRIBUTE_USED__attribute__((__used__))
248 raw_ostream &operator<<(raw_ostream &OS, const PE &P) {
249 P.C.print(OS, P.V ? P.V : P.C.Root);
250 return OS;
251 }
252
253} // end anonymous namespace
254
255char HexagonLoopIdiomRecognize::ID = 0;
256
257INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
258 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
259INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
260INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
261INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
262INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
263INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
264INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
265INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
266INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
267 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
268
269template <typename FuncT>
270void Simplifier::Context::traverse(Value *V, FuncT F) {
271 WorkListType Q;
272 Q.push_back(V);
273
274 while (!Q.empty()) {
275 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
276 if (!U || U->getParent())
277 continue;
278 if (!F(U))
279 continue;
280 for (Value *Op : U->operands())
281 Q.push_back(Op);
282 }
283}
284
285void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
286 const auto *U = dyn_cast<const Instruction>(V);
287 if (!U) {
288 OS << V << '(' << *V << ')';
289 return;
290 }
291
292 if (U->getParent()) {
293 OS << U << '(';
294 U->printAsOperand(OS, true);
295 OS << ')';
296 return;
297 }
298
299 unsigned N = U->getNumOperands();
300 if (N != 0)
301 OS << U << '(';
302 OS << U->getOpcodeName();
303 for (const Value *Op : U->operands()) {
304 OS << ' ';
305 print(OS, Op);
306 }
307 if (N != 0)
308 OS << ')';
309}
310
311void Simplifier::Context::initialize(Instruction *Exp) {
312 // Perform a deep clone of the expression, set Root to the root
313 // of the clone, and build a map from the cloned values to the
314 // original ones.
315 ValueMapType M;
316 BasicBlock *Block = Exp->getParent();
317 WorkListType Q;
318 Q.push_back(Exp);
319
320 while (!Q.empty()) {
321 Value *V = Q.pop_front_val();
322 if (M.find(V) != M.end())
323 continue;
324 if (Instruction *U = dyn_cast<Instruction>(V)) {
325 if (isa<PHINode>(U) || U->getParent() != Block)
326 continue;
327 for (Value *Op : U->operands())
328 Q.push_back(Op);
329 M.insert({U, U->clone()});
330 }
331 }
332
333 for (std::pair<Value*,Value*> P : M) {
334 Instruction *U = cast<Instruction>(P.second);
335 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
336 auto F = M.find(U->getOperand(i));
337 if (F != M.end())
338 U->setOperand(i, F->second);
339 }
340 }
341
342 auto R = M.find(Exp);
343 assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail
("R != M.end()", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 343, __extension__ __PRETTY_FUNCTION__))
;
344 Root = R->second;
345
346 record(Root);
347 use(Root);
348}
349
350void Simplifier::Context::record(Value *V) {
351 auto Record = [this](Instruction *U) -> bool {
352 Clones.insert(U);
353 return true;
354 };
355 traverse(V, Record);
356}
357
358void Simplifier::Context::use(Value *V) {
359 auto Use = [this](Instruction *U) -> bool {
360 Used.insert(U);
361 return true;
362 };
363 traverse(V, Use);
364}
365
366void Simplifier::Context::unuse(Value *V) {
367 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
368 return;
369
370 auto Unuse = [this](Instruction *U) -> bool {
371 if (!U->use_empty())
372 return false;
373 Used.erase(U);
374 return true;
375 };
376 traverse(V, Unuse);
377}
378
379Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
380 if (Tree == OldV)
381 return NewV;
382 if (OldV == NewV)
383 return Tree;
384
385 WorkListType Q;
386 Q.push_back(Tree);
387 while (!Q.empty()) {
388 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
389 // If U is not an instruction, or it's not a clone, skip it.
390 if (!U || U->getParent())
391 continue;
392 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
393 Value *Op = U->getOperand(i);
394 if (Op == OldV) {
395 U->setOperand(i, NewV);
396 unuse(OldV);
397 } else {
398 Q.push_back(Op);
399 }
400 }
401 }
402 return Tree;
403}
404
405void Simplifier::Context::replace(Value *OldV, Value *NewV) {
406 if (Root == OldV) {
407 Root = NewV;
408 use(Root);
409 return;
410 }
411
412 // NewV may be a complex tree that has just been created by one of the
413 // transformation rules. We need to make sure that it is commoned with
414 // the existing Root to the maximum extent possible.
415 // Identify all subtrees of NewV (including NewV itself) that have
416 // equivalent counterparts in Root, and replace those subtrees with
417 // these counterparts.
418 WorkListType Q;
419 Q.push_back(NewV);
420 while (!Q.empty()) {
421 Value *V = Q.pop_front_val();
422 Instruction *U = dyn_cast<Instruction>(V);
423 if (!U || U->getParent())
424 continue;
425 if (Value *DupV = find(Root, V)) {
426 if (DupV != V)
427 NewV = subst(NewV, V, DupV);
428 } else {
429 for (Value *Op : U->operands())
430 Q.push_back(Op);
431 }
432 }
433
434 // Now, simply replace OldV with NewV in Root.
435 Root = subst(Root, OldV, NewV);
436 use(Root);
437}
438
439void Simplifier::Context::cleanup() {
440 for (Value *V : Clones) {
441 Instruction *U = cast<Instruction>(V);
442 if (!U->getParent())
443 U->dropAllReferences();
444 }
445
446 for (Value *V : Clones) {
447 Instruction *U = cast<Instruction>(V);
448 if (!U->getParent())
449 U->deleteValue();
450 }
451}
452
453bool Simplifier::Context::equal(const Instruction *I,
454 const Instruction *J) const {
455 if (I == J)
456 return true;
457 if (!I->isSameOperationAs(J))
458 return false;
459 if (isa<PHINode>(I))
460 return I->isIdenticalTo(J);
461
462 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
463 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
464 if (OpI == OpJ)
465 continue;
466 auto *InI = dyn_cast<const Instruction>(OpI);
467 auto *InJ = dyn_cast<const Instruction>(OpJ);
468 if (InI && InJ) {
469 if (!equal(InI, InJ))
470 return false;
471 } else if (InI != InJ || !InI)
472 return false;
473 }
474 return true;
475}
476
477Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
478 Instruction *SubI = dyn_cast<Instruction>(Sub);
479 WorkListType Q;
480 Q.push_back(Tree);
481
482 while (!Q.empty()) {
483 Value *V = Q.pop_front_val();
484 if (V == Sub)
485 return V;
486 Instruction *U = dyn_cast<Instruction>(V);
487 if (!U || U->getParent())
488 continue;
489 if (SubI && equal(SubI, U))
490 return U;
491 assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0)
: __assert_fail ("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 491, __extension__ __PRETTY_FUNCTION__))
;
492 for (Value *Op : U->operands())
493 Q.push_back(Op);
494 }
495 return nullptr;
496}
497
498void Simplifier::Context::link(Instruction *I, BasicBlock *B,
499 BasicBlock::iterator At) {
500 if (I->getParent())
501 return;
502
503 for (Value *Op : I->operands()) {
504 if (Instruction *OpI = dyn_cast<Instruction>(Op))
505 link(OpI, B, At);
506 }
507
508 B->getInstList().insert(At, I);
509}
510
511Value *Simplifier::Context::materialize(BasicBlock *B,
512 BasicBlock::iterator At) {
513 if (Instruction *RootI = dyn_cast<Instruction>(Root))
514 link(RootI, B, At);
515 return Root;
516}
517
518Value *Simplifier::simplify(Context &C) {
519 WorkListType Q;
520 Q.push_back(C.Root);
521 unsigned Count = 0;
522 const unsigned Limit = SimplifyLimit;
523
524 while (!Q.empty()) {
525 if (Count++ >= Limit)
526 break;
527 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
528 if (!U || U->getParent() || !C.Used.count(U))
529 continue;
530 bool Changed = false;
531 for (Rule &R : Rules) {
532 Value *W = R.Fn(U, C.Ctx);
533 if (!W)
534 continue;
535 Changed = true;
536 C.record(W);
537 C.replace(U, W);
538 Q.push_back(C.Root);
539 break;
540 }
541 if (!Changed) {
542 for (Value *Op : U->operands())
543 Q.push_back(Op);
544 }
545 }
546 return Count < Limit ? C.Root : nullptr;
547}
548
549//===----------------------------------------------------------------------===//
550//
551// Implementation of PolynomialMultiplyRecognize
552//
553//===----------------------------------------------------------------------===//
554
555namespace {
556
557 class PolynomialMultiplyRecognize {
558 public:
559 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
560 const DominatorTree &dt, const TargetLibraryInfo &tli,
561 ScalarEvolution &se)
562 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
563
564 bool recognize();
565
566 private:
567 using ValueSeq = SetVector<Value *>;
568
569 IntegerType *getPmpyType() const {
570 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
571 return IntegerType::get(Ctx, 32);
572 }
573
574 bool isPromotableTo(Value *V, IntegerType *Ty);
575 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
576 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
577
578 Value *getCountIV(BasicBlock *BB);
579 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
580 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
581 ValueSeq &Late);
582 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
583 bool commutesWithShift(Instruction *I);
584 bool highBitsAreZero(Value *V, unsigned IterCount);
585 bool keepsHighBitsZero(Value *V, unsigned IterCount);
586 bool isOperandShifted(Instruction *I, Value *Op);
587 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
588 unsigned IterCount);
589 void cleanupLoopBody(BasicBlock *LoopB);
590
591 struct ParsedValues {
592 ParsedValues() = default;
593
594 Value *M = nullptr;
595 Value *P = nullptr;
596 Value *Q = nullptr;
597 Value *R = nullptr;
598 Value *X = nullptr;
599 Instruction *Res = nullptr;
600 unsigned IterCount = 0;
601 bool Left = false;
602 bool Inv = false;
603 };
604
605 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
606 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
607 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
608 Value *CIV, ParsedValues &PV, bool PreScan);
609 unsigned getInverseMxN(unsigned QP);
610 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
611
612 void setupPreSimplifier(Simplifier &S);
613 void setupPostSimplifier(Simplifier &S);
614
615 Loop *CurLoop;
616 const DataLayout &DL;
617 const DominatorTree &DT;
618 const TargetLibraryInfo &TLI;
619 ScalarEvolution &SE;
620 };
621
622} // end anonymous namespace
623
624Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
625 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
626 if (std::distance(PI, PE) != 2)
627 return nullptr;
628 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
629
630 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
631 auto *PN = cast<PHINode>(I);
632 Value *InitV = PN->getIncomingValueForBlock(PB);
633 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
634 continue;
635 Value *IterV = PN->getIncomingValueForBlock(BB);
636 if (!isa<BinaryOperator>(IterV))
637 continue;
638 auto *BO = dyn_cast<BinaryOperator>(IterV);
639 if (BO->getOpcode() != Instruction::Add)
640 continue;
641 Value *IncV = nullptr;
642 if (BO->getOperand(0) == PN)
643 IncV = BO->getOperand(1);
644 else if (BO->getOperand(1) == PN)
645 IncV = BO->getOperand(0);
646 if (IncV == nullptr)
647 continue;
648
649 if (auto *T = dyn_cast<ConstantInt>(IncV))
650 if (T->getZExtValue() == 1)
651 return PN;
652 }
653 return nullptr;
654}
655
656static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
657 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
658 Use &TheUse = UI.getUse();
659 ++UI;
660 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
661 if (BB == II->getParent())
662 II->replaceUsesOfWith(I, J);
663 }
664}
665
666bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
667 Value *CIV, ParsedValues &PV) {
668 // Match the following:
669 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
670 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
671 // The condition may also check for equality with the masked value, i.e
672 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
673 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
674
675 Value *CondV = SelI->getCondition();
676 Value *TrueV = SelI->getTrueValue();
677 Value *FalseV = SelI->getFalseValue();
678
679 using namespace PatternMatch;
680
681 CmpInst::Predicate P;
682 Value *A = nullptr, *B = nullptr, *C = nullptr;
683
684 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
685 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
686 return false;
687 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
688 return false;
689 // Matched: select (A & B) == C ? ... : ...
690 // select (A & B) != C ? ... : ...
691
692 Value *X = nullptr, *Sh1 = nullptr;
693 // Check (A & B) for (X & (1 << i)):
694 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
695 Sh1 = A;
696 X = B;
697 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
698 Sh1 = B;
699 X = A;
700 } else {
701 // TODO: Could also check for an induction variable containing single
702 // bit shifted left by 1 in each iteration.
703 return false;
704 }
705
706 bool TrueIfZero;
707
708 // Check C against the possible values for comparison: 0 and (1 << i):
709 if (match(C, m_Zero()))
710 TrueIfZero = (P == CmpInst::ICMP_EQ);
711 else if (C == Sh1)
712 TrueIfZero = (P == CmpInst::ICMP_NE);
713 else
714 return false;
715
716 // So far, matched:
717 // select (X & (1 << i)) ? ... : ...
718 // including variations of the check against zero/non-zero value.
719
720 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
721 if (TrueIfZero) {
722 ShouldSameV = TrueV;
723 ShouldXoredV = FalseV;
724 } else {
725 ShouldSameV = FalseV;
726 ShouldXoredV = TrueV;
727 }
728
729 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
730 Value *T = nullptr;
731 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
732 // Matched: select +++ ? ... : Y ^ Z
733 // select +++ ? Y ^ Z : ...
734 // where +++ denotes previously checked matches.
735 if (ShouldSameV == Y)
736 T = Z;
737 else if (ShouldSameV == Z)
738 T = Y;
739 else
740 return false;
741 R = ShouldSameV;
742 // Matched: select +++ ? R : R ^ T
743 // select +++ ? R ^ T : R
744 // depending on TrueIfZero.
745
746 } else if (match(ShouldSameV, m_Zero())) {
747 // Matched: select +++ ? 0 : ...
748 // select +++ ? ... : 0
749 if (!SelI->hasOneUse())
750 return false;
751 T = ShouldXoredV;
752 // Matched: select +++ ? 0 : T
753 // select +++ ? T : 0
754
755 Value *U = *SelI->user_begin();
756 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
757 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
758 return false;
759 // Matched: xor (select +++ ? 0 : T), R
760 // xor (select +++ ? T : 0), R
761 } else
762 return false;
763
764 // The xor input value T is isolated into its own match so that it could
765 // be checked against an induction variable containing a shifted bit
766 // (todo).
767 // For now, check against (Q << i).
768 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
769 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
770 return false;
771 // Matched: select +++ ? R : R ^ (Q << i)
772 // select +++ ? R ^ (Q << i) : R
773
774 PV.X = X;
775 PV.Q = Q;
776 PV.R = R;
777 PV.Left = true;
778 return true;
779}
780
781bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
782 ParsedValues &PV) {
783 // Match the following:
784 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
785 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
786 // The condition may also check for equality with the masked value, i.e
787 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
788 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
789
790 Value *CondV = SelI->getCondition();
791 Value *TrueV = SelI->getTrueValue();
792 Value *FalseV = SelI->getFalseValue();
793
794 using namespace PatternMatch;
795
796 Value *C = nullptr;
797 CmpInst::Predicate P;
798 bool TrueIfZero;
799
800 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
801 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
802 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
803 return false;
804 // Matched: select C == 0 ? ... : ...
805 // select C != 0 ? ... : ...
806 TrueIfZero = (P == CmpInst::ICMP_EQ);
807 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
808 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
809 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
810 return false;
811 // Matched: select C == 1 ? ... : ...
812 // select C != 1 ? ... : ...
813 TrueIfZero = (P == CmpInst::ICMP_NE);
814 } else
815 return false;
816
817 Value *X = nullptr;
818 if (!match(C, m_And(m_Value(X), m_One())) &&
819 !match(C, m_And(m_One(), m_Value(X))))
820 return false;
821 // Matched: select (X & 1) == +++ ? ... : ...
822 // select (X & 1) != +++ ? ... : ...
823
824 Value *R = nullptr, *Q = nullptr;
825 if (TrueIfZero) {
826 // The select's condition is true if the tested bit is 0.
827 // TrueV must be the shift, FalseV must be the xor.
828 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
829 return false;
830 // Matched: select +++ ? (R >> 1) : ...
831 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
832 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
833 return false;
834 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
835 // with commuting ^.
836 } else {
837 // The select's condition is true if the tested bit is 1.
838 // TrueV must be the xor, FalseV must be the shift.
839 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
840 return false;
841 // Matched: select +++ ? ... : (R >> 1)
842 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
843 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
844 return false;
845 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
846 // with commuting ^.
847 }
848
849 PV.X = X;
850 PV.Q = Q;
851 PV.R = R;
852 PV.Left = false;
853 return true;
854}
855
856bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
857 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
858 bool PreScan) {
859 using namespace PatternMatch;
860
861 // The basic pattern for R = P.Q is:
862 // for i = 0..31
863 // R = phi (0, R')
864 // if (P & (1 << i)) ; test-bit(P, i)
865 // R' = R ^ (Q << i)
866 //
867 // Similarly, the basic pattern for R = (P/Q).Q - P
868 // for i = 0..31
869 // R = phi(P, R')
870 // if (R & (1 << i))
871 // R' = R ^ (Q << i)
872
873 // There exist idioms, where instead of Q being shifted left, P is shifted
874 // right. This produces a result that is shifted right by 32 bits (the
875 // non-shifted result is 64-bit).
876 //
877 // For R = P.Q, this would be:
878 // for i = 0..31
879 // R = phi (0, R')
880 // if ((P >> i) & 1)
881 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
882 // else ; be shifted by 1, not i.
883 // R' = R >> 1
884 //
885 // And for the inverse:
886 // for i = 0..31
887 // R = phi (P, R')
888 // if (R & 1)
889 // R' = (R >> 1) ^ Q
890 // else
891 // R' = R >> 1
892
893 // The left-shifting idioms share the same pattern:
894 // select (X & (1 << i)) ? R ^ (Q << i) : R
895 // Similarly for right-shifting idioms:
896 // select (X & 1) ? (R >> 1) ^ Q
897
898 if (matchLeftShift(SelI, CIV, PV)) {
899 // If this is a pre-scan, getting this far is sufficient.
900 if (PreScan)
901 return true;
902
903 // Need to make sure that the SelI goes back into R.
904 auto *RPhi = dyn_cast<PHINode>(PV.R);
905 if (!RPhi)
906 return false;
907 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
908 return false;
909 PV.Res = SelI;
910
911 // If X is loop invariant, it must be the input polynomial, and the
912 // idiom is the basic polynomial multiply.
913 if (CurLoop->isLoopInvariant(PV.X)) {
914 PV.P = PV.X;
915 PV.Inv = false;
916 } else {
917 // X is not loop invariant. If X == R, this is the inverse pmpy.
918 // Otherwise, check for an xor with an invariant value. If the
919 // variable argument to the xor is R, then this is still a valid
920 // inverse pmpy.
921 PV.Inv = true;
922 if (PV.X != PV.R) {
923 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
924 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
925 return false;
926 auto *I1 = dyn_cast<Instruction>(X1);
927 auto *I2 = dyn_cast<Instruction>(X2);
928 if (!I1 || I1->getParent() != LoopB) {
929 Var = X2;
930 Inv = X1;
931 } else if (!I2 || I2->getParent() != LoopB) {
932 Var = X1;
933 Inv = X2;
934 } else
935 return false;
936 if (Var != PV.R)
937 return false;
938 PV.M = Inv;
939 }
940 // The input polynomial P still needs to be determined. It will be
941 // the entry value of R.
942 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
943 PV.P = EntryP;
944 }
945
946 return true;
947 }
948
949 if (matchRightShift(SelI, PV)) {
950 // If this is an inverse pattern, the Q polynomial must be known at
951 // compile time.
952 if (PV.Inv && !isa<ConstantInt>(PV.Q))
953 return false;
954 if (PreScan)
955 return true;
956 // There is no exact matching of right-shift pmpy.
957 return false;
958 }
959
960 return false;
961}
962
963bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
964 IntegerType *DestTy) {
965 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
966 if (!T || T->getBitWidth() > DestTy->getBitWidth())
967 return false;
968 if (T->getBitWidth() == DestTy->getBitWidth())
969 return true;
970 // Non-instructions are promotable. The reason why an instruction may not
971 // be promotable is that it may produce a different result if its operands
972 // and the result are promoted, for example, it may produce more non-zero
973 // bits. While it would still be possible to represent the proper result
974 // in a wider type, it may require adding additional instructions (which
975 // we don't want to do).
976 Instruction *In = dyn_cast<Instruction>(Val);
977 if (!In)
978 return true;
979 // The bitwidth of the source type is smaller than the destination.
980 // Check if the individual operation can be promoted.
981 switch (In->getOpcode()) {
982 case Instruction::PHI:
983 case Instruction::ZExt:
984 case Instruction::And:
985 case Instruction::Or:
986 case Instruction::Xor:
987 case Instruction::LShr: // Shift right is ok.
988 case Instruction::Select:
989 case Instruction::Trunc:
990 return true;
991 case Instruction::ICmp:
992 if (CmpInst *CI = cast<CmpInst>(In))
993 return CI->isEquality() || CI->isUnsigned();
994 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 994)
;
995 case Instruction::Add:
996 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
997 }
998 return false;
999}
1000
1001void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1002 IntegerType *DestTy, BasicBlock *LoopB) {
1003 Type *OrigTy = In->getType();
1004
1005 // Leave boolean values alone.
1006 if (!In->getType()->isIntegerTy(1))
1007 In->mutateType(DestTy);
1008 unsigned DestBW = DestTy->getBitWidth();
1009
1010 // Handle PHIs.
1011 if (PHINode *P = dyn_cast<PHINode>(In)) {
1012 unsigned N = P->getNumIncomingValues();
1013 for (unsigned i = 0; i != N; ++i) {
1014 BasicBlock *InB = P->getIncomingBlock(i);
1015 if (InB == LoopB)
1016 continue;
1017 Value *InV = P->getIncomingValue(i);
1018 IntegerType *Ty = cast<IntegerType>(InV->getType());
1019 // Do not promote values in PHI nodes of type i1.
1020 if (Ty != P->getType()) {
1021 // If the value type does not match the PHI type, the PHI type
1022 // must have been promoted.
1023 assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ?
void (0) : __assert_fail ("Ty->getBitWidth() < DestBW"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1023, __extension__ __PRETTY_FUNCTION__))
;
1024 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1025 P->setIncomingValue(i, InV);
1026 }
1027 }
1028 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1029 Value *Op = Z->getOperand(0);
1030 if (Op->getType() == Z->getType())
1031 Z->replaceAllUsesWith(Op);
1032 Z->eraseFromParent();
1033 return;
1034 }
1035 if (TruncInst *T = dyn_cast<TruncInst>(In)) {
1036 IntegerType *TruncTy = cast<IntegerType>(OrigTy);
1037 Value *Mask = ConstantInt::get(DestTy, (1u << TruncTy->getBitWidth()) - 1);
1038 Value *And = IRBuilder<>(In).CreateAnd(T->getOperand(0), Mask);
1039 T->replaceAllUsesWith(And);
1040 T->eraseFromParent();
1041 return;
1042 }
1043
1044 // Promote immediates.
1045 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1046 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1047 if (CI->getType()->getBitWidth() < DestBW)
1048 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1049 }
1050}
1051
1052bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1053 BasicBlock *ExitB) {
1054 assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail (
"LoopB", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1054, __extension__ __PRETTY_FUNCTION__))
;
1055 // Skip loops where the exit block has more than one predecessor. The values
1056 // coming from the loop block will be promoted to another type, and so the
1057 // values coming into the exit block from other predecessors would also have
1058 // to be promoted.
1059 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1060 return false;
1061 IntegerType *DestTy = getPmpyType();
1062 // Check if the exit values have types that are no wider than the type
1063 // that we want to promote to.
1064 unsigned DestBW = DestTy->getBitWidth();
1065 for (PHINode &P : ExitB->phis()) {
1066 if (P.getNumIncomingValues() != 1)
1067 return false;
1068 assert(P.getIncomingBlock(0) == LoopB)(static_cast <bool> (P.getIncomingBlock(0) == LoopB) ? void
(0) : __assert_fail ("P.getIncomingBlock(0) == LoopB", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1068, __extension__ __PRETTY_FUNCTION__))
;
1069 IntegerType *T = dyn_cast<IntegerType>(P.getType());
1070 if (!T || T->getBitWidth() > DestBW)
1071 return false;
1072 }
1073
1074 // Check all instructions in the loop.
1075 for (Instruction &In : *LoopB)
1076 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1077 return false;
1078
1079 // Perform the promotion.
1080 std::vector<Instruction*> LoopIns;
1081 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1082 [](Instruction &In) { return &In; });
1083 for (Instruction *In : LoopIns)
1084 promoteTo(In, DestTy, LoopB);
1085
1086 // Fix up the PHI nodes in the exit block.
1087 Instruction *EndI = ExitB->getFirstNonPHI();
1088 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1089 for (auto I = ExitB->begin(); I != End; ++I) {
1090 PHINode *P = dyn_cast<PHINode>(I);
1091 if (!P)
1092 break;
1093 Type *Ty0 = P->getIncomingValue(0)->getType();
1094 Type *PTy = P->getType();
1095 if (PTy != Ty0) {
1096 assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail
("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1096, __extension__ __PRETTY_FUNCTION__))
;
1097 // In order to create the trunc, P must have the promoted type.
1098 P->mutateType(Ty0);
1099 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1100 // In order for the RAUW to work, the types of P and T must match.
1101 P->mutateType(PTy);
1102 P->replaceAllUsesWith(T);
1103 // Final update of the P's type.
1104 P->mutateType(Ty0);
1105 cast<Instruction>(T)->setOperand(0, P);
1106 }
1107 }
1108
1109 return true;
1110}
1111
1112bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1113 ValueSeq &Cycle) {
1114 // Out = ..., In, ...
1115 if (Out == In)
1116 return true;
1117
1118 auto *BB = cast<Instruction>(Out)->getParent();
1119 bool HadPhi = false;
1120
1121 for (auto U : Out->users()) {
1122 auto *I = dyn_cast<Instruction>(&*U);
1123 if (I == nullptr || I->getParent() != BB)
1124 continue;
1125 // Make sure that there are no multi-iteration cycles, e.g.
1126 // p1 = phi(p2)
1127 // p2 = phi(p1)
1128 // The cycle p1->p2->p1 would span two loop iterations.
1129 // Check that there is only one phi in the cycle.
1130 bool IsPhi = isa<PHINode>(I);
1131 if (IsPhi && HadPhi)
1132 return false;
1133 HadPhi |= IsPhi;
1134 if (Cycle.count(I))
1135 return false;
1136 Cycle.insert(I);
1137 if (findCycle(I, In, Cycle))
1138 break;
1139 Cycle.remove(I);
1140 }
1141 return !Cycle.empty();
1142}
1143
1144void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1145 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1146 // All the values in the cycle that are between the phi node and the
1147 // divider instruction will be classified as "early", all other values
1148 // will be "late".
1149
1150 bool IsE = true;
1151 unsigned I, N = Cycle.size();
1152 for (I = 0; I < N; ++I) {
1153 Value *V = Cycle[I];
1154 if (DivI == V)
1155 IsE = false;
1156 else if (!isa<PHINode>(V))
1157 continue;
1158 // Stop if found either.
1159 break;
1160 }
1161 // "I" is the index of either DivI or the phi node, whichever was first.
1162 // "E" is "false" or "true" respectively.
1163 ValueSeq &First = !IsE ? Early : Late;
1164 for (unsigned J = 0; J < I; ++J)
1165 First.insert(Cycle[J]);
1166
1167 ValueSeq &Second = IsE ? Early : Late;
1168 Second.insert(Cycle[I]);
1169 for (++I; I < N; ++I) {
1170 Value *V = Cycle[I];
1171 if (DivI == V || isa<PHINode>(V))
1172 break;
1173 Second.insert(V);
1174 }
1175
1176 for (; I < N; ++I)
1177 First.insert(Cycle[I]);
1178}
1179
1180bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1181 ValueSeq &Early, ValueSeq &Late) {
1182 // Select is an exception, since the condition value does not have to be
1183 // classified in the same way as the true/false values. The true/false
1184 // values do have to be both early or both late.
1185 if (UseI->getOpcode() == Instruction::Select) {
1186 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1187 if (Early.count(TV) || Early.count(FV)) {
1188 if (Late.count(TV) || Late.count(FV))
1189 return false;
1190 Early.insert(UseI);
1191 } else if (Late.count(TV) || Late.count(FV)) {
1192 if (Early.count(TV) || Early.count(FV))
1193 return false;
1194 Late.insert(UseI);
1195 }
1196 return true;
1197 }
1198
1199 // Not sure what would be the example of this, but the code below relies
1200 // on having at least one operand.
1201 if (UseI->getNumOperands() == 0)
1202 return true;
1203
1204 bool AE = true, AL = true;
1205 for (auto &I : UseI->operands()) {
1206 if (Early.count(&*I))
1207 AL = false;
1208 else if (Late.count(&*I))
1209 AE = false;
1210 }
1211 // If the operands appear "all early" and "all late" at the same time,
1212 // then it means that none of them are actually classified as either.
1213 // This is harmless.
1214 if (AE && AL)
1215 return true;
1216 // Conversely, if they are neither "all early" nor "all late", then
1217 // we have a mixture of early and late operands that is not a known
1218 // exception.
1219 if (!AE && !AL)
1220 return false;
1221
1222 // Check that we have covered the two special cases.
1223 assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail
("AE != AL", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1223, __extension__ __PRETTY_FUNCTION__))
;
1224
1225 if (AE)
1226 Early.insert(UseI);
1227 else
1228 Late.insert(UseI);
1229 return true;
1230}
1231
1232bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1233 switch (I->getOpcode()) {
1234 case Instruction::And:
1235 case Instruction::Or:
1236 case Instruction::Xor:
1237 case Instruction::LShr:
1238 case Instruction::Shl:
1239 case Instruction::Select:
1240 case Instruction::ICmp:
1241 case Instruction::PHI:
1242 break;
1243 default:
1244 return false;
1245 }
1246 return true;
1247}
1248
1249bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1250 unsigned IterCount) {
1251 auto *T = dyn_cast<IntegerType>(V->getType());
1252 if (!T)
1253 return false;
1254
1255 KnownBits Known(T->getBitWidth());
1256 computeKnownBits(V, Known, DL);
1257 return Known.countMinLeadingZeros() >= IterCount;
1258}
1259
1260bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1261 unsigned IterCount) {
1262 // Assume that all inputs to the value have the high bits zero.
1263 // Check if the value itself preserves the zeros in the high bits.
1264 if (auto *C = dyn_cast<ConstantInt>(V))
1265 return C->getValue().countLeadingZeros() >= IterCount;
1266
1267 if (auto *I = dyn_cast<Instruction>(V)) {
1268 switch (I->getOpcode()) {
1269 case Instruction::And:
1270 case Instruction::Or:
1271 case Instruction::Xor:
1272 case Instruction::LShr:
1273 case Instruction::Select:
1274 case Instruction::ICmp:
1275 case Instruction::PHI:
1276 case Instruction::ZExt:
1277 return true;
1278 }
1279 }
1280
1281 return false;
1282}
1283
1284bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1285 unsigned Opc = I->getOpcode();
1286 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1287 return Op != I->getOperand(1);
1288 return true;
1289}
1290
1291bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1292 BasicBlock *ExitB, unsigned IterCount) {
1293 Value *CIV = getCountIV(LoopB);
1294 if (CIV == nullptr)
1295 return false;
1296 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1297 if (CIVTy == nullptr)
1298 return false;
1299
1300 ValueSeq RShifts;
1301 ValueSeq Early, Late, Cycled;
1302
1303 // Find all value cycles that contain logical right shifts by 1.
1304 for (Instruction &I : *LoopB) {
1305 using namespace PatternMatch;
1306
1307 Value *V = nullptr;
1308 if (!match(&I, m_LShr(m_Value(V), m_One())))
1309 continue;
1310 ValueSeq C;
1311 if (!findCycle(&I, V, C))
1312 continue;
1313
1314 // Found a cycle.
1315 C.insert(&I);
1316 classifyCycle(&I, C, Early, Late);
1317 Cycled.insert(C.begin(), C.end());
1318 RShifts.insert(&I);
1319 }
1320
1321 // Find the set of all values affected by the shift cycles, i.e. all
1322 // cycled values, and (recursively) all their users.
1323 ValueSeq Users(Cycled.begin(), Cycled.end());
1324 for (unsigned i = 0; i < Users.size(); ++i) {
1325 Value *V = Users[i];
1326 if (!isa<IntegerType>(V->getType()))
1327 return false;
1328 auto *R = cast<Instruction>(V);
1329 // If the instruction does not commute with shifts, the loop cannot
1330 // be unshifted.
1331 if (!commutesWithShift(R))
1332 return false;
1333 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1334 auto *T = cast<Instruction>(*I);
1335 // Skip users from outside of the loop. They will be handled later.
1336 // Also, skip the right-shifts and phi nodes, since they mix early
1337 // and late values.
1338 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1339 continue;
1340
1341 Users.insert(T);
1342 if (!classifyInst(T, Early, Late))
1343 return false;
1344 }
1345 }
1346
1347 if (Users.empty())
1348 return false;
1349
1350 // Verify that high bits remain zero.
1351 ValueSeq Internal(Users.begin(), Users.end());
1352 ValueSeq Inputs;
1353 for (unsigned i = 0; i < Internal.size(); ++i) {
1354 auto *R = dyn_cast<Instruction>(Internal[i]);
1355 if (!R)
1356 continue;
1357 for (Value *Op : R->operands()) {
1358 auto *T = dyn_cast<Instruction>(Op);
1359 if (T && T->getParent() != LoopB)
1360 Inputs.insert(Op);
1361 else
1362 Internal.insert(Op);
1363 }
1364 }
1365 for (Value *V : Inputs)
1366 if (!highBitsAreZero(V, IterCount))
1367 return false;
1368 for (Value *V : Internal)
1369 if (!keepsHighBitsZero(V, IterCount))
1370 return false;
1371
1372 // Finally, the work can be done. Unshift each user.
1373 IRBuilder<> IRB(LoopB);
1374 std::map<Value*,Value*> ShiftMap;
1375
1376 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1377
1378 CastMapType CastMap;
1379
1380 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1381 IntegerType *Ty) -> Value* {
1382 auto H = CM.find(std::make_pair(V, Ty));
1383 if (H != CM.end())
1384 return H->second;
1385 Value *CV = IRB.CreateIntCast(V, Ty, false);
1386 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1387 return CV;
1388 };
1389
1390 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1391 using namespace PatternMatch;
1392
1393 if (isa<PHINode>(I) || !Users.count(&*I))
1394 continue;
1395
1396 // Match lshr x, 1.
1397 Value *V = nullptr;
1398 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1399 replaceAllUsesOfWithIn(&*I, V, LoopB);
1400 continue;
1401 }
1402 // For each non-cycled operand, replace it with the corresponding
1403 // value shifted left.
1404 for (auto &J : I->operands()) {
1405 Value *Op = J.get();
1406 if (!isOperandShifted(&*I, Op))
1407 continue;
1408 if (Users.count(Op))
1409 continue;
1410 // Skip shifting zeros.
1411 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1412 continue;
1413 // Check if we have already generated a shift for this value.
1414 auto F = ShiftMap.find(Op);
1415 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1416 if (W == nullptr) {
1417 IRB.SetInsertPoint(&*I);
1418 // First, the shift amount will be CIV or CIV+1, depending on
1419 // whether the value is early or late. Instead of creating CIV+1,
1420 // do a single shift of the value.
1421 Value *ShAmt = CIV, *ShVal = Op;
1422 auto *VTy = cast<IntegerType>(ShVal->getType());
1423 auto *ATy = cast<IntegerType>(ShAmt->getType());
1424 if (Late.count(&*I))
1425 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1426 // Second, the types of the shifted value and the shift amount
1427 // must match.
1428 if (VTy != ATy) {
1429 if (VTy->getBitWidth() < ATy->getBitWidth())
1430 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1431 else
1432 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1433 }
1434 // Ready to generate the shift and memoize it.
1435 W = IRB.CreateShl(ShVal, ShAmt);
1436 ShiftMap.insert(std::make_pair(Op, W));
1437 }
1438 I->replaceUsesOfWith(Op, W);
1439 }
1440 }
1441
1442 // Update the users outside of the loop to account for having left
1443 // shifts. They would normally be shifted right in the loop, so shift
1444 // them right after the loop exit.
1445 // Take advantage of the loop-closed SSA form, which has all the post-
1446 // loop values in phi nodes.
1447 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1448 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1449 if (!isa<PHINode>(P))
1450 break;
1451 auto *PN = cast<PHINode>(P);
1452 Value *U = PN->getIncomingValueForBlock(LoopB);
1453 if (!Users.count(U))
1454 continue;
1455 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1456 PN->replaceAllUsesWith(S);
1457 // The above RAUW will create
1458 // S = lshr S, IterCount
1459 // so we need to fix it back into
1460 // S = lshr PN, IterCount
1461 cast<User>(S)->replaceUsesOfWith(S, PN);
1462 }
1463
1464 return true;
1465}
1466
1467void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1468 for (auto &I : *LoopB)
1469 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1470 I.replaceAllUsesWith(SV);
1471
1472 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1473 N = std::next(I);
1474 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1475 }
1476}
1477
1478unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1479 // Arrays of coefficients of Q and the inverse, C.
1480 // Q[i] = coefficient at x^i.
1481 std::array<char,32> Q, C;
1482
1483 for (unsigned i = 0; i < 32; ++i) {
1484 Q[i] = QP & 1;
1485 QP >>= 1;
1486 }
1487 assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail
("Q[0] == 1", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1487, __extension__ __PRETTY_FUNCTION__))
;
1488
1489 // Find C, such that
1490 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1491 //
1492 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1493 // operations * and + are & and ^ respectively.
1494 //
1495 // Find C[i] recursively, by comparing i-th coefficient in the product
1496 // with 0 (or 1 for i=0).
1497 //
1498 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1499 C[0] = 1;
1500 for (unsigned i = 1; i < 32; ++i) {
1501 // Solve for C[i] in:
1502 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1503 // This is equivalent to
1504 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1505 // which is
1506 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1507 unsigned T = 0;
1508 for (unsigned j = 0; j < i; ++j)
1509 T = T ^ (C[j] & Q[i-j]);
1510 C[i] = T;
1511 }
1512
1513 unsigned QV = 0;
1514 for (unsigned i = 0; i < 32; ++i)
1515 if (C[i])
1516 QV |= (1 << i);
1517
1518 return QV;
1519}
1520
1521Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1522 ParsedValues &PV) {
1523 IRBuilder<> B(&*At);
1524 Module *M = At->getParent()->getParent()->getParent();
1525 Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1526
1527 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1528 unsigned IC = PV.IterCount;
1529
1530 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1531 P0 = P = B.CreateXor(P, PV.M);
1532
1533 // Create a bit mask to clear the high bits beyond IterCount.
1534 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
1535
1536 if (PV.IterCount != 32)
3
Assuming the condition is false
4
Taking false branch
1537 P = B.CreateAnd(P, BMI);
1538
1539 if (PV.Inv) {
5
Assuming the condition is true
6
Taking true branch
1540 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1541 assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth()
<= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1541, __extension__ __PRETTY_FUNCTION__))
;
1542
1543 // Again, clearing bits beyond IterCount.
1544 unsigned M = (1 << PV.IterCount) - 1;
7
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1545 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1546 unsigned QV = getInverseMxN(Tmp) & M;
1547 auto *QVI = ConstantInt::get(QI->getType(), QV);
1548 P = B.CreateCall(PMF, {P, QVI});
1549 P = B.CreateTrunc(P, QI->getType());
1550 if (IC != 32)
1551 P = B.CreateAnd(P, BMI);
1552 }
1553
1554 Value *R = B.CreateCall(PMF, {P, Q});
1555
1556 if (PV.M != nullptr)
1557 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1558
1559 return R;
1560}
1561
1562static bool hasZeroSignBit(const Value *V) {
1563 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1564 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1565 const Instruction *I = dyn_cast<const Instruction>(V);
1566 if (!I)
1567 return false;
1568 switch (I->getOpcode()) {
1569 case Instruction::LShr:
1570 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1571 return SI->getZExtValue() > 0;
1572 return false;
1573 case Instruction::Or:
1574 case Instruction::Xor:
1575 return hasZeroSignBit(I->getOperand(0)) &&
1576 hasZeroSignBit(I->getOperand(1));
1577 case Instruction::And:
1578 return hasZeroSignBit(I->getOperand(0)) ||
1579 hasZeroSignBit(I->getOperand(1));
1580 }
1581 return false;
1582}
1583
1584void PolynomialMultiplyRecognize::setupPreSimplifier(Simplifier &S) {
1585 S.addRule("sink-zext",
1586 // Sink zext past bitwise operations.
1587 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1588 if (I->getOpcode() != Instruction::ZExt)
1589 return nullptr;
1590 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1591 if (!T)
1592 return nullptr;
1593 switch (T->getOpcode()) {
1594 case Instruction::And:
1595 case Instruction::Or:
1596 case Instruction::Xor:
1597 break;
1598 default:
1599 return nullptr;
1600 }
1601 IRBuilder<> B(Ctx);
1602 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1603 B.CreateZExt(T->getOperand(0), I->getType()),
1604 B.CreateZExt(T->getOperand(1), I->getType()));
1605 });
1606 S.addRule("xor/and -> and/xor",
1607 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1608 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1609 if (I->getOpcode() != Instruction::Xor)
1610 return nullptr;
1611 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1612 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1613 if (!And0 || !And1)
1614 return nullptr;
1615 if (And0->getOpcode() != Instruction::And ||
1616 And1->getOpcode() != Instruction::And)
1617 return nullptr;
1618 if (And0->getOperand(1) != And1->getOperand(1))
1619 return nullptr;
1620 IRBuilder<> B(Ctx);
1621 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1622 And0->getOperand(1));
1623 });
1624 S.addRule("sink binop into select",
1625 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1626 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1627 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1628 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1629 if (!BO)
1630 return nullptr;
1631 Instruction::BinaryOps Op = BO->getOpcode();
1632 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1633 IRBuilder<> B(Ctx);
1634 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1635 Value *Z = BO->getOperand(1);
1636 return B.CreateSelect(Sel->getCondition(),
1637 B.CreateBinOp(Op, X, Z),
1638 B.CreateBinOp(Op, Y, Z));
1639 }
1640 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1641 IRBuilder<> B(Ctx);
1642 Value *X = BO->getOperand(0);
1643 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1644 return B.CreateSelect(Sel->getCondition(),
1645 B.CreateBinOp(Op, X, Y),
1646 B.CreateBinOp(Op, X, Z));
1647 }
1648 return nullptr;
1649 });
1650 S.addRule("fold select-select",
1651 // (select c (select c x y) z) -> (select c x z)
1652 // (select c x (select c y z)) -> (select c x z)
1653 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1654 SelectInst *Sel = dyn_cast<SelectInst>(I);
1655 if (!Sel)
1656 return nullptr;
1657 IRBuilder<> B(Ctx);
1658 Value *C = Sel->getCondition();
1659 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1660 if (Sel0->getCondition() == C)
1661 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1662 }
1663 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1664 if (Sel1->getCondition() == C)
1665 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1666 }
1667 return nullptr;
1668 });
1669 S.addRule("or-signbit -> xor-signbit",
1670 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1671 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1672 if (I->getOpcode() != Instruction::Or)
1673 return nullptr;
1674 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1675 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1676 return nullptr;
1677 if (!hasZeroSignBit(I->getOperand(0)))
1678 return nullptr;
1679 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1680 });
1681 S.addRule("sink lshr into binop",
1682 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1683 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1684 if (I->getOpcode() != Instruction::LShr)
1685 return nullptr;
1686 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1687 if (!BitOp)
1688 return nullptr;
1689 switch (BitOp->getOpcode()) {
1690 case Instruction::And:
1691 case Instruction::Or:
1692 case Instruction::Xor:
1693 break;
1694 default:
1695 return nullptr;
1696 }
1697 IRBuilder<> B(Ctx);
1698 Value *S = I->getOperand(1);
1699 return B.CreateBinOp(BitOp->getOpcode(),
1700 B.CreateLShr(BitOp->getOperand(0), S),
1701 B.CreateLShr(BitOp->getOperand(1), S));
1702 });
1703 S.addRule("expose bitop-const",
1704 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1705 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1706 auto IsBitOp = [](unsigned Op) -> bool {
1707 switch (Op) {
1708 case Instruction::And:
1709 case Instruction::Or:
1710 case Instruction::Xor:
1711 return true;
1712 }
1713 return false;
1714 };
1715 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1716 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1717 return nullptr;
1718 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1719 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1720 return nullptr;
1721 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1722 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1723 if (!CA || !CB)
1724 return nullptr;
1725 IRBuilder<> B(Ctx);
1726 Value *X = BitOp2->getOperand(0);
1727 return B.CreateBinOp(BitOp2->getOpcode(), X,
1728 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1729 });
1730}
1731
1732void PolynomialMultiplyRecognize::setupPostSimplifier(Simplifier &S) {
1733 S.addRule("(and (xor (and x a) y) b) -> (and (xor x y) b), if b == b&a",
1734 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1735 if (I->getOpcode() != Instruction::And)
1736 return nullptr;
1737 Instruction *Xor = dyn_cast<Instruction>(I->getOperand(0));
1738 ConstantInt *C0 = dyn_cast<ConstantInt>(I->getOperand(1));
1739 if (!Xor || !C0)
1740 return nullptr;
1741 if (Xor->getOpcode() != Instruction::Xor)
1742 return nullptr;
1743 Instruction *And0 = dyn_cast<Instruction>(Xor->getOperand(0));
1744 Instruction *And1 = dyn_cast<Instruction>(Xor->getOperand(1));
1745 // Pick the first non-null and.
1746 if (!And0 || And0->getOpcode() != Instruction::And)
1747 std::swap(And0, And1);
1748 ConstantInt *C1 = dyn_cast<ConstantInt>(And0->getOperand(1));
1749 if (!C1)
1750 return nullptr;
1751 uint32_t V0 = C0->getZExtValue();
1752 uint32_t V1 = C1->getZExtValue();
1753 if (V0 != (V0 & V1))
1754 return nullptr;
1755 IRBuilder<> B(Ctx);
1756 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1), C0);
1757 });
1758}
1759
1760bool PolynomialMultiplyRecognize::recognize() {
1761 LLVM_DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1762 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1763 // Restrictions:
1764 // - The loop must consist of a single block.
1765 // - The iteration count must be known at compile-time.
1766 // - The loop must have an induction variable starting from 0, and
1767 // incremented in each iteration of the loop.
1768 BasicBlock *LoopB = CurLoop->getHeader();
1769 LLVM_DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1770
1771 if (LoopB != CurLoop->getLoopLatch())
1772 return false;
1773 BasicBlock *ExitB = CurLoop->getExitBlock();
1774 if (ExitB == nullptr)
1775 return false;
1776 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1777 if (EntryB == nullptr)
1778 return false;
1779
1780 unsigned IterCount = 0;
1781 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1782 if (isa<SCEVCouldNotCompute>(CT))
1783 return false;
1784 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1785 IterCount = CV->getValue()->getZExtValue() + 1;
1786
1787 Value *CIV = getCountIV(LoopB);
1788 ParsedValues PV;
1789 Simplifier PreSimp;
1790 PV.IterCount = IterCount;
1791 LLVM_DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCountdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
1792 << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1793
1794 setupPreSimplifier(PreSimp);
1795
1796 // Perform a preliminary scan of select instructions to see if any of them
1797 // looks like a generator of the polynomial multiply steps. Assume that a
1798 // loop can only contain a single transformable operation, so stop the
1799 // traversal after the first reasonable candidate was found.
1800 // XXX: Currently this approach can modify the loop before being 100% sure
1801 // that the transformation can be carried out.
1802 bool FoundPreScan = false;
1803 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1804 for (const Value *U : V->users()) {
1805 if (const auto *P = dyn_cast<const PHINode>(U))
1806 if (P->getParent() == LoopB)
1807 return true;
1808 }
1809 return false;
1810 };
1811 for (Instruction &In : *LoopB) {
1812 SelectInst *SI = dyn_cast<SelectInst>(&In);
1813 if (!SI || !FeedsPHI(SI))
1814 continue;
1815
1816 Simplifier::Context C(SI);
1817 Value *T = PreSimp.simplify(C);
1818 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1819 LLVM_DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1820 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1821 FoundPreScan = true;
1822 if (SelI != SI) {
1823 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1824 SI->replaceAllUsesWith(NewSel);
1825 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1826 }
1827 break;
1828 }
1829 }
1830
1831 if (!FoundPreScan) {
1832 LLVM_DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1833 return false;
1834 }
1835
1836 if (!PV.Left) {
1837 // The right shift version actually only returns the higher bits of
1838 // the result (each iteration discards the LSB). If we want to convert it
1839 // to a left-shifting loop, the working data type must be at least as
1840 // wide as the target's pmpy instruction.
1841 if (!promoteTypes(LoopB, ExitB))
1842 return false;
1843 // Run post-promotion simplifications.
1844 Simplifier PostSimp;
1845 setupPostSimplifier(PostSimp);
1846 for (Instruction &In : *LoopB) {
1847 SelectInst *SI = dyn_cast<SelectInst>(&In);
1848 if (!SI || !FeedsPHI(SI))
1849 continue;
1850 Simplifier::Context C(SI);
1851 Value *T = PostSimp.simplify(C);
1852 SelectInst *SelI = dyn_cast_or_null<SelectInst>(T);
1853 if (SelI != SI) {
1854 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1855 SI->replaceAllUsesWith(NewSel);
1856 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1857 }
1858 break;
1859 }
1860
1861 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1862 return false;
1863 cleanupLoopBody(LoopB);
1864 }
1865
1866 // Scan the loop again, find the generating select instruction.
1867 bool FoundScan = false;
1868 for (Instruction &In : *LoopB) {
1869 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1870 if (!SelI)
1871 continue;
1872 LLVM_DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1873 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1874 if (FoundScan)
1875 break;
1876 }
1877 assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail
("FoundScan", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1877, __extension__ __PRETTY_FUNCTION__))
;
1878
1879 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1880 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1881 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1882 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1883 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1884 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1885 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1886 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1887 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1888 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1889 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1890 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1891 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1892
1893 BasicBlock::iterator At(EntryB->getTerminator());
1894 Value *PM = generate(At, PV);
1895 if (PM == nullptr)
1896 return false;
1897
1898 if (PM->getType() != PV.Res->getType())
1899 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1900
1901 PV.Res->replaceAllUsesWith(PM);
1902 PV.Res->eraseFromParent();
1903 return true;
1904}
1905
1906int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1907 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1908 return SC->getAPInt().getSExtValue();
1909 return 0;
1910}
1911
1912bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1913 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1914 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1915 return false;
1916
1917 Value *StoredVal = SI->getValueOperand();
1918 Value *StorePtr = SI->getPointerOperand();
1919
1920 // Reject stores that are so large that they overflow an unsigned.
1921 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1922 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1923 return false;
1924
1925 // See if the pointer expression is an AddRec like {base,+,1} on the current
1926 // loop, which indicates a strided store. If we have something else, it's a
1927 // random store we can't handle.
1928 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1929 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1930 return false;
1931
1932 // Check to see if the stride matches the size of the store. If so, then we
1933 // know that every byte is touched in the loop.
1934 int Stride = getSCEVStride(StoreEv);
1935 if (Stride == 0)
1936 return false;
1937 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1938 if (StoreSize != unsigned(std::abs(Stride)))
1939 return false;
1940
1941 // The store must be feeding a non-volatile load.
1942 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1943 if (!LI || !LI->isSimple())
1944 return false;
1945
1946 // See if the pointer expression is an AddRec like {base,+,1} on the current
1947 // loop, which indicates a strided load. If we have something else, it's a
1948 // random load we can't handle.
1949 Value *LoadPtr = LI->getPointerOperand();
1950 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1951 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1952 return false;
1953
1954 // The store and load must share the same stride.
1955 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1956 return false;
1957
1958 // Success. This store can be converted into a memcpy.
1959 return true;
1960}
1961
1962/// mayLoopAccessLocation - Return true if the specified loop might access the
1963/// specified pointer location, which is a loop-strided access. The 'Access'
1964/// argument specifies what the verboten forms of access are (read or write).
1965static bool
1966mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1967 const SCEV *BECount, unsigned StoreSize,
1968 AliasAnalysis &AA,
1969 SmallPtrSetImpl<Instruction *> &Ignored) {
1970 // Get the location that may be stored across the loop. Since the access
1971 // is strided positively through memory, we say that the modified location
1972 // starts at the pointer and has infinite size.
1973 LocationSize AccessSize = MemoryLocation::UnknownSize;
1974
1975 // If the loop iterates a fixed number of times, we can refine the access
1976 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1977 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1978 AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
1979
1980 // TODO: For this to be really effective, we have to dive into the pointer
1981 // operand in the store. Store to &A[i] of 100 will always return may alias
1982 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1983 // which will then no-alias a store to &A[100].
1984 MemoryLocation StoreLoc(Ptr, AccessSize);
1985
1986 for (auto *B : L->blocks())
1987 for (auto &I : *B)
1988 if (Ignored.count(&I) == 0 &&
1989 isModOrRefSet(
1990 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
1991 return true;
1992
1993 return false;
1994}
1995
1996void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
1997 SmallVectorImpl<StoreInst*> &Stores) {
1998 Stores.clear();
1999 for (Instruction &I : *BB)
2000 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
2001 if (isLegalStore(CurLoop, SI))
2002 Stores.push_back(SI);
2003}
2004
2005bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
2006 StoreInst *SI, const SCEV *BECount) {
2007 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __extension__ __PRETTY_FUNCTION__))
2008 "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __extension__ __PRETTY_FUNCTION__))
2009 "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2009, __extension__ __PRETTY_FUNCTION__))
;
2010
2011 Value *StorePtr = SI->getPointerOperand();
2012 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
2013 unsigned Stride = getSCEVStride(StoreEv);
2014 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
2015 if (Stride != StoreSize)
2016 return false;
2017
2018 // See if the pointer expression is an AddRec like {base,+,1} on the current
2019 // loop, which indicates a strided load. If we have something else, it's a
2020 // random load we can't handle.
2021 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
2022 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
2023
2024 // The trip count of the loop and the base pointer of the addrec SCEV is
2025 // guaranteed to be loop invariant, which means that it should dominate the
2026 // header. This allows us to insert code for it in the preheader.
2027 BasicBlock *Preheader = CurLoop->getLoopPreheader();
2028 Instruction *ExpPt = Preheader->getTerminator();
2029 IRBuilder<> Builder(ExpPt);
2030 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
2031
2032 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
2033
2034 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
2035 // this into a memcpy/memmove in the loop preheader now if we want. However,
2036 // this would be unsafe to do if there is anything else in the loop that may
2037 // read or write the memory region we're storing to. For memcpy, this
2038 // includes the load that feeds the stores. Check for an alias by generating
2039 // the base address and checking everything.
2040 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
2041 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
2042 Value *LoadBasePtr = nullptr;
2043
2044 bool Overlap = false;
2045 bool DestVolatile = SI->isVolatile();
2046 Type *BECountTy = BECount->getType();
2047
2048 if (DestVolatile) {
2049 // The trip count must fit in i32, since it is the type of the "num_words"
2050 // argument to hexagon_memcpy_forward_vp4cp4n2.
2051 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
2052CleanupAndExit:
2053 // If we generated new code for the base pointer, clean up.
2054 Expander.clear();
2055 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
2056 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2057 StoreBasePtr = nullptr;
2058 }
2059 if (LoadBasePtr) {
2060 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2061 LoadBasePtr = nullptr;
2062 }
2063 return false;
2064 }
2065 }
2066
2067 SmallPtrSet<Instruction*, 2> Ignore1;
2068 Ignore1.insert(SI);
2069 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2070 StoreSize, *AA, Ignore1)) {
2071 // Check if the load is the offending instruction.
2072 Ignore1.insert(LI);
2073 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2074 BECount, StoreSize, *AA, Ignore1)) {
2075 // Still bad. Nothing we can do.
2076 goto CleanupAndExit;
2077 }
2078 // It worked with the load ignored.
2079 Overlap = true;
2080 }
2081
2082 if (!Overlap) {
2083 if (DisableMemcpyIdiom || !HasMemcpy)
2084 goto CleanupAndExit;
2085 } else {
2086 // Don't generate memmove if this function will be inlined. This is
2087 // because the caller will undergo this transformation after inlining.
2088 Function *Func = CurLoop->getHeader()->getParent();
2089 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2090 goto CleanupAndExit;
2091
2092 // In case of a memmove, the call to memmove will be executed instead
2093 // of the loop, so we need to make sure that there is nothing else in
2094 // the loop than the load, store and instructions that these two depend
2095 // on.
2096 SmallVector<Instruction*,2> Insts;
2097 Insts.push_back(SI);
2098 Insts.push_back(LI);
2099 if (!coverLoop(CurLoop, Insts))
2100 goto CleanupAndExit;
2101
2102 if (DisableMemmoveIdiom || !HasMemmove)
2103 goto CleanupAndExit;
2104 bool IsNested = CurLoop->getParentLoop() != nullptr;
2105 if (IsNested && OnlyNonNestedMemmove)
2106 goto CleanupAndExit;
2107 }
2108
2109 // For a memcpy, we have to make sure that the input array is not being
2110 // mutated by the loop.
2111 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2112 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2113
2114 SmallPtrSet<Instruction*, 2> Ignore2;
2115 Ignore2.insert(SI);
2116 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2117 StoreSize, *AA, Ignore2))
2118 goto CleanupAndExit;
2119
2120 // Check the stride.
2121 bool StridePos = getSCEVStride(LoadEv) >= 0;
2122
2123 // Currently, the volatile memcpy only emulates traversing memory forward.
2124 if (!StridePos && DestVolatile)
2125 goto CleanupAndExit;
2126
2127 bool RuntimeCheck = (Overlap || DestVolatile);
2128
2129 BasicBlock *ExitB;
2130 if (RuntimeCheck) {
2131 // The runtime check needs a single exit block.
2132 SmallVector<BasicBlock*, 8> ExitBlocks;
2133 CurLoop->getUniqueExitBlocks(ExitBlocks);
2134 if (ExitBlocks.size() != 1)
2135 goto CleanupAndExit;
2136 ExitB = ExitBlocks[0];
2137 }
2138
2139 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2140 // pointer size if it isn't already.
2141 LLVMContext &Ctx = SI->getContext();
2142 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2143 DebugLoc DLoc = SI->getDebugLoc();
2144
2145 const SCEV *NumBytesS =
2146 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2147 if (StoreSize != 1)
2148 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2149 SCEV::FlagNUW);
2150 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2151 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2152 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2153 NumBytes = Simp;
2154
2155 CallInst *NewCall;
2156
2157 if (RuntimeCheck) {
2158 unsigned Threshold = RuntimeMemSizeThreshold;
2159 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2160 uint64_t C = CI->getZExtValue();
2161 if (Threshold != 0 && C < Threshold)
2162 goto CleanupAndExit;
2163 if (C < CompileTimeMemSizeThreshold)
2164 goto CleanupAndExit;
2165 }
2166
2167 BasicBlock *Header = CurLoop->getHeader();
2168 Function *Func = Header->getParent();
2169 Loop *ParentL = LF->getLoopFor(Preheader);
2170 StringRef HeaderName = Header->getName();
2171
2172 // Create a new (empty) preheader, and update the PHI nodes in the
2173 // header to use the new preheader.
2174 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2175 Func, Header);
2176 if (ParentL)
2177 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2178 IRBuilder<>(NewPreheader).CreateBr(Header);
2179 for (auto &In : *Header) {
2180 PHINode *PN = dyn_cast<PHINode>(&In);
2181 if (!PN)
2182 break;
2183 int bx = PN->getBasicBlockIndex(Preheader);
2184 if (bx >= 0)
2185 PN->setIncomingBlock(bx, NewPreheader);
2186 }
2187 DT->addNewBlock(NewPreheader, Preheader);
2188 DT->changeImmediateDominator(Header, NewPreheader);
2189
2190 // Check for safe conditions to execute memmove.
2191 // If stride is positive, copying things from higher to lower addresses
2192 // is equivalent to memmove. For negative stride, it's the other way
2193 // around. Copying forward in memory with positive stride may not be
2194 // same as memmove since we may be copying values that we just stored
2195 // in some previous iteration.
2196 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2197 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2198 Value *LowA = StridePos ? SA : LA;
2199 Value *HighA = StridePos ? LA : SA;
2200 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2201 Value *Cond = CmpA;
2202
2203 // Check for distance between pointers. Since the case LowA < HighA
2204 // is checked for above, assume LowA >= HighA.
2205 Value *Dist = Builder.CreateSub(LowA, HighA);
2206 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2207 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2208 Cond = CmpEither;
2209
2210 if (Threshold != 0) {
2211 Type *Ty = NumBytes->getType();
2212 Value *Thr = ConstantInt::get(Ty, Threshold);
2213 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2214 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2215 Cond = CmpBoth;
2216 }
2217 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2218 Func, NewPreheader);
2219 if (ParentL)
2220 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2221 Instruction *OldT = Preheader->getTerminator();
2222 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2223 OldT->eraseFromParent();
2224 Preheader->setName(Preheader->getName()+".old");
2225 DT->addNewBlock(MemmoveB, Preheader);
2226 // Find the new immediate dominator of the exit block.
2227 BasicBlock *ExitD = Preheader;
2228 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2229 BasicBlock *PB = *PI;
2230 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2231 if (!ExitD)
2232 break;
2233 }
2234 // If the prior immediate dominator of ExitB was dominated by the
2235 // old preheader, then the old preheader becomes the new immediate
2236 // dominator. Otherwise don't change anything (because the newly
2237 // added blocks are dominated by the old preheader).
2238 if (ExitD && DT->dominates(Preheader, ExitD)) {
2239 DomTreeNode *BN = DT->getNode(ExitB);
2240 DomTreeNode *DN = DT->getNode(ExitD);
2241 BN->setIDom(DN);
2242 }
2243
2244 // Add a call to memmove to the conditional block.
2245 IRBuilder<> CondBuilder(MemmoveB);
2246 CondBuilder.CreateBr(ExitB);
2247 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2248
2249 if (DestVolatile) {
2250 Type *Int32Ty = Type::getInt32Ty(Ctx);
2251 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2252 Type *VoidTy = Type::getVoidTy(Ctx);
2253 Module *M = Func->getParent();
2254 Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
2255 Int32PtrTy, Int32PtrTy, Int32Ty);
2256 Function *Fn = cast<Function>(CF);
2257 Fn->setLinkage(Function::ExternalLinkage);
2258
2259 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2260 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2261 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2262 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2263 MemmoveB->getTerminator());
2264 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2265 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2266 NumWords = Simp;
2267
2268 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2269 ? StoreBasePtr
2270 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2271 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2272 ? LoadBasePtr
2273 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2274 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2275 } else {
2276 NewCall = CondBuilder.CreateMemMove(StoreBasePtr, SI->getAlignment(),
2277 LoadBasePtr, LI->getAlignment(),
2278 NumBytes);
2279 }
2280 } else {
2281 NewCall = Builder.CreateMemCpy(StoreBasePtr, SI->getAlignment(),
2282 LoadBasePtr, LI->getAlignment(),
2283 NumBytes);
2284 // Okay, the memcpy has been formed. Zap the original store and
2285 // anything that feeds into it.
2286 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2287 }
2288
2289 NewCall->setDebugLoc(DLoc);
2290
2291 LLVM_DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2292 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2293 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2294 << " from store ptr=" << *StoreEv << " at: " << *SIdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2295 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2296
2297 return true;
2298}
2299
2300// Check if the instructions in Insts, together with their dependencies
2301// cover the loop in the sense that the loop could be safely eliminated once
2302// the instructions in Insts are removed.
2303bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2304 SmallVectorImpl<Instruction*> &Insts) const {
2305 SmallSet<BasicBlock*,8> LoopBlocks;
2306 for (auto *B : L->blocks())
2307 LoopBlocks.insert(B);
2308
2309 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2310
2311 // Collect all instructions from the loop that the instructions in Insts
2312 // depend on (plus their dependencies, etc.). These instructions will
2313 // constitute the expression trees that feed those in Insts, but the trees
2314 // will be limited only to instructions contained in the loop.
2315 for (unsigned i = 0; i < Worklist.size(); ++i) {
2316 Instruction *In = Worklist[i];
2317 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2318 Instruction *OpI = dyn_cast<Instruction>(I);
2319 if (!OpI)
2320 continue;
2321 BasicBlock *PB = OpI->getParent();
2322 if (!LoopBlocks.count(PB))
2323 continue;
2324 Worklist.insert(OpI);
2325 }
2326 }
2327
2328 // Scan all instructions in the loop, if any of them have a user outside
2329 // of the loop, or outside of the expressions collected above, then either
2330 // the loop has a side-effect visible outside of it, or there are
2331 // instructions in it that are not involved in the original set Insts.
2332 for (auto *B : L->blocks()) {
2333 for (auto &In : *B) {
2334 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2335 continue;
2336 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2337 return false;
2338 for (const auto &K : In.users()) {
2339 Instruction *UseI = dyn_cast<Instruction>(K);
2340 if (!UseI)
2341 continue;
2342 BasicBlock *UseB = UseI->getParent();
2343 if (LF->getLoopFor(UseB) != L)
2344 return false;
2345 }
2346 }
2347 }
2348
2349 return true;
2350}
2351
2352/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2353/// with the specified backedge count. This block is known to be in the current
2354/// loop and not in any subloops.
2355bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2356 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2357 // We can only promote stores in this block if they are unconditionally
2358 // executed in the loop. For a block to be unconditionally executed, it has
2359 // to dominate all the exit blocks of the loop. Verify this now.
2360 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2361 return DT->dominates(BB, EB);
2362 };
2363 if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
2364 return false;
2365
2366 bool MadeChange = false;
2367 // Look for store instructions, which may be optimized to memset/memcpy.
2368 SmallVector<StoreInst*,8> Stores;
2369 collectStores(CurLoop, BB, Stores);
2370
2371 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2372 for (auto &SI : Stores)
2373 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2374
2375 return MadeChange;
2376}
2377
2378bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2379 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2380 if (PMR.recognize())
2381 return true;
2382
2383 if (!HasMemcpy && !HasMemmove)
2384 return false;
2385
2386 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2387 assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __extension__ __PRETTY_FUNCTION__))
2388 "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __extension__ __PRETTY_FUNCTION__))
2389 "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2389, __extension__ __PRETTY_FUNCTION__))
;
2390
2391 SmallVector<BasicBlock *, 8> ExitBlocks;
2392 L->getUniqueExitBlocks(ExitBlocks);
2393
2394 bool Changed = false;
2395
2396 // Scan all the blocks in the loop that are not in subloops.
2397 for (auto *BB : L->getBlocks()) {
2398 // Ignore blocks in subloops.
2399 if (LF->getLoopFor(BB) != L)
2400 continue;
2401 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2402 }
2403
2404 return Changed;
2405}
2406
2407bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
2408 const Module &M = *L->getHeader()->getParent()->getParent();
2409 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2410 return false;
2411
2412 if (skipLoop(L))
2413 return false;
2414
2415 // If the loop could not be converted to canonical form, it must have an
2416 // indirectbr in it, just give up.
2417 if (!L->getLoopPreheader())
2418 return false;
2419
2420 // Disable loop idiom recognition if the function's name is a common idiom.
2421 StringRef Name = L->getHeader()->getParent()->getName();
2422 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2423 return false;
2424
2425 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2426 DL = &L->getHeader()->getModule()->getDataLayout();
2427 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2428 LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2429 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2430 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2431
2432 HasMemcpy = TLI->has(LibFunc_memcpy);
2433 HasMemmove = TLI->has(LibFunc_memmove);
2434
2435 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2436 return runOnCountableLoop(L);
2437 return false;
2438}
2439
2440Pass *llvm::createHexagonLoopIdiomPass() {
2441 return new HexagonLoopIdiomRecognize();
2442}