Bug Summary

File:lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1535, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
11
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Triple.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/InstructionSimplify.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/MemoryLocation.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpander.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/Intrinsics.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/User.h"
49#include "llvm/IR/Value.h"
50#include "llvm/Pass.h"
51#include "llvm/Support/Casting.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Compiler.h"
54#include "llvm/Support/Debug.h"
55#include "llvm/Support/ErrorHandling.h"
56#include "llvm/Support/KnownBits.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/Transforms/Scalar.h"
59#include "llvm/Transforms/Utils/Local.h"
60#include <algorithm>
61#include <array>
62#include <cassert>
63#include <cstdint>
64#include <cstdlib>
65#include <deque>
66#include <functional>
67#include <iterator>
68#include <map>
69#include <set>
70#include <utility>
71#include <vector>
72
73using namespace llvm;
74
75static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
76 cl::Hidden, cl::init(false),
77 cl::desc("Disable generation of memcpy in loop idiom recognition"));
78
79static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
80 cl::Hidden, cl::init(false),
81 cl::desc("Disable generation of memmove in loop idiom recognition"));
82
83static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
84 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
85 "check guarding the memmove."));
86
87static cl::opt<unsigned> CompileTimeMemSizeThreshold(
88 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
89 cl::desc("Threshold (in bytes) to perform the transformation, if the "
90 "runtime loop count (mem transfer size) is known at compile-time."));
91
92static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
93 cl::Hidden, cl::init(true),
94 cl::desc("Only enable generating memmove in non-nested loops"));
95
96cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
97 cl::Hidden, cl::init(false),
98 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
99
100static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
101 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
102
103static const char *HexagonVolatileMemcpyName
104 = "hexagon_memcpy_forward_vp4cp4n2";
105
106
107namespace llvm {
108
109 void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
110 Pass *createHexagonLoopIdiomPass();
111
112} // end namespace llvm
113
114namespace {
115
116 class HexagonLoopIdiomRecognize : public LoopPass {
117 public:
118 static char ID;
119
120 explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
121 initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
122 }
123
124 StringRef getPassName() const override {
125 return "Recognize Hexagon-specific loop idioms";
126 }
127
128 void getAnalysisUsage(AnalysisUsage &AU) const override {
129 AU.addRequired<LoopInfoWrapperPass>();
130 AU.addRequiredID(LoopSimplifyID);
131 AU.addRequiredID(LCSSAID);
132 AU.addRequired<AAResultsWrapperPass>();
133 AU.addPreserved<AAResultsWrapperPass>();
134 AU.addRequired<ScalarEvolutionWrapperPass>();
135 AU.addRequired<DominatorTreeWrapperPass>();
136 AU.addRequired<TargetLibraryInfoWrapperPass>();
137 AU.addPreserved<TargetLibraryInfoWrapperPass>();
138 }
139
140 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
141
142 private:
143 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
144 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
145 void collectStores(Loop *CurLoop, BasicBlock *BB,
146 SmallVectorImpl<StoreInst*> &Stores);
147 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
148 bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
149 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
150 SmallVectorImpl<BasicBlock*> &ExitBlocks);
151 bool runOnCountableLoop(Loop *L);
152
153 AliasAnalysis *AA;
154 const DataLayout *DL;
155 DominatorTree *DT;
156 LoopInfo *LF;
157 const TargetLibraryInfo *TLI;
158 ScalarEvolution *SE;
159 bool HasMemcpy, HasMemmove;
160 };
161
162 struct Simplifier {
163 struct Rule {
164 using FuncType = std::function<Value* (Instruction*, LLVMContext&)>;
165 Rule(StringRef N, FuncType F) : Name(N), Fn(F) {}
166 StringRef Name; // For debugging.
167 FuncType Fn;
168 };
169
170 void addRule(StringRef N, const Rule::FuncType &F) {
171 Rules.push_back(Rule(N, F));
172 }
173
174 private:
175 struct WorkListType {
176 WorkListType() = default;
177
178 void push_back(Value* V) {
179 // Do not push back duplicates.
180 if (!S.count(V)) { Q.push_back(V); S.insert(V); }
181 }
182
183 Value *pop_front_val() {
184 Value *V = Q.front(); Q.pop_front(); S.erase(V);
185 return V;
186 }
187
188 bool empty() const { return Q.empty(); }
189
190 private:
191 std::deque<Value*> Q;
192 std::set<Value*> S;
193 };
194
195 using ValueSetType = std::set<Value *>;
196
197 std::vector<Rule> Rules;
198
199 public:
200 struct Context {
201 using ValueMapType = DenseMap<Value *, Value *>;
202
203 Value *Root;
204 ValueSetType Used; // The set of all cloned values used by Root.
205 ValueSetType Clones; // The set of all cloned values.
206 LLVMContext &Ctx;
207
208 Context(Instruction *Exp)
209 : Ctx(Exp->getParent()->getParent()->getContext()) {
210 initialize(Exp);
211 }
212
213 ~Context() { cleanup(); }
214
215 void print(raw_ostream &OS, const Value *V) const;
216 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
217
218 private:
219 friend struct Simplifier;
220
221 void initialize(Instruction *Exp);
222 void cleanup();
223
224 template <typename FuncT> void traverse(Value *V, FuncT F);
225 void record(Value *V);
226 void use(Value *V);
227 void unuse(Value *V);
228
229 bool equal(const Instruction *I, const Instruction *J) const;
230 Value *find(Value *Tree, Value *Sub) const;
231 Value *subst(Value *Tree, Value *OldV, Value *NewV);
232 void replace(Value *OldV, Value *NewV);
233 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
234 };
235
236 Value *simplify(Context &C);
237 };
238
239 struct PE {
240 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
241
242 const Simplifier::Context &C;
243 const Value *V;
244 };
245
246 raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED__attribute__((__used__));
247 raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
248 P.C.print(OS, P.V ? P.V : P.C.Root);
249 return OS;
250 }
251
252} // end anonymous namespace
253
254char HexagonLoopIdiomRecognize::ID = 0;
255
256INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
257 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
258INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
259INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
260INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
261INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
262INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
263INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
264INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
265INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
266 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
267
268template <typename FuncT>
269void Simplifier::Context::traverse(Value *V, FuncT F) {
270 WorkListType Q;
271 Q.push_back(V);
272
273 while (!Q.empty()) {
274 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
275 if (!U || U->getParent())
276 continue;
277 if (!F(U))
278 continue;
279 for (Value *Op : U->operands())
280 Q.push_back(Op);
281 }
282}
283
284void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
285 const auto *U = dyn_cast<const Instruction>(V);
286 if (!U) {
287 OS << V << '(' << *V << ')';
288 return;
289 }
290
291 if (U->getParent()) {
292 OS << U << '(';
293 U->printAsOperand(OS, true);
294 OS << ')';
295 return;
296 }
297
298 unsigned N = U->getNumOperands();
299 if (N != 0)
300 OS << U << '(';
301 OS << U->getOpcodeName();
302 for (const Value *Op : U->operands()) {
303 OS << ' ';
304 print(OS, Op);
305 }
306 if (N != 0)
307 OS << ')';
308}
309
310void Simplifier::Context::initialize(Instruction *Exp) {
311 // Perform a deep clone of the expression, set Root to the root
312 // of the clone, and build a map from the cloned values to the
313 // original ones.
314 ValueMapType M;
315 BasicBlock *Block = Exp->getParent();
316 WorkListType Q;
317 Q.push_back(Exp);
318
319 while (!Q.empty()) {
320 Value *V = Q.pop_front_val();
321 if (M.find(V) != M.end())
322 continue;
323 if (Instruction *U = dyn_cast<Instruction>(V)) {
324 if (isa<PHINode>(U) || U->getParent() != Block)
325 continue;
326 for (Value *Op : U->operands())
327 Q.push_back(Op);
328 M.insert({U, U->clone()});
329 }
330 }
331
332 for (std::pair<Value*,Value*> P : M) {
333 Instruction *U = cast<Instruction>(P.second);
334 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
335 auto F = M.find(U->getOperand(i));
336 if (F != M.end())
337 U->setOperand(i, F->second);
338 }
339 }
340
341 auto R = M.find(Exp);
342 assert(R != M.end())(static_cast <bool> (R != M.end()) ? void (0) : __assert_fail
("R != M.end()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 342, __extension__ __PRETTY_FUNCTION__))
;
343 Root = R->second;
344
345 record(Root);
346 use(Root);
347}
348
349void Simplifier::Context::record(Value *V) {
350 auto Record = [this](Instruction *U) -> bool {
351 Clones.insert(U);
352 return true;
353 };
354 traverse(V, Record);
355}
356
357void Simplifier::Context::use(Value *V) {
358 auto Use = [this](Instruction *U) -> bool {
359 Used.insert(U);
360 return true;
361 };
362 traverse(V, Use);
363}
364
365void Simplifier::Context::unuse(Value *V) {
366 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
367 return;
368
369 auto Unuse = [this](Instruction *U) -> bool {
370 if (!U->use_empty())
371 return false;
372 Used.erase(U);
373 return true;
374 };
375 traverse(V, Unuse);
376}
377
378Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
379 if (Tree == OldV)
380 return NewV;
381 if (OldV == NewV)
382 return Tree;
383
384 WorkListType Q;
385 Q.push_back(Tree);
386 while (!Q.empty()) {
387 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
388 // If U is not an instruction, or it's not a clone, skip it.
389 if (!U || U->getParent())
390 continue;
391 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
392 Value *Op = U->getOperand(i);
393 if (Op == OldV) {
394 U->setOperand(i, NewV);
395 unuse(OldV);
396 } else {
397 Q.push_back(Op);
398 }
399 }
400 }
401 return Tree;
402}
403
404void Simplifier::Context::replace(Value *OldV, Value *NewV) {
405 if (Root == OldV) {
406 Root = NewV;
407 use(Root);
408 return;
409 }
410
411 // NewV may be a complex tree that has just been created by one of the
412 // transformation rules. We need to make sure that it is commoned with
413 // the existing Root to the maximum extent possible.
414 // Identify all subtrees of NewV (including NewV itself) that have
415 // equivalent counterparts in Root, and replace those subtrees with
416 // these counterparts.
417 WorkListType Q;
418 Q.push_back(NewV);
419 while (!Q.empty()) {
420 Value *V = Q.pop_front_val();
421 Instruction *U = dyn_cast<Instruction>(V);
422 if (!U || U->getParent())
423 continue;
424 if (Value *DupV = find(Root, V)) {
425 if (DupV != V)
426 NewV = subst(NewV, V, DupV);
427 } else {
428 for (Value *Op : U->operands())
429 Q.push_back(Op);
430 }
431 }
432
433 // Now, simply replace OldV with NewV in Root.
434 Root = subst(Root, OldV, NewV);
435 use(Root);
436}
437
438void Simplifier::Context::cleanup() {
439 for (Value *V : Clones) {
440 Instruction *U = cast<Instruction>(V);
441 if (!U->getParent())
442 U->dropAllReferences();
443 }
444
445 for (Value *V : Clones) {
446 Instruction *U = cast<Instruction>(V);
447 if (!U->getParent())
448 U->deleteValue();
449 }
450}
451
452bool Simplifier::Context::equal(const Instruction *I,
453 const Instruction *J) const {
454 if (I == J)
455 return true;
456 if (!I->isSameOperationAs(J))
457 return false;
458 if (isa<PHINode>(I))
459 return I->isIdenticalTo(J);
460
461 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
462 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
463 if (OpI == OpJ)
464 continue;
465 auto *InI = dyn_cast<const Instruction>(OpI);
466 auto *InJ = dyn_cast<const Instruction>(OpJ);
467 if (InI && InJ) {
468 if (!equal(InI, InJ))
469 return false;
470 } else if (InI != InJ || !InI)
471 return false;
472 }
473 return true;
474}
475
476Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
477 Instruction *SubI = dyn_cast<Instruction>(Sub);
478 WorkListType Q;
479 Q.push_back(Tree);
480
481 while (!Q.empty()) {
482 Value *V = Q.pop_front_val();
483 if (V == Sub)
484 return V;
485 Instruction *U = dyn_cast<Instruction>(V);
486 if (!U || U->getParent())
487 continue;
488 if (SubI && equal(SubI, U))
489 return U;
490 assert(!isa<PHINode>(U))(static_cast <bool> (!isa<PHINode>(U)) ? void (0)
: __assert_fail ("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 490, __extension__ __PRETTY_FUNCTION__))
;
491 for (Value *Op : U->operands())
492 Q.push_back(Op);
493 }
494 return nullptr;
495}
496
497void Simplifier::Context::link(Instruction *I, BasicBlock *B,
498 BasicBlock::iterator At) {
499 if (I->getParent())
500 return;
501
502 for (Value *Op : I->operands()) {
503 if (Instruction *OpI = dyn_cast<Instruction>(Op))
504 link(OpI, B, At);
505 }
506
507 B->getInstList().insert(At, I);
508}
509
510Value *Simplifier::Context::materialize(BasicBlock *B,
511 BasicBlock::iterator At) {
512 if (Instruction *RootI = dyn_cast<Instruction>(Root))
513 link(RootI, B, At);
514 return Root;
515}
516
517Value *Simplifier::simplify(Context &C) {
518 WorkListType Q;
519 Q.push_back(C.Root);
520 unsigned Count = 0;
521 const unsigned Limit = SimplifyLimit;
522
523 while (!Q.empty()) {
524 if (Count++ >= Limit)
525 break;
526 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
527 if (!U || U->getParent() || !C.Used.count(U))
528 continue;
529 bool Changed = false;
530 for (Rule &R : Rules) {
531 Value *W = R.Fn(U, C.Ctx);
532 if (!W)
533 continue;
534 Changed = true;
535 C.record(W);
536 C.replace(U, W);
537 Q.push_back(C.Root);
538 break;
539 }
540 if (!Changed) {
541 for (Value *Op : U->operands())
542 Q.push_back(Op);
543 }
544 }
545 return Count < Limit ? C.Root : nullptr;
546}
547
548//===----------------------------------------------------------------------===//
549//
550// Implementation of PolynomialMultiplyRecognize
551//
552//===----------------------------------------------------------------------===//
553
554namespace {
555
556 class PolynomialMultiplyRecognize {
557 public:
558 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
559 const DominatorTree &dt, const TargetLibraryInfo &tli,
560 ScalarEvolution &se)
561 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
562
563 bool recognize();
564
565 private:
566 using ValueSeq = SetVector<Value *>;
567
568 IntegerType *getPmpyType() const {
569 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
570 return IntegerType::get(Ctx, 32);
571 }
572
573 bool isPromotableTo(Value *V, IntegerType *Ty);
574 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
575 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
576
577 Value *getCountIV(BasicBlock *BB);
578 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
579 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
580 ValueSeq &Late);
581 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
582 bool commutesWithShift(Instruction *I);
583 bool highBitsAreZero(Value *V, unsigned IterCount);
584 bool keepsHighBitsZero(Value *V, unsigned IterCount);
585 bool isOperandShifted(Instruction *I, Value *Op);
586 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
587 unsigned IterCount);
588 void cleanupLoopBody(BasicBlock *LoopB);
589
590 struct ParsedValues {
591 ParsedValues() = default;
592
593 Value *M = nullptr;
594 Value *P = nullptr;
595 Value *Q = nullptr;
596 Value *R = nullptr;
597 Value *X = nullptr;
598 Instruction *Res = nullptr;
599 unsigned IterCount = 0;
600 bool Left = false;
601 bool Inv = false;
602 };
603
604 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
605 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
606 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
607 Value *CIV, ParsedValues &PV, bool PreScan);
608 unsigned getInverseMxN(unsigned QP);
609 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
610
611 void setupSimplifier();
612
613 Simplifier Simp;
614 Loop *CurLoop;
615 const DataLayout &DL;
616 const DominatorTree &DT;
617 const TargetLibraryInfo &TLI;
618 ScalarEvolution &SE;
619 };
620
621} // end anonymous namespace
622
623Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
624 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
625 if (std::distance(PI, PE) != 2)
626 return nullptr;
627 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
628
629 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
630 auto *PN = cast<PHINode>(I);
631 Value *InitV = PN->getIncomingValueForBlock(PB);
632 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
633 continue;
634 Value *IterV = PN->getIncomingValueForBlock(BB);
635 if (!isa<BinaryOperator>(IterV))
636 continue;
637 auto *BO = dyn_cast<BinaryOperator>(IterV);
638 if (BO->getOpcode() != Instruction::Add)
639 continue;
640 Value *IncV = nullptr;
641 if (BO->getOperand(0) == PN)
642 IncV = BO->getOperand(1);
643 else if (BO->getOperand(1) == PN)
644 IncV = BO->getOperand(0);
645 if (IncV == nullptr)
646 continue;
647
648 if (auto *T = dyn_cast<ConstantInt>(IncV))
649 if (T->getZExtValue() == 1)
650 return PN;
651 }
652 return nullptr;
653}
654
655static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
656 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
657 Use &TheUse = UI.getUse();
658 ++UI;
659 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
660 if (BB == II->getParent())
661 II->replaceUsesOfWith(I, J);
662 }
663}
664
665bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
666 Value *CIV, ParsedValues &PV) {
667 // Match the following:
668 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
669 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
670 // The condition may also check for equality with the masked value, i.e
671 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
672 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
673
674 Value *CondV = SelI->getCondition();
675 Value *TrueV = SelI->getTrueValue();
676 Value *FalseV = SelI->getFalseValue();
677
678 using namespace PatternMatch;
679
680 CmpInst::Predicate P;
681 Value *A = nullptr, *B = nullptr, *C = nullptr;
682
683 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
684 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
685 return false;
686 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
687 return false;
688 // Matched: select (A & B) == C ? ... : ...
689 // select (A & B) != C ? ... : ...
690
691 Value *X = nullptr, *Sh1 = nullptr;
692 // Check (A & B) for (X & (1 << i)):
693 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
694 Sh1 = A;
695 X = B;
696 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
697 Sh1 = B;
698 X = A;
699 } else {
700 // TODO: Could also check for an induction variable containing single
701 // bit shifted left by 1 in each iteration.
702 return false;
703 }
704
705 bool TrueIfZero;
706
707 // Check C against the possible values for comparison: 0 and (1 << i):
708 if (match(C, m_Zero()))
709 TrueIfZero = (P == CmpInst::ICMP_EQ);
710 else if (C == Sh1)
711 TrueIfZero = (P == CmpInst::ICMP_NE);
712 else
713 return false;
714
715 // So far, matched:
716 // select (X & (1 << i)) ? ... : ...
717 // including variations of the check against zero/non-zero value.
718
719 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
720 if (TrueIfZero) {
721 ShouldSameV = TrueV;
722 ShouldXoredV = FalseV;
723 } else {
724 ShouldSameV = FalseV;
725 ShouldXoredV = TrueV;
726 }
727
728 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
729 Value *T = nullptr;
730 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
731 // Matched: select +++ ? ... : Y ^ Z
732 // select +++ ? Y ^ Z : ...
733 // where +++ denotes previously checked matches.
734 if (ShouldSameV == Y)
735 T = Z;
736 else if (ShouldSameV == Z)
737 T = Y;
738 else
739 return false;
740 R = ShouldSameV;
741 // Matched: select +++ ? R : R ^ T
742 // select +++ ? R ^ T : R
743 // depending on TrueIfZero.
744
745 } else if (match(ShouldSameV, m_Zero())) {
746 // Matched: select +++ ? 0 : ...
747 // select +++ ? ... : 0
748 if (!SelI->hasOneUse())
749 return false;
750 T = ShouldXoredV;
751 // Matched: select +++ ? 0 : T
752 // select +++ ? T : 0
753
754 Value *U = *SelI->user_begin();
755 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
756 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
757 return false;
758 // Matched: xor (select +++ ? 0 : T), R
759 // xor (select +++ ? T : 0), R
760 } else
761 return false;
762
763 // The xor input value T is isolated into its own match so that it could
764 // be checked against an induction variable containing a shifted bit
765 // (todo).
766 // For now, check against (Q << i).
767 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
768 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
769 return false;
770 // Matched: select +++ ? R : R ^ (Q << i)
771 // select +++ ? R ^ (Q << i) : R
772
773 PV.X = X;
774 PV.Q = Q;
775 PV.R = R;
776 PV.Left = true;
777 return true;
778}
779
780bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
781 ParsedValues &PV) {
782 // Match the following:
783 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
784 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
785 // The condition may also check for equality with the masked value, i.e
786 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
787 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
788
789 Value *CondV = SelI->getCondition();
790 Value *TrueV = SelI->getTrueValue();
791 Value *FalseV = SelI->getFalseValue();
792
793 using namespace PatternMatch;
794
795 Value *C = nullptr;
796 CmpInst::Predicate P;
797 bool TrueIfZero;
798
799 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
800 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
801 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
802 return false;
803 // Matched: select C == 0 ? ... : ...
804 // select C != 0 ? ... : ...
805 TrueIfZero = (P == CmpInst::ICMP_EQ);
806 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
807 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
808 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
809 return false;
810 // Matched: select C == 1 ? ... : ...
811 // select C != 1 ? ... : ...
812 TrueIfZero = (P == CmpInst::ICMP_NE);
813 } else
814 return false;
815
816 Value *X = nullptr;
817 if (!match(C, m_And(m_Value(X), m_One())) &&
818 !match(C, m_And(m_One(), m_Value(X))))
819 return false;
820 // Matched: select (X & 1) == +++ ? ... : ...
821 // select (X & 1) != +++ ? ... : ...
822
823 Value *R = nullptr, *Q = nullptr;
824 if (TrueIfZero) {
825 // The select's condition is true if the tested bit is 0.
826 // TrueV must be the shift, FalseV must be the xor.
827 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
828 return false;
829 // Matched: select +++ ? (R >> 1) : ...
830 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
831 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
832 return false;
833 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
834 // with commuting ^.
835 } else {
836 // The select's condition is true if the tested bit is 1.
837 // TrueV must be the xor, FalseV must be the shift.
838 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
839 return false;
840 // Matched: select +++ ? ... : (R >> 1)
841 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
842 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
843 return false;
844 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
845 // with commuting ^.
846 }
847
848 PV.X = X;
849 PV.Q = Q;
850 PV.R = R;
851 PV.Left = false;
852 return true;
853}
854
855bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
856 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
857 bool PreScan) {
858 using namespace PatternMatch;
859
860 // The basic pattern for R = P.Q is:
861 // for i = 0..31
862 // R = phi (0, R')
863 // if (P & (1 << i)) ; test-bit(P, i)
864 // R' = R ^ (Q << i)
865 //
866 // Similarly, the basic pattern for R = (P/Q).Q - P
867 // for i = 0..31
868 // R = phi(P, R')
869 // if (R & (1 << i))
870 // R' = R ^ (Q << i)
871
872 // There exist idioms, where instead of Q being shifted left, P is shifted
873 // right. This produces a result that is shifted right by 32 bits (the
874 // non-shifted result is 64-bit).
875 //
876 // For R = P.Q, this would be:
877 // for i = 0..31
878 // R = phi (0, R')
879 // if ((P >> i) & 1)
880 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
881 // else ; be shifted by 1, not i.
882 // R' = R >> 1
883 //
884 // And for the inverse:
885 // for i = 0..31
886 // R = phi (P, R')
887 // if (R & 1)
888 // R' = (R >> 1) ^ Q
889 // else
890 // R' = R >> 1
891
892 // The left-shifting idioms share the same pattern:
893 // select (X & (1 << i)) ? R ^ (Q << i) : R
894 // Similarly for right-shifting idioms:
895 // select (X & 1) ? (R >> 1) ^ Q
896
897 if (matchLeftShift(SelI, CIV, PV)) {
898 // If this is a pre-scan, getting this far is sufficient.
899 if (PreScan)
900 return true;
901
902 // Need to make sure that the SelI goes back into R.
903 auto *RPhi = dyn_cast<PHINode>(PV.R);
904 if (!RPhi)
905 return false;
906 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
907 return false;
908 PV.Res = SelI;
909
910 // If X is loop invariant, it must be the input polynomial, and the
911 // idiom is the basic polynomial multiply.
912 if (CurLoop->isLoopInvariant(PV.X)) {
913 PV.P = PV.X;
914 PV.Inv = false;
915 } else {
916 // X is not loop invariant. If X == R, this is the inverse pmpy.
917 // Otherwise, check for an xor with an invariant value. If the
918 // variable argument to the xor is R, then this is still a valid
919 // inverse pmpy.
920 PV.Inv = true;
921 if (PV.X != PV.R) {
922 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
923 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
924 return false;
925 auto *I1 = dyn_cast<Instruction>(X1);
926 auto *I2 = dyn_cast<Instruction>(X2);
927 if (!I1 || I1->getParent() != LoopB) {
928 Var = X2;
929 Inv = X1;
930 } else if (!I2 || I2->getParent() != LoopB) {
931 Var = X1;
932 Inv = X2;
933 } else
934 return false;
935 if (Var != PV.R)
936 return false;
937 PV.M = Inv;
938 }
939 // The input polynomial P still needs to be determined. It will be
940 // the entry value of R.
941 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
942 PV.P = EntryP;
943 }
944
945 return true;
946 }
947
948 if (matchRightShift(SelI, PV)) {
949 // If this is an inverse pattern, the Q polynomial must be known at
950 // compile time.
951 if (PV.Inv && !isa<ConstantInt>(PV.Q))
952 return false;
953 if (PreScan)
954 return true;
955 // There is no exact matching of right-shift pmpy.
956 return false;
957 }
958
959 return false;
960}
961
962bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
963 IntegerType *DestTy) {
964 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
965 if (!T || T->getBitWidth() > DestTy->getBitWidth())
966 return false;
967 if (T->getBitWidth() == DestTy->getBitWidth())
968 return true;
969 // Non-instructions are promotable. The reason why an instruction may not
970 // be promotable is that it may produce a different result if its operands
971 // and the result are promoted, for example, it may produce more non-zero
972 // bits. While it would still be possible to represent the proper result
973 // in a wider type, it may require adding additional instructions (which
974 // we don't want to do).
975 Instruction *In = dyn_cast<Instruction>(Val);
976 if (!In)
977 return true;
978 // The bitwidth of the source type is smaller than the destination.
979 // Check if the individual operation can be promoted.
980 switch (In->getOpcode()) {
981 case Instruction::PHI:
982 case Instruction::ZExt:
983 case Instruction::And:
984 case Instruction::Or:
985 case Instruction::Xor:
986 case Instruction::LShr: // Shift right is ok.
987 case Instruction::Select:
988 return true;
989 case Instruction::ICmp:
990 if (CmpInst *CI = cast<CmpInst>(In))
991 return CI->isEquality() || CI->isUnsigned();
992 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 992)
;
993 case Instruction::Add:
994 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
995 }
996 return false;
997}
998
999void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
1000 IntegerType *DestTy, BasicBlock *LoopB) {
1001 // Leave boolean values alone.
1002 if (!In->getType()->isIntegerTy(1))
1003 In->mutateType(DestTy);
1004 unsigned DestBW = DestTy->getBitWidth();
1005
1006 // Handle PHIs.
1007 if (PHINode *P = dyn_cast<PHINode>(In)) {
1008 unsigned N = P->getNumIncomingValues();
1009 for (unsigned i = 0; i != N; ++i) {
1010 BasicBlock *InB = P->getIncomingBlock(i);
1011 if (InB == LoopB)
1012 continue;
1013 Value *InV = P->getIncomingValue(i);
1014 IntegerType *Ty = cast<IntegerType>(InV->getType());
1015 // Do not promote values in PHI nodes of type i1.
1016 if (Ty != P->getType()) {
1017 // If the value type does not match the PHI type, the PHI type
1018 // must have been promoted.
1019 assert(Ty->getBitWidth() < DestBW)(static_cast <bool> (Ty->getBitWidth() < DestBW) ?
void (0) : __assert_fail ("Ty->getBitWidth() < DestBW"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1019, __extension__ __PRETTY_FUNCTION__))
;
1020 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1021 P->setIncomingValue(i, InV);
1022 }
1023 }
1024 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1025 Value *Op = Z->getOperand(0);
1026 if (Op->getType() == Z->getType())
1027 Z->replaceAllUsesWith(Op);
1028 Z->eraseFromParent();
1029 return;
1030 }
1031
1032 // Promote immediates.
1033 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1034 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1035 if (CI->getType()->getBitWidth() < DestBW)
1036 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1037 }
1038}
1039
1040bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1041 BasicBlock *ExitB) {
1042 assert(LoopB)(static_cast <bool> (LoopB) ? void (0) : __assert_fail (
"LoopB", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1042, __extension__ __PRETTY_FUNCTION__))
;
1043 // Skip loops where the exit block has more than one predecessor. The values
1044 // coming from the loop block will be promoted to another type, and so the
1045 // values coming into the exit block from other predecessors would also have
1046 // to be promoted.
1047 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1048 return false;
1049 IntegerType *DestTy = getPmpyType();
1050 // Check if the exit values have types that are no wider than the type
1051 // that we want to promote to.
1052 unsigned DestBW = DestTy->getBitWidth();
1053 for (Instruction &In : *ExitB) {
1054 PHINode *P = dyn_cast<PHINode>(&In);
1055 if (!P)
1056 break;
1057 if (P->getNumIncomingValues() != 1)
1058 return false;
1059 assert(P->getIncomingBlock(0) == LoopB)(static_cast <bool> (P->getIncomingBlock(0) == LoopB
) ? void (0) : __assert_fail ("P->getIncomingBlock(0) == LoopB"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1059, __extension__ __PRETTY_FUNCTION__))
;
1060 IntegerType *T = dyn_cast<IntegerType>(P->getType());
1061 if (!T || T->getBitWidth() > DestBW)
1062 return false;
1063 }
1064
1065 // Check all instructions in the loop.
1066 for (Instruction &In : *LoopB)
1067 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1068 return false;
1069
1070 // Perform the promotion.
1071 std::vector<Instruction*> LoopIns;
1072 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1073 [](Instruction &In) { return &In; });
1074 for (Instruction *In : LoopIns)
1075 promoteTo(In, DestTy, LoopB);
1076
1077 // Fix up the PHI nodes in the exit block.
1078 Instruction *EndI = ExitB->getFirstNonPHI();
1079 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1080 for (auto I = ExitB->begin(); I != End; ++I) {
1081 PHINode *P = dyn_cast<PHINode>(I);
1082 if (!P)
1083 break;
1084 Type *Ty0 = P->getIncomingValue(0)->getType();
1085 Type *PTy = P->getType();
1086 if (PTy != Ty0) {
1087 assert(Ty0 == DestTy)(static_cast <bool> (Ty0 == DestTy) ? void (0) : __assert_fail
("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1087, __extension__ __PRETTY_FUNCTION__))
;
1088 // In order to create the trunc, P must have the promoted type.
1089 P->mutateType(Ty0);
1090 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1091 // In order for the RAUW to work, the types of P and T must match.
1092 P->mutateType(PTy);
1093 P->replaceAllUsesWith(T);
1094 // Final update of the P's type.
1095 P->mutateType(Ty0);
1096 cast<Instruction>(T)->setOperand(0, P);
1097 }
1098 }
1099
1100 return true;
1101}
1102
1103bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1104 ValueSeq &Cycle) {
1105 // Out = ..., In, ...
1106 if (Out == In)
1107 return true;
1108
1109 auto *BB = cast<Instruction>(Out)->getParent();
1110 bool HadPhi = false;
1111
1112 for (auto U : Out->users()) {
1113 auto *I = dyn_cast<Instruction>(&*U);
1114 if (I == nullptr || I->getParent() != BB)
1115 continue;
1116 // Make sure that there are no multi-iteration cycles, e.g.
1117 // p1 = phi(p2)
1118 // p2 = phi(p1)
1119 // The cycle p1->p2->p1 would span two loop iterations.
1120 // Check that there is only one phi in the cycle.
1121 bool IsPhi = isa<PHINode>(I);
1122 if (IsPhi && HadPhi)
1123 return false;
1124 HadPhi |= IsPhi;
1125 if (Cycle.count(I))
1126 return false;
1127 Cycle.insert(I);
1128 if (findCycle(I, In, Cycle))
1129 break;
1130 Cycle.remove(I);
1131 }
1132 return !Cycle.empty();
1133}
1134
1135void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1136 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1137 // All the values in the cycle that are between the phi node and the
1138 // divider instruction will be classified as "early", all other values
1139 // will be "late".
1140
1141 bool IsE = true;
1142 unsigned I, N = Cycle.size();
1143 for (I = 0; I < N; ++I) {
1144 Value *V = Cycle[I];
1145 if (DivI == V)
1146 IsE = false;
1147 else if (!isa<PHINode>(V))
1148 continue;
1149 // Stop if found either.
1150 break;
1151 }
1152 // "I" is the index of either DivI or the phi node, whichever was first.
1153 // "E" is "false" or "true" respectively.
1154 ValueSeq &First = !IsE ? Early : Late;
1155 for (unsigned J = 0; J < I; ++J)
1156 First.insert(Cycle[J]);
1157
1158 ValueSeq &Second = IsE ? Early : Late;
1159 Second.insert(Cycle[I]);
1160 for (++I; I < N; ++I) {
1161 Value *V = Cycle[I];
1162 if (DivI == V || isa<PHINode>(V))
1163 break;
1164 Second.insert(V);
1165 }
1166
1167 for (; I < N; ++I)
1168 First.insert(Cycle[I]);
1169}
1170
1171bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1172 ValueSeq &Early, ValueSeq &Late) {
1173 // Select is an exception, since the condition value does not have to be
1174 // classified in the same way as the true/false values. The true/false
1175 // values do have to be both early or both late.
1176 if (UseI->getOpcode() == Instruction::Select) {
1177 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1178 if (Early.count(TV) || Early.count(FV)) {
1179 if (Late.count(TV) || Late.count(FV))
1180 return false;
1181 Early.insert(UseI);
1182 } else if (Late.count(TV) || Late.count(FV)) {
1183 if (Early.count(TV) || Early.count(FV))
1184 return false;
1185 Late.insert(UseI);
1186 }
1187 return true;
1188 }
1189
1190 // Not sure what would be the example of this, but the code below relies
1191 // on having at least one operand.
1192 if (UseI->getNumOperands() == 0)
1193 return true;
1194
1195 bool AE = true, AL = true;
1196 for (auto &I : UseI->operands()) {
1197 if (Early.count(&*I))
1198 AL = false;
1199 else if (Late.count(&*I))
1200 AE = false;
1201 }
1202 // If the operands appear "all early" and "all late" at the same time,
1203 // then it means that none of them are actually classified as either.
1204 // This is harmless.
1205 if (AE && AL)
1206 return true;
1207 // Conversely, if they are neither "all early" nor "all late", then
1208 // we have a mixture of early and late operands that is not a known
1209 // exception.
1210 if (!AE && !AL)
1211 return false;
1212
1213 // Check that we have covered the two special cases.
1214 assert(AE != AL)(static_cast <bool> (AE != AL) ? void (0) : __assert_fail
("AE != AL", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1214, __extension__ __PRETTY_FUNCTION__))
;
1215
1216 if (AE)
1217 Early.insert(UseI);
1218 else
1219 Late.insert(UseI);
1220 return true;
1221}
1222
1223bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1224 switch (I->getOpcode()) {
1225 case Instruction::And:
1226 case Instruction::Or:
1227 case Instruction::Xor:
1228 case Instruction::LShr:
1229 case Instruction::Shl:
1230 case Instruction::Select:
1231 case Instruction::ICmp:
1232 case Instruction::PHI:
1233 break;
1234 default:
1235 return false;
1236 }
1237 return true;
1238}
1239
1240bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1241 unsigned IterCount) {
1242 auto *T = dyn_cast<IntegerType>(V->getType());
1243 if (!T)
1244 return false;
1245
1246 KnownBits Known(T->getBitWidth());
1247 computeKnownBits(V, Known, DL);
1248 return Known.countMinLeadingZeros() >= IterCount;
1249}
1250
1251bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1252 unsigned IterCount) {
1253 // Assume that all inputs to the value have the high bits zero.
1254 // Check if the value itself preserves the zeros in the high bits.
1255 if (auto *C = dyn_cast<ConstantInt>(V))
1256 return C->getValue().countLeadingZeros() >= IterCount;
1257
1258 if (auto *I = dyn_cast<Instruction>(V)) {
1259 switch (I->getOpcode()) {
1260 case Instruction::And:
1261 case Instruction::Or:
1262 case Instruction::Xor:
1263 case Instruction::LShr:
1264 case Instruction::Select:
1265 case Instruction::ICmp:
1266 case Instruction::PHI:
1267 case Instruction::ZExt:
1268 return true;
1269 }
1270 }
1271
1272 return false;
1273}
1274
1275bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1276 unsigned Opc = I->getOpcode();
1277 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1278 return Op != I->getOperand(1);
1279 return true;
1280}
1281
1282bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1283 BasicBlock *ExitB, unsigned IterCount) {
1284 Value *CIV = getCountIV(LoopB);
1285 if (CIV == nullptr)
1286 return false;
1287 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1288 if (CIVTy == nullptr)
1289 return false;
1290
1291 ValueSeq RShifts;
1292 ValueSeq Early, Late, Cycled;
1293
1294 // Find all value cycles that contain logical right shifts by 1.
1295 for (Instruction &I : *LoopB) {
1296 using namespace PatternMatch;
1297
1298 Value *V = nullptr;
1299 if (!match(&I, m_LShr(m_Value(V), m_One())))
1300 continue;
1301 ValueSeq C;
1302 if (!findCycle(&I, V, C))
1303 continue;
1304
1305 // Found a cycle.
1306 C.insert(&I);
1307 classifyCycle(&I, C, Early, Late);
1308 Cycled.insert(C.begin(), C.end());
1309 RShifts.insert(&I);
1310 }
1311
1312 // Find the set of all values affected by the shift cycles, i.e. all
1313 // cycled values, and (recursively) all their users.
1314 ValueSeq Users(Cycled.begin(), Cycled.end());
1315 for (unsigned i = 0; i < Users.size(); ++i) {
1316 Value *V = Users[i];
1317 if (!isa<IntegerType>(V->getType()))
1318 return false;
1319 auto *R = cast<Instruction>(V);
1320 // If the instruction does not commute with shifts, the loop cannot
1321 // be unshifted.
1322 if (!commutesWithShift(R))
1323 return false;
1324 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1325 auto *T = cast<Instruction>(*I);
1326 // Skip users from outside of the loop. They will be handled later.
1327 // Also, skip the right-shifts and phi nodes, since they mix early
1328 // and late values.
1329 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1330 continue;
1331
1332 Users.insert(T);
1333 if (!classifyInst(T, Early, Late))
1334 return false;
1335 }
1336 }
1337
1338 if (Users.empty())
1339 return false;
1340
1341 // Verify that high bits remain zero.
1342 ValueSeq Internal(Users.begin(), Users.end());
1343 ValueSeq Inputs;
1344 for (unsigned i = 0; i < Internal.size(); ++i) {
1345 auto *R = dyn_cast<Instruction>(Internal[i]);
1346 if (!R)
1347 continue;
1348 for (Value *Op : R->operands()) {
1349 auto *T = dyn_cast<Instruction>(Op);
1350 if (T && T->getParent() != LoopB)
1351 Inputs.insert(Op);
1352 else
1353 Internal.insert(Op);
1354 }
1355 }
1356 for (Value *V : Inputs)
1357 if (!highBitsAreZero(V, IterCount))
1358 return false;
1359 for (Value *V : Internal)
1360 if (!keepsHighBitsZero(V, IterCount))
1361 return false;
1362
1363 // Finally, the work can be done. Unshift each user.
1364 IRBuilder<> IRB(LoopB);
1365 std::map<Value*,Value*> ShiftMap;
1366
1367 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1368
1369 CastMapType CastMap;
1370
1371 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1372 IntegerType *Ty) -> Value* {
1373 auto H = CM.find(std::make_pair(V, Ty));
1374 if (H != CM.end())
1375 return H->second;
1376 Value *CV = IRB.CreateIntCast(V, Ty, false);
1377 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1378 return CV;
1379 };
1380
1381 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1382 using namespace PatternMatch;
1383
1384 if (isa<PHINode>(I) || !Users.count(&*I))
1385 continue;
1386
1387 // Match lshr x, 1.
1388 Value *V = nullptr;
1389 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1390 replaceAllUsesOfWithIn(&*I, V, LoopB);
1391 continue;
1392 }
1393 // For each non-cycled operand, replace it with the corresponding
1394 // value shifted left.
1395 for (auto &J : I->operands()) {
1396 Value *Op = J.get();
1397 if (!isOperandShifted(&*I, Op))
1398 continue;
1399 if (Users.count(Op))
1400 continue;
1401 // Skip shifting zeros.
1402 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1403 continue;
1404 // Check if we have already generated a shift for this value.
1405 auto F = ShiftMap.find(Op);
1406 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1407 if (W == nullptr) {
1408 IRB.SetInsertPoint(&*I);
1409 // First, the shift amount will be CIV or CIV+1, depending on
1410 // whether the value is early or late. Instead of creating CIV+1,
1411 // do a single shift of the value.
1412 Value *ShAmt = CIV, *ShVal = Op;
1413 auto *VTy = cast<IntegerType>(ShVal->getType());
1414 auto *ATy = cast<IntegerType>(ShAmt->getType());
1415 if (Late.count(&*I))
1416 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1417 // Second, the types of the shifted value and the shift amount
1418 // must match.
1419 if (VTy != ATy) {
1420 if (VTy->getBitWidth() < ATy->getBitWidth())
1421 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1422 else
1423 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1424 }
1425 // Ready to generate the shift and memoize it.
1426 W = IRB.CreateShl(ShVal, ShAmt);
1427 ShiftMap.insert(std::make_pair(Op, W));
1428 }
1429 I->replaceUsesOfWith(Op, W);
1430 }
1431 }
1432
1433 // Update the users outside of the loop to account for having left
1434 // shifts. They would normally be shifted right in the loop, so shift
1435 // them right after the loop exit.
1436 // Take advantage of the loop-closed SSA form, which has all the post-
1437 // loop values in phi nodes.
1438 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1439 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1440 if (!isa<PHINode>(P))
1441 break;
1442 auto *PN = cast<PHINode>(P);
1443 Value *U = PN->getIncomingValueForBlock(LoopB);
1444 if (!Users.count(U))
1445 continue;
1446 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1447 PN->replaceAllUsesWith(S);
1448 // The above RAUW will create
1449 // S = lshr S, IterCount
1450 // so we need to fix it back into
1451 // S = lshr PN, IterCount
1452 cast<User>(S)->replaceUsesOfWith(S, PN);
1453 }
1454
1455 return true;
1456}
1457
1458void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1459 for (auto &I : *LoopB)
1460 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1461 I.replaceAllUsesWith(SV);
1462
1463 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1464 N = std::next(I);
1465 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1466 }
1467}
1468
1469unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1470 // Arrays of coefficients of Q and the inverse, C.
1471 // Q[i] = coefficient at x^i.
1472 std::array<char,32> Q, C;
1473
1474 for (unsigned i = 0; i < 32; ++i) {
1475 Q[i] = QP & 1;
1476 QP >>= 1;
1477 }
1478 assert(Q[0] == 1)(static_cast <bool> (Q[0] == 1) ? void (0) : __assert_fail
("Q[0] == 1", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1478, __extension__ __PRETTY_FUNCTION__))
;
1479
1480 // Find C, such that
1481 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1482 //
1483 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1484 // operations * and + are & and ^ respectively.
1485 //
1486 // Find C[i] recursively, by comparing i-th coefficient in the product
1487 // with 0 (or 1 for i=0).
1488 //
1489 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1490 C[0] = 1;
1491 for (unsigned i = 1; i < 32; ++i) {
1492 // Solve for C[i] in:
1493 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1494 // This is equivalent to
1495 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1496 // which is
1497 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1498 unsigned T = 0;
1499 for (unsigned j = 0; j < i; ++j)
1500 T = T ^ (C[j] & Q[i-j]);
1501 C[i] = T;
1502 }
1503
1504 unsigned QV = 0;
1505 for (unsigned i = 0; i < 32; ++i)
1506 if (C[i])
1507 QV |= (1 << i);
1508
1509 return QV;
1510}
1511
1512Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1513 ParsedValues &PV) {
1514 IRBuilder<> B(&*At);
1515 Module *M = At->getParent()->getParent()->getParent();
1516 Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1517
1518 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1519 unsigned IC = PV.IterCount;
1520
1521 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1522 P0 = P = B.CreateXor(P, PV.M);
1523
1524 // Create a bit mask to clear the high bits beyond IterCount.
1525 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
1526
1527 if (PV.IterCount != 32)
3
Assuming the condition is false
4
Taking false branch
1528 P = B.CreateAnd(P, BMI);
1529
1530 if (PV.Inv) {
5
Assuming the condition is true
6
Taking true branch
1531 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1532 assert(QI && QI->getBitWidth() <= 32)(static_cast <bool> (QI && QI->getBitWidth()
<= 32) ? void (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1532, __extension__ __PRETTY_FUNCTION__))
;
1533
1534 // Again, clearing bits beyond IterCount.
1535 unsigned M = (1 << PV.IterCount) - 1;
7
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1536 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1537 unsigned QV = getInverseMxN(Tmp) & M;
1538 auto *QVI = ConstantInt::get(QI->getType(), QV);
1539 P = B.CreateCall(PMF, {P, QVI});
1540 P = B.CreateTrunc(P, QI->getType());
1541 if (IC != 32)
1542 P = B.CreateAnd(P, BMI);
1543 }
1544
1545 Value *R = B.CreateCall(PMF, {P, Q});
1546
1547 if (PV.M != nullptr)
1548 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1549
1550 return R;
1551}
1552
1553static bool hasZeroSignBit(const Value *V) {
1554 if (const auto *CI = dyn_cast<const ConstantInt>(V))
1555 return (CI->getType()->getSignBit() & CI->getSExtValue()) == 0;
1556 const Instruction *I = dyn_cast<const Instruction>(V);
1557 if (!I)
1558 return false;
1559 switch (I->getOpcode()) {
1560 case Instruction::LShr:
1561 if (const auto SI = dyn_cast<const ConstantInt>(I->getOperand(1)))
1562 return SI->getZExtValue() > 0;
1563 return false;
1564 case Instruction::Or:
1565 case Instruction::Xor:
1566 return hasZeroSignBit(I->getOperand(0)) &&
1567 hasZeroSignBit(I->getOperand(1));
1568 case Instruction::And:
1569 return hasZeroSignBit(I->getOperand(0)) ||
1570 hasZeroSignBit(I->getOperand(1));
1571 }
1572 return false;
1573}
1574
1575void PolynomialMultiplyRecognize::setupSimplifier() {
1576 Simp.addRule("sink-zext",
1577 // Sink zext past bitwise operations.
1578 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1579 if (I->getOpcode() != Instruction::ZExt)
1580 return nullptr;
1581 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1582 if (!T)
1583 return nullptr;
1584 switch (T->getOpcode()) {
1585 case Instruction::And:
1586 case Instruction::Or:
1587 case Instruction::Xor:
1588 break;
1589 default:
1590 return nullptr;
1591 }
1592 IRBuilder<> B(Ctx);
1593 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1594 B.CreateZExt(T->getOperand(0), I->getType()),
1595 B.CreateZExt(T->getOperand(1), I->getType()));
1596 });
1597 Simp.addRule("xor/and -> and/xor",
1598 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1599 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1600 if (I->getOpcode() != Instruction::Xor)
1601 return nullptr;
1602 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1603 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1604 if (!And0 || !And1)
1605 return nullptr;
1606 if (And0->getOpcode() != Instruction::And ||
1607 And1->getOpcode() != Instruction::And)
1608 return nullptr;
1609 if (And0->getOperand(1) != And1->getOperand(1))
1610 return nullptr;
1611 IRBuilder<> B(Ctx);
1612 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1613 And0->getOperand(1));
1614 });
1615 Simp.addRule("sink binop into select",
1616 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1617 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1618 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1619 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1620 if (!BO)
1621 return nullptr;
1622 Instruction::BinaryOps Op = BO->getOpcode();
1623 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1624 IRBuilder<> B(Ctx);
1625 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1626 Value *Z = BO->getOperand(1);
1627 return B.CreateSelect(Sel->getCondition(),
1628 B.CreateBinOp(Op, X, Z),
1629 B.CreateBinOp(Op, Y, Z));
1630 }
1631 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1632 IRBuilder<> B(Ctx);
1633 Value *X = BO->getOperand(0);
1634 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1635 return B.CreateSelect(Sel->getCondition(),
1636 B.CreateBinOp(Op, X, Y),
1637 B.CreateBinOp(Op, X, Z));
1638 }
1639 return nullptr;
1640 });
1641 Simp.addRule("fold select-select",
1642 // (select c (select c x y) z) -> (select c x z)
1643 // (select c x (select c y z)) -> (select c x z)
1644 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1645 SelectInst *Sel = dyn_cast<SelectInst>(I);
1646 if (!Sel)
1647 return nullptr;
1648 IRBuilder<> B(Ctx);
1649 Value *C = Sel->getCondition();
1650 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1651 if (Sel0->getCondition() == C)
1652 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1653 }
1654 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1655 if (Sel1->getCondition() == C)
1656 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1657 }
1658 return nullptr;
1659 });
1660 Simp.addRule("or-signbit -> xor-signbit",
1661 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1662 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1663 if (I->getOpcode() != Instruction::Or)
1664 return nullptr;
1665 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1666 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1667 return nullptr;
1668 if (!hasZeroSignBit(I->getOperand(0)))
1669 return nullptr;
1670 return IRBuilder<>(Ctx).CreateXor(I->getOperand(0), Msb);
1671 });
1672 Simp.addRule("sink lshr into binop",
1673 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1674 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1675 if (I->getOpcode() != Instruction::LShr)
1676 return nullptr;
1677 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1678 if (!BitOp)
1679 return nullptr;
1680 switch (BitOp->getOpcode()) {
1681 case Instruction::And:
1682 case Instruction::Or:
1683 case Instruction::Xor:
1684 break;
1685 default:
1686 return nullptr;
1687 }
1688 IRBuilder<> B(Ctx);
1689 Value *S = I->getOperand(1);
1690 return B.CreateBinOp(BitOp->getOpcode(),
1691 B.CreateLShr(BitOp->getOperand(0), S),
1692 B.CreateLShr(BitOp->getOperand(1), S));
1693 });
1694 Simp.addRule("expose bitop-const",
1695 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1696 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1697 auto IsBitOp = [](unsigned Op) -> bool {
1698 switch (Op) {
1699 case Instruction::And:
1700 case Instruction::Or:
1701 case Instruction::Xor:
1702 return true;
1703 }
1704 return false;
1705 };
1706 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1707 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1708 return nullptr;
1709 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1710 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1711 return nullptr;
1712 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1713 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1714 if (!CA || !CB)
1715 return nullptr;
1716 IRBuilder<> B(Ctx);
1717 Value *X = BitOp2->getOperand(0);
1718 return B.CreateBinOp(BitOp2->getOpcode(), X,
1719 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1720 });
1721}
1722
1723bool PolynomialMultiplyRecognize::recognize() {
1724 DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1725 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1726 // Restrictions:
1727 // - The loop must consist of a single block.
1728 // - The iteration count must be known at compile-time.
1729 // - The loop must have an induction variable starting from 0, and
1730 // incremented in each iteration of the loop.
1731 BasicBlock *LoopB = CurLoop->getHeader();
1732 DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1733
1734 if (LoopB != CurLoop->getLoopLatch())
1735 return false;
1736 BasicBlock *ExitB = CurLoop->getExitBlock();
1737 if (ExitB == nullptr)
1738 return false;
1739 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1740 if (EntryB == nullptr)
1741 return false;
1742
1743 unsigned IterCount = 0;
1744 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1745 if (isa<SCEVCouldNotCompute>(CT))
1746 return false;
1747 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1748 IterCount = CV->getValue()->getZExtValue() + 1;
1749
1750 Value *CIV = getCountIV(LoopB);
1751 ParsedValues PV;
1752 PV.IterCount = IterCount;
1753 DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1754
1755 setupSimplifier();
1756
1757 // Perform a preliminary scan of select instructions to see if any of them
1758 // looks like a generator of the polynomial multiply steps. Assume that a
1759 // loop can only contain a single transformable operation, so stop the
1760 // traversal after the first reasonable candidate was found.
1761 // XXX: Currently this approach can modify the loop before being 100% sure
1762 // that the transformation can be carried out.
1763 bool FoundPreScan = false;
1764 auto FeedsPHI = [LoopB](const Value *V) -> bool {
1765 for (const Value *U : V->users()) {
1766 if (const auto *P = dyn_cast<const PHINode>(U))
1767 if (P->getParent() == LoopB)
1768 return true;
1769 }
1770 return false;
1771 };
1772 for (Instruction &In : *LoopB) {
1773 SelectInst *SI = dyn_cast<SelectInst>(&In);
1774 if (!SI || !FeedsPHI(SI))
1775 continue;
1776
1777 Simplifier::Context C(SI);
1778 Value *T = Simp.simplify(C);
1779 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1780 DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1781 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1782 FoundPreScan = true;
1783 if (SelI != SI) {
1784 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1785 SI->replaceAllUsesWith(NewSel);
1786 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1787 }
1788 break;
1789 }
1790 }
1791
1792 if (!FoundPreScan) {
1793 DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1794 return false;
1795 }
1796
1797 if (!PV.Left) {
1798 // The right shift version actually only returns the higher bits of
1799 // the result (each iteration discards the LSB). If we want to convert it
1800 // to a left-shifting loop, the working data type must be at least as
1801 // wide as the target's pmpy instruction.
1802 if (!promoteTypes(LoopB, ExitB))
1803 return false;
1804 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1805 return false;
1806 cleanupLoopBody(LoopB);
1807 }
1808
1809 // Scan the loop again, find the generating select instruction.
1810 bool FoundScan = false;
1811 for (Instruction &In : *LoopB) {
1812 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1813 if (!SelI)
1814 continue;
1815 DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1816 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1817 if (FoundScan)
1818 break;
1819 }
1820 assert(FoundScan)(static_cast <bool> (FoundScan) ? void (0) : __assert_fail
("FoundScan", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1820, __extension__ __PRETTY_FUNCTION__))
;
1821
1822 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1823 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1824 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1825 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1826 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1827 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1828 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1829 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1830 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1831 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1832 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1833 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1834 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1835
1836 BasicBlock::iterator At(EntryB->getTerminator());
1837 Value *PM = generate(At, PV);
1838 if (PM == nullptr)
1839 return false;
1840
1841 if (PM->getType() != PV.Res->getType())
1842 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1843
1844 PV.Res->replaceAllUsesWith(PM);
1845 PV.Res->eraseFromParent();
1846 return true;
1847}
1848
1849int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1850 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1851 return SC->getAPInt().getSExtValue();
1852 return 0;
1853}
1854
1855bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1856 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1857 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1858 return false;
1859
1860 Value *StoredVal = SI->getValueOperand();
1861 Value *StorePtr = SI->getPointerOperand();
1862
1863 // Reject stores that are so large that they overflow an unsigned.
1864 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1865 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1866 return false;
1867
1868 // See if the pointer expression is an AddRec like {base,+,1} on the current
1869 // loop, which indicates a strided store. If we have something else, it's a
1870 // random store we can't handle.
1871 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1872 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1873 return false;
1874
1875 // Check to see if the stride matches the size of the store. If so, then we
1876 // know that every byte is touched in the loop.
1877 int Stride = getSCEVStride(StoreEv);
1878 if (Stride == 0)
1879 return false;
1880 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1881 if (StoreSize != unsigned(std::abs(Stride)))
1882 return false;
1883
1884 // The store must be feeding a non-volatile load.
1885 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1886 if (!LI || !LI->isSimple())
1887 return false;
1888
1889 // See if the pointer expression is an AddRec like {base,+,1} on the current
1890 // loop, which indicates a strided load. If we have something else, it's a
1891 // random load we can't handle.
1892 Value *LoadPtr = LI->getPointerOperand();
1893 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1894 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1895 return false;
1896
1897 // The store and load must share the same stride.
1898 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1899 return false;
1900
1901 // Success. This store can be converted into a memcpy.
1902 return true;
1903}
1904
1905/// mayLoopAccessLocation - Return true if the specified loop might access the
1906/// specified pointer location, which is a loop-strided access. The 'Access'
1907/// argument specifies what the verboten forms of access are (read or write).
1908static bool
1909mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1910 const SCEV *BECount, unsigned StoreSize,
1911 AliasAnalysis &AA,
1912 SmallPtrSetImpl<Instruction *> &Ignored) {
1913 // Get the location that may be stored across the loop. Since the access
1914 // is strided positively through memory, we say that the modified location
1915 // starts at the pointer and has infinite size.
1916 uint64_t AccessSize = MemoryLocation::UnknownSize;
1917
1918 // If the loop iterates a fixed number of times, we can refine the access
1919 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1920 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1921 AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
1922
1923 // TODO: For this to be really effective, we have to dive into the pointer
1924 // operand in the store. Store to &A[i] of 100 will always return may alias
1925 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1926 // which will then no-alias a store to &A[100].
1927 MemoryLocation StoreLoc(Ptr, AccessSize);
1928
1929 for (auto *B : L->blocks())
1930 for (auto &I : *B)
1931 if (Ignored.count(&I) == 0 &&
1932 isModOrRefSet(
1933 intersectModRef(AA.getModRefInfo(&I, StoreLoc), Access)))
1934 return true;
1935
1936 return false;
1937}
1938
1939void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
1940 SmallVectorImpl<StoreInst*> &Stores) {
1941 Stores.clear();
1942 for (Instruction &I : *BB)
1943 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
1944 if (isLegalStore(CurLoop, SI))
1945 Stores.push_back(SI);
1946}
1947
1948bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
1949 StoreInst *SI, const SCEV *BECount) {
1950 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1952, __extension__ __PRETTY_FUNCTION__))
1951 "Expected only non-volatile stores, or Hexagon-specific memcpy"(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1952, __extension__ __PRETTY_FUNCTION__))
1952 "to volatile destination.")(static_cast <bool> ((SI->isSimple() || (SI->isVolatile
() && HexagonVolatileMemcpy)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? void (0) : __assert_fail ("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1952, __extension__ __PRETTY_FUNCTION__))
;
1953
1954 Value *StorePtr = SI->getPointerOperand();
1955 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1956 unsigned Stride = getSCEVStride(StoreEv);
1957 unsigned StoreSize = DL->getTypeStoreSize(SI->getValueOperand()->getType());
1958 if (Stride != StoreSize)
1959 return false;
1960
1961 // See if the pointer expression is an AddRec like {base,+,1} on the current
1962 // loop, which indicates a strided load. If we have something else, it's a
1963 // random load we can't handle.
1964 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1965 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1966
1967 // The trip count of the loop and the base pointer of the addrec SCEV is
1968 // guaranteed to be loop invariant, which means that it should dominate the
1969 // header. This allows us to insert code for it in the preheader.
1970 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1971 Instruction *ExpPt = Preheader->getTerminator();
1972 IRBuilder<> Builder(ExpPt);
1973 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
1974
1975 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
1976
1977 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1978 // this into a memcpy/memmove in the loop preheader now if we want. However,
1979 // this would be unsafe to do if there is anything else in the loop that may
1980 // read or write the memory region we're storing to. For memcpy, this
1981 // includes the load that feeds the stores. Check for an alias by generating
1982 // the base address and checking everything.
1983 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
1984 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
1985 Value *LoadBasePtr = nullptr;
1986
1987 bool Overlap = false;
1988 bool DestVolatile = SI->isVolatile();
1989 Type *BECountTy = BECount->getType();
1990
1991 if (DestVolatile) {
1992 // The trip count must fit in i32, since it is the type of the "num_words"
1993 // argument to hexagon_memcpy_forward_vp4cp4n2.
1994 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
1995CleanupAndExit:
1996 // If we generated new code for the base pointer, clean up.
1997 Expander.clear();
1998 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
1999 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
2000 StoreBasePtr = nullptr;
2001 }
2002 if (LoadBasePtr) {
2003 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
2004 LoadBasePtr = nullptr;
2005 }
2006 return false;
2007 }
2008 }
2009
2010 SmallPtrSet<Instruction*, 2> Ignore1;
2011 Ignore1.insert(SI);
2012 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop, BECount,
2013 StoreSize, *AA, Ignore1)) {
2014 // Check if the load is the offending instruction.
2015 Ignore1.insert(LI);
2016 if (mayLoopAccessLocation(StoreBasePtr, ModRefInfo::ModRef, CurLoop,
2017 BECount, StoreSize, *AA, Ignore1)) {
2018 // Still bad. Nothing we can do.
2019 goto CleanupAndExit;
2020 }
2021 // It worked with the load ignored.
2022 Overlap = true;
2023 }
2024
2025 if (!Overlap) {
2026 if (DisableMemcpyIdiom || !HasMemcpy)
2027 goto CleanupAndExit;
2028 } else {
2029 // Don't generate memmove if this function will be inlined. This is
2030 // because the caller will undergo this transformation after inlining.
2031 Function *Func = CurLoop->getHeader()->getParent();
2032 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2033 goto CleanupAndExit;
2034
2035 // In case of a memmove, the call to memmove will be executed instead
2036 // of the loop, so we need to make sure that there is nothing else in
2037 // the loop than the load, store and instructions that these two depend
2038 // on.
2039 SmallVector<Instruction*,2> Insts;
2040 Insts.push_back(SI);
2041 Insts.push_back(LI);
2042 if (!coverLoop(CurLoop, Insts))
2043 goto CleanupAndExit;
2044
2045 if (DisableMemmoveIdiom || !HasMemmove)
2046 goto CleanupAndExit;
2047 bool IsNested = CurLoop->getParentLoop() != nullptr;
2048 if (IsNested && OnlyNonNestedMemmove)
2049 goto CleanupAndExit;
2050 }
2051
2052 // For a memcpy, we have to make sure that the input array is not being
2053 // mutated by the loop.
2054 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2055 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2056
2057 SmallPtrSet<Instruction*, 2> Ignore2;
2058 Ignore2.insert(SI);
2059 if (mayLoopAccessLocation(LoadBasePtr, ModRefInfo::Mod, CurLoop, BECount,
2060 StoreSize, *AA, Ignore2))
2061 goto CleanupAndExit;
2062
2063 // Check the stride.
2064 bool StridePos = getSCEVStride(LoadEv) >= 0;
2065
2066 // Currently, the volatile memcpy only emulates traversing memory forward.
2067 if (!StridePos && DestVolatile)
2068 goto CleanupAndExit;
2069
2070 bool RuntimeCheck = (Overlap || DestVolatile);
2071
2072 BasicBlock *ExitB;
2073 if (RuntimeCheck) {
2074 // The runtime check needs a single exit block.
2075 SmallVector<BasicBlock*, 8> ExitBlocks;
2076 CurLoop->getUniqueExitBlocks(ExitBlocks);
2077 if (ExitBlocks.size() != 1)
2078 goto CleanupAndExit;
2079 ExitB = ExitBlocks[0];
2080 }
2081
2082 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2083 // pointer size if it isn't already.
2084 LLVMContext &Ctx = SI->getContext();
2085 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2086 unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment());
2087 DebugLoc DLoc = SI->getDebugLoc();
2088
2089 const SCEV *NumBytesS =
2090 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2091 if (StoreSize != 1)
2092 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2093 SCEV::FlagNUW);
2094 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2095 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2096 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2097 NumBytes = Simp;
2098
2099 CallInst *NewCall;
2100
2101 if (RuntimeCheck) {
2102 unsigned Threshold = RuntimeMemSizeThreshold;
2103 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2104 uint64_t C = CI->getZExtValue();
2105 if (Threshold != 0 && C < Threshold)
2106 goto CleanupAndExit;
2107 if (C < CompileTimeMemSizeThreshold)
2108 goto CleanupAndExit;
2109 }
2110
2111 BasicBlock *Header = CurLoop->getHeader();
2112 Function *Func = Header->getParent();
2113 Loop *ParentL = LF->getLoopFor(Preheader);
2114 StringRef HeaderName = Header->getName();
2115
2116 // Create a new (empty) preheader, and update the PHI nodes in the
2117 // header to use the new preheader.
2118 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2119 Func, Header);
2120 if (ParentL)
2121 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2122 IRBuilder<>(NewPreheader).CreateBr(Header);
2123 for (auto &In : *Header) {
2124 PHINode *PN = dyn_cast<PHINode>(&In);
2125 if (!PN)
2126 break;
2127 int bx = PN->getBasicBlockIndex(Preheader);
2128 if (bx >= 0)
2129 PN->setIncomingBlock(bx, NewPreheader);
2130 }
2131 DT->addNewBlock(NewPreheader, Preheader);
2132 DT->changeImmediateDominator(Header, NewPreheader);
2133
2134 // Check for safe conditions to execute memmove.
2135 // If stride is positive, copying things from higher to lower addresses
2136 // is equivalent to memmove. For negative stride, it's the other way
2137 // around. Copying forward in memory with positive stride may not be
2138 // same as memmove since we may be copying values that we just stored
2139 // in some previous iteration.
2140 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2141 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2142 Value *LowA = StridePos ? SA : LA;
2143 Value *HighA = StridePos ? LA : SA;
2144 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2145 Value *Cond = CmpA;
2146
2147 // Check for distance between pointers. Since the case LowA < HighA
2148 // is checked for above, assume LowA >= HighA.
2149 Value *Dist = Builder.CreateSub(LowA, HighA);
2150 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2151 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2152 Cond = CmpEither;
2153
2154 if (Threshold != 0) {
2155 Type *Ty = NumBytes->getType();
2156 Value *Thr = ConstantInt::get(Ty, Threshold);
2157 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2158 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2159 Cond = CmpBoth;
2160 }
2161 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2162 Func, NewPreheader);
2163 if (ParentL)
2164 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2165 Instruction *OldT = Preheader->getTerminator();
2166 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2167 OldT->eraseFromParent();
2168 Preheader->setName(Preheader->getName()+".old");
2169 DT->addNewBlock(MemmoveB, Preheader);
2170 // Find the new immediate dominator of the exit block.
2171 BasicBlock *ExitD = Preheader;
2172 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2173 BasicBlock *PB = *PI;
2174 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2175 if (!ExitD)
2176 break;
2177 }
2178 // If the prior immediate dominator of ExitB was dominated by the
2179 // old preheader, then the old preheader becomes the new immediate
2180 // dominator. Otherwise don't change anything (because the newly
2181 // added blocks are dominated by the old preheader).
2182 if (ExitD && DT->dominates(Preheader, ExitD)) {
2183 DomTreeNode *BN = DT->getNode(ExitB);
2184 DomTreeNode *DN = DT->getNode(ExitD);
2185 BN->setIDom(DN);
2186 }
2187
2188 // Add a call to memmove to the conditional block.
2189 IRBuilder<> CondBuilder(MemmoveB);
2190 CondBuilder.CreateBr(ExitB);
2191 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2192
2193 if (DestVolatile) {
2194 Type *Int32Ty = Type::getInt32Ty(Ctx);
2195 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2196 Type *VoidTy = Type::getVoidTy(Ctx);
2197 Module *M = Func->getParent();
2198 Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
2199 Int32PtrTy, Int32PtrTy, Int32Ty);
2200 Function *Fn = cast<Function>(CF);
2201 Fn->setLinkage(Function::ExternalLinkage);
2202
2203 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2204 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2205 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2206 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2207 MemmoveB->getTerminator());
2208 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2209 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2210 NumWords = Simp;
2211
2212 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2213 ? StoreBasePtr
2214 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2215 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2216 ? LoadBasePtr
2217 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2218 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2219 } else {
2220 NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr,
2221 NumBytes, Alignment);
2222 }
2223 } else {
2224 NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr,
2225 NumBytes, Alignment);
2226 // Okay, the memcpy has been formed. Zap the original store and
2227 // anything that feeds into it.
2228 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2229 }
2230
2231 NewCall->setDebugLoc(DLoc);
2232
2233 DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2234 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2235 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2236 << " from store ptr=" << *StoreEv << " at: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2237
2238 return true;
2239}
2240
2241// \brief Check if the instructions in Insts, together with their dependencies
2242// cover the loop in the sense that the loop could be safely eliminated once
2243// the instructions in Insts are removed.
2244bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2245 SmallVectorImpl<Instruction*> &Insts) const {
2246 SmallSet<BasicBlock*,8> LoopBlocks;
2247 for (auto *B : L->blocks())
2248 LoopBlocks.insert(B);
2249
2250 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2251
2252 // Collect all instructions from the loop that the instructions in Insts
2253 // depend on (plus their dependencies, etc.). These instructions will
2254 // constitute the expression trees that feed those in Insts, but the trees
2255 // will be limited only to instructions contained in the loop.
2256 for (unsigned i = 0; i < Worklist.size(); ++i) {
2257 Instruction *In = Worklist[i];
2258 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2259 Instruction *OpI = dyn_cast<Instruction>(I);
2260 if (!OpI)
2261 continue;
2262 BasicBlock *PB = OpI->getParent();
2263 if (!LoopBlocks.count(PB))
2264 continue;
2265 Worklist.insert(OpI);
2266 }
2267 }
2268
2269 // Scan all instructions in the loop, if any of them have a user outside
2270 // of the loop, or outside of the expressions collected above, then either
2271 // the loop has a side-effect visible outside of it, or there are
2272 // instructions in it that are not involved in the original set Insts.
2273 for (auto *B : L->blocks()) {
2274 for (auto &In : *B) {
2275 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2276 continue;
2277 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2278 return false;
2279 for (const auto &K : In.users()) {
2280 Instruction *UseI = dyn_cast<Instruction>(K);
2281 if (!UseI)
2282 continue;
2283 BasicBlock *UseB = UseI->getParent();
2284 if (LF->getLoopFor(UseB) != L)
2285 return false;
2286 }
2287 }
2288 }
2289
2290 return true;
2291}
2292
2293/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2294/// with the specified backedge count. This block is known to be in the current
2295/// loop and not in any subloops.
2296bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2297 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2298 // We can only promote stores in this block if they are unconditionally
2299 // executed in the loop. For a block to be unconditionally executed, it has
2300 // to dominate all the exit blocks of the loop. Verify this now.
2301 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2302 return DT->dominates(BB, EB);
2303 };
2304 if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
2305 return false;
2306
2307 bool MadeChange = false;
2308 // Look for store instructions, which may be optimized to memset/memcpy.
2309 SmallVector<StoreInst*,8> Stores;
2310 collectStores(CurLoop, BB, Stores);
2311
2312 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2313 for (auto &SI : Stores)
2314 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2315
2316 return MadeChange;
2317}
2318
2319bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2320 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2321 if (PMR.recognize())
2322 return true;
2323
2324 if (!HasMemcpy && !HasMemmove)
2325 return false;
2326
2327 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2328 assert(!isa<SCEVCouldNotCompute>(BECount) &&(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2330, __extension__ __PRETTY_FUNCTION__))
2329 "runOnCountableLoop() called on a loop without a predictable"(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2330, __extension__ __PRETTY_FUNCTION__))
2330 "backedge-taken count")(static_cast <bool> (!isa<SCEVCouldNotCompute>(BECount
) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? void (0) : __assert_fail ("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2330, __extension__ __PRETTY_FUNCTION__))
;
2331
2332 SmallVector<BasicBlock *, 8> ExitBlocks;
2333 L->getUniqueExitBlocks(ExitBlocks);
2334
2335 bool Changed = false;
2336
2337 // Scan all the blocks in the loop that are not in subloops.
2338 for (auto *BB : L->getBlocks()) {
2339 // Ignore blocks in subloops.
2340 if (LF->getLoopFor(BB) != L)
2341 continue;
2342 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2343 }
2344
2345 return Changed;
2346}
2347
2348bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
2349 const Module &M = *L->getHeader()->getParent()->getParent();
2350 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2351 return false;
2352
2353 if (skipLoop(L))
2354 return false;
2355
2356 // If the loop could not be converted to canonical form, it must have an
2357 // indirectbr in it, just give up.
2358 if (!L->getLoopPreheader())
2359 return false;
2360
2361 // Disable loop idiom recognition if the function's name is a common idiom.
2362 StringRef Name = L->getHeader()->getParent()->getName();
2363 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2364 return false;
2365
2366 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2367 DL = &L->getHeader()->getModule()->getDataLayout();
2368 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2369 LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2370 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2371 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2372
2373 HasMemcpy = TLI->has(LibFunc_memcpy);
2374 HasMemmove = TLI->has(LibFunc_memmove);
2375
2376 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2377 return runOnCountableLoop(L);
2378 return false;
2379}
2380
2381Pass *llvm::createHexagonLoopIdiomPass() {
2382 return new HexagonLoopIdiomRecognize();
2383}