Bug Summary

File:lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1529, column 21
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'

Annotated Source Code

1//===- HexagonLoopIdiomRecognition.cpp ------------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
11
12#include "llvm/ADT/APInt.h"
13#include "llvm/ADT/DenseMap.h"
14#include "llvm/ADT/SetVector.h"
15#include "llvm/ADT/SmallPtrSet.h"
16#include "llvm/ADT/SmallSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringRef.h"
19#include "llvm/ADT/Triple.h"
20#include "llvm/Analysis/AliasAnalysis.h"
21#include "llvm/Analysis/InstructionSimplify.h"
22#include "llvm/Analysis/LoopInfo.h"
23#include "llvm/Analysis/LoopPass.h"
24#include "llvm/Analysis/MemoryLocation.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionExpander.h"
27#include "llvm/Analysis/ScalarEvolutionExpressions.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/ValueTracking.h"
30#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DebugLoc.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/Dominators.h"
38#include "llvm/IR/Function.h"
39#include "llvm/IR/IRBuilder.h"
40#include "llvm/IR/InstrTypes.h"
41#include "llvm/IR/Instruction.h"
42#include "llvm/IR/Instructions.h"
43#include "llvm/IR/IntrinsicInst.h"
44#include "llvm/IR/Intrinsics.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/PatternMatch.h"
47#include "llvm/IR/Type.h"
48#include "llvm/IR/User.h"
49#include "llvm/IR/Value.h"
50#include "llvm/Pass.h"
51#include "llvm/Support/Casting.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Compiler.h"
54#include "llvm/Support/Debug.h"
55#include "llvm/Support/ErrorHandling.h"
56#include "llvm/Support/KnownBits.h"
57#include "llvm/Support/raw_ostream.h"
58#include "llvm/Transforms/Scalar.h"
59#include "llvm/Transforms/Utils/Local.h"
60#include <algorithm>
61#include <array>
62#include <cassert>
63#include <cstdint>
64#include <cstdlib>
65#include <deque>
66#include <functional>
67#include <iterator>
68#include <map>
69#include <set>
70#include <utility>
71#include <vector>
72
73using namespace llvm;
74
75static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
76 cl::Hidden, cl::init(false),
77 cl::desc("Disable generation of memcpy in loop idiom recognition"));
78
79static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
80 cl::Hidden, cl::init(false),
81 cl::desc("Disable generation of memmove in loop idiom recognition"));
82
83static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
84 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
85 "check guarding the memmove."));
86
87static cl::opt<unsigned> CompileTimeMemSizeThreshold(
88 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
89 cl::desc("Threshold (in bytes) to perform the transformation, if the "
90 "runtime loop count (mem transfer size) is known at compile-time."));
91
92static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
93 cl::Hidden, cl::init(true),
94 cl::desc("Only enable generating memmove in non-nested loops"));
95
96cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
97 cl::Hidden, cl::init(false),
98 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
99
100static cl::opt<unsigned> SimplifyLimit("hlir-simplify-limit", cl::init(10000),
101 cl::Hidden, cl::desc("Maximum number of simplification steps in HLIR"));
102
103static const char *HexagonVolatileMemcpyName
104 = "hexagon_memcpy_forward_vp4cp4n2";
105
106
107namespace llvm {
108
109 void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
110 Pass *createHexagonLoopIdiomPass();
111
112} // end namespace llvm
113
114namespace {
115
116 class HexagonLoopIdiomRecognize : public LoopPass {
117 public:
118 static char ID;
119
120 explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
121 initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
122 }
123
124 StringRef getPassName() const override {
125 return "Recognize Hexagon-specific loop idioms";
126 }
127
128 void getAnalysisUsage(AnalysisUsage &AU) const override {
129 AU.addRequired<LoopInfoWrapperPass>();
130 AU.addRequiredID(LoopSimplifyID);
131 AU.addRequiredID(LCSSAID);
132 AU.addRequired<AAResultsWrapperPass>();
133 AU.addPreserved<AAResultsWrapperPass>();
134 AU.addRequired<ScalarEvolutionWrapperPass>();
135 AU.addRequired<DominatorTreeWrapperPass>();
136 AU.addRequired<TargetLibraryInfoWrapperPass>();
137 AU.addPreserved<TargetLibraryInfoWrapperPass>();
138 }
139
140 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
141
142 private:
143 unsigned getStoreSizeInBytes(StoreInst *SI);
144 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
145 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
146 void collectStores(Loop *CurLoop, BasicBlock *BB,
147 SmallVectorImpl<StoreInst*> &Stores);
148 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
149 bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
150 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
151 SmallVectorImpl<BasicBlock*> &ExitBlocks);
152 bool runOnCountableLoop(Loop *L);
153
154 AliasAnalysis *AA;
155 const DataLayout *DL;
156 DominatorTree *DT;
157 LoopInfo *LF;
158 const TargetLibraryInfo *TLI;
159 ScalarEvolution *SE;
160 bool HasMemcpy, HasMemmove;
161 };
162
163 struct Simplifier {
164 using Rule = std::function<Value * (Instruction *, LLVMContext &)>;
165
166 void addRule(const Rule &R) { Rules.push_back(R); }
167
168 private:
169 struct WorkListType {
170 WorkListType() = default;
171
172 void push_back(Value* V) {
173 // Do not push back duplicates.
174 if (!S.count(V)) { Q.push_back(V); S.insert(V); }
175 }
176
177 Value *pop_front_val() {
178 Value *V = Q.front(); Q.pop_front(); S.erase(V);
179 return V;
180 }
181
182 bool empty() const { return Q.empty(); }
183
184 private:
185 std::deque<Value*> Q;
186 std::set<Value*> S;
187 };
188
189 using ValueSetType = std::set<Value *>;
190
191 std::vector<Rule> Rules;
192
193 public:
194 struct Context {
195 using ValueMapType = DenseMap<Value *, Value *>;
196
197 Value *Root;
198 ValueSetType Used; // The set of all cloned values used by Root.
199 ValueSetType Clones; // The set of all cloned values.
200 LLVMContext &Ctx;
201
202 Context(Instruction *Exp)
203 : Ctx(Exp->getParent()->getParent()->getContext()) {
204 initialize(Exp);
205 }
206
207 ~Context() { cleanup(); }
208
209 void print(raw_ostream &OS, const Value *V) const;
210 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
211
212 private:
213 friend struct Simplifier;
214
215 void initialize(Instruction *Exp);
216 void cleanup();
217
218 template <typename FuncT> void traverse(Value *V, FuncT F);
219 void record(Value *V);
220 void use(Value *V);
221 void unuse(Value *V);
222
223 bool equal(const Instruction *I, const Instruction *J) const;
224 Value *find(Value *Tree, Value *Sub) const;
225 Value *subst(Value *Tree, Value *OldV, Value *NewV);
226 void replace(Value *OldV, Value *NewV);
227 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
228 };
229
230 Value *simplify(Context &C);
231 };
232
233 struct PE {
234 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
235
236 const Simplifier::Context &C;
237 const Value *V;
238 };
239
240 raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED__attribute__((__used__));
241 raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
242 P.C.print(OS, P.V ? P.V : P.C.Root);
243 return OS;
244 }
245
246} // end anonymous namespace
247
248char HexagonLoopIdiomRecognize::ID = 0;
249
250INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
251 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
252INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
253INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
254INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
255INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
256INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
257INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
258INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
259INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
260 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
261
262template <typename FuncT>
263void Simplifier::Context::traverse(Value *V, FuncT F) {
264 WorkListType Q;
265 Q.push_back(V);
266
267 while (!Q.empty()) {
268 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
269 if (!U || U->getParent())
270 continue;
271 if (!F(U))
272 continue;
273 for (Value *Op : U->operands())
274 Q.push_back(Op);
275 }
276}
277
278void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
279 const auto *U = dyn_cast<const Instruction>(V);
280 if (!U) {
281 OS << V << '(' << *V << ')';
282 return;
283 }
284
285 if (U->getParent()) {
286 OS << U << '(';
287 U->printAsOperand(OS, true);
288 OS << ')';
289 return;
290 }
291
292 unsigned N = U->getNumOperands();
293 if (N != 0)
294 OS << U << '(';
295 OS << U->getOpcodeName();
296 for (const Value *Op : U->operands()) {
297 OS << ' ';
298 print(OS, Op);
299 }
300 if (N != 0)
301 OS << ')';
302}
303
304void Simplifier::Context::initialize(Instruction *Exp) {
305 // Perform a deep clone of the expression, set Root to the root
306 // of the clone, and build a map from the cloned values to the
307 // original ones.
308 ValueMapType M;
309 BasicBlock *Block = Exp->getParent();
310 WorkListType Q;
311 Q.push_back(Exp);
312
313 while (!Q.empty()) {
314 Value *V = Q.pop_front_val();
315 if (M.find(V) != M.end())
316 continue;
317 if (Instruction *U = dyn_cast<Instruction>(V)) {
318 if (isa<PHINode>(U) || U->getParent() != Block)
319 continue;
320 for (Value *Op : U->operands())
321 Q.push_back(Op);
322 M.insert({U, U->clone()});
323 }
324 }
325
326 for (std::pair<Value*,Value*> P : M) {
327 Instruction *U = cast<Instruction>(P.second);
328 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
329 auto F = M.find(U->getOperand(i));
330 if (F != M.end())
331 U->setOperand(i, F->second);
332 }
333 }
334
335 auto R = M.find(Exp);
336 assert(R != M.end())((R != M.end()) ? static_cast<void> (0) : __assert_fail
("R != M.end()", "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 336, __PRETTY_FUNCTION__))
;
337 Root = R->second;
338
339 record(Root);
340 use(Root);
341}
342
343void Simplifier::Context::record(Value *V) {
344 auto Record = [this](Instruction *U) -> bool {
345 Clones.insert(U);
346 return true;
347 };
348 traverse(V, Record);
349}
350
351void Simplifier::Context::use(Value *V) {
352 auto Use = [this](Instruction *U) -> bool {
353 Used.insert(U);
354 return true;
355 };
356 traverse(V, Use);
357}
358
359void Simplifier::Context::unuse(Value *V) {
360 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
361 return;
362
363 auto Unuse = [this](Instruction *U) -> bool {
364 if (!U->use_empty())
365 return false;
366 Used.erase(U);
367 return true;
368 };
369 traverse(V, Unuse);
370}
371
372Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
373 if (Tree == OldV)
374 return NewV;
375 if (OldV == NewV)
376 return Tree;
377
378 WorkListType Q;
379 Q.push_back(Tree);
380 while (!Q.empty()) {
381 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
382 // If U is not an instruction, or it's not a clone, skip it.
383 if (!U || U->getParent())
384 continue;
385 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
386 Value *Op = U->getOperand(i);
387 if (Op == OldV) {
388 U->setOperand(i, NewV);
389 unuse(OldV);
390 } else {
391 Q.push_back(Op);
392 }
393 }
394 }
395 return Tree;
396}
397
398void Simplifier::Context::replace(Value *OldV, Value *NewV) {
399 if (Root == OldV) {
400 Root = NewV;
401 use(Root);
402 return;
403 }
404
405 // NewV may be a complex tree that has just been created by one of the
406 // transformation rules. We need to make sure that it is commoned with
407 // the existing Root to the maximum extent possible.
408 // Identify all subtrees of NewV (including NewV itself) that have
409 // equivalent counterparts in Root, and replace those subtrees with
410 // these counterparts.
411 WorkListType Q;
412 Q.push_back(NewV);
413 while (!Q.empty()) {
414 Value *V = Q.pop_front_val();
415 Instruction *U = dyn_cast<Instruction>(V);
416 if (!U || U->getParent())
417 continue;
418 if (Value *DupV = find(Root, V)) {
419 if (DupV != V)
420 NewV = subst(NewV, V, DupV);
421 } else {
422 for (Value *Op : U->operands())
423 Q.push_back(Op);
424 }
425 }
426
427 // Now, simply replace OldV with NewV in Root.
428 Root = subst(Root, OldV, NewV);
429 use(Root);
430}
431
432void Simplifier::Context::cleanup() {
433 for (Value *V : Clones) {
434 Instruction *U = cast<Instruction>(V);
435 if (!U->getParent())
436 U->dropAllReferences();
437 }
438
439 for (Value *V : Clones) {
440 Instruction *U = cast<Instruction>(V);
441 if (!U->getParent())
442 U->deleteValue();
443 }
444}
445
446bool Simplifier::Context::equal(const Instruction *I,
447 const Instruction *J) const {
448 if (I == J)
449 return true;
450 if (!I->isSameOperationAs(J))
451 return false;
452 if (isa<PHINode>(I))
453 return I->isIdenticalTo(J);
454
455 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
456 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
457 if (OpI == OpJ)
458 continue;
459 auto *InI = dyn_cast<const Instruction>(OpI);
460 auto *InJ = dyn_cast<const Instruction>(OpJ);
461 if (InI && InJ) {
462 if (!equal(InI, InJ))
463 return false;
464 } else if (InI != InJ || !InI)
465 return false;
466 }
467 return true;
468}
469
470Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
471 Instruction *SubI = dyn_cast<Instruction>(Sub);
472 WorkListType Q;
473 Q.push_back(Tree);
474
475 while (!Q.empty()) {
476 Value *V = Q.pop_front_val();
477 if (V == Sub)
478 return V;
479 Instruction *U = dyn_cast<Instruction>(V);
480 if (!U || U->getParent())
481 continue;
482 if (SubI && equal(SubI, U))
483 return U;
484 assert(!isa<PHINode>(U))((!isa<PHINode>(U)) ? static_cast<void> (0) : __assert_fail
("!isa<PHINode>(U)", "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 484, __PRETTY_FUNCTION__))
;
485 for (Value *Op : U->operands())
486 Q.push_back(Op);
487 }
488 return nullptr;
489}
490
491void Simplifier::Context::link(Instruction *I, BasicBlock *B,
492 BasicBlock::iterator At) {
493 if (I->getParent())
494 return;
495
496 for (Value *Op : I->operands()) {
497 if (Instruction *OpI = dyn_cast<Instruction>(Op))
498 link(OpI, B, At);
499 }
500
501 B->getInstList().insert(At, I);
502}
503
504Value *Simplifier::Context::materialize(BasicBlock *B,
505 BasicBlock::iterator At) {
506 if (Instruction *RootI = dyn_cast<Instruction>(Root))
507 link(RootI, B, At);
508 return Root;
509}
510
511Value *Simplifier::simplify(Context &C) {
512 WorkListType Q;
513 Q.push_back(C.Root);
514 unsigned Count = 0;
515 const unsigned Limit = SimplifyLimit;
516
517 while (!Q.empty()) {
518 if (Count++ >= Limit)
519 break;
520 Instruction *U = dyn_cast<Instruction>(Q.pop_front_val());
521 if (!U || U->getParent() || !C.Used.count(U))
522 continue;
523 bool Changed = false;
524 for (Rule &R : Rules) {
525 Value *W = R(U, C.Ctx);
526 if (!W)
527 continue;
528 Changed = true;
529 C.record(W);
530 C.replace(U, W);
531 Q.push_back(C.Root);
532 break;
533 }
534 if (!Changed) {
535 for (Value *Op : U->operands())
536 Q.push_back(Op);
537 }
538 }
539 return Count < Limit ? C.Root : nullptr;
540}
541
542//===----------------------------------------------------------------------===//
543//
544// Implementation of PolynomialMultiplyRecognize
545//
546//===----------------------------------------------------------------------===//
547
548namespace {
549
550 class PolynomialMultiplyRecognize {
551 public:
552 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
553 const DominatorTree &dt, const TargetLibraryInfo &tli,
554 ScalarEvolution &se)
555 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
556
557 bool recognize();
558
559 private:
560 using ValueSeq = SetVector<Value *>;
561
562 IntegerType *getPmpyType() const {
563 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
564 return IntegerType::get(Ctx, 32);
565 }
566
567 bool isPromotableTo(Value *V, IntegerType *Ty);
568 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
569 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
570
571 Value *getCountIV(BasicBlock *BB);
572 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
573 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
574 ValueSeq &Late);
575 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
576 bool commutesWithShift(Instruction *I);
577 bool highBitsAreZero(Value *V, unsigned IterCount);
578 bool keepsHighBitsZero(Value *V, unsigned IterCount);
579 bool isOperandShifted(Instruction *I, Value *Op);
580 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
581 unsigned IterCount);
582 void cleanupLoopBody(BasicBlock *LoopB);
583
584 struct ParsedValues {
585 ParsedValues() = default;
586
587 Value *M = nullptr;
588 Value *P = nullptr;
589 Value *Q = nullptr;
590 Value *R = nullptr;
591 Value *X = nullptr;
592 Instruction *Res = nullptr;
593 unsigned IterCount = 0;
594 bool Left = false;
595 bool Inv = false;
596 };
597
598 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
599 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
600 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
601 Value *CIV, ParsedValues &PV, bool PreScan);
602 unsigned getInverseMxN(unsigned QP);
603 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
604
605 void setupSimplifier();
606
607 Simplifier Simp;
608 Loop *CurLoop;
609 const DataLayout &DL;
610 const DominatorTree &DT;
611 const TargetLibraryInfo &TLI;
612 ScalarEvolution &SE;
613 };
614
615} // end anonymous namespace
616
617Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
618 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
619 if (std::distance(PI, PE) != 2)
620 return nullptr;
621 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
622
623 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
624 auto *PN = cast<PHINode>(I);
625 Value *InitV = PN->getIncomingValueForBlock(PB);
626 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
627 continue;
628 Value *IterV = PN->getIncomingValueForBlock(BB);
629 if (!isa<BinaryOperator>(IterV))
630 continue;
631 auto *BO = dyn_cast<BinaryOperator>(IterV);
632 if (BO->getOpcode() != Instruction::Add)
633 continue;
634 Value *IncV = nullptr;
635 if (BO->getOperand(0) == PN)
636 IncV = BO->getOperand(1);
637 else if (BO->getOperand(1) == PN)
638 IncV = BO->getOperand(0);
639 if (IncV == nullptr)
640 continue;
641
642 if (auto *T = dyn_cast<ConstantInt>(IncV))
643 if (T->getZExtValue() == 1)
644 return PN;
645 }
646 return nullptr;
647}
648
649static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
650 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
651 Use &TheUse = UI.getUse();
652 ++UI;
653 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
654 if (BB == II->getParent())
655 II->replaceUsesOfWith(I, J);
656 }
657}
658
659bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
660 Value *CIV, ParsedValues &PV) {
661 // Match the following:
662 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
663 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
664 // The condition may also check for equality with the masked value, i.e
665 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
666 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
667
668 Value *CondV = SelI->getCondition();
669 Value *TrueV = SelI->getTrueValue();
670 Value *FalseV = SelI->getFalseValue();
671
672 using namespace PatternMatch;
673
674 CmpInst::Predicate P;
675 Value *A = nullptr, *B = nullptr, *C = nullptr;
676
677 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
678 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
679 return false;
680 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
681 return false;
682 // Matched: select (A & B) == C ? ... : ...
683 // select (A & B) != C ? ... : ...
684
685 Value *X = nullptr, *Sh1 = nullptr;
686 // Check (A & B) for (X & (1 << i)):
687 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
688 Sh1 = A;
689 X = B;
690 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
691 Sh1 = B;
692 X = A;
693 } else {
694 // TODO: Could also check for an induction variable containing single
695 // bit shifted left by 1 in each iteration.
696 return false;
697 }
698
699 bool TrueIfZero;
700
701 // Check C against the possible values for comparison: 0 and (1 << i):
702 if (match(C, m_Zero()))
703 TrueIfZero = (P == CmpInst::ICMP_EQ);
704 else if (C == Sh1)
705 TrueIfZero = (P == CmpInst::ICMP_NE);
706 else
707 return false;
708
709 // So far, matched:
710 // select (X & (1 << i)) ? ... : ...
711 // including variations of the check against zero/non-zero value.
712
713 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
714 if (TrueIfZero) {
715 ShouldSameV = TrueV;
716 ShouldXoredV = FalseV;
717 } else {
718 ShouldSameV = FalseV;
719 ShouldXoredV = TrueV;
720 }
721
722 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
723 Value *T = nullptr;
724 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
725 // Matched: select +++ ? ... : Y ^ Z
726 // select +++ ? Y ^ Z : ...
727 // where +++ denotes previously checked matches.
728 if (ShouldSameV == Y)
729 T = Z;
730 else if (ShouldSameV == Z)
731 T = Y;
732 else
733 return false;
734 R = ShouldSameV;
735 // Matched: select +++ ? R : R ^ T
736 // select +++ ? R ^ T : R
737 // depending on TrueIfZero.
738
739 } else if (match(ShouldSameV, m_Zero())) {
740 // Matched: select +++ ? 0 : ...
741 // select +++ ? ... : 0
742 if (!SelI->hasOneUse())
743 return false;
744 T = ShouldXoredV;
745 // Matched: select +++ ? 0 : T
746 // select +++ ? T : 0
747
748 Value *U = *SelI->user_begin();
749 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
750 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
751 return false;
752 // Matched: xor (select +++ ? 0 : T), R
753 // xor (select +++ ? T : 0), R
754 } else
755 return false;
756
757 // The xor input value T is isolated into its own match so that it could
758 // be checked against an induction variable containing a shifted bit
759 // (todo).
760 // For now, check against (Q << i).
761 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
762 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
763 return false;
764 // Matched: select +++ ? R : R ^ (Q << i)
765 // select +++ ? R ^ (Q << i) : R
766
767 PV.X = X;
768 PV.Q = Q;
769 PV.R = R;
770 PV.Left = true;
771 return true;
772}
773
774bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
775 ParsedValues &PV) {
776 // Match the following:
777 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
778 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
779 // The condition may also check for equality with the masked value, i.e
780 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
781 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
782
783 Value *CondV = SelI->getCondition();
784 Value *TrueV = SelI->getTrueValue();
785 Value *FalseV = SelI->getFalseValue();
786
787 using namespace PatternMatch;
788
789 Value *C = nullptr;
790 CmpInst::Predicate P;
791 bool TrueIfZero;
792
793 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
794 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
795 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
796 return false;
797 // Matched: select C == 0 ? ... : ...
798 // select C != 0 ? ... : ...
799 TrueIfZero = (P == CmpInst::ICMP_EQ);
800 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
801 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
802 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
803 return false;
804 // Matched: select C == 1 ? ... : ...
805 // select C != 1 ? ... : ...
806 TrueIfZero = (P == CmpInst::ICMP_NE);
807 } else
808 return false;
809
810 Value *X = nullptr;
811 if (!match(C, m_And(m_Value(X), m_One())) &&
812 !match(C, m_And(m_One(), m_Value(X))))
813 return false;
814 // Matched: select (X & 1) == +++ ? ... : ...
815 // select (X & 1) != +++ ? ... : ...
816
817 Value *R = nullptr, *Q = nullptr;
818 if (TrueIfZero) {
819 // The select's condition is true if the tested bit is 0.
820 // TrueV must be the shift, FalseV must be the xor.
821 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
822 return false;
823 // Matched: select +++ ? (R >> 1) : ...
824 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
825 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
826 return false;
827 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
828 // with commuting ^.
829 } else {
830 // The select's condition is true if the tested bit is 1.
831 // TrueV must be the xor, FalseV must be the shift.
832 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
833 return false;
834 // Matched: select +++ ? ... : (R >> 1)
835 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
836 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
837 return false;
838 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
839 // with commuting ^.
840 }
841
842 PV.X = X;
843 PV.Q = Q;
844 PV.R = R;
845 PV.Left = false;
846 return true;
847}
848
849bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
850 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
851 bool PreScan) {
852 using namespace PatternMatch;
853
854 // The basic pattern for R = P.Q is:
855 // for i = 0..31
856 // R = phi (0, R')
857 // if (P & (1 << i)) ; test-bit(P, i)
858 // R' = R ^ (Q << i)
859 //
860 // Similarly, the basic pattern for R = (P/Q).Q - P
861 // for i = 0..31
862 // R = phi(P, R')
863 // if (R & (1 << i))
864 // R' = R ^ (Q << i)
865
866 // There exist idioms, where instead of Q being shifted left, P is shifted
867 // right. This produces a result that is shifted right by 32 bits (the
868 // non-shifted result is 64-bit).
869 //
870 // For R = P.Q, this would be:
871 // for i = 0..31
872 // R = phi (0, R')
873 // if ((P >> i) & 1)
874 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
875 // else ; be shifted by 1, not i.
876 // R' = R >> 1
877 //
878 // And for the inverse:
879 // for i = 0..31
880 // R = phi (P, R')
881 // if (R & 1)
882 // R' = (R >> 1) ^ Q
883 // else
884 // R' = R >> 1
885
886 // The left-shifting idioms share the same pattern:
887 // select (X & (1 << i)) ? R ^ (Q << i) : R
888 // Similarly for right-shifting idioms:
889 // select (X & 1) ? (R >> 1) ^ Q
890
891 if (matchLeftShift(SelI, CIV, PV)) {
892 // If this is a pre-scan, getting this far is sufficient.
893 if (PreScan)
894 return true;
895
896 // Need to make sure that the SelI goes back into R.
897 auto *RPhi = dyn_cast<PHINode>(PV.R);
898 if (!RPhi)
899 return false;
900 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
901 return false;
902 PV.Res = SelI;
903
904 // If X is loop invariant, it must be the input polynomial, and the
905 // idiom is the basic polynomial multiply.
906 if (CurLoop->isLoopInvariant(PV.X)) {
907 PV.P = PV.X;
908 PV.Inv = false;
909 } else {
910 // X is not loop invariant. If X == R, this is the inverse pmpy.
911 // Otherwise, check for an xor with an invariant value. If the
912 // variable argument to the xor is R, then this is still a valid
913 // inverse pmpy.
914 PV.Inv = true;
915 if (PV.X != PV.R) {
916 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
917 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
918 return false;
919 auto *I1 = dyn_cast<Instruction>(X1);
920 auto *I2 = dyn_cast<Instruction>(X2);
921 if (!I1 || I1->getParent() != LoopB) {
922 Var = X2;
923 Inv = X1;
924 } else if (!I2 || I2->getParent() != LoopB) {
925 Var = X1;
926 Inv = X2;
927 } else
928 return false;
929 if (Var != PV.R)
930 return false;
931 PV.M = Inv;
932 }
933 // The input polynomial P still needs to be determined. It will be
934 // the entry value of R.
935 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
936 PV.P = EntryP;
937 }
938
939 return true;
940 }
941
942 if (matchRightShift(SelI, PV)) {
943 // If this is an inverse pattern, the Q polynomial must be known at
944 // compile time.
945 if (PV.Inv && !isa<ConstantInt>(PV.Q))
946 return false;
947 if (PreScan)
948 return true;
949 // There is no exact matching of right-shift pmpy.
950 return false;
951 }
952
953 return false;
954}
955
956bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
957 IntegerType *DestTy) {
958 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
959 if (!T || T->getBitWidth() > DestTy->getBitWidth())
960 return false;
961 if (T->getBitWidth() == DestTy->getBitWidth())
962 return true;
963 // Non-instructions are promotable. The reason why an instruction may not
964 // be promotable is that it may produce a different result if its operands
965 // and the result are promoted, for example, it may produce more non-zero
966 // bits. While it would still be possible to represent the proper result
967 // in a wider type, it may require adding additional instructions (which
968 // we don't want to do).
969 Instruction *In = dyn_cast<Instruction>(Val);
970 if (!In)
971 return true;
972 // The bitwidth of the source type is smaller than the destination.
973 // Check if the individual operation can be promoted.
974 switch (In->getOpcode()) {
975 case Instruction::PHI:
976 case Instruction::ZExt:
977 case Instruction::And:
978 case Instruction::Or:
979 case Instruction::Xor:
980 case Instruction::LShr: // Shift right is ok.
981 case Instruction::Select:
982 return true;
983 case Instruction::ICmp:
984 if (CmpInst *CI = cast<CmpInst>(In))
985 return CI->isEquality() || CI->isUnsigned();
986 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 986)
;
987 case Instruction::Add:
988 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
989 }
990 return false;
991}
992
993void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
994 IntegerType *DestTy, BasicBlock *LoopB) {
995 // Leave boolean values alone.
996 if (!In->getType()->isIntegerTy(1))
997 In->mutateType(DestTy);
998 unsigned DestBW = DestTy->getBitWidth();
999
1000 // Handle PHIs.
1001 if (PHINode *P = dyn_cast<PHINode>(In)) {
1002 unsigned N = P->getNumIncomingValues();
1003 for (unsigned i = 0; i != N; ++i) {
1004 BasicBlock *InB = P->getIncomingBlock(i);
1005 if (InB == LoopB)
1006 continue;
1007 Value *InV = P->getIncomingValue(i);
1008 IntegerType *Ty = cast<IntegerType>(InV->getType());
1009 // Do not promote values in PHI nodes of type i1.
1010 if (Ty != P->getType()) {
1011 // If the value type does not match the PHI type, the PHI type
1012 // must have been promoted.
1013 assert(Ty->getBitWidth() < DestBW)((Ty->getBitWidth() < DestBW) ? static_cast<void>
(0) : __assert_fail ("Ty->getBitWidth() < DestBW", "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1013, __PRETTY_FUNCTION__))
;
1014 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
1015 P->setIncomingValue(i, InV);
1016 }
1017 }
1018 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
1019 Value *Op = Z->getOperand(0);
1020 if (Op->getType() == Z->getType())
1021 Z->replaceAllUsesWith(Op);
1022 Z->eraseFromParent();
1023 return;
1024 }
1025
1026 // Promote immediates.
1027 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
1028 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
1029 if (CI->getType()->getBitWidth() < DestBW)
1030 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
1031 }
1032}
1033
1034bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
1035 BasicBlock *ExitB) {
1036 assert(LoopB)((LoopB) ? static_cast<void> (0) : __assert_fail ("LoopB"
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1036, __PRETTY_FUNCTION__))
;
1037 // Skip loops where the exit block has more than one predecessor. The values
1038 // coming from the loop block will be promoted to another type, and so the
1039 // values coming into the exit block from other predecessors would also have
1040 // to be promoted.
1041 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
1042 return false;
1043 IntegerType *DestTy = getPmpyType();
1044 // Check if the exit values have types that are no wider than the type
1045 // that we want to promote to.
1046 unsigned DestBW = DestTy->getBitWidth();
1047 for (Instruction &In : *ExitB) {
1048 PHINode *P = dyn_cast<PHINode>(&In);
1049 if (!P)
1050 break;
1051 if (P->getNumIncomingValues() != 1)
1052 return false;
1053 assert(P->getIncomingBlock(0) == LoopB)((P->getIncomingBlock(0) == LoopB) ? static_cast<void>
(0) : __assert_fail ("P->getIncomingBlock(0) == LoopB", "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1053, __PRETTY_FUNCTION__))
;
1054 IntegerType *T = dyn_cast<IntegerType>(P->getType());
1055 if (!T || T->getBitWidth() > DestBW)
1056 return false;
1057 }
1058
1059 // Check all instructions in the loop.
1060 for (Instruction &In : *LoopB)
1061 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
1062 return false;
1063
1064 // Perform the promotion.
1065 std::vector<Instruction*> LoopIns;
1066 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
1067 [](Instruction &In) { return &In; });
1068 for (Instruction *In : LoopIns)
1069 promoteTo(In, DestTy, LoopB);
1070
1071 // Fix up the PHI nodes in the exit block.
1072 Instruction *EndI = ExitB->getFirstNonPHI();
1073 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
1074 for (auto I = ExitB->begin(); I != End; ++I) {
1075 PHINode *P = dyn_cast<PHINode>(I);
1076 if (!P)
1077 break;
1078 Type *Ty0 = P->getIncomingValue(0)->getType();
1079 Type *PTy = P->getType();
1080 if (PTy != Ty0) {
1081 assert(Ty0 == DestTy)((Ty0 == DestTy) ? static_cast<void> (0) : __assert_fail
("Ty0 == DestTy", "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1081, __PRETTY_FUNCTION__))
;
1082 // In order to create the trunc, P must have the promoted type.
1083 P->mutateType(Ty0);
1084 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1085 // In order for the RAUW to work, the types of P and T must match.
1086 P->mutateType(PTy);
1087 P->replaceAllUsesWith(T);
1088 // Final update of the P's type.
1089 P->mutateType(Ty0);
1090 cast<Instruction>(T)->setOperand(0, P);
1091 }
1092 }
1093
1094 return true;
1095}
1096
1097bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1098 ValueSeq &Cycle) {
1099 // Out = ..., In, ...
1100 if (Out == In)
1101 return true;
1102
1103 auto *BB = cast<Instruction>(Out)->getParent();
1104 bool HadPhi = false;
1105
1106 for (auto U : Out->users()) {
1107 auto *I = dyn_cast<Instruction>(&*U);
1108 if (I == nullptr || I->getParent() != BB)
1109 continue;
1110 // Make sure that there are no multi-iteration cycles, e.g.
1111 // p1 = phi(p2)
1112 // p2 = phi(p1)
1113 // The cycle p1->p2->p1 would span two loop iterations.
1114 // Check that there is only one phi in the cycle.
1115 bool IsPhi = isa<PHINode>(I);
1116 if (IsPhi && HadPhi)
1117 return false;
1118 HadPhi |= IsPhi;
1119 if (Cycle.count(I))
1120 return false;
1121 Cycle.insert(I);
1122 if (findCycle(I, In, Cycle))
1123 break;
1124 Cycle.remove(I);
1125 }
1126 return !Cycle.empty();
1127}
1128
1129void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1130 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1131 // All the values in the cycle that are between the phi node and the
1132 // divider instruction will be classified as "early", all other values
1133 // will be "late".
1134
1135 bool IsE = true;
1136 unsigned I, N = Cycle.size();
1137 for (I = 0; I < N; ++I) {
1138 Value *V = Cycle[I];
1139 if (DivI == V)
1140 IsE = false;
1141 else if (!isa<PHINode>(V))
1142 continue;
1143 // Stop if found either.
1144 break;
1145 }
1146 // "I" is the index of either DivI or the phi node, whichever was first.
1147 // "E" is "false" or "true" respectively.
1148 ValueSeq &First = !IsE ? Early : Late;
1149 for (unsigned J = 0; J < I; ++J)
1150 First.insert(Cycle[J]);
1151
1152 ValueSeq &Second = IsE ? Early : Late;
1153 Second.insert(Cycle[I]);
1154 for (++I; I < N; ++I) {
1155 Value *V = Cycle[I];
1156 if (DivI == V || isa<PHINode>(V))
1157 break;
1158 Second.insert(V);
1159 }
1160
1161 for (; I < N; ++I)
1162 First.insert(Cycle[I]);
1163}
1164
1165bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1166 ValueSeq &Early, ValueSeq &Late) {
1167 // Select is an exception, since the condition value does not have to be
1168 // classified in the same way as the true/false values. The true/false
1169 // values do have to be both early or both late.
1170 if (UseI->getOpcode() == Instruction::Select) {
1171 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1172 if (Early.count(TV) || Early.count(FV)) {
1173 if (Late.count(TV) || Late.count(FV))
1174 return false;
1175 Early.insert(UseI);
1176 } else if (Late.count(TV) || Late.count(FV)) {
1177 if (Early.count(TV) || Early.count(FV))
1178 return false;
1179 Late.insert(UseI);
1180 }
1181 return true;
1182 }
1183
1184 // Not sure what would be the example of this, but the code below relies
1185 // on having at least one operand.
1186 if (UseI->getNumOperands() == 0)
1187 return true;
1188
1189 bool AE = true, AL = true;
1190 for (auto &I : UseI->operands()) {
1191 if (Early.count(&*I))
1192 AL = false;
1193 else if (Late.count(&*I))
1194 AE = false;
1195 }
1196 // If the operands appear "all early" and "all late" at the same time,
1197 // then it means that none of them are actually classified as either.
1198 // This is harmless.
1199 if (AE && AL)
1200 return true;
1201 // Conversely, if they are neither "all early" nor "all late", then
1202 // we have a mixture of early and late operands that is not a known
1203 // exception.
1204 if (!AE && !AL)
1205 return false;
1206
1207 // Check that we have covered the two special cases.
1208 assert(AE != AL)((AE != AL) ? static_cast<void> (0) : __assert_fail ("AE != AL"
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1208, __PRETTY_FUNCTION__))
;
1209
1210 if (AE)
1211 Early.insert(UseI);
1212 else
1213 Late.insert(UseI);
1214 return true;
1215}
1216
1217bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1218 switch (I->getOpcode()) {
1219 case Instruction::And:
1220 case Instruction::Or:
1221 case Instruction::Xor:
1222 case Instruction::LShr:
1223 case Instruction::Shl:
1224 case Instruction::Select:
1225 case Instruction::ICmp:
1226 case Instruction::PHI:
1227 break;
1228 default:
1229 return false;
1230 }
1231 return true;
1232}
1233
1234bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1235 unsigned IterCount) {
1236 auto *T = dyn_cast<IntegerType>(V->getType());
1237 if (!T)
1238 return false;
1239
1240 KnownBits Known(T->getBitWidth());
1241 computeKnownBits(V, Known, DL);
1242 return Known.countMinLeadingZeros() >= IterCount;
1243}
1244
1245bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1246 unsigned IterCount) {
1247 // Assume that all inputs to the value have the high bits zero.
1248 // Check if the value itself preserves the zeros in the high bits.
1249 if (auto *C = dyn_cast<ConstantInt>(V))
1250 return C->getValue().countLeadingZeros() >= IterCount;
1251
1252 if (auto *I = dyn_cast<Instruction>(V)) {
1253 switch (I->getOpcode()) {
1254 case Instruction::And:
1255 case Instruction::Or:
1256 case Instruction::Xor:
1257 case Instruction::LShr:
1258 case Instruction::Select:
1259 case Instruction::ICmp:
1260 case Instruction::PHI:
1261 case Instruction::ZExt:
1262 return true;
1263 }
1264 }
1265
1266 return false;
1267}
1268
1269bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1270 unsigned Opc = I->getOpcode();
1271 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1272 return Op != I->getOperand(1);
1273 return true;
1274}
1275
1276bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1277 BasicBlock *ExitB, unsigned IterCount) {
1278 Value *CIV = getCountIV(LoopB);
1279 if (CIV == nullptr)
1280 return false;
1281 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1282 if (CIVTy == nullptr)
1283 return false;
1284
1285 ValueSeq RShifts;
1286 ValueSeq Early, Late, Cycled;
1287
1288 // Find all value cycles that contain logical right shifts by 1.
1289 for (Instruction &I : *LoopB) {
1290 using namespace PatternMatch;
1291
1292 Value *V = nullptr;
1293 if (!match(&I, m_LShr(m_Value(V), m_One())))
1294 continue;
1295 ValueSeq C;
1296 if (!findCycle(&I, V, C))
1297 continue;
1298
1299 // Found a cycle.
1300 C.insert(&I);
1301 classifyCycle(&I, C, Early, Late);
1302 Cycled.insert(C.begin(), C.end());
1303 RShifts.insert(&I);
1304 }
1305
1306 // Find the set of all values affected by the shift cycles, i.e. all
1307 // cycled values, and (recursively) all their users.
1308 ValueSeq Users(Cycled.begin(), Cycled.end());
1309 for (unsigned i = 0; i < Users.size(); ++i) {
1310 Value *V = Users[i];
1311 if (!isa<IntegerType>(V->getType()))
1312 return false;
1313 auto *R = cast<Instruction>(V);
1314 // If the instruction does not commute with shifts, the loop cannot
1315 // be unshifted.
1316 if (!commutesWithShift(R))
1317 return false;
1318 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1319 auto *T = cast<Instruction>(*I);
1320 // Skip users from outside of the loop. They will be handled later.
1321 // Also, skip the right-shifts and phi nodes, since they mix early
1322 // and late values.
1323 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1324 continue;
1325
1326 Users.insert(T);
1327 if (!classifyInst(T, Early, Late))
1328 return false;
1329 }
1330 }
1331
1332 if (Users.empty())
1333 return false;
1334
1335 // Verify that high bits remain zero.
1336 ValueSeq Internal(Users.begin(), Users.end());
1337 ValueSeq Inputs;
1338 for (unsigned i = 0; i < Internal.size(); ++i) {
1339 auto *R = dyn_cast<Instruction>(Internal[i]);
1340 if (!R)
1341 continue;
1342 for (Value *Op : R->operands()) {
1343 auto *T = dyn_cast<Instruction>(Op);
1344 if (T && T->getParent() != LoopB)
1345 Inputs.insert(Op);
1346 else
1347 Internal.insert(Op);
1348 }
1349 }
1350 for (Value *V : Inputs)
1351 if (!highBitsAreZero(V, IterCount))
1352 return false;
1353 for (Value *V : Internal)
1354 if (!keepsHighBitsZero(V, IterCount))
1355 return false;
1356
1357 // Finally, the work can be done. Unshift each user.
1358 IRBuilder<> IRB(LoopB);
1359 std::map<Value*,Value*> ShiftMap;
1360
1361 using CastMapType = std::map<std::pair<Value *, Type *>, Value *>;
1362
1363 CastMapType CastMap;
1364
1365 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1366 IntegerType *Ty) -> Value* {
1367 auto H = CM.find(std::make_pair(V, Ty));
1368 if (H != CM.end())
1369 return H->second;
1370 Value *CV = IRB.CreateIntCast(V, Ty, false);
1371 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1372 return CV;
1373 };
1374
1375 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1376 using namespace PatternMatch;
1377
1378 if (isa<PHINode>(I) || !Users.count(&*I))
1379 continue;
1380
1381 // Match lshr x, 1.
1382 Value *V = nullptr;
1383 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1384 replaceAllUsesOfWithIn(&*I, V, LoopB);
1385 continue;
1386 }
1387 // For each non-cycled operand, replace it with the corresponding
1388 // value shifted left.
1389 for (auto &J : I->operands()) {
1390 Value *Op = J.get();
1391 if (!isOperandShifted(&*I, Op))
1392 continue;
1393 if (Users.count(Op))
1394 continue;
1395 // Skip shifting zeros.
1396 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1397 continue;
1398 // Check if we have already generated a shift for this value.
1399 auto F = ShiftMap.find(Op);
1400 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1401 if (W == nullptr) {
1402 IRB.SetInsertPoint(&*I);
1403 // First, the shift amount will be CIV or CIV+1, depending on
1404 // whether the value is early or late. Instead of creating CIV+1,
1405 // do a single shift of the value.
1406 Value *ShAmt = CIV, *ShVal = Op;
1407 auto *VTy = cast<IntegerType>(ShVal->getType());
1408 auto *ATy = cast<IntegerType>(ShAmt->getType());
1409 if (Late.count(&*I))
1410 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1411 // Second, the types of the shifted value and the shift amount
1412 // must match.
1413 if (VTy != ATy) {
1414 if (VTy->getBitWidth() < ATy->getBitWidth())
1415 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1416 else
1417 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1418 }
1419 // Ready to generate the shift and memoize it.
1420 W = IRB.CreateShl(ShVal, ShAmt);
1421 ShiftMap.insert(std::make_pair(Op, W));
1422 }
1423 I->replaceUsesOfWith(Op, W);
1424 }
1425 }
1426
1427 // Update the users outside of the loop to account for having left
1428 // shifts. They would normally be shifted right in the loop, so shift
1429 // them right after the loop exit.
1430 // Take advantage of the loop-closed SSA form, which has all the post-
1431 // loop values in phi nodes.
1432 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1433 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1434 if (!isa<PHINode>(P))
1435 break;
1436 auto *PN = cast<PHINode>(P);
1437 Value *U = PN->getIncomingValueForBlock(LoopB);
1438 if (!Users.count(U))
1439 continue;
1440 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1441 PN->replaceAllUsesWith(S);
1442 // The above RAUW will create
1443 // S = lshr S, IterCount
1444 // so we need to fix it back into
1445 // S = lshr PN, IterCount
1446 cast<User>(S)->replaceUsesOfWith(S, PN);
1447 }
1448
1449 return true;
1450}
1451
1452void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1453 for (auto &I : *LoopB)
1454 if (Value *SV = SimplifyInstruction(&I, {DL, &TLI, &DT}))
1455 I.replaceAllUsesWith(SV);
1456
1457 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1458 N = std::next(I);
1459 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1460 }
1461}
1462
1463unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1464 // Arrays of coefficients of Q and the inverse, C.
1465 // Q[i] = coefficient at x^i.
1466 std::array<char,32> Q, C;
1467
1468 for (unsigned i = 0; i < 32; ++i) {
1469 Q[i] = QP & 1;
1470 QP >>= 1;
1471 }
1472 assert(Q[0] == 1)((Q[0] == 1) ? static_cast<void> (0) : __assert_fail ("Q[0] == 1"
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1472, __PRETTY_FUNCTION__))
;
1473
1474 // Find C, such that
1475 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1476 //
1477 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1478 // operations * and + are & and ^ respectively.
1479 //
1480 // Find C[i] recursively, by comparing i-th coefficient in the product
1481 // with 0 (or 1 for i=0).
1482 //
1483 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1484 C[0] = 1;
1485 for (unsigned i = 1; i < 32; ++i) {
1486 // Solve for C[i] in:
1487 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1488 // This is equivalent to
1489 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1490 // which is
1491 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1492 unsigned T = 0;
1493 for (unsigned j = 0; j < i; ++j)
1494 T = T ^ (C[j] & Q[i-j]);
1495 C[i] = T;
1496 }
1497
1498 unsigned QV = 0;
1499 for (unsigned i = 0; i < 32; ++i)
1500 if (C[i])
1501 QV |= (1 << i);
1502
1503 return QV;
1504}
1505
1506Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1507 ParsedValues &PV) {
1508 IRBuilder<> B(&*At);
1509 Module *M = At->getParent()->getParent()->getParent();
1510 Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1511
1512 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1513 unsigned IC = PV.IterCount;
1514
1515 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1516 P0 = P = B.CreateXor(P, PV.M);
1517
1518 // Create a bit mask to clear the high bits beyond IterCount.
1519 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
1520
1521 if (PV.IterCount != 32)
3
Assuming the condition is false
4
Taking false branch
1522 P = B.CreateAnd(P, BMI);
1523
1524 if (PV.Inv) {
5
Assuming the condition is true
6
Taking true branch
1525 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1526 assert(QI && QI->getBitWidth() <= 32)((QI && QI->getBitWidth() <= 32) ? static_cast<
void> (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1526, __PRETTY_FUNCTION__))
;
1527
1528 // Again, clearing bits beyond IterCount.
1529 unsigned M = (1 << PV.IterCount) - 1;
7
The result of the left shift is undefined due to shifting by '32', which is greater or equal to the width of type 'int'
1530 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1531 unsigned QV = getInverseMxN(Tmp) & M;
1532 auto *QVI = ConstantInt::get(QI->getType(), QV);
1533 P = B.CreateCall(PMF, {P, QVI});
1534 P = B.CreateTrunc(P, QI->getType());
1535 if (IC != 32)
1536 P = B.CreateAnd(P, BMI);
1537 }
1538
1539 Value *R = B.CreateCall(PMF, {P, Q});
1540
1541 if (PV.M != nullptr)
1542 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1543
1544 return R;
1545}
1546
1547void PolynomialMultiplyRecognize::setupSimplifier() {
1548 Simp.addRule(
1549 // Sink zext past bitwise operations.
1550 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1551 if (I->getOpcode() != Instruction::ZExt)
1552 return nullptr;
1553 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1554 if (!T)
1555 return nullptr;
1556 switch (T->getOpcode()) {
1557 case Instruction::And:
1558 case Instruction::Or:
1559 case Instruction::Xor:
1560 break;
1561 default:
1562 return nullptr;
1563 }
1564 IRBuilder<> B(Ctx);
1565 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1566 B.CreateZExt(T->getOperand(0), I->getType()),
1567 B.CreateZExt(T->getOperand(1), I->getType()));
1568 });
1569 Simp.addRule(
1570 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1571 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1572 if (I->getOpcode() != Instruction::Xor)
1573 return nullptr;
1574 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1575 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1576 if (!And0 || !And1)
1577 return nullptr;
1578 if (And0->getOpcode() != Instruction::And ||
1579 And1->getOpcode() != Instruction::And)
1580 return nullptr;
1581 if (And0->getOperand(1) != And1->getOperand(1))
1582 return nullptr;
1583 IRBuilder<> B(Ctx);
1584 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1585 And0->getOperand(1));
1586 });
1587 Simp.addRule(
1588 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1589 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1590 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1591 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1592 if (!BO)
1593 return nullptr;
1594 Instruction::BinaryOps Op = BO->getOpcode();
1595 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1596 IRBuilder<> B(Ctx);
1597 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1598 Value *Z = BO->getOperand(1);
1599 return B.CreateSelect(Sel->getCondition(),
1600 B.CreateBinOp(Op, X, Z),
1601 B.CreateBinOp(Op, Y, Z));
1602 }
1603 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1604 IRBuilder<> B(Ctx);
1605 Value *X = BO->getOperand(0);
1606 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1607 return B.CreateSelect(Sel->getCondition(),
1608 B.CreateBinOp(Op, X, Y),
1609 B.CreateBinOp(Op, X, Z));
1610 }
1611 return nullptr;
1612 });
1613 Simp.addRule(
1614 // (select c (select c x y) z) -> (select c x z)
1615 // (select c x (select c y z)) -> (select c x z)
1616 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1617 SelectInst *Sel = dyn_cast<SelectInst>(I);
1618 if (!Sel)
1619 return nullptr;
1620 IRBuilder<> B(Ctx);
1621 Value *C = Sel->getCondition();
1622 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1623 if (Sel0->getCondition() == C)
1624 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1625 }
1626 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1627 if (Sel1->getCondition() == C)
1628 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1629 }
1630 return nullptr;
1631 });
1632 Simp.addRule(
1633 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1634 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1635 if (I->getOpcode() != Instruction::Or)
1636 return nullptr;
1637 Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0));
1638 if (!LShr || LShr->getOpcode() != Instruction::LShr)
1639 return nullptr;
1640 ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1));
1641 if (!One || One->getZExtValue() != 1)
1642 return nullptr;
1643 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1644 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1645 return nullptr;
1646 return IRBuilder<>(Ctx).CreateXor(LShr, Msb);
1647 });
1648 Simp.addRule(
1649 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1650 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1651 if (I->getOpcode() != Instruction::LShr)
1652 return nullptr;
1653 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1654 if (!BitOp)
1655 return nullptr;
1656 switch (BitOp->getOpcode()) {
1657 case Instruction::And:
1658 case Instruction::Or:
1659 case Instruction::Xor:
1660 break;
1661 default:
1662 return nullptr;
1663 }
1664 IRBuilder<> B(Ctx);
1665 Value *S = I->getOperand(1);
1666 return B.CreateBinOp(BitOp->getOpcode(),
1667 B.CreateLShr(BitOp->getOperand(0), S),
1668 B.CreateLShr(BitOp->getOperand(1), S));
1669 });
1670 Simp.addRule(
1671 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1672 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1673 auto IsBitOp = [](unsigned Op) -> bool {
1674 switch (Op) {
1675 case Instruction::And:
1676 case Instruction::Or:
1677 case Instruction::Xor:
1678 return true;
1679 }
1680 return false;
1681 };
1682 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1683 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1684 return nullptr;
1685 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1686 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1687 return nullptr;
1688 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1689 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1690 if (!CA || !CB)
1691 return nullptr;
1692 IRBuilder<> B(Ctx);
1693 Value *X = BitOp2->getOperand(0);
1694 return B.CreateBinOp(BitOp2->getOpcode(), X,
1695 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1696 });
1697}
1698
1699bool PolynomialMultiplyRecognize::recognize() {
1700 DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1701 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1702 // Restrictions:
1703 // - The loop must consist of a single block.
1704 // - The iteration count must be known at compile-time.
1705 // - The loop must have an induction variable starting from 0, and
1706 // incremented in each iteration of the loop.
1707 BasicBlock *LoopB = CurLoop->getHeader();
1708 DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1709
1710 if (LoopB != CurLoop->getLoopLatch())
1711 return false;
1712 BasicBlock *ExitB = CurLoop->getExitBlock();
1713 if (ExitB == nullptr)
1714 return false;
1715 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1716 if (EntryB == nullptr)
1717 return false;
1718
1719 unsigned IterCount = 0;
1720 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1721 if (isa<SCEVCouldNotCompute>(CT))
1722 return false;
1723 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1724 IterCount = CV->getValue()->getZExtValue() + 1;
1725
1726 Value *CIV = getCountIV(LoopB);
1727 ParsedValues PV;
1728 PV.IterCount = IterCount;
1729 DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1730
1731 setupSimplifier();
1732
1733 // Perform a preliminary scan of select instructions to see if any of them
1734 // looks like a generator of the polynomial multiply steps. Assume that a
1735 // loop can only contain a single transformable operation, so stop the
1736 // traversal after the first reasonable candidate was found.
1737 // XXX: Currently this approach can modify the loop before being 100% sure
1738 // that the transformation can be carried out.
1739 bool FoundPreScan = false;
1740 for (Instruction &In : *LoopB) {
1741 SelectInst *SI = dyn_cast<SelectInst>(&In);
1742 if (!SI)
1743 continue;
1744
1745 Simplifier::Context C(SI);
1746 Value *T = Simp.simplify(C);
1747 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1748 DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1749 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1750 FoundPreScan = true;
1751 if (SelI != SI) {
1752 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1753 SI->replaceAllUsesWith(NewSel);
1754 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1755 }
1756 break;
1757 }
1758 }
1759
1760 if (!FoundPreScan) {
1761 DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1762 return false;
1763 }
1764
1765 if (!PV.Left) {
1766 // The right shift version actually only returns the higher bits of
1767 // the result (each iteration discards the LSB). If we want to convert it
1768 // to a left-shifting loop, the working data type must be at least as
1769 // wide as the target's pmpy instruction.
1770 if (!promoteTypes(LoopB, ExitB))
1771 return false;
1772 if (!convertShiftsToLeft(LoopB, ExitB, IterCount))
1773 return false;
1774 cleanupLoopBody(LoopB);
1775 }
1776
1777 // Scan the loop again, find the generating select instruction.
1778 bool FoundScan = false;
1779 for (Instruction &In : *LoopB) {
1780 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1781 if (!SelI)
1782 continue;
1783 DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1784 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1785 if (FoundScan)
1786 break;
1787 }
1788 assert(FoundScan)((FoundScan) ? static_cast<void> (0) : __assert_fail ("FoundScan"
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1788, __PRETTY_FUNCTION__))
;
1789
1790 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1791 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1792 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1793 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1794 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1795 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1796 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1797 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1798 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1799 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1800 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1801 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1802 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1803
1804 BasicBlock::iterator At(EntryB->getTerminator());
1805 Value *PM = generate(At, PV);
1806 if (PM == nullptr)
1807 return false;
1808
1809 if (PM->getType() != PV.Res->getType())
1810 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1811
1812 PV.Res->replaceAllUsesWith(PM);
1813 PV.Res->eraseFromParent();
1814 return true;
1815}
1816
1817unsigned HexagonLoopIdiomRecognize::getStoreSizeInBytes(StoreInst *SI) {
1818 uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
1819 assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&((((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
"Don't overflow unsigned.") ? static_cast<void> (0) : __assert_fail
("((SizeInBits & 7) || (SizeInBits >> 32) == 0) && \"Don't overflow unsigned.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1820, __PRETTY_FUNCTION__))
1820 "Don't overflow unsigned.")((((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
"Don't overflow unsigned.") ? static_cast<void> (0) : __assert_fail
("((SizeInBits & 7) || (SizeInBits >> 32) == 0) && \"Don't overflow unsigned.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1820, __PRETTY_FUNCTION__))
;
1821 return (unsigned)SizeInBits >> 3;
1822}
1823
1824int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1825 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1826 return SC->getAPInt().getSExtValue();
1827 return 0;
1828}
1829
1830bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1831 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1832 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1833 return false;
1834
1835 Value *StoredVal = SI->getValueOperand();
1836 Value *StorePtr = SI->getPointerOperand();
1837
1838 // Reject stores that are so large that they overflow an unsigned.
1839 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1840 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1841 return false;
1842
1843 // See if the pointer expression is an AddRec like {base,+,1} on the current
1844 // loop, which indicates a strided store. If we have something else, it's a
1845 // random store we can't handle.
1846 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1847 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1848 return false;
1849
1850 // Check to see if the stride matches the size of the store. If so, then we
1851 // know that every byte is touched in the loop.
1852 int Stride = getSCEVStride(StoreEv);
1853 if (Stride == 0)
1854 return false;
1855 unsigned StoreSize = getStoreSizeInBytes(SI);
1856 if (StoreSize != unsigned(std::abs(Stride)))
1857 return false;
1858
1859 // The store must be feeding a non-volatile load.
1860 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1861 if (!LI || !LI->isSimple())
1862 return false;
1863
1864 // See if the pointer expression is an AddRec like {base,+,1} on the current
1865 // loop, which indicates a strided load. If we have something else, it's a
1866 // random load we can't handle.
1867 Value *LoadPtr = LI->getPointerOperand();
1868 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1869 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1870 return false;
1871
1872 // The store and load must share the same stride.
1873 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1874 return false;
1875
1876 // Success. This store can be converted into a memcpy.
1877 return true;
1878}
1879
1880/// mayLoopAccessLocation - Return true if the specified loop might access the
1881/// specified pointer location, which is a loop-strided access. The 'Access'
1882/// argument specifies what the verboten forms of access are (read or write).
1883static bool
1884mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1885 const SCEV *BECount, unsigned StoreSize,
1886 AliasAnalysis &AA,
1887 SmallPtrSetImpl<Instruction *> &Ignored) {
1888 // Get the location that may be stored across the loop. Since the access
1889 // is strided positively through memory, we say that the modified location
1890 // starts at the pointer and has infinite size.
1891 uint64_t AccessSize = MemoryLocation::UnknownSize;
1892
1893 // If the loop iterates a fixed number of times, we can refine the access
1894 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1895 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1896 AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
1897
1898 // TODO: For this to be really effective, we have to dive into the pointer
1899 // operand in the store. Store to &A[i] of 100 will always return may alias
1900 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1901 // which will then no-alias a store to &A[100].
1902 MemoryLocation StoreLoc(Ptr, AccessSize);
1903
1904 for (auto *B : L->blocks())
1905 for (auto &I : *B)
1906 if (Ignored.count(&I) == 0 && (AA.getModRefInfo(&I, StoreLoc) & Access))
1907 return true;
1908
1909 return false;
1910}
1911
1912void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
1913 SmallVectorImpl<StoreInst*> &Stores) {
1914 Stores.clear();
1915 for (Instruction &I : *BB)
1916 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
1917 if (isLegalStore(CurLoop, SI))
1918 Stores.push_back(SI);
1919}
1920
1921bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
1922 StoreInst *SI, const SCEV *BECount) {
1923 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1925, __PRETTY_FUNCTION__))
1924 "Expected only non-volatile stores, or Hexagon-specific memcpy"(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1925, __PRETTY_FUNCTION__))
1925 "to volatile destination.")(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1925, __PRETTY_FUNCTION__))
;
1926
1927 Value *StorePtr = SI->getPointerOperand();
1928 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1929 unsigned Stride = getSCEVStride(StoreEv);
1930 unsigned StoreSize = getStoreSizeInBytes(SI);
1931 if (Stride != StoreSize)
1932 return false;
1933
1934 // See if the pointer expression is an AddRec like {base,+,1} on the current
1935 // loop, which indicates a strided load. If we have something else, it's a
1936 // random load we can't handle.
1937 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1938 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1939
1940 // The trip count of the loop and the base pointer of the addrec SCEV is
1941 // guaranteed to be loop invariant, which means that it should dominate the
1942 // header. This allows us to insert code for it in the preheader.
1943 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1944 Instruction *ExpPt = Preheader->getTerminator();
1945 IRBuilder<> Builder(ExpPt);
1946 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
1947
1948 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
1949
1950 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1951 // this into a memcpy/memmove in the loop preheader now if we want. However,
1952 // this would be unsafe to do if there is anything else in the loop that may
1953 // read or write the memory region we're storing to. For memcpy, this
1954 // includes the load that feeds the stores. Check for an alias by generating
1955 // the base address and checking everything.
1956 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
1957 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
1958 Value *LoadBasePtr = nullptr;
1959
1960 bool Overlap = false;
1961 bool DestVolatile = SI->isVolatile();
1962 Type *BECountTy = BECount->getType();
1963
1964 if (DestVolatile) {
1965 // The trip count must fit in i32, since it is the type of the "num_words"
1966 // argument to hexagon_memcpy_forward_vp4cp4n2.
1967 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
1968CleanupAndExit:
1969 // If we generated new code for the base pointer, clean up.
1970 Expander.clear();
1971 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
1972 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1973 StoreBasePtr = nullptr;
1974 }
1975 if (LoadBasePtr) {
1976 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
1977 LoadBasePtr = nullptr;
1978 }
1979 return false;
1980 }
1981 }
1982
1983 SmallPtrSet<Instruction*, 2> Ignore1;
1984 Ignore1.insert(SI);
1985 if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
1986 StoreSize, *AA, Ignore1)) {
1987 // Check if the load is the offending instruction.
1988 Ignore1.insert(LI);
1989 if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
1990 StoreSize, *AA, Ignore1)) {
1991 // Still bad. Nothing we can do.
1992 goto CleanupAndExit;
1993 }
1994 // It worked with the load ignored.
1995 Overlap = true;
1996 }
1997
1998 if (!Overlap) {
1999 if (DisableMemcpyIdiom || !HasMemcpy)
2000 goto CleanupAndExit;
2001 } else {
2002 // Don't generate memmove if this function will be inlined. This is
2003 // because the caller will undergo this transformation after inlining.
2004 Function *Func = CurLoop->getHeader()->getParent();
2005 if (Func->hasFnAttribute(Attribute::AlwaysInline))
2006 goto CleanupAndExit;
2007
2008 // In case of a memmove, the call to memmove will be executed instead
2009 // of the loop, so we need to make sure that there is nothing else in
2010 // the loop than the load, store and instructions that these two depend
2011 // on.
2012 SmallVector<Instruction*,2> Insts;
2013 Insts.push_back(SI);
2014 Insts.push_back(LI);
2015 if (!coverLoop(CurLoop, Insts))
2016 goto CleanupAndExit;
2017
2018 if (DisableMemmoveIdiom || !HasMemmove)
2019 goto CleanupAndExit;
2020 bool IsNested = CurLoop->getParentLoop() != nullptr;
2021 if (IsNested && OnlyNonNestedMemmove)
2022 goto CleanupAndExit;
2023 }
2024
2025 // For a memcpy, we have to make sure that the input array is not being
2026 // mutated by the loop.
2027 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
2028 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
2029
2030 SmallPtrSet<Instruction*, 2> Ignore2;
2031 Ignore2.insert(SI);
2032 if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
2033 *AA, Ignore2))
2034 goto CleanupAndExit;
2035
2036 // Check the stride.
2037 bool StridePos = getSCEVStride(LoadEv) >= 0;
2038
2039 // Currently, the volatile memcpy only emulates traversing memory forward.
2040 if (!StridePos && DestVolatile)
2041 goto CleanupAndExit;
2042
2043 bool RuntimeCheck = (Overlap || DestVolatile);
2044
2045 BasicBlock *ExitB;
2046 if (RuntimeCheck) {
2047 // The runtime check needs a single exit block.
2048 SmallVector<BasicBlock*, 8> ExitBlocks;
2049 CurLoop->getUniqueExitBlocks(ExitBlocks);
2050 if (ExitBlocks.size() != 1)
2051 goto CleanupAndExit;
2052 ExitB = ExitBlocks[0];
2053 }
2054
2055 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
2056 // pointer size if it isn't already.
2057 LLVMContext &Ctx = SI->getContext();
2058 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
2059 unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment());
2060 DebugLoc DLoc = SI->getDebugLoc();
2061
2062 const SCEV *NumBytesS =
2063 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2064 if (StoreSize != 1)
2065 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2066 SCEV::FlagNUW);
2067 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2068 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2069 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2070 NumBytes = Simp;
2071
2072 CallInst *NewCall;
2073
2074 if (RuntimeCheck) {
2075 unsigned Threshold = RuntimeMemSizeThreshold;
2076 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2077 uint64_t C = CI->getZExtValue();
2078 if (Threshold != 0 && C < Threshold)
2079 goto CleanupAndExit;
2080 if (C < CompileTimeMemSizeThreshold)
2081 goto CleanupAndExit;
2082 }
2083
2084 BasicBlock *Header = CurLoop->getHeader();
2085 Function *Func = Header->getParent();
2086 Loop *ParentL = LF->getLoopFor(Preheader);
2087 StringRef HeaderName = Header->getName();
2088
2089 // Create a new (empty) preheader, and update the PHI nodes in the
2090 // header to use the new preheader.
2091 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2092 Func, Header);
2093 if (ParentL)
2094 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2095 IRBuilder<>(NewPreheader).CreateBr(Header);
2096 for (auto &In : *Header) {
2097 PHINode *PN = dyn_cast<PHINode>(&In);
2098 if (!PN)
2099 break;
2100 int bx = PN->getBasicBlockIndex(Preheader);
2101 if (bx >= 0)
2102 PN->setIncomingBlock(bx, NewPreheader);
2103 }
2104 DT->addNewBlock(NewPreheader, Preheader);
2105 DT->changeImmediateDominator(Header, NewPreheader);
2106
2107 // Check for safe conditions to execute memmove.
2108 // If stride is positive, copying things from higher to lower addresses
2109 // is equivalent to memmove. For negative stride, it's the other way
2110 // around. Copying forward in memory with positive stride may not be
2111 // same as memmove since we may be copying values that we just stored
2112 // in some previous iteration.
2113 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2114 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2115 Value *LowA = StridePos ? SA : LA;
2116 Value *HighA = StridePos ? LA : SA;
2117 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2118 Value *Cond = CmpA;
2119
2120 // Check for distance between pointers. Since the case LowA < HighA
2121 // is checked for above, assume LowA >= HighA.
2122 Value *Dist = Builder.CreateSub(LowA, HighA);
2123 Value *CmpD = Builder.CreateICmpSLE(NumBytes, Dist);
2124 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2125 Cond = CmpEither;
2126
2127 if (Threshold != 0) {
2128 Type *Ty = NumBytes->getType();
2129 Value *Thr = ConstantInt::get(Ty, Threshold);
2130 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2131 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2132 Cond = CmpBoth;
2133 }
2134 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2135 Func, NewPreheader);
2136 if (ParentL)
2137 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2138 Instruction *OldT = Preheader->getTerminator();
2139 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2140 OldT->eraseFromParent();
2141 Preheader->setName(Preheader->getName()+".old");
2142 DT->addNewBlock(MemmoveB, Preheader);
2143 // Find the new immediate dominator of the exit block.
2144 BasicBlock *ExitD = Preheader;
2145 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2146 BasicBlock *PB = *PI;
2147 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2148 if (!ExitD)
2149 break;
2150 }
2151 // If the prior immediate dominator of ExitB was dominated by the
2152 // old preheader, then the old preheader becomes the new immediate
2153 // dominator. Otherwise don't change anything (because the newly
2154 // added blocks are dominated by the old preheader).
2155 if (ExitD && DT->dominates(Preheader, ExitD)) {
2156 DomTreeNode *BN = DT->getNode(ExitB);
2157 DomTreeNode *DN = DT->getNode(ExitD);
2158 BN->setIDom(DN);
2159 }
2160
2161 // Add a call to memmove to the conditional block.
2162 IRBuilder<> CondBuilder(MemmoveB);
2163 CondBuilder.CreateBr(ExitB);
2164 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2165
2166 if (DestVolatile) {
2167 Type *Int32Ty = Type::getInt32Ty(Ctx);
2168 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2169 Type *VoidTy = Type::getVoidTy(Ctx);
2170 Module *M = Func->getParent();
2171 Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
2172 Int32PtrTy, Int32PtrTy, Int32Ty);
2173 Function *Fn = cast<Function>(CF);
2174 Fn->setLinkage(Function::ExternalLinkage);
2175
2176 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2177 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2178 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2179 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2180 MemmoveB->getTerminator());
2181 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2182 if (Value *Simp = SimplifyInstruction(In, {*DL, TLI, DT}))
2183 NumWords = Simp;
2184
2185 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2186 ? StoreBasePtr
2187 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2188 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2189 ? LoadBasePtr
2190 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2191 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2192 } else {
2193 NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr,
2194 NumBytes, Alignment);
2195 }
2196 } else {
2197 NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr,
2198 NumBytes, Alignment);
2199 // Okay, the memcpy has been formed. Zap the original store and
2200 // anything that feeds into it.
2201 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2202 }
2203
2204 NewCall->setDebugLoc(DLoc);
2205
2206 DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2207 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2208 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2209 << " from store ptr=" << *StoreEv << " at: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2210
2211 return true;
2212}
2213
2214// \brief Check if the instructions in Insts, together with their dependencies
2215// cover the loop in the sense that the loop could be safely eliminated once
2216// the instructions in Insts are removed.
2217bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2218 SmallVectorImpl<Instruction*> &Insts) const {
2219 SmallSet<BasicBlock*,8> LoopBlocks;
2220 for (auto *B : L->blocks())
2221 LoopBlocks.insert(B);
2222
2223 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2224
2225 // Collect all instructions from the loop that the instructions in Insts
2226 // depend on (plus their dependencies, etc.). These instructions will
2227 // constitute the expression trees that feed those in Insts, but the trees
2228 // will be limited only to instructions contained in the loop.
2229 for (unsigned i = 0; i < Worklist.size(); ++i) {
2230 Instruction *In = Worklist[i];
2231 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2232 Instruction *OpI = dyn_cast<Instruction>(I);
2233 if (!OpI)
2234 continue;
2235 BasicBlock *PB = OpI->getParent();
2236 if (!LoopBlocks.count(PB))
2237 continue;
2238 Worklist.insert(OpI);
2239 }
2240 }
2241
2242 // Scan all instructions in the loop, if any of them have a user outside
2243 // of the loop, or outside of the expressions collected above, then either
2244 // the loop has a side-effect visible outside of it, or there are
2245 // instructions in it that are not involved in the original set Insts.
2246 for (auto *B : L->blocks()) {
2247 for (auto &In : *B) {
2248 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2249 continue;
2250 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2251 return false;
2252 for (const auto &K : In.users()) {
2253 Instruction *UseI = dyn_cast<Instruction>(K);
2254 if (!UseI)
2255 continue;
2256 BasicBlock *UseB = UseI->getParent();
2257 if (LF->getLoopFor(UseB) != L)
2258 return false;
2259 }
2260 }
2261 }
2262
2263 return true;
2264}
2265
2266/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2267/// with the specified backedge count. This block is known to be in the current
2268/// loop and not in any subloops.
2269bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2270 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2271 // We can only promote stores in this block if they are unconditionally
2272 // executed in the loop. For a block to be unconditionally executed, it has
2273 // to dominate all the exit blocks of the loop. Verify this now.
2274 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2275 return DT->dominates(BB, EB);
2276 };
2277 if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
2278 return false;
2279
2280 bool MadeChange = false;
2281 // Look for store instructions, which may be optimized to memset/memcpy.
2282 SmallVector<StoreInst*,8> Stores;
2283 collectStores(CurLoop, BB, Stores);
2284
2285 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2286 for (auto &SI : Stores)
2287 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2288
2289 return MadeChange;
2290}
2291
2292bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2293 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2294 if (PMR.recognize())
2295 return true;
2296
2297 if (!HasMemcpy && !HasMemmove)
2298 return false;
2299
2300 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2301 assert(!isa<SCEVCouldNotCompute>(BECount) &&((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2303, __PRETTY_FUNCTION__))
2302 "runOnCountableLoop() called on a loop without a predictable"((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2303, __PRETTY_FUNCTION__))
2303 "backedge-taken count")((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/build/llvm-toolchain-snapshot-6.0~svn316068/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2303, __PRETTY_FUNCTION__))
;
2304
2305 SmallVector<BasicBlock *, 8> ExitBlocks;
2306 L->getUniqueExitBlocks(ExitBlocks);
2307
2308 bool Changed = false;
2309
2310 // Scan all the blocks in the loop that are not in subloops.
2311 for (auto *BB : L->getBlocks()) {
2312 // Ignore blocks in subloops.
2313 if (LF->getLoopFor(BB) != L)
2314 continue;
2315 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2316 }
2317
2318 return Changed;
2319}
2320
2321bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
2322 const Module &M = *L->getHeader()->getParent()->getParent();
2323 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2324 return false;
2325
2326 if (skipLoop(L))
2327 return false;
2328
2329 // If the loop could not be converted to canonical form, it must have an
2330 // indirectbr in it, just give up.
2331 if (!L->getLoopPreheader())
2332 return false;
2333
2334 // Disable loop idiom recognition if the function's name is a common idiom.
2335 StringRef Name = L->getHeader()->getParent()->getName();
2336 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2337 return false;
2338
2339 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2340 DL = &L->getHeader()->getModule()->getDataLayout();
2341 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2342 LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2343 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2344 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2345
2346 HasMemcpy = TLI->has(LibFunc_memcpy);
2347 HasMemmove = TLI->has(LibFunc_memmove);
2348
2349 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2350 return runOnCountableLoop(L);
2351 return false;
2352}
2353
2354Pass *llvm::createHexagonLoopIdiomPass() {
2355 return new HexagonLoopIdiomRecognize();
2356}