Bug Summary

File:lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp
Warning:line 1460, column 21
The result of the '<<' expression is undefined

Annotated Source Code

1//===--- HexagonLoopIdiomRecognition.cpp ----------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#define DEBUG_TYPE"hexagon-lir" "hexagon-lir"
11
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallSet.h"
14#include "llvm/Analysis/AliasAnalysis.h"
15#include "llvm/Analysis/InstructionSimplify.h"
16#include "llvm/Analysis/LoopPass.h"
17#include "llvm/Analysis/ScalarEvolution.h"
18#include "llvm/Analysis/ScalarEvolutionExpander.h"
19#include "llvm/Analysis/ScalarEvolutionExpressions.h"
20#include "llvm/Analysis/TargetLibraryInfo.h"
21#include "llvm/Analysis/ValueTracking.h"
22#include "llvm/IR/DataLayout.h"
23#include "llvm/IR/Dominators.h"
24#include "llvm/IR/IRBuilder.h"
25#include "llvm/IR/PatternMatch.h"
26#include "llvm/Transforms/Scalar.h"
27#include "llvm/Transforms/Utils/Local.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/raw_ostream.h"
30
31#include <algorithm>
32#include <array>
33
34using namespace llvm;
35
36static cl::opt<bool> DisableMemcpyIdiom("disable-memcpy-idiom",
37 cl::Hidden, cl::init(false),
38 cl::desc("Disable generation of memcpy in loop idiom recognition"));
39
40static cl::opt<bool> DisableMemmoveIdiom("disable-memmove-idiom",
41 cl::Hidden, cl::init(false),
42 cl::desc("Disable generation of memmove in loop idiom recognition"));
43
44static cl::opt<unsigned> RuntimeMemSizeThreshold("runtime-mem-idiom-threshold",
45 cl::Hidden, cl::init(0), cl::desc("Threshold (in bytes) for the runtime "
46 "check guarding the memmove."));
47
48static cl::opt<unsigned> CompileTimeMemSizeThreshold(
49 "compile-time-mem-idiom-threshold", cl::Hidden, cl::init(64),
50 cl::desc("Threshold (in bytes) to perform the transformation, if the "
51 "runtime loop count (mem transfer size) is known at compile-time."));
52
53static cl::opt<bool> OnlyNonNestedMemmove("only-nonnested-memmove-idiom",
54 cl::Hidden, cl::init(true),
55 cl::desc("Only enable generating memmove in non-nested loops"));
56
57cl::opt<bool> HexagonVolatileMemcpy("disable-hexagon-volatile-memcpy",
58 cl::Hidden, cl::init(false),
59 cl::desc("Enable Hexagon-specific memcpy for volatile destination."));
60
61static const char *HexagonVolatileMemcpyName
62 = "hexagon_memcpy_forward_vp4cp4n2";
63
64
65namespace llvm {
66 void initializeHexagonLoopIdiomRecognizePass(PassRegistry&);
67 Pass *createHexagonLoopIdiomPass();
68}
69
70namespace {
71 class HexagonLoopIdiomRecognize : public LoopPass {
72 public:
73 static char ID;
74 explicit HexagonLoopIdiomRecognize() : LoopPass(ID) {
75 initializeHexagonLoopIdiomRecognizePass(*PassRegistry::getPassRegistry());
76 }
77 StringRef getPassName() const override {
78 return "Recognize Hexagon-specific loop idioms";
79 }
80
81 void getAnalysisUsage(AnalysisUsage &AU) const override {
82 AU.addRequired<LoopInfoWrapperPass>();
83 AU.addRequiredID(LoopSimplifyID);
84 AU.addRequiredID(LCSSAID);
85 AU.addRequired<AAResultsWrapperPass>();
86 AU.addPreserved<AAResultsWrapperPass>();
87 AU.addRequired<ScalarEvolutionWrapperPass>();
88 AU.addRequired<DominatorTreeWrapperPass>();
89 AU.addRequired<TargetLibraryInfoWrapperPass>();
90 AU.addPreserved<TargetLibraryInfoWrapperPass>();
91 }
92
93 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
94
95 private:
96 unsigned getStoreSizeInBytes(StoreInst *SI);
97 int getSCEVStride(const SCEVAddRecExpr *StoreEv);
98 bool isLegalStore(Loop *CurLoop, StoreInst *SI);
99 void collectStores(Loop *CurLoop, BasicBlock *BB,
100 SmallVectorImpl<StoreInst*> &Stores);
101 bool processCopyingStore(Loop *CurLoop, StoreInst *SI, const SCEV *BECount);
102 bool coverLoop(Loop *L, SmallVectorImpl<Instruction*> &Insts) const;
103 bool runOnLoopBlock(Loop *CurLoop, BasicBlock *BB, const SCEV *BECount,
104 SmallVectorImpl<BasicBlock*> &ExitBlocks);
105 bool runOnCountableLoop(Loop *L);
106
107 AliasAnalysis *AA;
108 const DataLayout *DL;
109 DominatorTree *DT;
110 LoopInfo *LF;
111 const TargetLibraryInfo *TLI;
112 ScalarEvolution *SE;
113 bool HasMemcpy, HasMemmove;
114 };
115}
116
117char HexagonLoopIdiomRecognize::ID = 0;
118
119INITIALIZE_PASS_BEGIN(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
120 "Recognize Hexagon-specific loop idioms", false, false)static void *initializeHexagonLoopIdiomRecognizePassOnce(PassRegistry
&Registry) {
121INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
122INITIALIZE_PASS_DEPENDENCY(LoopSimplify)initializeLoopSimplifyPass(Registry);
123INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)initializeLCSSAWrapperPassPass(Registry);
124INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)initializeScalarEvolutionWrapperPassPass(Registry);
125INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
126INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
127INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)initializeAAResultsWrapperPassPass(Registry);
128INITIALIZE_PASS_END(HexagonLoopIdiomRecognize, "hexagon-loop-idiom",PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
129 "Recognize Hexagon-specific loop idioms", false, false)PassInfo *PI = new PassInfo( "Recognize Hexagon-specific loop idioms"
, "hexagon-loop-idiom", &HexagonLoopIdiomRecognize::ID, PassInfo
::NormalCtor_t(callDefaultCtor<HexagonLoopIdiomRecognize>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeHexagonLoopIdiomRecognizePassFlag
; void llvm::initializeHexagonLoopIdiomRecognizePass(PassRegistry
&Registry) { llvm::call_once(InitializeHexagonLoopIdiomRecognizePassFlag
, initializeHexagonLoopIdiomRecognizePassOnce, std::ref(Registry
)); }
130
131
132namespace {
133 struct Simplifier {
134 typedef std::function<Value* (Instruction*, LLVMContext&)> Rule;
135
136 void addRule(const Rule &R) { Rules.push_back(R); }
137
138 private:
139 typedef std::deque<Value*> WorkListType;
140 typedef std::set<Value*> ValueSetType;
141 std::vector<Rule> Rules;
142
143 public:
144 struct Context {
145 typedef DenseMap<Value*,Value*> ValueMapType;
146
147 Value *Root;
148 ValueSetType Used;
149 ValueMapType Clones, Orig;
150 LLVMContext &Ctx;
151
152 Context(Instruction *Exp)
153 : Ctx(Exp->getParent()->getParent()->getContext()) {
154 initialize(Exp);
155 reset();
156 }
157 ~Context() { cleanup(); }
158 void print(raw_ostream &OS, const Value *V) const;
159
160 Value *materialize(BasicBlock *B, BasicBlock::iterator At);
161
162 private:
163 void initialize(Instruction *Exp);
164 void reset();
165 void cleanup();
166 void cleanup(Value *V);
167
168 bool equal(const Instruction *I, const Instruction *J) const;
169 Value *find(Value *Tree, Value *Sub) const;
170 Value *subst(Value *Tree, Value *OldV, Value *NewV);
171 void replace(Value *OldV, Value *NewV);
172 void link(Instruction *I, BasicBlock *B, BasicBlock::iterator At);
173
174 friend struct Simplifier;
175 };
176
177 Value *simplify(Context &C);
178 };
179
180 struct PE {
181 PE(const Simplifier::Context &c, Value *v = nullptr) : C(c), V(v) {}
182 const Simplifier::Context &C;
183 const Value *V;
184 };
185
186 raw_ostream &operator<< (raw_ostream &OS, const PE &P) LLVM_ATTRIBUTE_USED__attribute__((__used__));
187 raw_ostream &operator<< (raw_ostream &OS, const PE &P) {
188 P.C.print(OS, P.V ? P.V : P.C.Root);
189 return OS;
190 }
191}
192
193
194void Simplifier::Context::print(raw_ostream &OS, const Value *V) const {
195 const auto *U = dyn_cast<const Instruction>(V);
196 if (!U) {
197 OS << V << '(' << *V << ')';
198 return;
199 }
200
201 if (U->getParent()) {
202 OS << U << '(';
203 U->printAsOperand(OS, true);
204 OS << ')';
205 return;
206 }
207
208 unsigned N = U->getNumOperands();
209 if (N != 0)
210 OS << U << '(';
211 OS << U->getOpcodeName();
212 for (const Value *Op : U->operands()) {
213 OS << ' ';
214 print(OS, Op);
215 }
216 if (N != 0)
217 OS << ')';
218}
219
220
221void Simplifier::Context::initialize(Instruction *Exp) {
222 // Perform a deep clone of the expression, set Root to the root
223 // of the clone, and build a map from the cloned values to the
224 // original ones.
225 BasicBlock *Block = Exp->getParent();
226 WorkListType Q;
227 Q.push_back(Exp);
228
229 while (!Q.empty()) {
230 Value *V = Q.front();
231 Q.pop_front();
232 if (Clones.find(V) != Clones.end())
233 continue;
234 if (Instruction *U = dyn_cast<Instruction>(V)) {
235 if (isa<PHINode>(U) || U->getParent() != Block)
236 continue;
237 for (Value *Op : U->operands())
238 Q.push_back(Op);
239 Clones.insert({U, U->clone()});
240 }
241 }
242
243 for (std::pair<Value*,Value*> P : Clones) {
244 Instruction *U = cast<Instruction>(P.second);
245 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
246 auto F = Clones.find(U->getOperand(i));
247 if (F != Clones.end())
248 U->setOperand(i, F->second);
249 }
250 Orig.insert({P.second, P.first});
251 }
252
253 auto R = Clones.find(Exp);
254 assert(R != Clones.end())((R != Clones.end()) ? static_cast<void> (0) : __assert_fail
("R != Clones.end()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 254, __PRETTY_FUNCTION__))
;
255 Root = R->second;
256}
257
258
259void Simplifier::Context::reset() {
260 ValueSetType NewUsed;
261 WorkListType Q;
262 Q.push_back(Root);
263
264 while (!Q.empty()) {
265 Instruction *U = dyn_cast<Instruction>(Q.front());
266 Q.pop_front();
267 if (!U || U->getParent())
268 continue;
269 NewUsed.insert(U);
270 for (Value *Op : U->operands())
271 Q.push_back(Op);
272 }
273 for (Value *V : Used)
274 if (!NewUsed.count(V))
275 cast<Instruction>(V)->dropAllReferences();
276 Used = NewUsed;
277}
278
279
280Value *Simplifier::Context::subst(Value *Tree, Value *OldV, Value *NewV) {
281 if (Tree == OldV) {
282 cleanup(OldV);
283 return NewV;
284 }
285
286 WorkListType Q;
287 Q.push_back(Tree);
288 while (!Q.empty()) {
289 Instruction *U = dyn_cast<Instruction>(Q.front());
290 Q.pop_front();
291 // If U is not an instruction, or it's not a clone, skip it.
292 if (!U || U->getParent())
293 continue;
294 for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i) {
295 Value *Op = U->getOperand(i);
296 if (Op == OldV) {
297 cleanup(OldV);
298 U->setOperand(i, NewV);
299 } else {
300 Q.push_back(Op);
301 }
302 }
303 }
304 return Tree;
305}
306
307
308void Simplifier::Context::replace(Value *OldV, Value *NewV) {
309 if (Root == OldV) {
310 Root = NewV;
311 reset();
312 return;
313 }
314
315 // NewV may be a complex tree that has just been created by one of the
316 // transformation rules. We need to make sure that it is commoned with
317 // the existing Root to the maximum extent possible.
318 // Identify all subtrees of NewV (including NewV itself) that have
319 // equivalent counterparts in Root, and replace those subtrees with
320 // these counterparts.
321 WorkListType Q;
322 Q.push_back(NewV);
323 while (!Q.empty()) {
324 Value *V = Q.front();
325 Q.pop_front();
326 Instruction *U = dyn_cast<Instruction>(V);
327 if (!U || U->getParent())
328 continue;
329 if (Value *DupV = find(Root, V)) {
330 if (DupV != V)
331 NewV = subst(NewV, V, DupV);
332 } else {
333 for (Value *Op : U->operands())
334 Q.push_back(Op);
335 }
336 }
337
338 // Now, simply replace OldV with NewV in Root.
339 Root = subst(Root, OldV, NewV);
340 reset();
341}
342
343
344void Simplifier::Context::cleanup() {
345 for (Value *V : Used) {
346 Instruction *U = cast<Instruction>(V);
347 if (!U->getParent())
348 U->dropAllReferences();
349 }
350}
351
352
353void Simplifier::Context::cleanup(Value *V) {
354 if (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != nullptr)
355 return;
356 WorkListType Q;
357 Q.push_back(V);
358 while (!Q.empty()) {
359 Instruction *U = dyn_cast<Instruction>(Q.front());
360 Q.pop_front();
361 if (!U || U->getParent() || Used.count(U))
362 continue;
363 for (Value *Op : U->operands())
364 Q.push_back(Op);
365 U->dropAllReferences();
366 }
367}
368
369
370bool Simplifier::Context::equal(const Instruction *I,
371 const Instruction *J) const {
372 if (I == J)
373 return true;
374 if (!I->isSameOperationAs(J))
375 return false;
376 if (isa<PHINode>(I))
377 return I->isIdenticalTo(J);
378
379 for (unsigned i = 0, n = I->getNumOperands(); i != n; ++i) {
380 Value *OpI = I->getOperand(i), *OpJ = J->getOperand(i);
381 if (OpI == OpJ)
382 continue;
383 auto *InI = dyn_cast<const Instruction>(OpI);
384 auto *InJ = dyn_cast<const Instruction>(OpJ);
385 if (InI && InJ) {
386 if (!equal(InI, InJ))
387 return false;
388 } else if (InI != InJ || !InI)
389 return false;
390 }
391 return true;
392}
393
394
395Value *Simplifier::Context::find(Value *Tree, Value *Sub) const {
396 Instruction *SubI = dyn_cast<Instruction>(Sub);
397 WorkListType Q;
398 Q.push_back(Tree);
399
400 while (!Q.empty()) {
401 Value *V = Q.front();
402 Q.pop_front();
403 if (V == Sub)
404 return V;
405 Instruction *U = dyn_cast<Instruction>(V);
406 if (!U || U->getParent())
407 continue;
408 if (SubI && equal(SubI, U))
409 return U;
410 assert(!isa<PHINode>(U))((!isa<PHINode>(U)) ? static_cast<void> (0) : __assert_fail
("!isa<PHINode>(U)", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 410, __PRETTY_FUNCTION__))
;
411 for (Value *Op : U->operands())
412 Q.push_back(Op);
413 }
414 return nullptr;
415}
416
417
418void Simplifier::Context::link(Instruction *I, BasicBlock *B,
419 BasicBlock::iterator At) {
420 if (I->getParent())
421 return;
422
423 for (Value *Op : I->operands()) {
424 if (Instruction *OpI = dyn_cast<Instruction>(Op))
425 link(OpI, B, At);
426 }
427
428 B->getInstList().insert(At, I);
429}
430
431
432Value *Simplifier::Context::materialize(BasicBlock *B,
433 BasicBlock::iterator At) {
434 if (Instruction *RootI = dyn_cast<Instruction>(Root))
435 link(RootI, B, At);
436 return Root;
437}
438
439
440Value *Simplifier::simplify(Context &C) {
441 WorkListType Q;
442 Q.push_back(C.Root);
443
444 while (!Q.empty()) {
445 Instruction *U = dyn_cast<Instruction>(Q.front());
446 Q.pop_front();
447 if (!U || U->getParent() || !C.Used.count(U))
448 continue;
449 bool Changed = false;
450 for (Rule &R : Rules) {
451 Value *W = R(U, C.Ctx);
452 if (!W)
453 continue;
454 Changed = true;
455 C.replace(U, W);
456 Q.push_back(C.Root);
457 break;
458 }
459 if (!Changed) {
460 for (Value *Op : U->operands())
461 Q.push_back(Op);
462 }
463 }
464 return C.Root;
465}
466
467
468//===----------------------------------------------------------------------===//
469//
470// Implementation of PolynomialMultiplyRecognize
471//
472//===----------------------------------------------------------------------===//
473
474namespace {
475 class PolynomialMultiplyRecognize {
476 public:
477 explicit PolynomialMultiplyRecognize(Loop *loop, const DataLayout &dl,
478 const DominatorTree &dt, const TargetLibraryInfo &tli,
479 ScalarEvolution &se)
480 : CurLoop(loop), DL(dl), DT(dt), TLI(tli), SE(se) {}
481
482 bool recognize();
483 private:
484 typedef SetVector<Value*> ValueSeq;
485
486 IntegerType *getPmpyType() const {
487 LLVMContext &Ctx = CurLoop->getHeader()->getParent()->getContext();
488 return IntegerType::get(Ctx, 32);
489 }
490 bool isPromotableTo(Value *V, IntegerType *Ty);
491 void promoteTo(Instruction *In, IntegerType *DestTy, BasicBlock *LoopB);
492 bool promoteTypes(BasicBlock *LoopB, BasicBlock *ExitB);
493
494 Value *getCountIV(BasicBlock *BB);
495 bool findCycle(Value *Out, Value *In, ValueSeq &Cycle);
496 void classifyCycle(Instruction *DivI, ValueSeq &Cycle, ValueSeq &Early,
497 ValueSeq &Late);
498 bool classifyInst(Instruction *UseI, ValueSeq &Early, ValueSeq &Late);
499 bool commutesWithShift(Instruction *I);
500 bool highBitsAreZero(Value *V, unsigned IterCount);
501 bool keepsHighBitsZero(Value *V, unsigned IterCount);
502 bool isOperandShifted(Instruction *I, Value *Op);
503 bool convertShiftsToLeft(BasicBlock *LoopB, BasicBlock *ExitB,
504 unsigned IterCount);
505 void cleanupLoopBody(BasicBlock *LoopB);
506
507 struct ParsedValues {
508 ParsedValues() : M(nullptr), P(nullptr), Q(nullptr), R(nullptr),
509 X(nullptr), Res(nullptr), IterCount(0), Left(false), Inv(false) {}
510 Value *M, *P, *Q, *R, *X;
511 Instruction *Res;
512 unsigned IterCount;
513 bool Left, Inv;
514 };
515
516 bool matchLeftShift(SelectInst *SelI, Value *CIV, ParsedValues &PV);
517 bool matchRightShift(SelectInst *SelI, ParsedValues &PV);
518 bool scanSelect(SelectInst *SI, BasicBlock *LoopB, BasicBlock *PrehB,
519 Value *CIV, ParsedValues &PV, bool PreScan);
520 unsigned getInverseMxN(unsigned QP);
521 Value *generate(BasicBlock::iterator At, ParsedValues &PV);
522
523 void setupSimplifier();
524
525 Simplifier Simp;
526 Loop *CurLoop;
527 const DataLayout &DL;
528 const DominatorTree &DT;
529 const TargetLibraryInfo &TLI;
530 ScalarEvolution &SE;
531 };
532}
533
534
535Value *PolynomialMultiplyRecognize::getCountIV(BasicBlock *BB) {
536 pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
537 if (std::distance(PI, PE) != 2)
538 return nullptr;
539 BasicBlock *PB = (*PI == BB) ? *std::next(PI) : *PI;
540
541 for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
542 auto *PN = cast<PHINode>(I);
543 Value *InitV = PN->getIncomingValueForBlock(PB);
544 if (!isa<ConstantInt>(InitV) || !cast<ConstantInt>(InitV)->isZero())
545 continue;
546 Value *IterV = PN->getIncomingValueForBlock(BB);
547 if (!isa<BinaryOperator>(IterV))
548 continue;
549 auto *BO = dyn_cast<BinaryOperator>(IterV);
550 if (BO->getOpcode() != Instruction::Add)
551 continue;
552 Value *IncV = nullptr;
553 if (BO->getOperand(0) == PN)
554 IncV = BO->getOperand(1);
555 else if (BO->getOperand(1) == PN)
556 IncV = BO->getOperand(0);
557 if (IncV == nullptr)
558 continue;
559
560 if (auto *T = dyn_cast<ConstantInt>(IncV))
561 if (T->getZExtValue() == 1)
562 return PN;
563 }
564 return nullptr;
565}
566
567
568static void replaceAllUsesOfWithIn(Value *I, Value *J, BasicBlock *BB) {
569 for (auto UI = I->user_begin(), UE = I->user_end(); UI != UE;) {
570 Use &TheUse = UI.getUse();
571 ++UI;
572 if (auto *II = dyn_cast<Instruction>(TheUse.getUser()))
573 if (BB == II->getParent())
574 II->replaceUsesOfWith(I, J);
575 }
576}
577
578
579bool PolynomialMultiplyRecognize::matchLeftShift(SelectInst *SelI,
580 Value *CIV, ParsedValues &PV) {
581 // Match the following:
582 // select (X & (1 << i)) != 0 ? R ^ (Q << i) : R
583 // select (X & (1 << i)) == 0 ? R : R ^ (Q << i)
584 // The condition may also check for equality with the masked value, i.e
585 // select (X & (1 << i)) == (1 << i) ? R ^ (Q << i) : R
586 // select (X & (1 << i)) != (1 << i) ? R : R ^ (Q << i);
587
588 Value *CondV = SelI->getCondition();
589 Value *TrueV = SelI->getTrueValue();
590 Value *FalseV = SelI->getFalseValue();
591
592 using namespace PatternMatch;
593
594 CmpInst::Predicate P;
595 Value *A = nullptr, *B = nullptr, *C = nullptr;
596
597 if (!match(CondV, m_ICmp(P, m_And(m_Value(A), m_Value(B)), m_Value(C))) &&
598 !match(CondV, m_ICmp(P, m_Value(C), m_And(m_Value(A), m_Value(B)))))
599 return false;
600 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
601 return false;
602 // Matched: select (A & B) == C ? ... : ...
603 // select (A & B) != C ? ... : ...
604
605 Value *X = nullptr, *Sh1 = nullptr;
606 // Check (A & B) for (X & (1 << i)):
607 if (match(A, m_Shl(m_One(), m_Specific(CIV)))) {
608 Sh1 = A;
609 X = B;
610 } else if (match(B, m_Shl(m_One(), m_Specific(CIV)))) {
611 Sh1 = B;
612 X = A;
613 } else {
614 // TODO: Could also check for an induction variable containing single
615 // bit shifted left by 1 in each iteration.
616 return false;
617 }
618
619 bool TrueIfZero;
620
621 // Check C against the possible values for comparison: 0 and (1 << i):
622 if (match(C, m_Zero()))
623 TrueIfZero = (P == CmpInst::ICMP_EQ);
624 else if (C == Sh1)
625 TrueIfZero = (P == CmpInst::ICMP_NE);
626 else
627 return false;
628
629 // So far, matched:
630 // select (X & (1 << i)) ? ... : ...
631 // including variations of the check against zero/non-zero value.
632
633 Value *ShouldSameV = nullptr, *ShouldXoredV = nullptr;
634 if (TrueIfZero) {
635 ShouldSameV = TrueV;
636 ShouldXoredV = FalseV;
637 } else {
638 ShouldSameV = FalseV;
639 ShouldXoredV = TrueV;
640 }
641
642 Value *Q = nullptr, *R = nullptr, *Y = nullptr, *Z = nullptr;
643 Value *T = nullptr;
644 if (match(ShouldXoredV, m_Xor(m_Value(Y), m_Value(Z)))) {
645 // Matched: select +++ ? ... : Y ^ Z
646 // select +++ ? Y ^ Z : ...
647 // where +++ denotes previously checked matches.
648 if (ShouldSameV == Y)
649 T = Z;
650 else if (ShouldSameV == Z)
651 T = Y;
652 else
653 return false;
654 R = ShouldSameV;
655 // Matched: select +++ ? R : R ^ T
656 // select +++ ? R ^ T : R
657 // depending on TrueIfZero.
658
659 } else if (match(ShouldSameV, m_Zero())) {
660 // Matched: select +++ ? 0 : ...
661 // select +++ ? ... : 0
662 if (!SelI->hasOneUse())
663 return false;
664 T = ShouldXoredV;
665 // Matched: select +++ ? 0 : T
666 // select +++ ? T : 0
667
668 Value *U = *SelI->user_begin();
669 if (!match(U, m_Xor(m_Specific(SelI), m_Value(R))) &&
670 !match(U, m_Xor(m_Value(R), m_Specific(SelI))))
671 return false;
672 // Matched: xor (select +++ ? 0 : T), R
673 // xor (select +++ ? T : 0), R
674 } else
675 return false;
676
677 // The xor input value T is isolated into its own match so that it could
678 // be checked against an induction variable containing a shifted bit
679 // (todo).
680 // For now, check against (Q << i).
681 if (!match(T, m_Shl(m_Value(Q), m_Specific(CIV))) &&
682 !match(T, m_Shl(m_ZExt(m_Value(Q)), m_ZExt(m_Specific(CIV)))))
683 return false;
684 // Matched: select +++ ? R : R ^ (Q << i)
685 // select +++ ? R ^ (Q << i) : R
686
687 PV.X = X;
688 PV.Q = Q;
689 PV.R = R;
690 PV.Left = true;
691 return true;
692}
693
694
695bool PolynomialMultiplyRecognize::matchRightShift(SelectInst *SelI,
696 ParsedValues &PV) {
697 // Match the following:
698 // select (X & 1) != 0 ? (R >> 1) ^ Q : (R >> 1)
699 // select (X & 1) == 0 ? (R >> 1) : (R >> 1) ^ Q
700 // The condition may also check for equality with the masked value, i.e
701 // select (X & 1) == 1 ? (R >> 1) ^ Q : (R >> 1)
702 // select (X & 1) != 1 ? (R >> 1) : (R >> 1) ^ Q
703
704 Value *CondV = SelI->getCondition();
705 Value *TrueV = SelI->getTrueValue();
706 Value *FalseV = SelI->getFalseValue();
707
708 using namespace PatternMatch;
709
710 Value *C = nullptr;
711 CmpInst::Predicate P;
712 bool TrueIfZero;
713
714 if (match(CondV, m_ICmp(P, m_Value(C), m_Zero())) ||
715 match(CondV, m_ICmp(P, m_Zero(), m_Value(C)))) {
716 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
717 return false;
718 // Matched: select C == 0 ? ... : ...
719 // select C != 0 ? ... : ...
720 TrueIfZero = (P == CmpInst::ICMP_EQ);
721 } else if (match(CondV, m_ICmp(P, m_Value(C), m_One())) ||
722 match(CondV, m_ICmp(P, m_One(), m_Value(C)))) {
723 if (P != CmpInst::ICMP_EQ && P != CmpInst::ICMP_NE)
724 return false;
725 // Matched: select C == 1 ? ... : ...
726 // select C != 1 ? ... : ...
727 TrueIfZero = (P == CmpInst::ICMP_NE);
728 } else
729 return false;
730
731 Value *X = nullptr;
732 if (!match(C, m_And(m_Value(X), m_One())) &&
733 !match(C, m_And(m_One(), m_Value(X))))
734 return false;
735 // Matched: select (X & 1) == +++ ? ... : ...
736 // select (X & 1) != +++ ? ... : ...
737
738 Value *R = nullptr, *Q = nullptr;
739 if (TrueIfZero) {
740 // The select's condition is true if the tested bit is 0.
741 // TrueV must be the shift, FalseV must be the xor.
742 if (!match(TrueV, m_LShr(m_Value(R), m_One())))
743 return false;
744 // Matched: select +++ ? (R >> 1) : ...
745 if (!match(FalseV, m_Xor(m_Specific(TrueV), m_Value(Q))) &&
746 !match(FalseV, m_Xor(m_Value(Q), m_Specific(TrueV))))
747 return false;
748 // Matched: select +++ ? (R >> 1) : (R >> 1) ^ Q
749 // with commuting ^.
750 } else {
751 // The select's condition is true if the tested bit is 1.
752 // TrueV must be the xor, FalseV must be the shift.
753 if (!match(FalseV, m_LShr(m_Value(R), m_One())))
754 return false;
755 // Matched: select +++ ? ... : (R >> 1)
756 if (!match(TrueV, m_Xor(m_Specific(FalseV), m_Value(Q))) &&
757 !match(TrueV, m_Xor(m_Value(Q), m_Specific(FalseV))))
758 return false;
759 // Matched: select +++ ? (R >> 1) ^ Q : (R >> 1)
760 // with commuting ^.
761 }
762
763 PV.X = X;
764 PV.Q = Q;
765 PV.R = R;
766 PV.Left = false;
767 return true;
768}
769
770
771bool PolynomialMultiplyRecognize::scanSelect(SelectInst *SelI,
772 BasicBlock *LoopB, BasicBlock *PrehB, Value *CIV, ParsedValues &PV,
773 bool PreScan) {
774 using namespace PatternMatch;
775 // The basic pattern for R = P.Q is:
776 // for i = 0..31
777 // R = phi (0, R')
778 // if (P & (1 << i)) ; test-bit(P, i)
779 // R' = R ^ (Q << i)
780 //
781 // Similarly, the basic pattern for R = (P/Q).Q - P
782 // for i = 0..31
783 // R = phi(P, R')
784 // if (R & (1 << i))
785 // R' = R ^ (Q << i)
786
787 // There exist idioms, where instead of Q being shifted left, P is shifted
788 // right. This produces a result that is shifted right by 32 bits (the
789 // non-shifted result is 64-bit).
790 //
791 // For R = P.Q, this would be:
792 // for i = 0..31
793 // R = phi (0, R')
794 // if ((P >> i) & 1)
795 // R' = (R >> 1) ^ Q ; R is cycled through the loop, so it must
796 // else ; be shifted by 1, not i.
797 // R' = R >> 1
798 //
799 // And for the inverse:
800 // for i = 0..31
801 // R = phi (P, R')
802 // if (R & 1)
803 // R' = (R >> 1) ^ Q
804 // else
805 // R' = R >> 1
806
807 // The left-shifting idioms share the same pattern:
808 // select (X & (1 << i)) ? R ^ (Q << i) : R
809 // Similarly for right-shifting idioms:
810 // select (X & 1) ? (R >> 1) ^ Q
811
812 if (matchLeftShift(SelI, CIV, PV)) {
813 // If this is a pre-scan, getting this far is sufficient.
814 if (PreScan)
815 return true;
816
817 // Need to make sure that the SelI goes back into R.
818 auto *RPhi = dyn_cast<PHINode>(PV.R);
819 if (!RPhi)
820 return false;
821 if (SelI != RPhi->getIncomingValueForBlock(LoopB))
822 return false;
823 PV.Res = SelI;
824
825 // If X is loop invariant, it must be the input polynomial, and the
826 // idiom is the basic polynomial multiply.
827 if (CurLoop->isLoopInvariant(PV.X)) {
828 PV.P = PV.X;
829 PV.Inv = false;
830 } else {
831 // X is not loop invariant. If X == R, this is the inverse pmpy.
832 // Otherwise, check for an xor with an invariant value. If the
833 // variable argument to the xor is R, then this is still a valid
834 // inverse pmpy.
835 PV.Inv = true;
836 if (PV.X != PV.R) {
837 Value *Var = nullptr, *Inv = nullptr, *X1 = nullptr, *X2 = nullptr;
838 if (!match(PV.X, m_Xor(m_Value(X1), m_Value(X2))))
839 return false;
840 auto *I1 = dyn_cast<Instruction>(X1);
841 auto *I2 = dyn_cast<Instruction>(X2);
842 if (!I1 || I1->getParent() != LoopB) {
843 Var = X2;
844 Inv = X1;
845 } else if (!I2 || I2->getParent() != LoopB) {
846 Var = X1;
847 Inv = X2;
848 } else
849 return false;
850 if (Var != PV.R)
851 return false;
852 PV.M = Inv;
853 }
854 // The input polynomial P still needs to be determined. It will be
855 // the entry value of R.
856 Value *EntryP = RPhi->getIncomingValueForBlock(PrehB);
857 PV.P = EntryP;
858 }
859
860 return true;
861 }
862
863 if (matchRightShift(SelI, PV)) {
864 // If this is an inverse pattern, the Q polynomial must be known at
865 // compile time.
866 if (PV.Inv && !isa<ConstantInt>(PV.Q))
867 return false;
868 if (PreScan)
869 return true;
870 // There is no exact matching of right-shift pmpy.
871 return false;
872 }
873
874 return false;
875}
876
877
878bool PolynomialMultiplyRecognize::isPromotableTo(Value *Val,
879 IntegerType *DestTy) {
880 IntegerType *T = dyn_cast<IntegerType>(Val->getType());
881 if (!T || T->getBitWidth() > DestTy->getBitWidth())
882 return false;
883 if (T->getBitWidth() == DestTy->getBitWidth())
884 return true;
885 // Non-instructions are promotable. The reason why an instruction may not
886 // be promotable is that it may produce a different result if its operands
887 // and the result are promoted, for example, it may produce more non-zero
888 // bits. While it would still be possible to represent the proper result
889 // in a wider type, it may require adding additional instructions (which
890 // we don't want to do).
891 Instruction *In = dyn_cast<Instruction>(Val);
892 if (!In)
893 return true;
894 // The bitwidth of the source type is smaller than the destination.
895 // Check if the individual operation can be promoted.
896 switch (In->getOpcode()) {
897 case Instruction::PHI:
898 case Instruction::ZExt:
899 case Instruction::And:
900 case Instruction::Or:
901 case Instruction::Xor:
902 case Instruction::LShr: // Shift right is ok.
903 case Instruction::Select:
904 return true;
905 case Instruction::ICmp:
906 if (CmpInst *CI = cast<CmpInst>(In))
907 return CI->isEquality() || CI->isUnsigned();
908 llvm_unreachable("Cast failed unexpectedly")::llvm::llvm_unreachable_internal("Cast failed unexpectedly",
"/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 908)
;
909 case Instruction::Add:
910 return In->hasNoSignedWrap() && In->hasNoUnsignedWrap();
911 }
912 return false;
913}
914
915
916void PolynomialMultiplyRecognize::promoteTo(Instruction *In,
917 IntegerType *DestTy, BasicBlock *LoopB) {
918 // Leave boolean values alone.
919 if (!In->getType()->isIntegerTy(1))
920 In->mutateType(DestTy);
921 unsigned DestBW = DestTy->getBitWidth();
922
923 // Handle PHIs.
924 if (PHINode *P = dyn_cast<PHINode>(In)) {
925 unsigned N = P->getNumIncomingValues();
926 for (unsigned i = 0; i != N; ++i) {
927 BasicBlock *InB = P->getIncomingBlock(i);
928 if (InB == LoopB)
929 continue;
930 Value *InV = P->getIncomingValue(i);
931 IntegerType *Ty = cast<IntegerType>(InV->getType());
932 // Do not promote values in PHI nodes of type i1.
933 if (Ty != P->getType()) {
934 // If the value type does not match the PHI type, the PHI type
935 // must have been promoted.
936 assert(Ty->getBitWidth() < DestBW)((Ty->getBitWidth() < DestBW) ? static_cast<void>
(0) : __assert_fail ("Ty->getBitWidth() < DestBW", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 936, __PRETTY_FUNCTION__))
;
937 InV = IRBuilder<>(InB->getTerminator()).CreateZExt(InV, DestTy);
938 P->setIncomingValue(i, InV);
939 }
940 }
941 } else if (ZExtInst *Z = dyn_cast<ZExtInst>(In)) {
942 Value *Op = Z->getOperand(0);
943 if (Op->getType() == Z->getType())
944 Z->replaceAllUsesWith(Op);
945 Z->eraseFromParent();
946 return;
947 }
948
949 // Promote immediates.
950 for (unsigned i = 0, n = In->getNumOperands(); i != n; ++i) {
951 if (ConstantInt *CI = dyn_cast<ConstantInt>(In->getOperand(i)))
952 if (CI->getType()->getBitWidth() < DestBW)
953 In->setOperand(i, ConstantInt::get(DestTy, CI->getZExtValue()));
954 }
955}
956
957
958bool PolynomialMultiplyRecognize::promoteTypes(BasicBlock *LoopB,
959 BasicBlock *ExitB) {
960 assert(LoopB)((LoopB) ? static_cast<void> (0) : __assert_fail ("LoopB"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 960, __PRETTY_FUNCTION__))
;
961 // Skip loops where the exit block has more than one predecessor. The values
962 // coming from the loop block will be promoted to another type, and so the
963 // values coming into the exit block from other predecessors would also have
964 // to be promoted.
965 if (!ExitB || (ExitB->getSinglePredecessor() != LoopB))
966 return false;
967 IntegerType *DestTy = getPmpyType();
968 // Check if the exit values have types that are no wider than the type
969 // that we want to promote to.
970 unsigned DestBW = DestTy->getBitWidth();
971 for (Instruction &In : *ExitB) {
972 PHINode *P = dyn_cast<PHINode>(&In);
973 if (!P)
974 break;
975 if (P->getNumIncomingValues() != 1)
976 return false;
977 assert(P->getIncomingBlock(0) == LoopB)((P->getIncomingBlock(0) == LoopB) ? static_cast<void>
(0) : __assert_fail ("P->getIncomingBlock(0) == LoopB", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 977, __PRETTY_FUNCTION__))
;
978 IntegerType *T = dyn_cast<IntegerType>(P->getType());
979 if (!T || T->getBitWidth() > DestBW)
980 return false;
981 }
982
983 // Check all instructions in the loop.
984 for (Instruction &In : *LoopB)
985 if (!In.isTerminator() && !isPromotableTo(&In, DestTy))
986 return false;
987
988 // Perform the promotion.
989 std::vector<Instruction*> LoopIns;
990 std::transform(LoopB->begin(), LoopB->end(), std::back_inserter(LoopIns),
991 [](Instruction &In) { return &In; });
992 for (Instruction *In : LoopIns)
993 promoteTo(In, DestTy, LoopB);
994
995 // Fix up the PHI nodes in the exit block.
996 Instruction *EndI = ExitB->getFirstNonPHI();
997 BasicBlock::iterator End = EndI ? EndI->getIterator() : ExitB->end();
998 for (auto I = ExitB->begin(); I != End; ++I) {
999 PHINode *P = dyn_cast<PHINode>(I);
1000 if (!P)
1001 break;
1002 Type *Ty0 = P->getIncomingValue(0)->getType();
1003 Type *PTy = P->getType();
1004 if (PTy != Ty0) {
1005 assert(Ty0 == DestTy)((Ty0 == DestTy) ? static_cast<void> (0) : __assert_fail
("Ty0 == DestTy", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1005, __PRETTY_FUNCTION__))
;
1006 // In order to create the trunc, P must have the promoted type.
1007 P->mutateType(Ty0);
1008 Value *T = IRBuilder<>(ExitB, End).CreateTrunc(P, PTy);
1009 // In order for the RAUW to work, the types of P and T must match.
1010 P->mutateType(PTy);
1011 P->replaceAllUsesWith(T);
1012 // Final update of the P's type.
1013 P->mutateType(Ty0);
1014 cast<Instruction>(T)->setOperand(0, P);
1015 }
1016 }
1017
1018 return true;
1019}
1020
1021
1022bool PolynomialMultiplyRecognize::findCycle(Value *Out, Value *In,
1023 ValueSeq &Cycle) {
1024 // Out = ..., In, ...
1025 if (Out == In)
1026 return true;
1027
1028 auto *BB = cast<Instruction>(Out)->getParent();
1029 bool HadPhi = false;
1030
1031 for (auto U : Out->users()) {
1032 auto *I = dyn_cast<Instruction>(&*U);
1033 if (I == nullptr || I->getParent() != BB)
1034 continue;
1035 // Make sure that there are no multi-iteration cycles, e.g.
1036 // p1 = phi(p2)
1037 // p2 = phi(p1)
1038 // The cycle p1->p2->p1 would span two loop iterations.
1039 // Check that there is only one phi in the cycle.
1040 bool IsPhi = isa<PHINode>(I);
1041 if (IsPhi && HadPhi)
1042 return false;
1043 HadPhi |= IsPhi;
1044 if (Cycle.count(I))
1045 return false;
1046 Cycle.insert(I);
1047 if (findCycle(I, In, Cycle))
1048 break;
1049 Cycle.remove(I);
1050 }
1051 return !Cycle.empty();
1052}
1053
1054
1055void PolynomialMultiplyRecognize::classifyCycle(Instruction *DivI,
1056 ValueSeq &Cycle, ValueSeq &Early, ValueSeq &Late) {
1057 // All the values in the cycle that are between the phi node and the
1058 // divider instruction will be classified as "early", all other values
1059 // will be "late".
1060
1061 bool IsE = true;
1062 unsigned I, N = Cycle.size();
1063 for (I = 0; I < N; ++I) {
1064 Value *V = Cycle[I];
1065 if (DivI == V)
1066 IsE = false;
1067 else if (!isa<PHINode>(V))
1068 continue;
1069 // Stop if found either.
1070 break;
1071 }
1072 // "I" is the index of either DivI or the phi node, whichever was first.
1073 // "E" is "false" or "true" respectively.
1074 ValueSeq &First = !IsE ? Early : Late;
1075 for (unsigned J = 0; J < I; ++J)
1076 First.insert(Cycle[J]);
1077
1078 ValueSeq &Second = IsE ? Early : Late;
1079 Second.insert(Cycle[I]);
1080 for (++I; I < N; ++I) {
1081 Value *V = Cycle[I];
1082 if (DivI == V || isa<PHINode>(V))
1083 break;
1084 Second.insert(V);
1085 }
1086
1087 for (; I < N; ++I)
1088 First.insert(Cycle[I]);
1089}
1090
1091
1092bool PolynomialMultiplyRecognize::classifyInst(Instruction *UseI,
1093 ValueSeq &Early, ValueSeq &Late) {
1094 // Select is an exception, since the condition value does not have to be
1095 // classified in the same way as the true/false values. The true/false
1096 // values do have to be both early or both late.
1097 if (UseI->getOpcode() == Instruction::Select) {
1098 Value *TV = UseI->getOperand(1), *FV = UseI->getOperand(2);
1099 if (Early.count(TV) || Early.count(FV)) {
1100 if (Late.count(TV) || Late.count(FV))
1101 return false;
1102 Early.insert(UseI);
1103 } else if (Late.count(TV) || Late.count(FV)) {
1104 if (Early.count(TV) || Early.count(FV))
1105 return false;
1106 Late.insert(UseI);
1107 }
1108 return true;
1109 }
1110
1111 // Not sure what would be the example of this, but the code below relies
1112 // on having at least one operand.
1113 if (UseI->getNumOperands() == 0)
1114 return true;
1115
1116 bool AE = true, AL = true;
1117 for (auto &I : UseI->operands()) {
1118 if (Early.count(&*I))
1119 AL = false;
1120 else if (Late.count(&*I))
1121 AE = false;
1122 }
1123 // If the operands appear "all early" and "all late" at the same time,
1124 // then it means that none of them are actually classified as either.
1125 // This is harmless.
1126 if (AE && AL)
1127 return true;
1128 // Conversely, if they are neither "all early" nor "all late", then
1129 // we have a mixture of early and late operands that is not a known
1130 // exception.
1131 if (!AE && !AL)
1132 return false;
1133
1134 // Check that we have covered the two special cases.
1135 assert(AE != AL)((AE != AL) ? static_cast<void> (0) : __assert_fail ("AE != AL"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1135, __PRETTY_FUNCTION__))
;
1136
1137 if (AE)
1138 Early.insert(UseI);
1139 else
1140 Late.insert(UseI);
1141 return true;
1142}
1143
1144
1145bool PolynomialMultiplyRecognize::commutesWithShift(Instruction *I) {
1146 switch (I->getOpcode()) {
1147 case Instruction::And:
1148 case Instruction::Or:
1149 case Instruction::Xor:
1150 case Instruction::LShr:
1151 case Instruction::Shl:
1152 case Instruction::Select:
1153 case Instruction::ICmp:
1154 case Instruction::PHI:
1155 break;
1156 default:
1157 return false;
1158 }
1159 return true;
1160}
1161
1162
1163bool PolynomialMultiplyRecognize::highBitsAreZero(Value *V,
1164 unsigned IterCount) {
1165 auto *T = dyn_cast<IntegerType>(V->getType());
1166 if (!T)
1167 return false;
1168
1169 unsigned BW = T->getBitWidth();
1170 APInt K0(BW, 0), K1(BW, 0);
1171 computeKnownBits(V, K0, K1, DL);
1172 return K0.countLeadingOnes() >= IterCount;
1173}
1174
1175
1176bool PolynomialMultiplyRecognize::keepsHighBitsZero(Value *V,
1177 unsigned IterCount) {
1178 // Assume that all inputs to the value have the high bits zero.
1179 // Check if the value itself preserves the zeros in the high bits.
1180 if (auto *C = dyn_cast<ConstantInt>(V))
1181 return C->getValue().countLeadingZeros() >= IterCount;
1182
1183 if (auto *I = dyn_cast<Instruction>(V)) {
1184 switch (I->getOpcode()) {
1185 case Instruction::And:
1186 case Instruction::Or:
1187 case Instruction::Xor:
1188 case Instruction::LShr:
1189 case Instruction::Select:
1190 case Instruction::ICmp:
1191 case Instruction::PHI:
1192 case Instruction::ZExt:
1193 return true;
1194 }
1195 }
1196
1197 return false;
1198}
1199
1200
1201bool PolynomialMultiplyRecognize::isOperandShifted(Instruction *I, Value *Op) {
1202 unsigned Opc = I->getOpcode();
1203 if (Opc == Instruction::Shl || Opc == Instruction::LShr)
1204 return Op != I->getOperand(1);
1205 return true;
1206}
1207
1208
1209bool PolynomialMultiplyRecognize::convertShiftsToLeft(BasicBlock *LoopB,
1210 BasicBlock *ExitB, unsigned IterCount) {
1211 Value *CIV = getCountIV(LoopB);
1212 if (CIV == nullptr)
1213 return false;
1214 auto *CIVTy = dyn_cast<IntegerType>(CIV->getType());
1215 if (CIVTy == nullptr)
1216 return false;
1217
1218 ValueSeq RShifts;
1219 ValueSeq Early, Late, Cycled;
1220
1221 // Find all value cycles that contain logical right shifts by 1.
1222 for (Instruction &I : *LoopB) {
1223 using namespace PatternMatch;
1224 Value *V = nullptr;
1225 if (!match(&I, m_LShr(m_Value(V), m_One())))
1226 continue;
1227 ValueSeq C;
1228 if (!findCycle(&I, V, C))
1229 continue;
1230
1231 // Found a cycle.
1232 C.insert(&I);
1233 classifyCycle(&I, C, Early, Late);
1234 Cycled.insert(C.begin(), C.end());
1235 RShifts.insert(&I);
1236 }
1237
1238 // Find the set of all values affected by the shift cycles, i.e. all
1239 // cycled values, and (recursively) all their users.
1240 ValueSeq Users(Cycled.begin(), Cycled.end());
1241 for (unsigned i = 0; i < Users.size(); ++i) {
1242 Value *V = Users[i];
1243 if (!isa<IntegerType>(V->getType()))
1244 return false;
1245 auto *R = cast<Instruction>(V);
1246 // If the instruction does not commute with shifts, the loop cannot
1247 // be unshifted.
1248 if (!commutesWithShift(R))
1249 return false;
1250 for (auto I = R->user_begin(), E = R->user_end(); I != E; ++I) {
1251 auto *T = cast<Instruction>(*I);
1252 // Skip users from outside of the loop. They will be handled later.
1253 // Also, skip the right-shifts and phi nodes, since they mix early
1254 // and late values.
1255 if (T->getParent() != LoopB || RShifts.count(T) || isa<PHINode>(T))
1256 continue;
1257
1258 Users.insert(T);
1259 if (!classifyInst(T, Early, Late))
1260 return false;
1261 }
1262 }
1263
1264 if (Users.size() == 0)
1265 return false;
1266
1267 // Verify that high bits remain zero.
1268 ValueSeq Internal(Users.begin(), Users.end());
1269 ValueSeq Inputs;
1270 for (unsigned i = 0; i < Internal.size(); ++i) {
1271 auto *R = dyn_cast<Instruction>(Internal[i]);
1272 if (!R)
1273 continue;
1274 for (Value *Op : R->operands()) {
1275 auto *T = dyn_cast<Instruction>(Op);
1276 if (T && T->getParent() != LoopB)
1277 Inputs.insert(Op);
1278 else
1279 Internal.insert(Op);
1280 }
1281 }
1282 for (Value *V : Inputs)
1283 if (!highBitsAreZero(V, IterCount))
1284 return false;
1285 for (Value *V : Internal)
1286 if (!keepsHighBitsZero(V, IterCount))
1287 return false;
1288
1289 // Finally, the work can be done. Unshift each user.
1290 IRBuilder<> IRB(LoopB);
1291 std::map<Value*,Value*> ShiftMap;
1292 typedef std::map<std::pair<Value*,Type*>,Value*> CastMapType;
1293 CastMapType CastMap;
1294
1295 auto upcast = [] (CastMapType &CM, IRBuilder<> &IRB, Value *V,
1296 IntegerType *Ty) -> Value* {
1297 auto H = CM.find(std::make_pair(V, Ty));
1298 if (H != CM.end())
1299 return H->second;
1300 Value *CV = IRB.CreateIntCast(V, Ty, false);
1301 CM.insert(std::make_pair(std::make_pair(V, Ty), CV));
1302 return CV;
1303 };
1304
1305 for (auto I = LoopB->begin(), E = LoopB->end(); I != E; ++I) {
1306 if (isa<PHINode>(I) || !Users.count(&*I))
1307 continue;
1308 using namespace PatternMatch;
1309 // Match lshr x, 1.
1310 Value *V = nullptr;
1311 if (match(&*I, m_LShr(m_Value(V), m_One()))) {
1312 replaceAllUsesOfWithIn(&*I, V, LoopB);
1313 continue;
1314 }
1315 // For each non-cycled operand, replace it with the corresponding
1316 // value shifted left.
1317 for (auto &J : I->operands()) {
1318 Value *Op = J.get();
1319 if (!isOperandShifted(&*I, Op))
1320 continue;
1321 if (Users.count(Op))
1322 continue;
1323 // Skip shifting zeros.
1324 if (isa<ConstantInt>(Op) && cast<ConstantInt>(Op)->isZero())
1325 continue;
1326 // Check if we have already generated a shift for this value.
1327 auto F = ShiftMap.find(Op);
1328 Value *W = (F != ShiftMap.end()) ? F->second : nullptr;
1329 if (W == nullptr) {
1330 IRB.SetInsertPoint(&*I);
1331 // First, the shift amount will be CIV or CIV+1, depending on
1332 // whether the value is early or late. Instead of creating CIV+1,
1333 // do a single shift of the value.
1334 Value *ShAmt = CIV, *ShVal = Op;
1335 auto *VTy = cast<IntegerType>(ShVal->getType());
1336 auto *ATy = cast<IntegerType>(ShAmt->getType());
1337 if (Late.count(&*I))
1338 ShVal = IRB.CreateShl(Op, ConstantInt::get(VTy, 1));
1339 // Second, the types of the shifted value and the shift amount
1340 // must match.
1341 if (VTy != ATy) {
1342 if (VTy->getBitWidth() < ATy->getBitWidth())
1343 ShVal = upcast(CastMap, IRB, ShVal, ATy);
1344 else
1345 ShAmt = upcast(CastMap, IRB, ShAmt, VTy);
1346 }
1347 // Ready to generate the shift and memoize it.
1348 W = IRB.CreateShl(ShVal, ShAmt);
1349 ShiftMap.insert(std::make_pair(Op, W));
1350 }
1351 I->replaceUsesOfWith(Op, W);
1352 }
1353 }
1354
1355 // Update the users outside of the loop to account for having left
1356 // shifts. They would normally be shifted right in the loop, so shift
1357 // them right after the loop exit.
1358 // Take advantage of the loop-closed SSA form, which has all the post-
1359 // loop values in phi nodes.
1360 IRB.SetInsertPoint(ExitB, ExitB->getFirstInsertionPt());
1361 for (auto P = ExitB->begin(), Q = ExitB->end(); P != Q; ++P) {
1362 if (!isa<PHINode>(P))
1363 break;
1364 auto *PN = cast<PHINode>(P);
1365 Value *U = PN->getIncomingValueForBlock(LoopB);
1366 if (!Users.count(U))
1367 continue;
1368 Value *S = IRB.CreateLShr(PN, ConstantInt::get(PN->getType(), IterCount));
1369 PN->replaceAllUsesWith(S);
1370 // The above RAUW will create
1371 // S = lshr S, IterCount
1372 // so we need to fix it back into
1373 // S = lshr PN, IterCount
1374 cast<User>(S)->replaceUsesOfWith(S, PN);
1375 }
1376
1377 return true;
1378}
1379
1380
1381void PolynomialMultiplyRecognize::cleanupLoopBody(BasicBlock *LoopB) {
1382 for (auto &I : *LoopB)
1383 if (Value *SV = SimplifyInstruction(&I, DL, &TLI, &DT))
1384 I.replaceAllUsesWith(SV);
1385
1386 for (auto I = LoopB->begin(), N = I; I != LoopB->end(); I = N) {
1387 N = std::next(I);
1388 RecursivelyDeleteTriviallyDeadInstructions(&*I, &TLI);
1389 }
1390}
1391
1392
1393unsigned PolynomialMultiplyRecognize::getInverseMxN(unsigned QP) {
1394 // Arrays of coefficients of Q and the inverse, C.
1395 // Q[i] = coefficient at x^i.
1396 std::array<char,32> Q, C;
1397
1398 for (unsigned i = 0; i < 32; ++i) {
1399 Q[i] = QP & 1;
1400 QP >>= 1;
1401 }
1402 assert(Q[0] == 1)((Q[0] == 1) ? static_cast<void> (0) : __assert_fail ("Q[0] == 1"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1402, __PRETTY_FUNCTION__))
;
1403
1404 // Find C, such that
1405 // (Q[n]*x^n + ... + Q[1]*x + Q[0]) * (C[n]*x^n + ... + C[1]*x + C[0]) = 1
1406 //
1407 // For it to have a solution, Q[0] must be 1. Since this is Z2[x], the
1408 // operations * and + are & and ^ respectively.
1409 //
1410 // Find C[i] recursively, by comparing i-th coefficient in the product
1411 // with 0 (or 1 for i=0).
1412 //
1413 // C[0] = 1, since C[0] = Q[0], and Q[0] = 1.
1414 C[0] = 1;
1415 for (unsigned i = 1; i < 32; ++i) {
1416 // Solve for C[i] in:
1417 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i]Q[0] = 0
1418 // This is equivalent to
1419 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] ^ C[i] = 0
1420 // which is
1421 // C[0]Q[i] ^ C[1]Q[i-1] ^ ... ^ C[i-1]Q[1] = C[i]
1422 unsigned T = 0;
1423 for (unsigned j = 0; j < i; ++j)
1424 T = T ^ (C[j] & Q[i-j]);
1425 C[i] = T;
1426 }
1427
1428 unsigned QV = 0;
1429 for (unsigned i = 0; i < 32; ++i)
1430 if (C[i])
1431 QV |= (1 << i);
1432
1433 return QV;
1434}
1435
1436
1437Value *PolynomialMultiplyRecognize::generate(BasicBlock::iterator At,
1438 ParsedValues &PV) {
1439 IRBuilder<> B(&*At);
1440 Module *M = At->getParent()->getParent()->getParent();
1441 Value *PMF = Intrinsic::getDeclaration(M, Intrinsic::hexagon_M4_pmpyw);
1442
1443 Value *P = PV.P, *Q = PV.Q, *P0 = P;
1444 unsigned IC = PV.IterCount;
1445
1446 if (PV.M != nullptr)
1
Assuming the condition is false
2
Taking false branch
1447 P0 = P = B.CreateXor(P, PV.M);
1448
1449 // Create a bit mask to clear the high bits beyond IterCount.
1450 auto *BMI = ConstantInt::get(P->getType(), APInt::getLowBitsSet(32, IC));
1451
1452 if (PV.IterCount != 32)
3
Assuming the condition is false
4
Taking false branch
1453 P = B.CreateAnd(P, BMI);
1454
1455 if (PV.Inv) {
5
Assuming the condition is true
6
Taking true branch
1456 auto *QI = dyn_cast<ConstantInt>(PV.Q);
1457 assert(QI && QI->getBitWidth() <= 32)((QI && QI->getBitWidth() <= 32) ? static_cast<
void> (0) : __assert_fail ("QI && QI->getBitWidth() <= 32"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1457, __PRETTY_FUNCTION__))
;
1458
1459 // Again, clearing bits beyond IterCount.
1460 unsigned M = (1 << PV.IterCount) - 1;
7
The result of the '<<' expression is undefined
1461 unsigned Tmp = (QI->getZExtValue() | 1) & M;
1462 unsigned QV = getInverseMxN(Tmp) & M;
1463 auto *QVI = ConstantInt::get(QI->getType(), QV);
1464 P = B.CreateCall(PMF, {P, QVI});
1465 P = B.CreateTrunc(P, QI->getType());
1466 if (IC != 32)
1467 P = B.CreateAnd(P, BMI);
1468 }
1469
1470 Value *R = B.CreateCall(PMF, {P, Q});
1471
1472 if (PV.M != nullptr)
1473 R = B.CreateXor(R, B.CreateIntCast(P0, R->getType(), false));
1474
1475 return R;
1476}
1477
1478
1479void PolynomialMultiplyRecognize::setupSimplifier() {
1480 Simp.addRule(
1481 // Sink zext past bitwise operations.
1482 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1483 if (I->getOpcode() != Instruction::ZExt)
1484 return nullptr;
1485 Instruction *T = dyn_cast<Instruction>(I->getOperand(0));
1486 if (!T)
1487 return nullptr;
1488 switch (T->getOpcode()) {
1489 case Instruction::And:
1490 case Instruction::Or:
1491 case Instruction::Xor:
1492 break;
1493 default:
1494 return nullptr;
1495 }
1496 IRBuilder<> B(Ctx);
1497 return B.CreateBinOp(cast<BinaryOperator>(T)->getOpcode(),
1498 B.CreateZExt(T->getOperand(0), I->getType()),
1499 B.CreateZExt(T->getOperand(1), I->getType()));
1500 });
1501 Simp.addRule(
1502 // (xor (and x a) (and y a)) -> (and (xor x y) a)
1503 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1504 if (I->getOpcode() != Instruction::Xor)
1505 return nullptr;
1506 Instruction *And0 = dyn_cast<Instruction>(I->getOperand(0));
1507 Instruction *And1 = dyn_cast<Instruction>(I->getOperand(1));
1508 if (!And0 || !And1)
1509 return nullptr;
1510 if (And0->getOpcode() != Instruction::And ||
1511 And1->getOpcode() != Instruction::And)
1512 return nullptr;
1513 if (And0->getOperand(1) != And1->getOperand(1))
1514 return nullptr;
1515 IRBuilder<> B(Ctx);
1516 return B.CreateAnd(B.CreateXor(And0->getOperand(0), And1->getOperand(0)),
1517 And0->getOperand(1));
1518 });
1519 Simp.addRule(
1520 // (Op (select c x y) z) -> (select c (Op x z) (Op y z))
1521 // (Op x (select c y z)) -> (select c (Op x y) (Op x z))
1522 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1523 BinaryOperator *BO = dyn_cast<BinaryOperator>(I);
1524 if (!BO)
1525 return nullptr;
1526 Instruction::BinaryOps Op = BO->getOpcode();
1527 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(0))) {
1528 IRBuilder<> B(Ctx);
1529 Value *X = Sel->getTrueValue(), *Y = Sel->getFalseValue();
1530 Value *Z = BO->getOperand(1);
1531 return B.CreateSelect(Sel->getCondition(),
1532 B.CreateBinOp(Op, X, Z),
1533 B.CreateBinOp(Op, Y, Z));
1534 }
1535 if (SelectInst *Sel = dyn_cast<SelectInst>(BO->getOperand(1))) {
1536 IRBuilder<> B(Ctx);
1537 Value *X = BO->getOperand(0);
1538 Value *Y = Sel->getTrueValue(), *Z = Sel->getFalseValue();
1539 return B.CreateSelect(Sel->getCondition(),
1540 B.CreateBinOp(Op, X, Y),
1541 B.CreateBinOp(Op, X, Z));
1542 }
1543 return nullptr;
1544 });
1545 Simp.addRule(
1546 // (select c (select c x y) z) -> (select c x z)
1547 // (select c x (select c y z)) -> (select c x z)
1548 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1549 SelectInst *Sel = dyn_cast<SelectInst>(I);
1550 if (!Sel)
1551 return nullptr;
1552 IRBuilder<> B(Ctx);
1553 Value *C = Sel->getCondition();
1554 if (SelectInst *Sel0 = dyn_cast<SelectInst>(Sel->getTrueValue())) {
1555 if (Sel0->getCondition() == C)
1556 return B.CreateSelect(C, Sel0->getTrueValue(), Sel->getFalseValue());
1557 }
1558 if (SelectInst *Sel1 = dyn_cast<SelectInst>(Sel->getFalseValue())) {
1559 if (Sel1->getCondition() == C)
1560 return B.CreateSelect(C, Sel->getTrueValue(), Sel1->getFalseValue());
1561 }
1562 return nullptr;
1563 });
1564 Simp.addRule(
1565 // (or (lshr x 1) 0x800.0) -> (xor (lshr x 1) 0x800.0)
1566 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1567 if (I->getOpcode() != Instruction::Or)
1568 return nullptr;
1569 Instruction *LShr = dyn_cast<Instruction>(I->getOperand(0));
1570 if (!LShr || LShr->getOpcode() != Instruction::LShr)
1571 return nullptr;
1572 ConstantInt *One = dyn_cast<ConstantInt>(LShr->getOperand(1));
1573 if (!One || One->getZExtValue() != 1)
1574 return nullptr;
1575 ConstantInt *Msb = dyn_cast<ConstantInt>(I->getOperand(1));
1576 if (!Msb || Msb->getZExtValue() != Msb->getType()->getSignBit())
1577 return nullptr;
1578 return IRBuilder<>(Ctx).CreateXor(LShr, Msb);
1579 });
1580 Simp.addRule(
1581 // (lshr (BitOp x y) c) -> (BitOp (lshr x c) (lshr y c))
1582 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1583 if (I->getOpcode() != Instruction::LShr)
1584 return nullptr;
1585 BinaryOperator *BitOp = dyn_cast<BinaryOperator>(I->getOperand(0));
1586 if (!BitOp)
1587 return nullptr;
1588 switch (BitOp->getOpcode()) {
1589 case Instruction::And:
1590 case Instruction::Or:
1591 case Instruction::Xor:
1592 break;
1593 default:
1594 return nullptr;
1595 }
1596 IRBuilder<> B(Ctx);
1597 Value *S = I->getOperand(1);
1598 return B.CreateBinOp(BitOp->getOpcode(),
1599 B.CreateLShr(BitOp->getOperand(0), S),
1600 B.CreateLShr(BitOp->getOperand(1), S));
1601 });
1602 Simp.addRule(
1603 // (BitOp1 (BitOp2 x a) b) -> (BitOp2 x (BitOp1 a b))
1604 [](Instruction *I, LLVMContext &Ctx) -> Value* {
1605 auto IsBitOp = [](unsigned Op) -> bool {
1606 switch (Op) {
1607 case Instruction::And:
1608 case Instruction::Or:
1609 case Instruction::Xor:
1610 return true;
1611 }
1612 return false;
1613 };
1614 BinaryOperator *BitOp1 = dyn_cast<BinaryOperator>(I);
1615 if (!BitOp1 || !IsBitOp(BitOp1->getOpcode()))
1616 return nullptr;
1617 BinaryOperator *BitOp2 = dyn_cast<BinaryOperator>(BitOp1->getOperand(0));
1618 if (!BitOp2 || !IsBitOp(BitOp2->getOpcode()))
1619 return nullptr;
1620 ConstantInt *CA = dyn_cast<ConstantInt>(BitOp2->getOperand(1));
1621 ConstantInt *CB = dyn_cast<ConstantInt>(BitOp1->getOperand(1));
1622 if (!CA || !CB)
1623 return nullptr;
1624 IRBuilder<> B(Ctx);
1625 Value *X = BitOp2->getOperand(0);
1626 return B.CreateBinOp(BitOp2->getOpcode(), X,
1627 B.CreateBinOp(BitOp1->getOpcode(), CA, CB));
1628 });
1629}
1630
1631
1632bool PolynomialMultiplyRecognize::recognize() {
1633 DEBUG(dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
1634 << *CurLoop << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Starting PolynomialMultiplyRecognize on loop\n"
<< *CurLoop << '\n'; } } while (false)
;
1635 // Restrictions:
1636 // - The loop must consist of a single block.
1637 // - The iteration count must be known at compile-time.
1638 // - The loop must have an induction variable starting from 0, and
1639 // incremented in each iteration of the loop.
1640 BasicBlock *LoopB = CurLoop->getHeader();
1641 DEBUG(dbgs() << "Loop header:\n" << *LoopB)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop header:\n" << *
LoopB; } } while (false)
;
1642
1643 if (LoopB != CurLoop->getLoopLatch())
1644 return false;
1645 BasicBlock *ExitB = CurLoop->getExitBlock();
1646 if (ExitB == nullptr)
1647 return false;
1648 BasicBlock *EntryB = CurLoop->getLoopPreheader();
1649 if (EntryB == nullptr)
1650 return false;
1651
1652 unsigned IterCount = 0;
1653 const SCEV *CT = SE.getBackedgeTakenCount(CurLoop);
1654 if (isa<SCEVCouldNotCompute>(CT))
1655 return false;
1656 if (auto *CV = dyn_cast<SCEVConstant>(CT))
1657 IterCount = CV->getValue()->getZExtValue() + 1;
1658
1659 Value *CIV = getCountIV(LoopB);
1660 ParsedValues PV;
1661 PV.IterCount = IterCount;
1662 DEBUG(dbgs() << "Loop IV: " << *CIV << "\nIterCount: " << IterCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Loop IV: " << *CIV <<
"\nIterCount: " << IterCount << '\n'; } } while (
false)
;
1663
1664 setupSimplifier();
1665
1666 // Perform a preliminary scan of select instructions to see if any of them
1667 // looks like a generator of the polynomial multiply steps. Assume that a
1668 // loop can only contain a single transformable operation, so stop the
1669 // traversal after the first reasonable candidate was found.
1670 // XXX: Currently this approach can modify the loop before being 100% sure
1671 // that the transformation can be carried out.
1672 bool FoundPreScan = false;
1673 for (Instruction &In : *LoopB) {
1674 SelectInst *SI = dyn_cast<SelectInst>(&In);
1675 if (!SI)
1676 continue;
1677
1678 Simplifier::Context C(SI);
1679 Value *T = Simp.simplify(C);
1680 SelectInst *SelI = (T && isa<SelectInst>(T)) ? cast<SelectInst>(T) : SI;
1681 DEBUG(dbgs() << "scanSelect(pre-scan): " << PE(C, SelI) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect(pre-scan): " <<
PE(C, SelI) << '\n'; } } while (false)
;
1682 if (scanSelect(SelI, LoopB, EntryB, CIV, PV, true)) {
1683 FoundPreScan = true;
1684 if (SelI != SI) {
1685 Value *NewSel = C.materialize(LoopB, SI->getIterator());
1686 SI->replaceAllUsesWith(NewSel);
1687 RecursivelyDeleteTriviallyDeadInstructions(SI, &TLI);
1688 }
1689 break;
1690 }
1691 }
1692
1693 if (!FoundPreScan) {
1694 DEBUG(dbgs() << "Have not found candidates for pmpy\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "Have not found candidates for pmpy\n"
; } } while (false)
;
1695 return false;
1696 }
1697
1698 if (!PV.Left) {
1699 // The right shift version actually only returns the higher bits of
1700 // the result (each iteration discards the LSB). If we want to convert it
1701 // to a left-shifting loop, the working data type must be at least as
1702 // wide as the target's pmpy instruction.
1703 if (!promoteTypes(LoopB, ExitB))
1704 return false;
1705 convertShiftsToLeft(LoopB, ExitB, IterCount);
1706 cleanupLoopBody(LoopB);
1707 }
1708
1709 // Scan the loop again, find the generating select instruction.
1710 bool FoundScan = false;
1711 for (Instruction &In : *LoopB) {
1712 SelectInst *SelI = dyn_cast<SelectInst>(&In);
1713 if (!SelI)
1714 continue;
1715 DEBUG(dbgs() << "scanSelect: " << *SelI << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << "scanSelect: " << *SelI
<< '\n'; } } while (false)
;
1716 FoundScan = scanSelect(SelI, LoopB, EntryB, CIV, PV, false);
1717 if (FoundScan)
1718 break;
1719 }
1720 assert(FoundScan)((FoundScan) ? static_cast<void> (0) : __assert_fail ("FoundScan"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1720, __PRETTY_FUNCTION__))
;
1721
1722 DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1723 StringRef PP = (PV.M ? "(P+M)" : "P");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1724 if (!PV.Inv)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1725 dbgs() << "Found pmpy idiom: R = " << PP << ".Q\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1726 elsedo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1727 dbgs() << "Found inverse pmpy idiom: R = (" << PP << "/Q).Q) + "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1728 << PP << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1729 dbgs() << " Res:" << *PV.Res << "\n P:" << *PV.P << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1730 if (PV.M)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1731 dbgs() << " M:" << *PV.M << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1732 dbgs() << " Q:" << *PV.Q << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1733 dbgs() << " Iteration count:" << PV.IterCount << "\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
1734 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { { StringRef PP = (PV.M ? "(P+M)" : "P"); if
(!PV.Inv) dbgs() << "Found pmpy idiom: R = " << PP
<< ".Q\n"; else dbgs() << "Found inverse pmpy idiom: R = ("
<< PP << "/Q).Q) + " << PP << "\n"; dbgs
() << " Res:" << *PV.Res << "\n P:" <<
*PV.P << "\n"; if (PV.M) dbgs() << " M:" <<
*PV.M << "\n"; dbgs() << " Q:" << *PV.Q <<
"\n"; dbgs() << " Iteration count:" << PV.IterCount
<< "\n"; }; } } while (false)
;
1735
1736 BasicBlock::iterator At(EntryB->getTerminator());
1737 Value *PM = generate(At, PV);
1738 if (PM == nullptr)
1739 return false;
1740
1741 if (PM->getType() != PV.Res->getType())
1742 PM = IRBuilder<>(&*At).CreateIntCast(PM, PV.Res->getType(), false);
1743
1744 PV.Res->replaceAllUsesWith(PM);
1745 PV.Res->eraseFromParent();
1746 return true;
1747}
1748
1749
1750unsigned HexagonLoopIdiomRecognize::getStoreSizeInBytes(StoreInst *SI) {
1751 uint64_t SizeInBits = DL->getTypeSizeInBits(SI->getValueOperand()->getType());
1752 assert(((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&((((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
"Don't overflow unsigned.") ? static_cast<void> (0) : __assert_fail
("((SizeInBits & 7) || (SizeInBits >> 32) == 0) && \"Don't overflow unsigned.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1753, __PRETTY_FUNCTION__))
1753 "Don't overflow unsigned.")((((SizeInBits & 7) || (SizeInBits >> 32) == 0) &&
"Don't overflow unsigned.") ? static_cast<void> (0) : __assert_fail
("((SizeInBits & 7) || (SizeInBits >> 32) == 0) && \"Don't overflow unsigned.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1753, __PRETTY_FUNCTION__))
;
1754 return (unsigned)SizeInBits >> 3;
1755}
1756
1757
1758int HexagonLoopIdiomRecognize::getSCEVStride(const SCEVAddRecExpr *S) {
1759 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(1)))
1760 return SC->getAPInt().getSExtValue();
1761 return 0;
1762}
1763
1764
1765bool HexagonLoopIdiomRecognize::isLegalStore(Loop *CurLoop, StoreInst *SI) {
1766 // Allow volatile stores if HexagonVolatileMemcpy is enabled.
1767 if (!(SI->isVolatile() && HexagonVolatileMemcpy) && !SI->isSimple())
1768 return false;
1769
1770 Value *StoredVal = SI->getValueOperand();
1771 Value *StorePtr = SI->getPointerOperand();
1772
1773 // Reject stores that are so large that they overflow an unsigned.
1774 uint64_t SizeInBits = DL->getTypeSizeInBits(StoredVal->getType());
1775 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0)
1776 return false;
1777
1778 // See if the pointer expression is an AddRec like {base,+,1} on the current
1779 // loop, which indicates a strided store. If we have something else, it's a
1780 // random store we can't handle.
1781 auto *StoreEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1782 if (!StoreEv || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine())
1783 return false;
1784
1785 // Check to see if the stride matches the size of the store. If so, then we
1786 // know that every byte is touched in the loop.
1787 int Stride = getSCEVStride(StoreEv);
1788 if (Stride == 0)
1789 return false;
1790 unsigned StoreSize = getStoreSizeInBytes(SI);
1791 if (StoreSize != unsigned(std::abs(Stride)))
1792 return false;
1793
1794 // The store must be feeding a non-volatile load.
1795 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1796 if (!LI || !LI->isSimple())
1797 return false;
1798
1799 // See if the pointer expression is an AddRec like {base,+,1} on the current
1800 // loop, which indicates a strided load. If we have something else, it's a
1801 // random load we can't handle.
1802 Value *LoadPtr = LI->getPointerOperand();
1803 auto *LoadEv = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LoadPtr));
1804 if (!LoadEv || LoadEv->getLoop() != CurLoop || !LoadEv->isAffine())
1805 return false;
1806
1807 // The store and load must share the same stride.
1808 if (StoreEv->getOperand(1) != LoadEv->getOperand(1))
1809 return false;
1810
1811 // Success. This store can be converted into a memcpy.
1812 return true;
1813}
1814
1815
1816/// mayLoopAccessLocation - Return true if the specified loop might access the
1817/// specified pointer location, which is a loop-strided access. The 'Access'
1818/// argument specifies what the verboten forms of access are (read or write).
1819static bool
1820mayLoopAccessLocation(Value *Ptr, ModRefInfo Access, Loop *L,
1821 const SCEV *BECount, unsigned StoreSize,
1822 AliasAnalysis &AA,
1823 SmallPtrSetImpl<Instruction *> &Ignored) {
1824 // Get the location that may be stored across the loop. Since the access
1825 // is strided positively through memory, we say that the modified location
1826 // starts at the pointer and has infinite size.
1827 uint64_t AccessSize = MemoryLocation::UnknownSize;
1828
1829 // If the loop iterates a fixed number of times, we can refine the access
1830 // size to be exactly the size of the memset, which is (BECount+1)*StoreSize
1831 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount))
1832 AccessSize = (BECst->getValue()->getZExtValue() + 1) * StoreSize;
1833
1834 // TODO: For this to be really effective, we have to dive into the pointer
1835 // operand in the store. Store to &A[i] of 100 will always return may alias
1836 // with store of &A[100], we need to StoreLoc to be "A" with size of 100,
1837 // which will then no-alias a store to &A[100].
1838 MemoryLocation StoreLoc(Ptr, AccessSize);
1839
1840 for (auto *B : L->blocks())
1841 for (auto &I : *B)
1842 if (Ignored.count(&I) == 0 && (AA.getModRefInfo(&I, StoreLoc) & Access))
1843 return true;
1844
1845 return false;
1846}
1847
1848
1849void HexagonLoopIdiomRecognize::collectStores(Loop *CurLoop, BasicBlock *BB,
1850 SmallVectorImpl<StoreInst*> &Stores) {
1851 Stores.clear();
1852 for (Instruction &I : *BB)
1853 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
1854 if (isLegalStore(CurLoop, SI))
1855 Stores.push_back(SI);
1856}
1857
1858
1859bool HexagonLoopIdiomRecognize::processCopyingStore(Loop *CurLoop,
1860 StoreInst *SI, const SCEV *BECount) {
1861 assert((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) &&(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1863, __PRETTY_FUNCTION__))
1862 "Expected only non-volatile stores, or Hexagon-specific memcpy"(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1863, __PRETTY_FUNCTION__))
1863 "to volatile destination.")(((SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy
)) && "Expected only non-volatile stores, or Hexagon-specific memcpy"
"to volatile destination.") ? static_cast<void> (0) : __assert_fail
("(SI->isSimple() || (SI->isVolatile() && HexagonVolatileMemcpy)) && \"Expected only non-volatile stores, or Hexagon-specific memcpy\" \"to volatile destination.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 1863, __PRETTY_FUNCTION__))
;
1864
1865 Value *StorePtr = SI->getPointerOperand();
1866 auto *StoreEv = cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr));
1867 unsigned Stride = getSCEVStride(StoreEv);
1868 unsigned StoreSize = getStoreSizeInBytes(SI);
1869 if (Stride != StoreSize)
1870 return false;
1871
1872 // See if the pointer expression is an AddRec like {base,+,1} on the current
1873 // loop, which indicates a strided load. If we have something else, it's a
1874 // random load we can't handle.
1875 LoadInst *LI = dyn_cast<LoadInst>(SI->getValueOperand());
1876 auto *LoadEv = cast<SCEVAddRecExpr>(SE->getSCEV(LI->getPointerOperand()));
1877
1878 // The trip count of the loop and the base pointer of the addrec SCEV is
1879 // guaranteed to be loop invariant, which means that it should dominate the
1880 // header. This allows us to insert code for it in the preheader.
1881 BasicBlock *Preheader = CurLoop->getLoopPreheader();
1882 Instruction *ExpPt = Preheader->getTerminator();
1883 IRBuilder<> Builder(ExpPt);
1884 SCEVExpander Expander(*SE, *DL, "hexagon-loop-idiom");
1885
1886 Type *IntPtrTy = Builder.getIntPtrTy(*DL, SI->getPointerAddressSpace());
1887
1888 // Okay, we have a strided store "p[i]" of a loaded value. We can turn
1889 // this into a memcpy/memmove in the loop preheader now if we want. However,
1890 // this would be unsafe to do if there is anything else in the loop that may
1891 // read or write the memory region we're storing to. For memcpy, this
1892 // includes the load that feeds the stores. Check for an alias by generating
1893 // the base address and checking everything.
1894 Value *StoreBasePtr = Expander.expandCodeFor(StoreEv->getStart(),
1895 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), ExpPt);
1896 Value *LoadBasePtr = nullptr;
1897
1898 bool Overlap = false;
1899 bool DestVolatile = SI->isVolatile();
1900 Type *BECountTy = BECount->getType();
1901
1902 if (DestVolatile) {
1903 // The trip count must fit in i32, since it is the type of the "num_words"
1904 // argument to hexagon_memcpy_forward_vp4cp4n2.
1905 if (StoreSize != 4 || DL->getTypeSizeInBits(BECountTy) > 32) {
1906CleanupAndExit:
1907 // If we generated new code for the base pointer, clean up.
1908 Expander.clear();
1909 if (StoreBasePtr && (LoadBasePtr != StoreBasePtr)) {
1910 RecursivelyDeleteTriviallyDeadInstructions(StoreBasePtr, TLI);
1911 StoreBasePtr = nullptr;
1912 }
1913 if (LoadBasePtr) {
1914 RecursivelyDeleteTriviallyDeadInstructions(LoadBasePtr, TLI);
1915 LoadBasePtr = nullptr;
1916 }
1917 return false;
1918 }
1919 }
1920
1921 SmallPtrSet<Instruction*, 2> Ignore1;
1922 Ignore1.insert(SI);
1923 if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
1924 StoreSize, *AA, Ignore1)) {
1925 // Check if the load is the offending instruction.
1926 Ignore1.insert(LI);
1927 if (mayLoopAccessLocation(StoreBasePtr, MRI_ModRef, CurLoop, BECount,
1928 StoreSize, *AA, Ignore1)) {
1929 // Still bad. Nothing we can do.
1930 goto CleanupAndExit;
1931 }
1932 // It worked with the load ignored.
1933 Overlap = true;
1934 }
1935
1936 if (!Overlap) {
1937 if (DisableMemcpyIdiom || !HasMemcpy)
1938 goto CleanupAndExit;
1939 } else {
1940 // Don't generate memmove if this function will be inlined. This is
1941 // because the caller will undergo this transformation after inlining.
1942 Function *Func = CurLoop->getHeader()->getParent();
1943 if (Func->hasFnAttribute(Attribute::AlwaysInline))
1944 goto CleanupAndExit;
1945
1946 // In case of a memmove, the call to memmove will be executed instead
1947 // of the loop, so we need to make sure that there is nothing else in
1948 // the loop than the load, store and instructions that these two depend
1949 // on.
1950 SmallVector<Instruction*,2> Insts;
1951 Insts.push_back(SI);
1952 Insts.push_back(LI);
1953 if (!coverLoop(CurLoop, Insts))
1954 goto CleanupAndExit;
1955
1956 if (DisableMemmoveIdiom || !HasMemmove)
1957 goto CleanupAndExit;
1958 bool IsNested = CurLoop->getParentLoop() != 0;
1959 if (IsNested && OnlyNonNestedMemmove)
1960 goto CleanupAndExit;
1961 }
1962
1963 // For a memcpy, we have to make sure that the input array is not being
1964 // mutated by the loop.
1965 LoadBasePtr = Expander.expandCodeFor(LoadEv->getStart(),
1966 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), ExpPt);
1967
1968 SmallPtrSet<Instruction*, 2> Ignore2;
1969 Ignore2.insert(SI);
1970 if (mayLoopAccessLocation(LoadBasePtr, MRI_Mod, CurLoop, BECount, StoreSize,
1971 *AA, Ignore2))
1972 goto CleanupAndExit;
1973
1974 // Check the stride.
1975 bool StridePos = getSCEVStride(LoadEv) >= 0;
1976
1977 // Currently, the volatile memcpy only emulates traversing memory forward.
1978 if (!StridePos && DestVolatile)
1979 goto CleanupAndExit;
1980
1981 bool RuntimeCheck = (Overlap || DestVolatile);
1982
1983 BasicBlock *ExitB;
1984 if (RuntimeCheck) {
1985 // The runtime check needs a single exit block.
1986 SmallVector<BasicBlock*, 8> ExitBlocks;
1987 CurLoop->getUniqueExitBlocks(ExitBlocks);
1988 if (ExitBlocks.size() != 1)
1989 goto CleanupAndExit;
1990 ExitB = ExitBlocks[0];
1991 }
1992
1993 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to
1994 // pointer size if it isn't already.
1995 LLVMContext &Ctx = SI->getContext();
1996 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtrTy);
1997 unsigned Alignment = std::min(SI->getAlignment(), LI->getAlignment());
1998 DebugLoc DLoc = SI->getDebugLoc();
1999
2000 const SCEV *NumBytesS =
2001 SE->getAddExpr(BECount, SE->getOne(IntPtrTy), SCEV::FlagNUW);
2002 if (StoreSize != 1)
2003 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtrTy, StoreSize),
2004 SCEV::FlagNUW);
2005 Value *NumBytes = Expander.expandCodeFor(NumBytesS, IntPtrTy, ExpPt);
2006 if (Instruction *In = dyn_cast<Instruction>(NumBytes))
2007 if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
2008 NumBytes = Simp;
2009
2010 CallInst *NewCall;
2011
2012 if (RuntimeCheck) {
2013 unsigned Threshold = RuntimeMemSizeThreshold;
2014 if (ConstantInt *CI = dyn_cast<ConstantInt>(NumBytes)) {
2015 uint64_t C = CI->getZExtValue();
2016 if (Threshold != 0 && C < Threshold)
2017 goto CleanupAndExit;
2018 if (C < CompileTimeMemSizeThreshold)
2019 goto CleanupAndExit;
2020 }
2021
2022 BasicBlock *Header = CurLoop->getHeader();
2023 Function *Func = Header->getParent();
2024 Loop *ParentL = LF->getLoopFor(Preheader);
2025 StringRef HeaderName = Header->getName();
2026
2027 // Create a new (empty) preheader, and update the PHI nodes in the
2028 // header to use the new preheader.
2029 BasicBlock *NewPreheader = BasicBlock::Create(Ctx, HeaderName+".rtli.ph",
2030 Func, Header);
2031 if (ParentL)
2032 ParentL->addBasicBlockToLoop(NewPreheader, *LF);
2033 IRBuilder<>(NewPreheader).CreateBr(Header);
2034 for (auto &In : *Header) {
2035 PHINode *PN = dyn_cast<PHINode>(&In);
2036 if (!PN)
2037 break;
2038 int bx = PN->getBasicBlockIndex(Preheader);
2039 if (bx >= 0)
2040 PN->setIncomingBlock(bx, NewPreheader);
2041 }
2042 DT->addNewBlock(NewPreheader, Preheader);
2043 DT->changeImmediateDominator(Header, NewPreheader);
2044
2045 // Check for safe conditions to execute memmove.
2046 // If stride is positive, copying things from higher to lower addresses
2047 // is equivalent to memmove. For negative stride, it's the other way
2048 // around. Copying forward in memory with positive stride may not be
2049 // same as memmove since we may be copying values that we just stored
2050 // in some previous iteration.
2051 Value *LA = Builder.CreatePtrToInt(LoadBasePtr, IntPtrTy);
2052 Value *SA = Builder.CreatePtrToInt(StoreBasePtr, IntPtrTy);
2053 Value *LowA = StridePos ? SA : LA;
2054 Value *HighA = StridePos ? LA : SA;
2055 Value *CmpA = Builder.CreateICmpULT(LowA, HighA);
2056 Value *Cond = CmpA;
2057
2058 // Check for distance between pointers.
2059 Value *Dist = Builder.CreateSub(HighA, LowA);
2060 Value *CmpD = Builder.CreateICmpSLT(NumBytes, Dist);
2061 Value *CmpEither = Builder.CreateOr(Cond, CmpD);
2062 Cond = CmpEither;
2063
2064 if (Threshold != 0) {
2065 Type *Ty = NumBytes->getType();
2066 Value *Thr = ConstantInt::get(Ty, Threshold);
2067 Value *CmpB = Builder.CreateICmpULT(Thr, NumBytes);
2068 Value *CmpBoth = Builder.CreateAnd(Cond, CmpB);
2069 Cond = CmpBoth;
2070 }
2071 BasicBlock *MemmoveB = BasicBlock::Create(Ctx, Header->getName()+".rtli",
2072 Func, NewPreheader);
2073 if (ParentL)
2074 ParentL->addBasicBlockToLoop(MemmoveB, *LF);
2075 Instruction *OldT = Preheader->getTerminator();
2076 Builder.CreateCondBr(Cond, MemmoveB, NewPreheader);
2077 OldT->eraseFromParent();
2078 Preheader->setName(Preheader->getName()+".old");
2079 DT->addNewBlock(MemmoveB, Preheader);
2080 // Find the new immediate dominator of the exit block.
2081 BasicBlock *ExitD = Preheader;
2082 for (auto PI = pred_begin(ExitB), PE = pred_end(ExitB); PI != PE; ++PI) {
2083 BasicBlock *PB = *PI;
2084 ExitD = DT->findNearestCommonDominator(ExitD, PB);
2085 if (!ExitD)
2086 break;
2087 }
2088 // If the prior immediate dominator of ExitB was dominated by the
2089 // old preheader, then the old preheader becomes the new immediate
2090 // dominator. Otherwise don't change anything (because the newly
2091 // added blocks are dominated by the old preheader).
2092 if (ExitD && DT->dominates(Preheader, ExitD)) {
2093 DomTreeNode *BN = DT->getNode(ExitB);
2094 DomTreeNode *DN = DT->getNode(ExitD);
2095 BN->setIDom(DN);
2096 }
2097
2098 // Add a call to memmove to the conditional block.
2099 IRBuilder<> CondBuilder(MemmoveB);
2100 CondBuilder.CreateBr(ExitB);
2101 CondBuilder.SetInsertPoint(MemmoveB->getTerminator());
2102
2103 if (DestVolatile) {
2104 Type *Int32Ty = Type::getInt32Ty(Ctx);
2105 Type *Int32PtrTy = Type::getInt32PtrTy(Ctx);
2106 Type *VoidTy = Type::getVoidTy(Ctx);
2107 Module *M = Func->getParent();
2108 Constant *CF = M->getOrInsertFunction(HexagonVolatileMemcpyName, VoidTy,
2109 Int32PtrTy, Int32PtrTy, Int32Ty,
2110 nullptr);
2111 Function *Fn = cast<Function>(CF);
2112 Fn->setLinkage(Function::ExternalLinkage);
2113
2114 const SCEV *OneS = SE->getConstant(Int32Ty, 1);
2115 const SCEV *BECount32 = SE->getTruncateOrZeroExtend(BECount, Int32Ty);
2116 const SCEV *NumWordsS = SE->getAddExpr(BECount32, OneS, SCEV::FlagNUW);
2117 Value *NumWords = Expander.expandCodeFor(NumWordsS, Int32Ty,
2118 MemmoveB->getTerminator());
2119 if (Instruction *In = dyn_cast<Instruction>(NumWords))
2120 if (Value *Simp = SimplifyInstruction(In, *DL, TLI, DT))
2121 NumWords = Simp;
2122
2123 Value *Op0 = (StoreBasePtr->getType() == Int32PtrTy)
2124 ? StoreBasePtr
2125 : CondBuilder.CreateBitCast(StoreBasePtr, Int32PtrTy);
2126 Value *Op1 = (LoadBasePtr->getType() == Int32PtrTy)
2127 ? LoadBasePtr
2128 : CondBuilder.CreateBitCast(LoadBasePtr, Int32PtrTy);
2129 NewCall = CondBuilder.CreateCall(Fn, {Op0, Op1, NumWords});
2130 } else {
2131 NewCall = CondBuilder.CreateMemMove(StoreBasePtr, LoadBasePtr,
2132 NumBytes, Alignment);
2133 }
2134 } else {
2135 NewCall = Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr,
2136 NumBytes, Alignment);
2137 // Okay, the memcpy has been formed. Zap the original store and
2138 // anything that feeds into it.
2139 RecursivelyDeleteTriviallyDeadInstructions(SI, TLI);
2140 }
2141
2142 NewCall->setDebugLoc(DLoc);
2143
2144 DEBUG(dbgs() << " Formed " << (Overlap ? "memmove: " : "memcpy: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2145 << *NewCall << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2146 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n"do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
2147 << " from store ptr=" << *StoreEv << " at: " << *SI << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("hexagon-lir")) { dbgs() << " Formed " << (Overlap
? "memmove: " : "memcpy: ") << *NewCall << "\n" <<
" from load ptr=" << *LoadEv << " at: " <<
*LI << "\n" << " from store ptr=" << *StoreEv
<< " at: " << *SI << "\n"; } } while (false
)
;
2148
2149 return true;
2150}
2151
2152
2153// \brief Check if the instructions in Insts, together with their dependencies
2154// cover the loop in the sense that the loop could be safely eliminated once
2155// the instructions in Insts are removed.
2156bool HexagonLoopIdiomRecognize::coverLoop(Loop *L,
2157 SmallVectorImpl<Instruction*> &Insts) const {
2158 SmallSet<BasicBlock*,8> LoopBlocks;
2159 for (auto *B : L->blocks())
2160 LoopBlocks.insert(B);
2161
2162 SetVector<Instruction*> Worklist(Insts.begin(), Insts.end());
2163
2164 // Collect all instructions from the loop that the instructions in Insts
2165 // depend on (plus their dependencies, etc.). These instructions will
2166 // constitute the expression trees that feed those in Insts, but the trees
2167 // will be limited only to instructions contained in the loop.
2168 for (unsigned i = 0; i < Worklist.size(); ++i) {
2169 Instruction *In = Worklist[i];
2170 for (auto I = In->op_begin(), E = In->op_end(); I != E; ++I) {
2171 Instruction *OpI = dyn_cast<Instruction>(I);
2172 if (!OpI)
2173 continue;
2174 BasicBlock *PB = OpI->getParent();
2175 if (!LoopBlocks.count(PB))
2176 continue;
2177 Worklist.insert(OpI);
2178 }
2179 }
2180
2181 // Scan all instructions in the loop, if any of them have a user outside
2182 // of the loop, or outside of the expressions collected above, then either
2183 // the loop has a side-effect visible outside of it, or there are
2184 // instructions in it that are not involved in the original set Insts.
2185 for (auto *B : L->blocks()) {
2186 for (auto &In : *B) {
2187 if (isa<BranchInst>(In) || isa<DbgInfoIntrinsic>(In))
2188 continue;
2189 if (!Worklist.count(&In) && In.mayHaveSideEffects())
2190 return false;
2191 for (const auto &K : In.users()) {
2192 Instruction *UseI = dyn_cast<Instruction>(K);
2193 if (!UseI)
2194 continue;
2195 BasicBlock *UseB = UseI->getParent();
2196 if (LF->getLoopFor(UseB) != L)
2197 return false;
2198 }
2199 }
2200 }
2201
2202 return true;
2203}
2204
2205/// runOnLoopBlock - Process the specified block, which lives in a counted loop
2206/// with the specified backedge count. This block is known to be in the current
2207/// loop and not in any subloops.
2208bool HexagonLoopIdiomRecognize::runOnLoopBlock(Loop *CurLoop, BasicBlock *BB,
2209 const SCEV *BECount, SmallVectorImpl<BasicBlock*> &ExitBlocks) {
2210 // We can only promote stores in this block if they are unconditionally
2211 // executed in the loop. For a block to be unconditionally executed, it has
2212 // to dominate all the exit blocks of the loop. Verify this now.
2213 auto DominatedByBB = [this,BB] (BasicBlock *EB) -> bool {
2214 return DT->dominates(BB, EB);
2215 };
2216 if (!std::all_of(ExitBlocks.begin(), ExitBlocks.end(), DominatedByBB))
2217 return false;
2218
2219 bool MadeChange = false;
2220 // Look for store instructions, which may be optimized to memset/memcpy.
2221 SmallVector<StoreInst*,8> Stores;
2222 collectStores(CurLoop, BB, Stores);
2223
2224 // Optimize the store into a memcpy, if it feeds an similarly strided load.
2225 for (auto &SI : Stores)
2226 MadeChange |= processCopyingStore(CurLoop, SI, BECount);
2227
2228 return MadeChange;
2229}
2230
2231
2232bool HexagonLoopIdiomRecognize::runOnCountableLoop(Loop *L) {
2233 PolynomialMultiplyRecognize PMR(L, *DL, *DT, *TLI, *SE);
2234 if (PMR.recognize())
2235 return true;
2236
2237 if (!HasMemcpy && !HasMemmove)
2238 return false;
2239
2240 const SCEV *BECount = SE->getBackedgeTakenCount(L);
2241 assert(!isa<SCEVCouldNotCompute>(BECount) &&((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2243, __PRETTY_FUNCTION__))
2242 "runOnCountableLoop() called on a loop without a predictable"((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2243, __PRETTY_FUNCTION__))
2243 "backedge-taken count")((!isa<SCEVCouldNotCompute>(BECount) && "runOnCountableLoop() called on a loop without a predictable"
"backedge-taken count") ? static_cast<void> (0) : __assert_fail
("!isa<SCEVCouldNotCompute>(BECount) && \"runOnCountableLoop() called on a loop without a predictable\" \"backedge-taken count\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/Hexagon/HexagonLoopIdiomRecognition.cpp"
, 2243, __PRETTY_FUNCTION__))
;
2244
2245 SmallVector<BasicBlock *, 8> ExitBlocks;
2246 L->getUniqueExitBlocks(ExitBlocks);
2247
2248 bool Changed = false;
2249
2250 // Scan all the blocks in the loop that are not in subloops.
2251 for (auto *BB : L->getBlocks()) {
2252 // Ignore blocks in subloops.
2253 if (LF->getLoopFor(BB) != L)
2254 continue;
2255 Changed |= runOnLoopBlock(L, BB, BECount, ExitBlocks);
2256 }
2257
2258 return Changed;
2259}
2260
2261
2262bool HexagonLoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
2263 const Module &M = *L->getHeader()->getParent()->getParent();
2264 if (Triple(M.getTargetTriple()).getArch() != Triple::hexagon)
2265 return false;
2266
2267 if (skipLoop(L))
2268 return false;
2269
2270 // If the loop could not be converted to canonical form, it must have an
2271 // indirectbr in it, just give up.
2272 if (!L->getLoopPreheader())
2273 return false;
2274
2275 // Disable loop idiom recognition if the function's name is a common idiom.
2276 StringRef Name = L->getHeader()->getParent()->getName();
2277 if (Name == "memset" || Name == "memcpy" || Name == "memmove")
2278 return false;
2279
2280 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
2281 DL = &L->getHeader()->getModule()->getDataLayout();
2282 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2283 LF = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2284 TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2285 SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2286
2287 HasMemcpy = TLI->has(LibFunc_memcpy);
2288 HasMemmove = TLI->has(LibFunc_memmove);
2289
2290 if (SE->hasLoopInvariantBackedgeTakenCount(L))
2291 return runOnCountableLoop(L);
2292 return false;
2293}
2294
2295
2296Pass *llvm::createHexagonLoopIdiomPass() {
2297 return new HexagonLoopIdiomRecognize();
2298}
2299