LLVM  8.0.0svn
LoopUtils.cpp
Go to the documentation of this file.
1 //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines common loop utility functions.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/ScopeExit.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
29 #include "llvm/IR/DomTreeUpdater.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/Instructions.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/PatternMatch.h"
34 #include "llvm/IR/ValueHandle.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/Debug.h"
37 #include "llvm/Support/KnownBits.h"
39 
40 using namespace llvm;
41 using namespace llvm::PatternMatch;
42 
43 #define DEBUG_TYPE "loop-utils"
44 
46  bool PreserveLCSSA) {
47  bool Changed = false;
48 
49  // We re-use a vector for the in-loop predecesosrs.
50  SmallVector<BasicBlock *, 4> InLoopPredecessors;
51 
52  auto RewriteExit = [&](BasicBlock *BB) {
53  assert(InLoopPredecessors.empty() &&
54  "Must start with an empty predecessors list!");
55  auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
56 
57  // See if there are any non-loop predecessors of this exit block and
58  // keep track of the in-loop predecessors.
59  bool IsDedicatedExit = true;
60  for (auto *PredBB : predecessors(BB))
61  if (L->contains(PredBB)) {
62  if (isa<IndirectBrInst>(PredBB->getTerminator()))
63  // We cannot rewrite exiting edges from an indirectbr.
64  return false;
65 
66  InLoopPredecessors.push_back(PredBB);
67  } else {
68  IsDedicatedExit = false;
69  }
70 
71  assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
72 
73  // Nothing to do if this is already a dedicated exit.
74  if (IsDedicatedExit)
75  return false;
76 
77  auto *NewExitBB = SplitBlockPredecessors(
78  BB, InLoopPredecessors, ".loopexit", DT, LI, nullptr, PreserveLCSSA);
79 
80  if (!NewExitBB)
81  LLVM_DEBUG(
82  dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
83  << *L << "\n");
84  else
85  LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
86  << NewExitBB->getName() << "\n");
87  return true;
88  };
89 
90  // Walk the exit blocks directly rather than building up a data structure for
91  // them, but only visit each one once.
93  for (auto *BB : L->blocks())
94  for (auto *SuccBB : successors(BB)) {
95  // We're looking for exit blocks so skip in-loop successors.
96  if (L->contains(SuccBB))
97  continue;
98 
99  // Visit each exit block exactly once.
100  if (!Visited.insert(SuccBB).second)
101  continue;
102 
103  Changed |= RewriteExit(SuccBB);
104  }
105 
106  return Changed;
107 }
108 
109 /// Returns the instructions that use values defined in the loop.
111  SmallVector<Instruction *, 8> UsedOutside;
112 
113  for (auto *Block : L->getBlocks())
114  // FIXME: I believe that this could use copy_if if the Inst reference could
115  // be adapted into a pointer.
116  for (auto &Inst : *Block) {
117  auto Users = Inst.users();
118  if (any_of(Users, [&](User *U) {
119  auto *Use = cast<Instruction>(U);
120  return !L->contains(Use->getParent());
121  }))
122  UsedOutside.push_back(&Inst);
123  }
124 
125  return UsedOutside;
126 }
127 
129  // By definition, all loop passes need the LoopInfo analysis and the
130  // Dominator tree it depends on. Because they all participate in the loop
131  // pass manager, they must also preserve these.
136 
137  // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
138  // here because users shouldn't directly get them from this header.
139  extern char &LoopSimplifyID;
140  extern char &LCSSAID;
141  AU.addRequiredID(LoopSimplifyID);
142  AU.addPreservedID(LoopSimplifyID);
143  AU.addRequiredID(LCSSAID);
144  AU.addPreservedID(LCSSAID);
145  // This is used in the LPPassManager to perform LCSSA verification on passes
146  // which preserve lcssa form
149 
150  // Loop passes are designed to run inside of a loop pass manager which means
151  // that any function analyses they require must be required by the first loop
152  // pass in the manager (so that it is computed before the loop pass manager
153  // runs) and preserved by all loop pasess in the manager. To make this
154  // reasonably robust, the set needed for most loop passes is maintained here.
155  // If your loop pass requires an analysis not listed here, you will need to
156  // carefully audit the loop pass manager nesting structure that results.
164 }
165 
166 /// Manually defined generic "LoopPass" dependency initialization. This is used
167 /// to initialize the exact set of passes from above in \c
168 /// getLoopAnalysisUsage. It can be used within a loop pass's initialization
169 /// with:
170 ///
171 /// INITIALIZE_PASS_DEPENDENCY(LoopPass)
172 ///
173 /// As-if "LoopPass" were a pass.
177  INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
178  INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
184 }
185 
186 /// Find string metadata for loop
187 ///
188 /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
189 /// operand or null otherwise. If the string metadata is not found return
190 /// Optional's not-a-value.
192  StringRef Name) {
193  MDNode *LoopID = TheLoop->getLoopID();
194  // Return none if LoopID is false.
195  if (!LoopID)
196  return None;
197 
198  // First operand should refer to the loop id itself.
199  assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
200  assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
201 
202  // Iterate over LoopID operands and look for MDString Metadata
203  for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
204  MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
205  if (!MD)
206  continue;
207  MDString *S = dyn_cast<MDString>(MD->getOperand(0));
208  if (!S)
209  continue;
210  // Return true if MDString holds expected MetaData.
211  if (Name.equals(S->getString()))
212  switch (MD->getNumOperands()) {
213  case 1:
214  return nullptr;
215  case 2:
216  return &MD->getOperand(1);
217  default:
218  llvm_unreachable("loop metadata has 0 or 1 operand");
219  }
220  }
221  return None;
222 }
223 
224 /// Does a BFS from a given node to all of its children inside a given loop.
225 /// The returned vector of nodes includes the starting point.
229  auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
230  // Only include subregions in the top level loop.
231  BasicBlock *BB = DTN->getBlock();
232  if (CurLoop->contains(BB))
233  Worklist.push_back(DTN);
234  };
235 
236  AddRegionToWorklist(N);
237 
238  for (size_t I = 0; I < Worklist.size(); I++)
239  for (DomTreeNode *Child : Worklist[I]->getChildren())
240  AddRegionToWorklist(Child);
241 
242  return Worklist;
243 }
244 
245 void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
246  ScalarEvolution *SE = nullptr,
247  LoopInfo *LI = nullptr) {
248  assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
249  auto *Preheader = L->getLoopPreheader();
250  assert(Preheader && "Preheader should exist!");
251 
252  // Now that we know the removal is safe, remove the loop by changing the
253  // branch from the preheader to go to the single exit block.
254  //
255  // Because we're deleting a large chunk of code at once, the sequence in which
256  // we remove things is very important to avoid invalidation issues.
257 
258  // Tell ScalarEvolution that the loop is deleted. Do this before
259  // deleting the loop so that ScalarEvolution can look at the loop
260  // to determine what it needs to clean up.
261  if (SE)
262  SE->forgetLoop(L);
263 
264  auto *ExitBlock = L->getUniqueExitBlock();
265  assert(ExitBlock && "Should have a unique exit block!");
266  assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
267 
268  auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
269  assert(OldBr && "Preheader must end with a branch");
270  assert(OldBr->isUnconditional() && "Preheader must have a single successor");
271  // Connect the preheader to the exit block. Keep the old edge to the header
272  // around to perform the dominator tree update in two separate steps
273  // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
274  // preheader -> header.
275  //
276  //
277  // 0. Preheader 1. Preheader 2. Preheader
278  // | | | |
279  // V | V |
280  // Header <--\ | Header <--\ | Header <--\
281  // | | | | | | | | | | |
282  // | V | | | V | | | V |
283  // | Body --/ | | Body --/ | | Body --/
284  // V V V V V
285  // Exit Exit Exit
286  //
287  // By doing this is two separate steps we can perform the dominator tree
288  // update without using the batch update API.
289  //
290  // Even when the loop is never executed, we cannot remove the edge from the
291  // source block to the exit block. Consider the case where the unexecuted loop
292  // branches back to an outer loop. If we deleted the loop and removed the edge
293  // coming to this inner loop, this will break the outer loop structure (by
294  // deleting the backedge of the outer loop). If the outer loop is indeed a
295  // non-loop, it will be deleted in a future iteration of loop deletion pass.
296  IRBuilder<> Builder(OldBr);
297  Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
298  // Remove the old branch. The conditional branch becomes a new terminator.
299  OldBr->eraseFromParent();
300 
301  // Rewrite phis in the exit block to get their inputs from the Preheader
302  // instead of the exiting block.
303  for (PHINode &P : ExitBlock->phis()) {
304  // Set the zero'th element of Phi to be from the preheader and remove all
305  // other incoming values. Given the loop has dedicated exits, all other
306  // incoming values must be from the exiting blocks.
307  int PredIndex = 0;
308  P.setIncomingBlock(PredIndex, Preheader);
309  // Removes all incoming values from all other exiting blocks (including
310  // duplicate values from an exiting block).
311  // Nuke all entries except the zero'th entry which is the preheader entry.
312  // NOTE! We need to remove Incoming Values in the reverse order as done
313  // below, to keep the indices valid for deletion (removeIncomingValues
314  // updates getNumIncomingValues and shifts all values down into the operand
315  // being deleted).
316  for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
317  P.removeIncomingValue(e - i, false);
318 
319  assert((P.getNumIncomingValues() == 1 &&
320  P.getIncomingBlock(PredIndex) == Preheader) &&
321  "Should have exactly one value and that's from the preheader!");
322  }
323 
324  // Disconnect the loop body by branching directly to its exit.
325  Builder.SetInsertPoint(Preheader->getTerminator());
326  Builder.CreateBr(ExitBlock);
327  // Remove the old branch.
328  Preheader->getTerminator()->eraseFromParent();
329 
331  if (DT) {
332  // Update the dominator tree by informing it about the new edge from the
333  // preheader to the exit.
334  DTU.insertEdge(Preheader, ExitBlock);
335  // Inform the dominator tree about the removed edge.
336  DTU.deleteEdge(Preheader, L->getHeader());
337  }
338 
339  // Given LCSSA form is satisfied, we should not have users of instructions
340  // within the dead loop outside of the loop. However, LCSSA doesn't take
341  // unreachable uses into account. We handle them here.
342  // We could do it after drop all references (in this case all users in the
343  // loop will be already eliminated and we have less work to do but according
344  // to API doc of User::dropAllReferences only valid operation after dropping
345  // references, is deletion. So let's substitute all usages of
346  // instruction from the loop with undef value of corresponding type first.
347  for (auto *Block : L->blocks())
348  for (Instruction &I : *Block) {
349  auto *Undef = UndefValue::get(I.getType());
350  for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) {
351  Use &U = *UI;
352  ++UI;
353  if (auto *Usr = dyn_cast<Instruction>(U.getUser()))
354  if (L->contains(Usr->getParent()))
355  continue;
356  // If we have a DT then we can check that uses outside a loop only in
357  // unreachable block.
358  if (DT)
359  assert(!DT->isReachableFromEntry(U) &&
360  "Unexpected user in reachable block");
361  U.set(Undef);
362  }
363  }
364 
365  // Remove the block from the reference counting scheme, so that we can
366  // delete it freely later.
367  for (auto *Block : L->blocks())
368  Block->dropAllReferences();
369 
370  if (LI) {
371  // Erase the instructions and the blocks without having to worry
372  // about ordering because we already dropped the references.
373  // NOTE: This iteration is safe because erasing the block does not remove
374  // its entry from the loop's block list. We do that in the next section.
375  for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
376  LpI != LpE; ++LpI)
377  (*LpI)->eraseFromParent();
378 
379  // Finally, the blocks from loopinfo. This has to happen late because
380  // otherwise our loop iterators won't work.
381 
383  blocks.insert(L->block_begin(), L->block_end());
384  for (BasicBlock *BB : blocks)
385  LI->removeBlock(BB);
386 
387  // The last step is to update LoopInfo now that we've eliminated this loop.
388  LI->erase(L);
389  }
390 }
391 
393  // Only support loops with a unique exiting block, and a latch.
394  if (!L->getExitingBlock())
395  return None;
396 
397  // Get the branch weights for the loop's backedge.
398  BranchInst *LatchBR =
400  if (!LatchBR || LatchBR->getNumSuccessors() != 2)
401  return None;
402 
403  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
404  LatchBR->getSuccessor(1) == L->getHeader()) &&
405  "At least one edge out of the latch must go to the header");
406 
407  // To estimate the number of times the loop body was executed, we want to
408  // know the number of times the backedge was taken, vs. the number of times
409  // we exited the loop.
410  uint64_t TrueVal, FalseVal;
411  if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
412  return None;
413 
414  if (!TrueVal || !FalseVal)
415  return 0;
416 
417  // Divide the count of the backedge by the count of the edge exiting the loop,
418  // rounding to nearest.
419  if (LatchBR->getSuccessor(0) == L->getHeader())
420  return (TrueVal + (FalseVal / 2)) / FalseVal;
421  else
422  return (FalseVal + (TrueVal / 2)) / TrueVal;
423 }
424 
426  ScalarEvolution &SE) {
427  Loop *OuterL = InnerLoop->getParentLoop();
428  if (!OuterL)
429  return true;
430 
431  // Get the backedge taken count for the inner loop
432  BasicBlock *InnerLoopLatch = InnerLoop->getLoopLatch();
433  const SCEV *InnerLoopBECountSC = SE.getExitCount(InnerLoop, InnerLoopLatch);
434  if (isa<SCEVCouldNotCompute>(InnerLoopBECountSC) ||
435  !InnerLoopBECountSC->getType()->isIntegerTy())
436  return false;
437 
438  // Get whether count is invariant to the outer loop
440  SE.getLoopDisposition(InnerLoopBECountSC, OuterL);
442  return false;
443 
444  return true;
445 }
446 
447 /// Adds a 'fast' flag to floating point operations.
449  if (isa<FPMathOperator>(V)) {
450  FastMathFlags Flags;
451  Flags.setFast();
452  cast<Instruction>(V)->setFastMathFlags(Flags);
453  }
454  return V;
455 }
456 
459  Value *Left, Value *Right) {
461  switch (RK) {
462  default:
463  llvm_unreachable("Unknown min/max recurrence kind");
465  P = CmpInst::ICMP_ULT;
466  break;
468  P = CmpInst::ICMP_UGT;
469  break;
471  P = CmpInst::ICMP_SLT;
472  break;
474  P = CmpInst::ICMP_SGT;
475  break;
477  P = CmpInst::FCMP_OLT;
478  break;
480  P = CmpInst::FCMP_OGT;
481  break;
482  }
483 
484  // We only match FP sequences that are 'fast', so we can unconditionally
485  // set it on any generated instructions.
486  IRBuilder<>::FastMathFlagGuard FMFG(Builder);
487  FastMathFlags FMF;
488  FMF.setFast();
489  Builder.setFastMathFlags(FMF);
490 
491  Value *Cmp;
494  Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
495  else
496  Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
497 
498  Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
499  return Select;
500 }
501 
502 // Helper to generate an ordered reduction.
503 Value *
505  unsigned Op,
507  ArrayRef<Value *> RedOps) {
508  unsigned VF = Src->getType()->getVectorNumElements();
509 
510  // Extract and apply reduction ops in ascending order:
511  // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1]
512  Value *Result = Acc;
513  for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) {
514  Value *Ext =
515  Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx));
516 
517  if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
518  Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext,
519  "bin.rdx");
520  } else {
522  "Invalid min/max");
523  Result = createMinMaxOp(Builder, MinMaxKind, Result, Ext);
524  }
525 
526  if (!RedOps.empty())
527  propagateIRFlags(Result, RedOps);
528  }
529 
530  return Result;
531 }
532 
533 // Helper to generate a log2 shuffle reduction.
534 Value *
537  ArrayRef<Value *> RedOps) {
538  unsigned VF = Src->getType()->getVectorNumElements();
539  // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
540  // and vector ops, reducing the set of values being computed by half each
541  // round.
542  assert(isPowerOf2_32(VF) &&
543  "Reduction emission only supported for pow2 vectors!");
544  Value *TmpVec = Src;
545  SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
546  for (unsigned i = VF; i != 1; i >>= 1) {
547  // Move the upper half of the vector to the lower half.
548  for (unsigned j = 0; j != i / 2; ++j)
549  ShuffleMask[j] = Builder.getInt32(i / 2 + j);
550 
551  // Fill the rest of the mask with undef.
552  std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
553  UndefValue::get(Builder.getInt32Ty()));
554 
555  Value *Shuf = Builder.CreateShuffleVector(
556  TmpVec, UndefValue::get(TmpVec->getType()),
557  ConstantVector::get(ShuffleMask), "rdx.shuf");
558 
559  if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
560  // Floating point operations had to be 'fast' to enable the reduction.
562  TmpVec, Shuf, "bin.rdx"));
563  } else {
565  "Invalid min/max");
566  TmpVec = createMinMaxOp(Builder, MinMaxKind, TmpVec, Shuf);
567  }
568  if (!RedOps.empty())
569  propagateIRFlags(TmpVec, RedOps);
570  }
571  // The result is in the first element of the vector.
572  return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
573 }
574 
575 /// Create a simple vector reduction specified by an opcode and some
576 /// flags (if generating min/max reductions).
578  IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
580  ArrayRef<Value *> RedOps) {
581  assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
582 
583  Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
584  std::function<Value *()> BuildFunc;
585  using RD = RecurrenceDescriptor;
586  RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
587  // TODO: Support creating ordered reductions.
588  FastMathFlags FMFFast;
589  FMFFast.setFast();
590 
591  switch (Opcode) {
592  case Instruction::Add:
593  BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
594  break;
595  case Instruction::Mul:
596  BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
597  break;
598  case Instruction::And:
599  BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
600  break;
601  case Instruction::Or:
602  BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
603  break;
604  case Instruction::Xor:
605  BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
606  break;
607  case Instruction::FAdd:
608  BuildFunc = [&]() {
609  auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
610  cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
611  return Rdx;
612  };
613  break;
614  case Instruction::FMul:
615  BuildFunc = [&]() {
616  auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
617  cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
618  return Rdx;
619  };
620  break;
621  case Instruction::ICmp:
622  if (Flags.IsMaxOp) {
623  MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
624  BuildFunc = [&]() {
625  return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
626  };
627  } else {
628  MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
629  BuildFunc = [&]() {
630  return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
631  };
632  }
633  break;
634  case Instruction::FCmp:
635  if (Flags.IsMaxOp) {
636  MinMaxKind = RD::MRK_FloatMax;
637  BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
638  } else {
639  MinMaxKind = RD::MRK_FloatMin;
640  BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
641  }
642  break;
643  default:
644  llvm_unreachable("Unhandled opcode");
645  break;
646  }
647  if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
648  return BuildFunc();
649  return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
650 }
651 
652 /// Create a vector reduction using a given recurrence descriptor.
654  const TargetTransformInfo *TTI,
655  RecurrenceDescriptor &Desc, Value *Src,
656  bool NoNaN) {
657  // TODO: Support in-order reductions based on the recurrence descriptor.
658  using RD = RecurrenceDescriptor;
659  RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
661  Flags.NoNaN = NoNaN;
662  switch (RecKind) {
663  case RD::RK_FloatAdd:
664  return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
665  case RD::RK_FloatMult:
666  return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
667  case RD::RK_IntegerAdd:
668  return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
669  case RD::RK_IntegerMult:
670  return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
671  case RD::RK_IntegerAnd:
672  return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
673  case RD::RK_IntegerOr:
674  return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
675  case RD::RK_IntegerXor:
676  return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
677  case RD::RK_IntegerMinMax: {
678  RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
679  Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
680  Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
681  return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
682  }
683  case RD::RK_FloatMinMax: {
684  Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
685  return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
686  }
687  default:
688  llvm_unreachable("Unhandled RecKind");
689  }
690 }
691 
693  auto *VecOp = dyn_cast<Instruction>(I);
694  if (!VecOp)
695  return;
696  auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
697  : dyn_cast<Instruction>(OpValue);
698  if (!Intersection)
699  return;
700  const unsigned Opcode = Intersection->getOpcode();
701  VecOp->copyIRFlags(Intersection);
702  for (auto *V : VL) {
703  auto *Instr = dyn_cast<Instruction>(V);
704  if (!Instr)
705  continue;
706  if (OpValue == nullptr || Opcode == Instr->getOpcode())
707  VecOp->andIRFlags(V);
708  }
709 }
Legacy wrapper pass to provide the GlobalsAAResult object.
Type * getVectorElementType() const
Definition: Type.h:371
const NoneType None
Definition: None.h:24
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1858
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
Definition: LoopInfoImpl.h:225
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1258
void setFast(bool B=true)
Definition: Operator.h:231
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
This is the interface for a simple mod/ref and alias analysis over globals.
bool hasDedicatedExits() const
Return true if no exit block for the loop has a predecessor that is outside the loop.
Definition: LoopInfoImpl.h:86
bool isLCSSAForm(DominatorTree &DT) const
Return true if the Loop is in LCSSA form.
Definition: LoopInfo.cpp:177
The main scalar evolution driver.
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:45
BlockT * getLoopPreheader() const
If there is a preheader for this loop, return it.
Definition: LoopInfoImpl.h:174
unsigned less than
Definition: InstrTypes.h:682
0 1 0 0 True if ordered and less than
Definition: InstrTypes.h:663
Value * createSimpleTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, unsigned Opcode, Value *Src, TargetTransformInfo::ReductionFlags Flags=TargetTransformInfo::ReductionFlags(), ArrayRef< Value *> RedOps=None)
Create a target reduction of the given vector.
Definition: LoopUtils.cpp:577
LLVM_NODISCARD detail::scope_exit< typename std::decay< Callable >::type > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
BasicBlock * getSuccessor(unsigned i) const
bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE)
Check inner loop (L) backedge count is known to be invariant on all iterations of its outer loop...
Definition: LoopUtils.cpp:425
MinMaxRecurrenceKind getMinMaxRecurrenceKind()
Metadata node.
Definition: Metadata.h:864
const MDOperand & getOperand(unsigned I) const
Definition: Metadata.h:1069
Value * getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind=RecurrenceDescriptor::MRK_Invalid, ArrayRef< Value *> RedOps=None)
Generates a vector reduction using shufflevectors to reduce the value.
Definition: LoopUtils.cpp:535
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:138
iv Induction Variable Users
Definition: IVUsers.cpp:52
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition: IRBuilder.h:347
The SCEV is loop-invariant.
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
amdgpu Simplify well known AMD library false Value Value const Twine & Name
This is the interface for a SCEV-based alias analysis.
void initializeLoopPassPass(PassRegistry &)
Manually defined generic "LoopPass" dependency initialization.
Definition: LoopUtils.cpp:174
unsigned getNumSuccessors() const
CallInst * CreateFAddReduce(Value *Acc, Value *Src)
Create a vector fadd reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:322
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
Contains a collection of routines for determining if a given instruction is guaranteed to execute if ...
SmallVector< Instruction *, 8 > findDefsUsedOutsideOfLoop(Loop *L)
Returns the instructions that use values defined in the loop.
Definition: LoopUtils.cpp:110
BlockT * getHeader() const
Definition: LoopInfo.h:100
CallInst * CreateFPMinReduce(Value *Src, bool NoNaN=false)
Create a vector float min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:390
void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE, LoopInfo *LI)
This function deletes dead loops.
Definition: LoopUtils.cpp:245
Value * getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, unsigned Op, RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind=RecurrenceDescriptor::MRK_Invalid, ArrayRef< Value *> RedOps=None)
Generates an ordered vector reduction using extracts to reduce the value.
Definition: LoopUtils.cpp:504
User * getUser() const LLVM_READONLY
Returns the User that contains this Use.
Definition: Use.cpp:41
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void andIRFlags(const Value *V)
Logical &#39;and&#39; of any supported wrapping, exact, and fast-math flags of V and this instruction...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
AnalysisUsage & addPreservedID(const void *ID)
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:145
use_iterator_impl< Use > use_iterator
Definition: Value.h:332
CallInst * CreateXorReduce(Value *Src)
Create a vector int XOR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:362
StringRef getString() const
Definition: Metadata.cpp:464
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1866
#define P(N)
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock *> Preds, const char *Suffix, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void set(Value *Val)
Definition: Value.h:671
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:429
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
Flags describing the kind of vector reduction.
LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L)
Return the "disposition" of the given SCEV with respect to the given loop.
Conditional or Unconditional Branch instruction.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1913
Value * createMinMaxOp(IRBuilder<> &Builder, RecurrenceDescriptor::MinMaxRecurrenceKind RK, Value *Left, Value *Right)
Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
Definition: LoopUtils.cpp:457
char & LCSSAID
Definition: LCSSA.cpp:440
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Represent the analysis usage information of a pass.
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1049
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:657
Optional< unsigned > getLoopEstimatedTripCount(Loop *L)
Get a loop&#39;s estimated trip count based on branch weight metadata.
Definition: LoopUtils.cpp:392
Value * CreateExtractElement(Value *Vec, Value *Idx, const Twine &Name="")
Definition: IRBuilder.h:1933
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
BlockT * getUniqueExitBlock() const
If getUniqueExitBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:145
size_t size() const
Definition: SmallVector.h:53
bool IsMaxOp
If the op a min/max kind, true if it&#39;s a max operation.
RecurrenceKind getRecurrenceKind()
Optional< const MDOperand * > findStringMetadataForLoop(Loop *TheLoop, StringRef Name)
Find string metadata for loop.
Definition: LoopUtils.cpp:191
void deleteEdge(BasicBlock *From, BasicBlock *To)
Notify all available trees on an edge deletion.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
void propagateIRFlags(Value *I, ArrayRef< Value *> VL, Value *OpValue=nullptr)
Get the intersection (logical and) of all of the potential IR flags of each scalar operation (VL) tha...
Definition: LoopUtils.cpp:692
signed greater than
Definition: InstrTypes.h:684
char & LoopSimplifyID
The RecurrenceDescriptor is used to identify recurrences variables in a loop.
Definition: IVDescriptors.h:63
0 0 1 0 True if ordered and greater than
Definition: InstrTypes.h:661
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
Definition: LoopInfo.h:110
void insertEdge(BasicBlock *From, BasicBlock *To)
Notify all available trees on an edge insertion.
CallInst * CreateAddReduce(Value *Src)
Create a vector int add reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
Legacy wrapper pass to provide the SCEVAAResult object.
Type * getType() const
Return the LLVM type of this SCEV expression.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
CallInst * CreateIntMaxReduce(Value *Src, bool IsSigned=false)
Create a vector integer max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:367
AnalysisUsage & addRequiredID(const void *ID)
Definition: Pass.cpp:299
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
signed less than
Definition: InstrTypes.h:686
ConstantInt * getInt32(uint32_t C)
Get a constant 32-bit value.
Definition: IRBuilder.h:307
Value * CreateShuffleVector(Value *V1, Value *V2, Value *Mask, const Twine &Name="")
Definition: IRBuilder.h:1960
pred_range predecessors(BasicBlock *BB)
Definition: CFG.h:123
CallInst * CreateFMulReduce(Value *Acc, Value *Src)
Create a vector fmul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:332
Value * createTargetReduction(IRBuilder<> &B, const TargetTransformInfo *TTI, RecurrenceDescriptor &Desc, Value *Src, bool NoNaN=false)
Create a generic target reduction using a recurrence descriptor Desc The target is queried to determi...
Definition: LoopUtils.cpp:653
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
LoopDisposition
An enum describing the relationship between a SCEV and a loop.
LLVM_NODISCARD LLVM_ATTRIBUTE_ALWAYS_INLINE bool equals(StringRef RHS) const
equals - Check for string equality, this is more efficient than compare() when the relative ordering ...
Definition: StringRef.h:169
LoopT * getParentLoop() const
Definition: LoopInfo.h:101
CallInst * CreateAndReduce(Value *Src)
Create a vector int AND reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:352
MDNode * getLoopID() const
Return the llvm.loop loop id metadata node for this loop if it is present.
Definition: LoopInfo.cpp:215
CallInst * CreateOrReduce(Value *Src)
Create a vector int OR reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:357
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:133
CallInst * CreateIntMinReduce(Value *Src, bool IsSigned=false)
Create a vector integer min reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:373
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:56
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
void getLoopAnalysisUsage(AnalysisUsage &AU)
Helper to consistently add the set of standard passes to a loop pass&#39;s AnalysisUsage.
Definition: LoopUtils.cpp:128
CallInst * CreateMulReduce(Value *Src)
Create a vector int mul reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:347
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
block_iterator block_end() const
Definition: LoopInfo.h:155
SmallVector< DomTreeNode *, 16 > collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop)
Does a BFS from a given node to all of its children inside a given loop.
Definition: LoopUtils.cpp:227
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LLVM Value Representation.
Definition: Value.h:73
bool useReductionIntrinsic(unsigned Opcode, Type *Ty, ReductionFlags Flags) const
succ_range successors(Instruction *I)
Definition: CFG.h:262
void setFastMathFlags(FastMathFlags NewFMF)
Set the fast-math flags to be used with generated fp-math operators.
Definition: IRBuilder.h:220
bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI, bool PreserveLCSSA)
Ensure that all exit blocks of the loop are dedicated exits.
Definition: LoopUtils.cpp:45
The legacy pass manager&#39;s analysis pass to compute loop information.
Definition: LoopInfo.h:964
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
unsigned greater than
Definition: InstrTypes.h:680
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
This is the interface for LLVM&#39;s primary stateless and local alias analysis.
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:39
A single uniqued string.
Definition: Metadata.h:604
const SCEV * getExitCount(const Loop *L, BasicBlock *ExitingBlock)
Get the expression for the number of loop iterations for which this loop is guaranteed not to exit vi...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
CallInst * CreateFPMaxReduce(Value *Src, bool NoNaN=false)
Create a vector float max reduction intrinsic of the source vector.
Definition: IRBuilder.cpp:379
unsigned getNumOperands() const
Return number of MDNode operands.
Definition: Metadata.h:1075
#define LLVM_DEBUG(X)
Definition: Debug.h:123
bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const
Retrieve the raw weight values of a conditional branch or select.
Definition: Metadata.cpp:1315
iterator_range< block_iterator > blocks() const
Definition: LoopInfo.h:156
static Value * addFastMathFlag(Value *V)
Adds a &#39;fast&#39; flag to floating point operations.
Definition: LoopUtils.cpp:448
block_iterator block_begin() const
Definition: LoopInfo.h:154
BlockT * getExitingBlock() const
If getExitingBlocks would return exactly one block, return that block.
Definition: LoopInfoImpl.h:50
bool NoNaN
If op is an fp min/max, whether NaNs may be present.
static Constant * get(ArrayRef< Constant *> V)
Definition: Constants.cpp:1056
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
Legacy wrapper pass to provide the BasicAAResult object.
bool IsSigned
Whether the operation is a signed int reduction.