LLVM  7.0.0svn
LoadStoreVectorizer.cpp
Go to the documentation of this file.
1 //===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass merges loads/stores to/from sequential memory addresses into vector
11 // loads/stores. Although there's nothing GPU-specific in here, this pass is
12 // motivated by the microarchitectural quirks of nVidia and AMD GPUs.
13 //
14 // (For simplicity below we talk about loads only, but everything also applies
15 // to stores.)
16 //
17 // This pass is intended to be run late in the pipeline, after other
18 // vectorization opportunities have been exploited. So the assumption here is
19 // that immediately following our new vector load we'll need to extract out the
20 // individual elements of the load, so we can operate on them individually.
21 //
22 // On CPUs this transformation is usually not beneficial, because extracting the
23 // elements of a vector register is expensive on most architectures. It's
24 // usually better just to load each element individually into its own scalar
25 // register.
26 //
27 // However, nVidia and AMD GPUs don't have proper vector registers. Instead, a
28 // "vector load" loads directly into a series of scalar registers. In effect,
29 // extracting the elements of the vector is free. It's therefore always
30 // beneficial to vectorize a sequence of loads on these architectures.
31 //
32 // Vectorizing (perhaps a better name might be "coalescing") loads can have
33 // large performance impacts on GPU kernels, and opportunities for vectorizing
34 // are common in GPU code. This pass tries very hard to find such
35 // opportunities; its runtime is quadratic in the number of loads in a BB.
36 //
37 // Some CPU architectures, such as ARM, have instructions that load into
38 // multiple scalar registers, similar to a GPU vectorized load. In theory ARM
39 // could use this pass (with some modifications), but currently it implements
40 // its own pass to do something similar to what we do here.
41 
42 #include "llvm/ADT/APInt.h"
43 #include "llvm/ADT/ArrayRef.h"
44 #include "llvm/ADT/MapVector.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallPtrSet.h"
48 #include "llvm/ADT/SmallVector.h"
49 #include "llvm/ADT/Statistic.h"
59 #include "llvm/IR/Attributes.h"
60 #include "llvm/IR/BasicBlock.h"
61 #include "llvm/IR/Constants.h"
62 #include "llvm/IR/DataLayout.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Dominators.h"
65 #include "llvm/IR/Function.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/InstrTypes.h"
68 #include "llvm/IR/Instruction.h"
69 #include "llvm/IR/Instructions.h"
70 #include "llvm/IR/IntrinsicInst.h"
71 #include "llvm/IR/Module.h"
72 #include "llvm/IR/Type.h"
73 #include "llvm/IR/User.h"
74 #include "llvm/IR/Value.h"
75 #include "llvm/Pass.h"
76 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/KnownBits.h"
82 #include <algorithm>
83 #include <cassert>
84 #include <cstdlib>
85 #include <tuple>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "load-store-vectorizer"
91 
92 STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
93 STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
94 
95 // FIXME: Assuming stack alignment of 4 is always good enough
96 static const unsigned StackAdjustedAlignment = 4;
97 
98 namespace {
99 
100 using InstrList = SmallVector<Instruction *, 8>;
101 using InstrListMap = MapVector<Value *, InstrList>;
102 
103 class Vectorizer {
104  Function &F;
105  AliasAnalysis &AA;
106  DominatorTree &DT;
107  ScalarEvolution &SE;
108  TargetTransformInfo &TTI;
109  const DataLayout &DL;
110  IRBuilder<> Builder;
111 
112 public:
113  Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
115  : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
116  DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
117 
118  bool run();
119 
120 private:
121  GetElementPtrInst *getSourceGEP(Value *Src) const;
122 
123  unsigned getPointerAddressSpace(Value *I);
124 
125  unsigned getAlignment(LoadInst *LI) const {
126  unsigned Align = LI->getAlignment();
127  if (Align != 0)
128  return Align;
129 
130  return DL.getABITypeAlignment(LI->getType());
131  }
132 
133  unsigned getAlignment(StoreInst *SI) const {
134  unsigned Align = SI->getAlignment();
135  if (Align != 0)
136  return Align;
137 
138  return DL.getABITypeAlignment(SI->getValueOperand()->getType());
139  }
140 
141  bool isConsecutiveAccess(Value *A, Value *B);
142 
143  /// After vectorization, reorder the instructions that I depends on
144  /// (the instructions defining its operands), to ensure they dominate I.
145  void reorder(Instruction *I);
146 
147  /// Returns the first and the last instructions in Chain.
148  std::pair<BasicBlock::iterator, BasicBlock::iterator>
149  getBoundaryInstrs(ArrayRef<Instruction *> Chain);
150 
151  /// Erases the original instructions after vectorizing.
152  void eraseInstructions(ArrayRef<Instruction *> Chain);
153 
154  /// "Legalize" the vector type that would be produced by combining \p
155  /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
156  /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
157  /// expected to have more than 4 elements.
158  std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
159  splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
160 
161  /// Finds the largest prefix of Chain that's vectorizable, checking for
162  /// intervening instructions which may affect the memory accessed by the
163  /// instructions within Chain.
164  ///
165  /// The elements of \p Chain must be all loads or all stores and must be in
166  /// address order.
167  ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
168 
169  /// Collects load and store instructions to vectorize.
170  std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
171 
172  /// Processes the collected instructions, the \p Map. The values of \p Map
173  /// should be all loads or all stores.
174  bool vectorizeChains(InstrListMap &Map);
175 
176  /// Finds the load/stores to consecutive memory addresses and vectorizes them.
177  bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
178 
179  /// Vectorizes the load instructions in Chain.
180  bool
181  vectorizeLoadChain(ArrayRef<Instruction *> Chain,
182  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
183 
184  /// Vectorizes the store instructions in Chain.
185  bool
186  vectorizeStoreChain(ArrayRef<Instruction *> Chain,
187  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
188 
189  /// Check if this load/store access is misaligned accesses.
190  bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
191  unsigned Alignment);
192 };
193 
194 class LoadStoreVectorizer : public FunctionPass {
195 public:
196  static char ID;
197 
198  LoadStoreVectorizer() : FunctionPass(ID) {
200  }
201 
202  bool runOnFunction(Function &F) override;
203 
204  StringRef getPassName() const override {
205  return "GPU Load and Store Vectorizer";
206  }
207 
208  void getAnalysisUsage(AnalysisUsage &AU) const override {
213  AU.setPreservesCFG();
214  }
215 };
216 
217 } // end anonymous namespace
218 
219 char LoadStoreVectorizer::ID = 0;
220 
221 INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,
222  "Vectorize load and Store instructions", false, false)
228 INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
229  "Vectorize load and store instructions", false, false)
230 
232  return new LoadStoreVectorizer();
233 }
234 
235 // The real propagateMetadata expects a SmallVector<Value*>, but we deal in
236 // vectors of Instructions.
238  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
239  propagateMetadata(I, VL);
240 }
241 
243  // Don't vectorize when the attribute NoImplicitFloat is used.
244  if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
245  return false;
246 
247  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
248  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
249  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
250  TargetTransformInfo &TTI =
251  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
252 
253  Vectorizer V(F, AA, DT, SE, TTI);
254  return V.run();
255 }
256 
257 // Vectorizer Implementation
258 bool Vectorizer::run() {
259  bool Changed = false;
260 
261  // Scan the blocks in the function in post order.
262  for (BasicBlock *BB : post_order(&F)) {
263  InstrListMap LoadRefs, StoreRefs;
264  std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
265  Changed |= vectorizeChains(LoadRefs);
266  Changed |= vectorizeChains(StoreRefs);
267  }
268 
269  return Changed;
270 }
271 
272 unsigned Vectorizer::getPointerAddressSpace(Value *I) {
273  if (LoadInst *L = dyn_cast<LoadInst>(I))
274  return L->getPointerAddressSpace();
275  if (StoreInst *S = dyn_cast<StoreInst>(I))
276  return S->getPointerAddressSpace();
277  return -1;
278 }
279 
280 GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const {
281  // First strip pointer bitcasts. Make sure pointee size is the same with
282  // and without casts.
283  // TODO: a stride set by the add instruction below can match the difference
284  // in pointee type size here. Currently it will not be vectorized.
285  Value *SrcPtr = getLoadStorePointerOperand(Src);
286  Value *SrcBase = SrcPtr->stripPointerCasts();
287  Type *SrcPtrType = SrcPtr->getType()->getPointerElementType();
288  Type *SrcBaseType = SrcBase->getType()->getPointerElementType();
289  if (SrcPtrType->isSized() && SrcBaseType->isSized() &&
290  DL.getTypeStoreSize(SrcPtrType) == DL.getTypeStoreSize(SrcBaseType))
291  SrcPtr = SrcBase;
292  return dyn_cast<GetElementPtrInst>(SrcPtr);
293 }
294 
295 // FIXME: Merge with llvm::isConsecutiveAccess
299  unsigned ASA = getPointerAddressSpace(A);
300  unsigned ASB = getPointerAddressSpace(B);
301 
302  // Check that the address spaces match and that the pointers are valid.
303  if (!PtrA || !PtrB || (ASA != ASB))
304  return false;
305 
306  // Make sure that A and B are different pointers of the same size type.
307  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
308  Type *PtrATy = PtrA->getType()->getPointerElementType();
309  Type *PtrBTy = PtrB->getType()->getPointerElementType();
310  if (PtrA == PtrB ||
311  PtrATy->isVectorTy() != PtrBTy->isVectorTy() ||
312  DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
313  DL.getTypeStoreSize(PtrATy->getScalarType()) !=
314  DL.getTypeStoreSize(PtrBTy->getScalarType()))
315  return false;
316 
317  APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
318 
319  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
320  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
321  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
322  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
323 
324  APInt OffsetDelta = OffsetB - OffsetA;
325 
326  // Check if they are based on the same pointer. That makes the offsets
327  // sufficient.
328  if (PtrA == PtrB)
329  return OffsetDelta == Size;
330 
331  // Compute the necessary base pointer delta to have the necessary final delta
332  // equal to the size.
333  APInt BaseDelta = Size - OffsetDelta;
334 
335  // Compute the distance with SCEV between the base pointers.
336  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
337  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
338  const SCEV *C = SE.getConstant(BaseDelta);
339  const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
340  if (X == PtrSCEVB)
341  return true;
342 
343  // Sometimes even this doesn't work, because SCEV can't always see through
344  // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
345  // things the hard way.
346 
347  // Look through GEPs after checking they're the same except for the last
348  // index.
349  GetElementPtrInst *GEPA = getSourceGEP(A);
350  GetElementPtrInst *GEPB = getSourceGEP(B);
351  if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
352  return false;
353  unsigned FinalIndex = GEPA->getNumOperands() - 1;
354  for (unsigned i = 0; i < FinalIndex; i++)
355  if (GEPA->getOperand(i) != GEPB->getOperand(i))
356  return false;
357 
358  Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex));
359  Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex));
360  if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
361  OpA->getType() != OpB->getType())
362  return false;
363 
364  // Only look through a ZExt/SExt.
365  if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
366  return false;
367 
368  bool Signed = isa<SExtInst>(OpA);
369 
370  OpA = dyn_cast<Instruction>(OpA->getOperand(0));
371  OpB = dyn_cast<Instruction>(OpB->getOperand(0));
372  if (!OpA || !OpB || OpA->getType() != OpB->getType())
373  return false;
374 
375  // Now we need to prove that adding 1 to OpA won't overflow.
376  bool Safe = false;
377  // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA,
378  // we're okay.
379  if (OpB->getOpcode() == Instruction::Add &&
380  isa<ConstantInt>(OpB->getOperand(1)) &&
381  cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) {
382  if (Signed)
383  Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap();
384  else
385  Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap();
386  }
387 
388  unsigned BitWidth = OpA->getType()->getScalarSizeInBits();
389 
390  // Second attempt:
391  // If any bits are known to be zero other than the sign bit in OpA, we can
392  // add 1 to it while guaranteeing no overflow of any sort.
393  if (!Safe) {
394  KnownBits Known(BitWidth);
395  computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);
396  if (Known.countMaxTrailingOnes() < (BitWidth - 1))
397  Safe = true;
398  }
399 
400  if (!Safe)
401  return false;
402 
403  const SCEV *OffsetSCEVA = SE.getSCEV(OpA);
404  const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
405  const SCEV *One = SE.getConstant(APInt(BitWidth, 1));
406  const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One);
407  return X2 == OffsetSCEVB;
408 }
409 
410 void Vectorizer::reorder(Instruction *I) {
411  OrderedBasicBlock OBB(I->getParent());
412  SmallPtrSet<Instruction *, 16> InstructionsToMove;
414 
415  Worklist.push_back(I);
416  while (!Worklist.empty()) {
417  Instruction *IW = Worklist.pop_back_val();
418  int NumOperands = IW->getNumOperands();
419  for (int i = 0; i < NumOperands; i++) {
421  if (!IM || IM->getOpcode() == Instruction::PHI)
422  continue;
423 
424  // If IM is in another BB, no need to move it, because this pass only
425  // vectorizes instructions within one BB.
426  if (IM->getParent() != I->getParent())
427  continue;
428 
429  if (!OBB.dominates(IM, I)) {
430  InstructionsToMove.insert(IM);
431  Worklist.push_back(IM);
432  }
433  }
434  }
435 
436  // All instructions to move should follow I. Start from I, not from begin().
437  for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
438  ++BBI) {
439  if (!InstructionsToMove.count(&*BBI))
440  continue;
441  Instruction *IM = &*BBI;
442  --BBI;
443  IM->removeFromParent();
444  IM->insertBefore(I);
445  }
446 }
447 
448 std::pair<BasicBlock::iterator, BasicBlock::iterator>
449 Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
450  Instruction *C0 = Chain[0];
451  BasicBlock::iterator FirstInstr = C0->getIterator();
452  BasicBlock::iterator LastInstr = C0->getIterator();
453 
454  BasicBlock *BB = C0->getParent();
455  unsigned NumFound = 0;
456  for (Instruction &I : *BB) {
457  if (!is_contained(Chain, &I))
458  continue;
459 
460  ++NumFound;
461  if (NumFound == 1) {
462  FirstInstr = I.getIterator();
463  }
464  if (NumFound == Chain.size()) {
465  LastInstr = I.getIterator();
466  break;
467  }
468  }
469 
470  // Range is [first, last).
471  return std::make_pair(FirstInstr, ++LastInstr);
472 }
473 
474 void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
476  for (Instruction *I : Chain) {
477  Value *PtrOperand = getLoadStorePointerOperand(I);
478  assert(PtrOperand && "Instruction must have a pointer operand.");
479  Instrs.push_back(I);
480  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
481  Instrs.push_back(GEP);
482  }
483 
484  // Erase instructions.
485  for (Instruction *I : Instrs)
486  if (I->use_empty())
487  I->eraseFromParent();
488 }
489 
490 std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
491 Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
492  unsigned ElementSizeBits) {
493  unsigned ElementSizeBytes = ElementSizeBits / 8;
494  unsigned SizeBytes = ElementSizeBytes * Chain.size();
495  unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
496  if (NumLeft == Chain.size()) {
497  if ((NumLeft & 1) == 0)
498  NumLeft /= 2; // Split even in half
499  else
500  --NumLeft; // Split off last element
501  } else if (NumLeft == 0)
502  NumLeft = 1;
503  return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
504 }
505 
507 Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
508  // These are in BB order, unlike Chain, which is in address order.
509  SmallVector<Instruction *, 16> MemoryInstrs;
510  SmallVector<Instruction *, 16> ChainInstrs;
511 
512  bool IsLoadChain = isa<LoadInst>(Chain[0]);
513  LLVM_DEBUG({
514  for (Instruction *I : Chain) {
515  if (IsLoadChain)
516  assert(isa<LoadInst>(I) &&
517  "All elements of Chain must be loads, or all must be stores.");
518  else
519  assert(isa<StoreInst>(I) &&
520  "All elements of Chain must be loads, or all must be stores.");
521  }
522  });
523 
524  for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
525  if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
526  if (!is_contained(Chain, &I))
527  MemoryInstrs.push_back(&I);
528  else
529  ChainInstrs.push_back(&I);
530  } else if (isa<IntrinsicInst>(&I) &&
531  cast<IntrinsicInst>(&I)->getIntrinsicID() ==
532  Intrinsic::sideeffect) {
533  // Ignore llvm.sideeffect calls.
534  } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
535  LLVM_DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I
536  << '\n');
537  break;
538  } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
539  LLVM_DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
540  << '\n');
541  break;
542  }
543  }
544 
545  OrderedBasicBlock OBB(Chain[0]->getParent());
546 
547  // Loop until we find an instruction in ChainInstrs that we can't vectorize.
548  unsigned ChainInstrIdx = 0;
549  Instruction *BarrierMemoryInstr = nullptr;
550 
551  for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
552  Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
553 
554  // If a barrier memory instruction was found, chain instructions that follow
555  // will not be added to the valid prefix.
556  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
557  break;
558 
559  // Check (in BB order) if any instruction prevents ChainInstr from being
560  // vectorized. Find and store the first such "conflicting" instruction.
561  for (Instruction *MemInstr : MemoryInstrs) {
562  // If a barrier memory instruction was found, do not check past it.
563  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
564  break;
565 
566  auto *MemLoad = dyn_cast<LoadInst>(MemInstr);
567  auto *ChainLoad = dyn_cast<LoadInst>(ChainInstr);
568  if (MemLoad && ChainLoad)
569  continue;
570 
571  // We can ignore the alias if the we have a load store pair and the load
572  // is known to be invariant. The load cannot be clobbered by the store.
573  auto IsInvariantLoad = [](const LoadInst *LI) -> bool {
575  };
576 
577  // We can ignore the alias as long as the load comes before the store,
578  // because that means we won't be moving the load past the store to
579  // vectorize it (the vectorized load is inserted at the location of the
580  // first load in the chain).
581  if (isa<StoreInst>(MemInstr) && ChainLoad &&
582  (IsInvariantLoad(ChainLoad) || OBB.dominates(ChainLoad, MemInstr)))
583  continue;
584 
585  // Same case, but in reverse.
586  if (MemLoad && isa<StoreInst>(ChainInstr) &&
587  (IsInvariantLoad(MemLoad) || OBB.dominates(MemLoad, ChainInstr)))
588  continue;
589 
590  if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
591  MemoryLocation::get(ChainInstr))) {
592  LLVM_DEBUG({
593  dbgs() << "LSV: Found alias:\n"
594  " Aliasing instruction and pointer:\n"
595  << " " << *MemInstr << '\n'
596  << " " << *getLoadStorePointerOperand(MemInstr) << '\n'
597  << " Aliased instruction and pointer:\n"
598  << " " << *ChainInstr << '\n'
599  << " " << *getLoadStorePointerOperand(ChainInstr) << '\n';
600  });
601  // Save this aliasing memory instruction as a barrier, but allow other
602  // instructions that precede the barrier to be vectorized with this one.
603  BarrierMemoryInstr = MemInstr;
604  break;
605  }
606  }
607  // Continue the search only for store chains, since vectorizing stores that
608  // precede an aliasing load is valid. Conversely, vectorizing loads is valid
609  // up to an aliasing store, but should not pull loads from further down in
610  // the basic block.
611  if (IsLoadChain && BarrierMemoryInstr) {
612  // The BarrierMemoryInstr is a store that precedes ChainInstr.
613  assert(OBB.dominates(BarrierMemoryInstr, ChainInstr));
614  break;
615  }
616  }
617 
618  // Find the largest prefix of Chain whose elements are all in
619  // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
620  // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
621  // order.)
622  SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
623  ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
624  unsigned ChainIdx = 0;
625  for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
626  if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
627  break;
628  }
629  return Chain.slice(0, ChainIdx);
630 }
631 
632 std::pair<InstrListMap, InstrListMap>
633 Vectorizer::collectInstructions(BasicBlock *BB) {
634  InstrListMap LoadRefs;
635  InstrListMap StoreRefs;
636 
637  for (Instruction &I : *BB) {
638  if (!I.mayReadOrWriteMemory())
639  continue;
640 
641  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
642  if (!LI->isSimple())
643  continue;
644 
645  // Skip if it's not legal.
646  if (!TTI.isLegalToVectorizeLoad(LI))
647  continue;
648 
649  Type *Ty = LI->getType();
651  continue;
652 
653  // Skip weird non-byte sizes. They probably aren't worth the effort of
654  // handling correctly.
655  unsigned TySize = DL.getTypeSizeInBits(Ty);
656  if ((TySize % 8) != 0)
657  continue;
658 
659  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
660  // functions are currently using an integer type for the vectorized
661  // load/store, and does not support casting between the integer type and a
662  // vector of pointers (e.g. i64 to <2 x i16*>)
663  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
664  continue;
665 
666  Value *Ptr = LI->getPointerOperand();
667  unsigned AS = Ptr->getType()->getPointerAddressSpace();
668  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
669 
670  unsigned VF = VecRegSize / TySize;
671  VectorType *VecTy = dyn_cast<VectorType>(Ty);
672 
673  // No point in looking at these if they're too big to vectorize.
674  if (TySize > VecRegSize / 2 ||
675  (VecTy && TTI.getLoadVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
676  continue;
677 
678  // Make sure all the users of a vector are constant-index extracts.
679  if (isa<VectorType>(Ty) && !llvm::all_of(LI->users(), [](const User *U) {
681  return EEI && isa<ConstantInt>(EEI->getOperand(1));
682  }))
683  continue;
684 
685  // Save the load locations.
686  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
687  LoadRefs[ObjPtr].push_back(LI);
688  } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
689  if (!SI->isSimple())
690  continue;
691 
692  // Skip if it's not legal.
693  if (!TTI.isLegalToVectorizeStore(SI))
694  continue;
695 
696  Type *Ty = SI->getValueOperand()->getType();
698  continue;
699 
700  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
701  // functions are currently using an integer type for the vectorized
702  // load/store, and does not support casting between the integer type and a
703  // vector of pointers (e.g. i64 to <2 x i16*>)
704  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
705  continue;
706 
707  // Skip weird non-byte sizes. They probably aren't worth the effort of
708  // handling correctly.
709  unsigned TySize = DL.getTypeSizeInBits(Ty);
710  if ((TySize % 8) != 0)
711  continue;
712 
713  Value *Ptr = SI->getPointerOperand();
714  unsigned AS = Ptr->getType()->getPointerAddressSpace();
715  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
716 
717  unsigned VF = VecRegSize / TySize;
718  VectorType *VecTy = dyn_cast<VectorType>(Ty);
719 
720  // No point in looking at these if they're too big to vectorize.
721  if (TySize > VecRegSize / 2 ||
722  (VecTy && TTI.getStoreVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
723  continue;
724 
725  if (isa<VectorType>(Ty) && !llvm::all_of(SI->users(), [](const User *U) {
727  return EEI && isa<ConstantInt>(EEI->getOperand(1));
728  }))
729  continue;
730 
731  // Save store location.
732  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
733  StoreRefs[ObjPtr].push_back(SI);
734  }
735  }
736 
737  return {LoadRefs, StoreRefs};
738 }
739 
740 bool Vectorizer::vectorizeChains(InstrListMap &Map) {
741  bool Changed = false;
742 
743  for (const std::pair<Value *, InstrList> &Chain : Map) {
744  unsigned Size = Chain.second.size();
745  if (Size < 2)
746  continue;
747 
748  LLVM_DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n");
749 
750  // Process the stores in chunks of 64.
751  for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
752  unsigned Len = std::min<unsigned>(CE - CI, 64);
753  ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
754  Changed |= vectorizeInstructions(Chunk);
755  }
756  }
757 
758  return Changed;
759 }
760 
761 bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
762  LLVM_DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size()
763  << " instructions.\n");
764  SmallVector<int, 16> Heads, Tails;
765  int ConsecutiveChain[64];
766 
767  // Do a quadratic search on all of the given loads/stores and find all of the
768  // pairs of loads/stores that follow each other.
769  for (int i = 0, e = Instrs.size(); i < e; ++i) {
770  ConsecutiveChain[i] = -1;
771  for (int j = e - 1; j >= 0; --j) {
772  if (i == j)
773  continue;
774 
775  if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
776  if (ConsecutiveChain[i] != -1) {
777  int CurDistance = std::abs(ConsecutiveChain[i] - i);
778  int NewDistance = std::abs(ConsecutiveChain[i] - j);
779  if (j < i || NewDistance > CurDistance)
780  continue; // Should not insert.
781  }
782 
783  Tails.push_back(j);
784  Heads.push_back(i);
785  ConsecutiveChain[i] = j;
786  }
787  }
788  }
789 
790  bool Changed = false;
791  SmallPtrSet<Instruction *, 16> InstructionsProcessed;
792 
793  for (int Head : Heads) {
794  if (InstructionsProcessed.count(Instrs[Head]))
795  continue;
796  bool LongerChainExists = false;
797  for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
798  if (Head == Tails[TIt] &&
799  !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
800  LongerChainExists = true;
801  break;
802  }
803  if (LongerChainExists)
804  continue;
805 
806  // We found an instr that starts a chain. Now follow the chain and try to
807  // vectorize it.
809  int I = Head;
810  while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
811  if (InstructionsProcessed.count(Instrs[I]))
812  break;
813 
814  Operands.push_back(Instrs[I]);
815  I = ConsecutiveChain[I];
816  }
817 
818  bool Vectorized = false;
819  if (isa<LoadInst>(*Operands.begin()))
820  Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
821  else
822  Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
823 
824  Changed |= Vectorized;
825  }
826 
827  return Changed;
828 }
829 
830 bool Vectorizer::vectorizeStoreChain(
832  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
833  StoreInst *S0 = cast<StoreInst>(Chain[0]);
834 
835  // If the vector has an int element, default to int for the whole store.
836  Type *StoreTy;
837  for (Instruction *I : Chain) {
838  StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
839  if (StoreTy->isIntOrIntVectorTy())
840  break;
841 
842  if (StoreTy->isPtrOrPtrVectorTy()) {
843  StoreTy = Type::getIntNTy(F.getParent()->getContext(),
844  DL.getTypeSizeInBits(StoreTy));
845  break;
846  }
847  }
848 
849  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
850  unsigned AS = S0->getPointerAddressSpace();
851  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
852  unsigned VF = VecRegSize / Sz;
853  unsigned ChainSize = Chain.size();
854  unsigned Alignment = getAlignment(S0);
855 
856  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
857  InstructionsProcessed->insert(Chain.begin(), Chain.end());
858  return false;
859  }
860 
861  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
862  if (NewChain.empty()) {
863  // No vectorization possible.
864  InstructionsProcessed->insert(Chain.begin(), Chain.end());
865  return false;
866  }
867  if (NewChain.size() == 1) {
868  // Failed after the first instruction. Discard it and try the smaller chain.
869  InstructionsProcessed->insert(NewChain.front());
870  return false;
871  }
872 
873  // Update Chain to the valid vectorizable subchain.
874  Chain = NewChain;
875  ChainSize = Chain.size();
876 
877  // Check if it's legal to vectorize this chain. If not, split the chain and
878  // try again.
879  unsigned EltSzInBytes = Sz / 8;
880  unsigned SzInBytes = EltSzInBytes * ChainSize;
881  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
882  auto Chains = splitOddVectorElts(Chain, Sz);
883  return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
884  vectorizeStoreChain(Chains.second, InstructionsProcessed);
885  }
886 
887  VectorType *VecTy;
888  VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
889  if (VecStoreTy)
890  VecTy = VectorType::get(StoreTy->getScalarType(),
891  Chain.size() * VecStoreTy->getNumElements());
892  else
893  VecTy = VectorType::get(StoreTy, Chain.size());
894 
895  // If it's more than the max vector size or the target has a better
896  // vector factor, break it into two pieces.
897  unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
898  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
899  LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
900  " Creating two separate arrays.\n");
901  return vectorizeStoreChain(Chain.slice(0, TargetVF),
902  InstructionsProcessed) |
903  vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
904  }
905 
906  LLVM_DEBUG({
907  dbgs() << "LSV: Stores to vectorize:\n";
908  for (Instruction *I : Chain)
909  dbgs() << " " << *I << "\n";
910  });
911 
912  // We won't try again to vectorize the elements of the chain, regardless of
913  // whether we succeed below.
914  InstructionsProcessed->insert(Chain.begin(), Chain.end());
915 
916  // If the store is going to be misaligned, don't vectorize it.
917  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
918  if (S0->getPointerAddressSpace() != 0)
919  return false;
920 
921  unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
923  DL, S0, nullptr, &DT);
924  if (NewAlign < StackAdjustedAlignment)
925  return false;
926  }
927 
928  BasicBlock::iterator First, Last;
929  std::tie(First, Last) = getBoundaryInstrs(Chain);
930  Builder.SetInsertPoint(&*Last);
931 
932  Value *Vec = UndefValue::get(VecTy);
933 
934  if (VecStoreTy) {
935  unsigned VecWidth = VecStoreTy->getNumElements();
936  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
937  StoreInst *Store = cast<StoreInst>(Chain[I]);
938  for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
939  unsigned NewIdx = J + I * VecWidth;
940  Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
941  Builder.getInt32(J));
942  if (Extract->getType() != StoreTy->getScalarType())
943  Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
944 
945  Value *Insert =
946  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
947  Vec = Insert;
948  }
949  }
950  } else {
951  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
952  StoreInst *Store = cast<StoreInst>(Chain[I]);
953  Value *Extract = Store->getValueOperand();
954  if (Extract->getType() != StoreTy->getScalarType())
955  Extract =
956  Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
957 
958  Value *Insert =
959  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
960  Vec = Insert;
961  }
962  }
963 
964  // This cast is safe because Builder.CreateStore() always creates a bona fide
965  // StoreInst.
966  StoreInst *SI = cast<StoreInst>(
967  Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
968  VecTy->getPointerTo(AS))));
969  propagateMetadata(SI, Chain);
970  SI->setAlignment(Alignment);
971 
972  eraseInstructions(Chain);
973  ++NumVectorInstructions;
974  NumScalarsVectorized += Chain.size();
975  return true;
976 }
977 
978 bool Vectorizer::vectorizeLoadChain(
980  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
981  LoadInst *L0 = cast<LoadInst>(Chain[0]);
982 
983  // If the vector has an int element, default to int for the whole load.
984  Type *LoadTy;
985  for (const auto &V : Chain) {
986  LoadTy = cast<LoadInst>(V)->getType();
987  if (LoadTy->isIntOrIntVectorTy())
988  break;
989 
990  if (LoadTy->isPtrOrPtrVectorTy()) {
991  LoadTy = Type::getIntNTy(F.getParent()->getContext(),
992  DL.getTypeSizeInBits(LoadTy));
993  break;
994  }
995  }
996 
997  unsigned Sz = DL.getTypeSizeInBits(LoadTy);
998  unsigned AS = L0->getPointerAddressSpace();
999  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
1000  unsigned VF = VecRegSize / Sz;
1001  unsigned ChainSize = Chain.size();
1002  unsigned Alignment = getAlignment(L0);
1003 
1004  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
1005  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1006  return false;
1007  }
1008 
1009  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
1010  if (NewChain.empty()) {
1011  // No vectorization possible.
1012  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1013  return false;
1014  }
1015  if (NewChain.size() == 1) {
1016  // Failed after the first instruction. Discard it and try the smaller chain.
1017  InstructionsProcessed->insert(NewChain.front());
1018  return false;
1019  }
1020 
1021  // Update Chain to the valid vectorizable subchain.
1022  Chain = NewChain;
1023  ChainSize = Chain.size();
1024 
1025  // Check if it's legal to vectorize this chain. If not, split the chain and
1026  // try again.
1027  unsigned EltSzInBytes = Sz / 8;
1028  unsigned SzInBytes = EltSzInBytes * ChainSize;
1029  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
1030  auto Chains = splitOddVectorElts(Chain, Sz);
1031  return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
1032  vectorizeLoadChain(Chains.second, InstructionsProcessed);
1033  }
1034 
1035  VectorType *VecTy;
1036  VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
1037  if (VecLoadTy)
1038  VecTy = VectorType::get(LoadTy->getScalarType(),
1039  Chain.size() * VecLoadTy->getNumElements());
1040  else
1041  VecTy = VectorType::get(LoadTy, Chain.size());
1042 
1043  // If it's more than the max vector size or the target has a better
1044  // vector factor, break it into two pieces.
1045  unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
1046  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
1047  LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
1048  " Creating two separate arrays.\n");
1049  return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
1050  vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
1051  }
1052 
1053  // We won't try again to vectorize the elements of the chain, regardless of
1054  // whether we succeed below.
1055  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1056 
1057  // If the load is going to be misaligned, don't vectorize it.
1058  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
1059  if (L0->getPointerAddressSpace() != 0)
1060  return false;
1061 
1062  unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
1064  DL, L0, nullptr, &DT);
1065  if (NewAlign < StackAdjustedAlignment)
1066  return false;
1067 
1068  Alignment = NewAlign;
1069  }
1070 
1071  LLVM_DEBUG({
1072  dbgs() << "LSV: Loads to vectorize:\n";
1073  for (Instruction *I : Chain)
1074  I->dump();
1075  });
1076 
1077  // getVectorizablePrefix already computed getBoundaryInstrs. The value of
1078  // Last may have changed since then, but the value of First won't have. If it
1079  // matters, we could compute getBoundaryInstrs only once and reuse it here.
1080  BasicBlock::iterator First, Last;
1081  std::tie(First, Last) = getBoundaryInstrs(Chain);
1082  Builder.SetInsertPoint(&*First);
1083 
1084  Value *Bitcast =
1085  Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
1086  // This cast is safe because Builder.CreateLoad always creates a bona fide
1087  // LoadInst.
1088  LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
1089  propagateMetadata(LI, Chain);
1090  LI->setAlignment(Alignment);
1091 
1092  if (VecLoadTy) {
1093  SmallVector<Instruction *, 16> InstrsToErase;
1094 
1095  unsigned VecWidth = VecLoadTy->getNumElements();
1096  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1097  for (auto Use : Chain[I]->users()) {
1098  // All users of vector loads are ExtractElement instructions with
1099  // constant indices, otherwise we would have bailed before now.
1100  Instruction *UI = cast<Instruction>(Use);
1101  unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
1102  unsigned NewIdx = Idx + I * VecWidth;
1103  Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
1104  UI->getName());
1105  if (V->getType() != UI->getType())
1106  V = Builder.CreateBitCast(V, UI->getType());
1107 
1108  // Replace the old instruction.
1109  UI->replaceAllUsesWith(V);
1110  InstrsToErase.push_back(UI);
1111  }
1112  }
1113 
1114  // Bitcast might not be an Instruction, if the value being loaded is a
1115  // constant. In that case, no need to reorder anything.
1116  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1117  reorder(BitcastInst);
1118 
1119  for (auto I : InstrsToErase)
1120  I->eraseFromParent();
1121  } else {
1122  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1123  Value *CV = Chain[I];
1124  Value *V =
1125  Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
1126  if (V->getType() != CV->getType()) {
1127  V = Builder.CreateBitOrPointerCast(V, CV->getType());
1128  }
1129 
1130  // Replace the old instruction.
1131  CV->replaceAllUsesWith(V);
1132  }
1133 
1134  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1135  reorder(BitcastInst);
1136  }
1137 
1138  eraseInstructions(Chain);
1139 
1140  ++NumVectorInstructions;
1141  NumScalarsVectorized += Chain.size();
1142  return true;
1143 }
1144 
1145 bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
1146  unsigned Alignment) {
1147  if (Alignment % SzInBytes == 0)
1148  return false;
1149 
1150  bool Fast = false;
1151  bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
1152  SzInBytes * 8, AddressSpace,
1153  Alignment, &Fast);
1154  LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
1155  << " and fast? " << Fast << "\n";);
1156  return !Allows || !Fast;
1157 }
Legacy wrapper pass to provide the GlobalsAAResult object.
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:152
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1165
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isSized(SmallPtrSetImpl< Type *> *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:262
iterator begin() const
Definition: ArrayRef.h:137
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:137
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal].
The main scalar evolution driver.
bool dominates(const Instruction *A, const Instruction *B)
Find out whether A dominates B, meaning whether A comes before B in BB.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:307
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:908
STATISTIC(NumFunctions, "Total number of functions")
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:4185
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static uint32_t getAlignment(const MCSectionCOFF &Sec)
Type * getPointerElementType() const
Definition: Type.h:373
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:242
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
LLVMContext & getContext() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:731
This file contains the simple types necessary to represent the attributes associated with functions a...
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:404
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void initializeLoadStoreVectorizerPass(PassRegistry &)
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:608
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:195
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
Value * getLoadStorePointerOperand(Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Pass * createLoadStoreVectorizerPass()
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
Value * getOperand(unsigned i) const
Definition: User.h:170
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static const unsigned StackAdjustedAlignment
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned countMaxTrailingOnes() const
Returns the maximum number of trailing one bits possible.
Definition: KnownBits.h:171
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
#define DEBUG_TYPE
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayThrow() const
Return true if this instruction may throw an exception.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
iterator_range< po_iterator< T > > post_order(const T &G)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
void setAlignment(unsigned Align)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1382
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:538
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:224
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:192
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
static unsigned getIntrinsicID(const SDNode *N)
Legacy wrapper pass to provide the SCEVAAResult object.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:266
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Module.h This file contains the declarations for the Module class.
AddressSpace
Definition: NVPTXBaseInfo.h:22
iterator end() const
Definition: ArrayRef.h:138
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:722
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:286
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
Class to represent vector types.
Definition: DerivedTypes.h:393
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:556
Class for arbitrary precision integers.
Definition: APInt.h:69
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:179
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
void removeFromParent()
This method unlinks &#39;this&#39; from the containing basic block, but does not delete it.
Definition: Instruction.cpp:63
iv users
Definition: IVUsers.cpp:52
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:62
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE, "Vectorize load and Store instructions", false, false) INITIALIZE_PASS_END(LoadStoreVectorizer
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:276
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
static const Function * getParent(const Value *V)
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
inst_range instructions(Function *F)
Definition: InstIterator.h:134
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:254
This pass exposes codegen information to IR-level passes.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
#define LLVM_DEBUG(X)
Definition: Debug.h:119
Value * getPointerOperand()
Definition: Instructions.h:398
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool use_empty() const
Definition: Value.h:322
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
const BasicBlock * getParent() const
Definition: Instruction.h:67
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:967
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:496