LLVM  7.0.0svn
LoadStoreVectorizer.cpp
Go to the documentation of this file.
1 //===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass merges loads/stores to/from sequential memory addresses into vector
11 // loads/stores. Although there's nothing GPU-specific in here, this pass is
12 // motivated by the microarchitectural quirks of nVidia and AMD GPUs.
13 //
14 // (For simplicity below we talk about loads only, but everything also applies
15 // to stores.)
16 //
17 // This pass is intended to be run late in the pipeline, after other
18 // vectorization opportunities have been exploited. So the assumption here is
19 // that immediately following our new vector load we'll need to extract out the
20 // individual elements of the load, so we can operate on them individually.
21 //
22 // On CPUs this transformation is usually not beneficial, because extracting the
23 // elements of a vector register is expensive on most architectures. It's
24 // usually better just to load each element individually into its own scalar
25 // register.
26 //
27 // However, nVidia and AMD GPUs don't have proper vector registers. Instead, a
28 // "vector load" loads directly into a series of scalar registers. In effect,
29 // extracting the elements of the vector is free. It's therefore always
30 // beneficial to vectorize a sequence of loads on these architectures.
31 //
32 // Vectorizing (perhaps a better name might be "coalescing") loads can have
33 // large performance impacts on GPU kernels, and opportunities for vectorizing
34 // are common in GPU code. This pass tries very hard to find such
35 // opportunities; its runtime is quadratic in the number of loads in a BB.
36 //
37 // Some CPU architectures, such as ARM, have instructions that load into
38 // multiple scalar registers, similar to a GPU vectorized load. In theory ARM
39 // could use this pass (with some modifications), but currently it implements
40 // its own pass to do something similar to what we do here.
41 
42 #include "llvm/ADT/APInt.h"
43 #include "llvm/ADT/ArrayRef.h"
44 #include "llvm/ADT/MapVector.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallPtrSet.h"
48 #include "llvm/ADT/SmallVector.h"
49 #include "llvm/ADT/Statistic.h"
58 #include "llvm/IR/Attributes.h"
59 #include "llvm/IR/BasicBlock.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DerivedTypes.h"
63 #include "llvm/IR/Dominators.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/IRBuilder.h"
66 #include "llvm/IR/InstrTypes.h"
67 #include "llvm/IR/Instruction.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/Pass.h"
75 #include "llvm/Support/Casting.h"
76 #include "llvm/Support/Debug.h"
77 #include "llvm/Support/KnownBits.h"
82 #include <algorithm>
83 #include <cassert>
84 #include <cstdlib>
85 #include <tuple>
86 #include <utility>
87 
88 using namespace llvm;
89 
90 #define DEBUG_TYPE "load-store-vectorizer"
91 
92 STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
93 STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
94 
95 // FIXME: Assuming stack alignment of 4 is always good enough
96 static const unsigned StackAdjustedAlignment = 4;
97 
98 namespace {
99 
100 using InstrList = SmallVector<Instruction *, 8>;
101 using InstrListMap = MapVector<Value *, InstrList>;
102 
103 class Vectorizer {
104  Function &F;
105  AliasAnalysis &AA;
106  DominatorTree &DT;
107  ScalarEvolution &SE;
108  TargetTransformInfo &TTI;
109  const DataLayout &DL;
110  IRBuilder<> Builder;
111 
112 public:
113  Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
115  : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
116  DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
117 
118  bool run();
119 
120 private:
121  Value *getPointerOperand(Value *I) const;
122 
123  GetElementPtrInst *getSourceGEP(Value *Src) const;
124 
125  unsigned getPointerAddressSpace(Value *I);
126 
127  unsigned getAlignment(LoadInst *LI) const {
128  unsigned Align = LI->getAlignment();
129  if (Align != 0)
130  return Align;
131 
132  return DL.getABITypeAlignment(LI->getType());
133  }
134 
135  unsigned getAlignment(StoreInst *SI) const {
136  unsigned Align = SI->getAlignment();
137  if (Align != 0)
138  return Align;
139 
140  return DL.getABITypeAlignment(SI->getValueOperand()->getType());
141  }
142 
143  bool isConsecutiveAccess(Value *A, Value *B);
144 
145  /// After vectorization, reorder the instructions that I depends on
146  /// (the instructions defining its operands), to ensure they dominate I.
147  void reorder(Instruction *I);
148 
149  /// Returns the first and the last instructions in Chain.
150  std::pair<BasicBlock::iterator, BasicBlock::iterator>
151  getBoundaryInstrs(ArrayRef<Instruction *> Chain);
152 
153  /// Erases the original instructions after vectorizing.
154  void eraseInstructions(ArrayRef<Instruction *> Chain);
155 
156  /// "Legalize" the vector type that would be produced by combining \p
157  /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
158  /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
159  /// expected to have more than 4 elements.
160  std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
161  splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
162 
163  /// Finds the largest prefix of Chain that's vectorizable, checking for
164  /// intervening instructions which may affect the memory accessed by the
165  /// instructions within Chain.
166  ///
167  /// The elements of \p Chain must be all loads or all stores and must be in
168  /// address order.
169  ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
170 
171  /// Collects load and store instructions to vectorize.
172  std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
173 
174  /// Processes the collected instructions, the \p Map. The values of \p Map
175  /// should be all loads or all stores.
176  bool vectorizeChains(InstrListMap &Map);
177 
178  /// Finds the load/stores to consecutive memory addresses and vectorizes them.
179  bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
180 
181  /// Vectorizes the load instructions in Chain.
182  bool
183  vectorizeLoadChain(ArrayRef<Instruction *> Chain,
184  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
185 
186  /// Vectorizes the store instructions in Chain.
187  bool
188  vectorizeStoreChain(ArrayRef<Instruction *> Chain,
189  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
190 
191  /// Check if this load/store access is misaligned accesses.
192  bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
193  unsigned Alignment);
194 };
195 
196 class LoadStoreVectorizer : public FunctionPass {
197 public:
198  static char ID;
199 
200  LoadStoreVectorizer() : FunctionPass(ID) {
202  }
203 
204  bool runOnFunction(Function &F) override;
205 
206  StringRef getPassName() const override {
207  return "GPU Load and Store Vectorizer";
208  }
209 
210  void getAnalysisUsage(AnalysisUsage &AU) const override {
215  AU.setPreservesCFG();
216  }
217 };
218 
219 } // end anonymous namespace
220 
221 char LoadStoreVectorizer::ID = 0;
222 
223 INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,
224  "Vectorize load and Store instructions", false, false)
230 INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
231  "Vectorize load and store instructions", false, false)
232 
234  return new LoadStoreVectorizer();
235 }
236 
237 // The real propagateMetadata expects a SmallVector<Value*>, but we deal in
238 // vectors of Instructions.
240  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
241  propagateMetadata(I, VL);
242 }
243 
245  // Don't vectorize when the attribute NoImplicitFloat is used.
246  if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
247  return false;
248 
249  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
250  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
251  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
252  TargetTransformInfo &TTI =
253  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
254 
255  Vectorizer V(F, AA, DT, SE, TTI);
256  return V.run();
257 }
258 
259 // Vectorizer Implementation
260 bool Vectorizer::run() {
261  bool Changed = false;
262 
263  // Scan the blocks in the function in post order.
264  for (BasicBlock *BB : post_order(&F)) {
265  InstrListMap LoadRefs, StoreRefs;
266  std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
267  Changed |= vectorizeChains(LoadRefs);
268  Changed |= vectorizeChains(StoreRefs);
269  }
270 
271  return Changed;
272 }
273 
275  if (LoadInst *LI = dyn_cast<LoadInst>(I))
276  return LI->getPointerOperand();
277  if (StoreInst *SI = dyn_cast<StoreInst>(I))
278  return SI->getPointerOperand();
279  return nullptr;
280 }
281 
282 unsigned Vectorizer::getPointerAddressSpace(Value *I) {
283  if (LoadInst *L = dyn_cast<LoadInst>(I))
284  return L->getPointerAddressSpace();
285  if (StoreInst *S = dyn_cast<StoreInst>(I))
286  return S->getPointerAddressSpace();
287  return -1;
288 }
289 
290 GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const {
291  // First strip pointer bitcasts. Make sure pointee size is the same with
292  // and without casts.
293  // TODO: a stride set by the add instruction below can match the difference
294  // in pointee type size here. Currently it will not be vectorized.
295  Value *SrcPtr = getPointerOperand(Src);
296  Value *SrcBase = SrcPtr->stripPointerCasts();
297  if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) ==
298  DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType()))
299  SrcPtr = SrcBase;
300  return dyn_cast<GetElementPtrInst>(SrcPtr);
301 }
302 
303 // FIXME: Merge with llvm::isConsecutiveAccess
305  Value *PtrA = getPointerOperand(A);
306  Value *PtrB = getPointerOperand(B);
307  unsigned ASA = getPointerAddressSpace(A);
308  unsigned ASB = getPointerAddressSpace(B);
309 
310  // Check that the address spaces match and that the pointers are valid.
311  if (!PtrA || !PtrB || (ASA != ASB))
312  return false;
313 
314  // Make sure that A and B are different pointers of the same size type.
315  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
316  Type *PtrATy = PtrA->getType()->getPointerElementType();
317  Type *PtrBTy = PtrB->getType()->getPointerElementType();
318  if (PtrA == PtrB ||
319  DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
320  DL.getTypeStoreSize(PtrATy->getScalarType()) !=
321  DL.getTypeStoreSize(PtrBTy->getScalarType()))
322  return false;
323 
324  APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
325 
326  unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
327  APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
328  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
329  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
330 
331  APInt OffsetDelta = OffsetB - OffsetA;
332 
333  // Check if they are based on the same pointer. That makes the offsets
334  // sufficient.
335  if (PtrA == PtrB)
336  return OffsetDelta == Size;
337 
338  // Compute the necessary base pointer delta to have the necessary final delta
339  // equal to the size.
340  APInt BaseDelta = Size - OffsetDelta;
341 
342  // Compute the distance with SCEV between the base pointers.
343  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
344  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
345  const SCEV *C = SE.getConstant(BaseDelta);
346  const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
347  if (X == PtrSCEVB)
348  return true;
349 
350  // Sometimes even this doesn't work, because SCEV can't always see through
351  // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
352  // things the hard way.
353 
354  // Look through GEPs after checking they're the same except for the last
355  // index.
356  GetElementPtrInst *GEPA = getSourceGEP(A);
357  GetElementPtrInst *GEPB = getSourceGEP(B);
358  if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
359  return false;
360  unsigned FinalIndex = GEPA->getNumOperands() - 1;
361  for (unsigned i = 0; i < FinalIndex; i++)
362  if (GEPA->getOperand(i) != GEPB->getOperand(i))
363  return false;
364 
365  Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex));
366  Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex));
367  if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
368  OpA->getType() != OpB->getType())
369  return false;
370 
371  // Only look through a ZExt/SExt.
372  if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
373  return false;
374 
375  bool Signed = isa<SExtInst>(OpA);
376 
377  OpA = dyn_cast<Instruction>(OpA->getOperand(0));
378  OpB = dyn_cast<Instruction>(OpB->getOperand(0));
379  if (!OpA || !OpB || OpA->getType() != OpB->getType())
380  return false;
381 
382  // Now we need to prove that adding 1 to OpA won't overflow.
383  bool Safe = false;
384  // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA,
385  // we're okay.
386  if (OpB->getOpcode() == Instruction::Add &&
387  isa<ConstantInt>(OpB->getOperand(1)) &&
388  cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) {
389  if (Signed)
390  Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap();
391  else
392  Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap();
393  }
394 
395  unsigned BitWidth = OpA->getType()->getScalarSizeInBits();
396 
397  // Second attempt:
398  // If any bits are known to be zero other than the sign bit in OpA, we can
399  // add 1 to it while guaranteeing no overflow of any sort.
400  if (!Safe) {
401  KnownBits Known(BitWidth);
402  computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);
403  if (Known.countMaxTrailingOnes() < (BitWidth - 1))
404  Safe = true;
405  }
406 
407  if (!Safe)
408  return false;
409 
410  const SCEV *OffsetSCEVA = SE.getSCEV(OpA);
411  const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
412  const SCEV *One = SE.getConstant(APInt(BitWidth, 1));
413  const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One);
414  return X2 == OffsetSCEVB;
415 }
416 
417 void Vectorizer::reorder(Instruction *I) {
418  OrderedBasicBlock OBB(I->getParent());
419  SmallPtrSet<Instruction *, 16> InstructionsToMove;
421 
422  Worklist.push_back(I);
423  while (!Worklist.empty()) {
424  Instruction *IW = Worklist.pop_back_val();
425  int NumOperands = IW->getNumOperands();
426  for (int i = 0; i < NumOperands; i++) {
428  if (!IM || IM->getOpcode() == Instruction::PHI)
429  continue;
430 
431  // If IM is in another BB, no need to move it, because this pass only
432  // vectorizes instructions within one BB.
433  if (IM->getParent() != I->getParent())
434  continue;
435 
436  if (!OBB.dominates(IM, I)) {
437  InstructionsToMove.insert(IM);
438  Worklist.push_back(IM);
439  }
440  }
441  }
442 
443  // All instructions to move should follow I. Start from I, not from begin().
444  for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
445  ++BBI) {
446  if (!InstructionsToMove.count(&*BBI))
447  continue;
448  Instruction *IM = &*BBI;
449  --BBI;
450  IM->removeFromParent();
451  IM->insertBefore(I);
452  }
453 }
454 
455 std::pair<BasicBlock::iterator, BasicBlock::iterator>
456 Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
457  Instruction *C0 = Chain[0];
458  BasicBlock::iterator FirstInstr = C0->getIterator();
459  BasicBlock::iterator LastInstr = C0->getIterator();
460 
461  BasicBlock *BB = C0->getParent();
462  unsigned NumFound = 0;
463  for (Instruction &I : *BB) {
464  if (!is_contained(Chain, &I))
465  continue;
466 
467  ++NumFound;
468  if (NumFound == 1) {
469  FirstInstr = I.getIterator();
470  }
471  if (NumFound == Chain.size()) {
472  LastInstr = I.getIterator();
473  break;
474  }
475  }
476 
477  // Range is [first, last).
478  return std::make_pair(FirstInstr, ++LastInstr);
479 }
480 
481 void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
483  for (Instruction *I : Chain) {
484  Value *PtrOperand = getPointerOperand(I);
485  assert(PtrOperand && "Instruction must have a pointer operand.");
486  Instrs.push_back(I);
487  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
488  Instrs.push_back(GEP);
489  }
490 
491  // Erase instructions.
492  for (Instruction *I : Instrs)
493  if (I->use_empty())
494  I->eraseFromParent();
495 }
496 
497 std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
498 Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
499  unsigned ElementSizeBits) {
500  unsigned ElementSizeBytes = ElementSizeBits / 8;
501  unsigned SizeBytes = ElementSizeBytes * Chain.size();
502  unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
503  if (NumLeft == Chain.size()) {
504  if ((NumLeft & 1) == 0)
505  NumLeft /= 2; // Split even in half
506  else
507  --NumLeft; // Split off last element
508  } else if (NumLeft == 0)
509  NumLeft = 1;
510  return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
511 }
512 
514 Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
515  // These are in BB order, unlike Chain, which is in address order.
516  SmallVector<Instruction *, 16> MemoryInstrs;
517  SmallVector<Instruction *, 16> ChainInstrs;
518 
519  bool IsLoadChain = isa<LoadInst>(Chain[0]);
520  DEBUG({
521  for (Instruction *I : Chain) {
522  if (IsLoadChain)
523  assert(isa<LoadInst>(I) &&
524  "All elements of Chain must be loads, or all must be stores.");
525  else
526  assert(isa<StoreInst>(I) &&
527  "All elements of Chain must be loads, or all must be stores.");
528  }
529  });
530 
531  for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
532  if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
533  if (!is_contained(Chain, &I))
534  MemoryInstrs.push_back(&I);
535  else
536  ChainInstrs.push_back(&I);
537  } else if (isa<IntrinsicInst>(&I) &&
538  cast<IntrinsicInst>(&I)->getIntrinsicID() ==
539  Intrinsic::sideeffect) {
540  // Ignore llvm.sideeffect calls.
541  } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
542  DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n');
543  break;
544  } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
545  DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
546  << '\n');
547  break;
548  }
549  }
550 
551  OrderedBasicBlock OBB(Chain[0]->getParent());
552 
553  // Loop until we find an instruction in ChainInstrs that we can't vectorize.
554  unsigned ChainInstrIdx = 0;
555  Instruction *BarrierMemoryInstr = nullptr;
556 
557  for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
558  Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
559 
560  // If a barrier memory instruction was found, chain instructions that follow
561  // will not be added to the valid prefix.
562  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
563  break;
564 
565  // Check (in BB order) if any instruction prevents ChainInstr from being
566  // vectorized. Find and store the first such "conflicting" instruction.
567  for (Instruction *MemInstr : MemoryInstrs) {
568  // If a barrier memory instruction was found, do not check past it.
569  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
570  break;
571 
572  if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
573  continue;
574 
575  // We can ignore the alias as long as the load comes before the store,
576  // because that means we won't be moving the load past the store to
577  // vectorize it (the vectorized load is inserted at the location of the
578  // first load in the chain).
579  if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
580  OBB.dominates(ChainInstr, MemInstr))
581  continue;
582 
583  // Same case, but in reverse.
584  if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
585  OBB.dominates(MemInstr, ChainInstr))
586  continue;
587 
588  if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
589  MemoryLocation::get(ChainInstr))) {
590  DEBUG({
591  dbgs() << "LSV: Found alias:\n"
592  " Aliasing instruction and pointer:\n"
593  << " " << *MemInstr << '\n'
594  << " " << *getPointerOperand(MemInstr) << '\n'
595  << " Aliased instruction and pointer:\n"
596  << " " << *ChainInstr << '\n'
597  << " " << *getPointerOperand(ChainInstr) << '\n';
598  });
599  // Save this aliasing memory instruction as a barrier, but allow other
600  // instructions that precede the barrier to be vectorized with this one.
601  BarrierMemoryInstr = MemInstr;
602  break;
603  }
604  }
605  // Continue the search only for store chains, since vectorizing stores that
606  // precede an aliasing load is valid. Conversely, vectorizing loads is valid
607  // up to an aliasing store, but should not pull loads from further down in
608  // the basic block.
609  if (IsLoadChain && BarrierMemoryInstr) {
610  // The BarrierMemoryInstr is a store that precedes ChainInstr.
611  assert(OBB.dominates(BarrierMemoryInstr, ChainInstr));
612  break;
613  }
614  }
615 
616  // Find the largest prefix of Chain whose elements are all in
617  // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
618  // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
619  // order.)
620  SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
621  ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
622  unsigned ChainIdx = 0;
623  for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
624  if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
625  break;
626  }
627  return Chain.slice(0, ChainIdx);
628 }
629 
630 std::pair<InstrListMap, InstrListMap>
631 Vectorizer::collectInstructions(BasicBlock *BB) {
632  InstrListMap LoadRefs;
633  InstrListMap StoreRefs;
634 
635  for (Instruction &I : *BB) {
636  if (!I.mayReadOrWriteMemory())
637  continue;
638 
639  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
640  if (!LI->isSimple())
641  continue;
642 
643  // Skip if it's not legal.
644  if (!TTI.isLegalToVectorizeLoad(LI))
645  continue;
646 
647  Type *Ty = LI->getType();
649  continue;
650 
651  // Skip weird non-byte sizes. They probably aren't worth the effort of
652  // handling correctly.
653  unsigned TySize = DL.getTypeSizeInBits(Ty);
654  if ((TySize % 8) != 0)
655  continue;
656 
657  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
658  // functions are currently using an integer type for the vectorized
659  // load/store, and does not support casting between the integer type and a
660  // vector of pointers (e.g. i64 to <2 x i16*>)
661  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
662  continue;
663 
664  Value *Ptr = LI->getPointerOperand();
665  unsigned AS = Ptr->getType()->getPointerAddressSpace();
666  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
667 
668  // No point in looking at these if they're too big to vectorize.
669  if (TySize > VecRegSize / 2)
670  continue;
671 
672  // Make sure all the users of a vector are constant-index extracts.
673  if (isa<VectorType>(Ty) && !llvm::all_of(LI->users(), [](const User *U) {
675  return EEI && isa<ConstantInt>(EEI->getOperand(1));
676  }))
677  continue;
678 
679  // Save the load locations.
680  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
681  LoadRefs[ObjPtr].push_back(LI);
682  } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
683  if (!SI->isSimple())
684  continue;
685 
686  // Skip if it's not legal.
687  if (!TTI.isLegalToVectorizeStore(SI))
688  continue;
689 
690  Type *Ty = SI->getValueOperand()->getType();
692  continue;
693 
694  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
695  // functions are currently using an integer type for the vectorized
696  // load/store, and does not support casting between the integer type and a
697  // vector of pointers (e.g. i64 to <2 x i16*>)
698  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
699  continue;
700 
701  // Skip weird non-byte sizes. They probably aren't worth the effort of
702  // handling correctly.
703  unsigned TySize = DL.getTypeSizeInBits(Ty);
704  if ((TySize % 8) != 0)
705  continue;
706 
707  Value *Ptr = SI->getPointerOperand();
708  unsigned AS = Ptr->getType()->getPointerAddressSpace();
709  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
710 
711  // No point in looking at these if they're too big to vectorize.
712  if (TySize > VecRegSize / 2)
713  continue;
714 
715  if (isa<VectorType>(Ty) && !llvm::all_of(SI->users(), [](const User *U) {
717  return EEI && isa<ConstantInt>(EEI->getOperand(1));
718  }))
719  continue;
720 
721  // Save store location.
722  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
723  StoreRefs[ObjPtr].push_back(SI);
724  }
725  }
726 
727  return {LoadRefs, StoreRefs};
728 }
729 
730 bool Vectorizer::vectorizeChains(InstrListMap &Map) {
731  bool Changed = false;
732 
733  for (const std::pair<Value *, InstrList> &Chain : Map) {
734  unsigned Size = Chain.second.size();
735  if (Size < 2)
736  continue;
737 
738  DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n");
739 
740  // Process the stores in chunks of 64.
741  for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
742  unsigned Len = std::min<unsigned>(CE - CI, 64);
743  ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
744  Changed |= vectorizeInstructions(Chunk);
745  }
746  }
747 
748  return Changed;
749 }
750 
751 bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
752  DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n");
753  SmallVector<int, 16> Heads, Tails;
754  int ConsecutiveChain[64];
755 
756  // Do a quadratic search on all of the given loads/stores and find all of the
757  // pairs of loads/stores that follow each other.
758  for (int i = 0, e = Instrs.size(); i < e; ++i) {
759  ConsecutiveChain[i] = -1;
760  for (int j = e - 1; j >= 0; --j) {
761  if (i == j)
762  continue;
763 
764  if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
765  if (ConsecutiveChain[i] != -1) {
766  int CurDistance = std::abs(ConsecutiveChain[i] - i);
767  int NewDistance = std::abs(ConsecutiveChain[i] - j);
768  if (j < i || NewDistance > CurDistance)
769  continue; // Should not insert.
770  }
771 
772  Tails.push_back(j);
773  Heads.push_back(i);
774  ConsecutiveChain[i] = j;
775  }
776  }
777  }
778 
779  bool Changed = false;
780  SmallPtrSet<Instruction *, 16> InstructionsProcessed;
781 
782  for (int Head : Heads) {
783  if (InstructionsProcessed.count(Instrs[Head]))
784  continue;
785  bool LongerChainExists = false;
786  for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
787  if (Head == Tails[TIt] &&
788  !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
789  LongerChainExists = true;
790  break;
791  }
792  if (LongerChainExists)
793  continue;
794 
795  // We found an instr that starts a chain. Now follow the chain and try to
796  // vectorize it.
798  int I = Head;
799  while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
800  if (InstructionsProcessed.count(Instrs[I]))
801  break;
802 
803  Operands.push_back(Instrs[I]);
804  I = ConsecutiveChain[I];
805  }
806 
807  bool Vectorized = false;
808  if (isa<LoadInst>(*Operands.begin()))
809  Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
810  else
811  Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
812 
813  Changed |= Vectorized;
814  }
815 
816  return Changed;
817 }
818 
819 bool Vectorizer::vectorizeStoreChain(
821  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
822  StoreInst *S0 = cast<StoreInst>(Chain[0]);
823 
824  // If the vector has an int element, default to int for the whole store.
825  Type *StoreTy;
826  for (Instruction *I : Chain) {
827  StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
828  if (StoreTy->isIntOrIntVectorTy())
829  break;
830 
831  if (StoreTy->isPtrOrPtrVectorTy()) {
832  StoreTy = Type::getIntNTy(F.getParent()->getContext(),
833  DL.getTypeSizeInBits(StoreTy));
834  break;
835  }
836  }
837 
838  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
839  unsigned AS = S0->getPointerAddressSpace();
840  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
841  unsigned VF = VecRegSize / Sz;
842  unsigned ChainSize = Chain.size();
843  unsigned Alignment = getAlignment(S0);
844 
845  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
846  InstructionsProcessed->insert(Chain.begin(), Chain.end());
847  return false;
848  }
849 
850  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
851  if (NewChain.empty()) {
852  // No vectorization possible.
853  InstructionsProcessed->insert(Chain.begin(), Chain.end());
854  return false;
855  }
856  if (NewChain.size() == 1) {
857  // Failed after the first instruction. Discard it and try the smaller chain.
858  InstructionsProcessed->insert(NewChain.front());
859  return false;
860  }
861 
862  // Update Chain to the valid vectorizable subchain.
863  Chain = NewChain;
864  ChainSize = Chain.size();
865 
866  // Check if it's legal to vectorize this chain. If not, split the chain and
867  // try again.
868  unsigned EltSzInBytes = Sz / 8;
869  unsigned SzInBytes = EltSzInBytes * ChainSize;
870  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
871  auto Chains = splitOddVectorElts(Chain, Sz);
872  return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
873  vectorizeStoreChain(Chains.second, InstructionsProcessed);
874  }
875 
876  VectorType *VecTy;
877  VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
878  if (VecStoreTy)
879  VecTy = VectorType::get(StoreTy->getScalarType(),
880  Chain.size() * VecStoreTy->getNumElements());
881  else
882  VecTy = VectorType::get(StoreTy, Chain.size());
883 
884  // If it's more than the max vector size or the target has a better
885  // vector factor, break it into two pieces.
886  unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
887  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
888  DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
889  " Creating two separate arrays.\n");
890  return vectorizeStoreChain(Chain.slice(0, TargetVF),
891  InstructionsProcessed) |
892  vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
893  }
894 
895  DEBUG({
896  dbgs() << "LSV: Stores to vectorize:\n";
897  for (Instruction *I : Chain)
898  dbgs() << " " << *I << "\n";
899  });
900 
901  // We won't try again to vectorize the elements of the chain, regardless of
902  // whether we succeed below.
903  InstructionsProcessed->insert(Chain.begin(), Chain.end());
904 
905  // If the store is going to be misaligned, don't vectorize it.
906  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
907  if (S0->getPointerAddressSpace() != 0)
908  return false;
909 
910  unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
912  DL, S0, nullptr, &DT);
913  if (NewAlign < StackAdjustedAlignment)
914  return false;
915  }
916 
917  BasicBlock::iterator First, Last;
918  std::tie(First, Last) = getBoundaryInstrs(Chain);
919  Builder.SetInsertPoint(&*Last);
920 
921  Value *Vec = UndefValue::get(VecTy);
922 
923  if (VecStoreTy) {
924  unsigned VecWidth = VecStoreTy->getNumElements();
925  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
926  StoreInst *Store = cast<StoreInst>(Chain[I]);
927  for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
928  unsigned NewIdx = J + I * VecWidth;
929  Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
930  Builder.getInt32(J));
931  if (Extract->getType() != StoreTy->getScalarType())
932  Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
933 
934  Value *Insert =
935  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
936  Vec = Insert;
937  }
938  }
939  } else {
940  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
941  StoreInst *Store = cast<StoreInst>(Chain[I]);
942  Value *Extract = Store->getValueOperand();
943  if (Extract->getType() != StoreTy->getScalarType())
944  Extract =
945  Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
946 
947  Value *Insert =
948  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
949  Vec = Insert;
950  }
951  }
952 
953  // This cast is safe because Builder.CreateStore() always creates a bona fide
954  // StoreInst.
955  StoreInst *SI = cast<StoreInst>(
956  Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
957  VecTy->getPointerTo(AS))));
958  propagateMetadata(SI, Chain);
959  SI->setAlignment(Alignment);
960 
961  eraseInstructions(Chain);
962  ++NumVectorInstructions;
963  NumScalarsVectorized += Chain.size();
964  return true;
965 }
966 
967 bool Vectorizer::vectorizeLoadChain(
969  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
970  LoadInst *L0 = cast<LoadInst>(Chain[0]);
971 
972  // If the vector has an int element, default to int for the whole load.
973  Type *LoadTy;
974  for (const auto &V : Chain) {
975  LoadTy = cast<LoadInst>(V)->getType();
976  if (LoadTy->isIntOrIntVectorTy())
977  break;
978 
979  if (LoadTy->isPtrOrPtrVectorTy()) {
980  LoadTy = Type::getIntNTy(F.getParent()->getContext(),
981  DL.getTypeSizeInBits(LoadTy));
982  break;
983  }
984  }
985 
986  unsigned Sz = DL.getTypeSizeInBits(LoadTy);
987  unsigned AS = L0->getPointerAddressSpace();
988  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
989  unsigned VF = VecRegSize / Sz;
990  unsigned ChainSize = Chain.size();
991  unsigned Alignment = getAlignment(L0);
992 
993  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
994  InstructionsProcessed->insert(Chain.begin(), Chain.end());
995  return false;
996  }
997 
998  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
999  if (NewChain.empty()) {
1000  // No vectorization possible.
1001  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1002  return false;
1003  }
1004  if (NewChain.size() == 1) {
1005  // Failed after the first instruction. Discard it and try the smaller chain.
1006  InstructionsProcessed->insert(NewChain.front());
1007  return false;
1008  }
1009 
1010  // Update Chain to the valid vectorizable subchain.
1011  Chain = NewChain;
1012  ChainSize = Chain.size();
1013 
1014  // Check if it's legal to vectorize this chain. If not, split the chain and
1015  // try again.
1016  unsigned EltSzInBytes = Sz / 8;
1017  unsigned SzInBytes = EltSzInBytes * ChainSize;
1018  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
1019  auto Chains = splitOddVectorElts(Chain, Sz);
1020  return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
1021  vectorizeLoadChain(Chains.second, InstructionsProcessed);
1022  }
1023 
1024  VectorType *VecTy;
1025  VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
1026  if (VecLoadTy)
1027  VecTy = VectorType::get(LoadTy->getScalarType(),
1028  Chain.size() * VecLoadTy->getNumElements());
1029  else
1030  VecTy = VectorType::get(LoadTy, Chain.size());
1031 
1032  // If it's more than the max vector size or the target has a better
1033  // vector factor, break it into two pieces.
1034  unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
1035  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
1036  DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
1037  " Creating two separate arrays.\n");
1038  return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
1039  vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
1040  }
1041 
1042  // We won't try again to vectorize the elements of the chain, regardless of
1043  // whether we succeed below.
1044  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1045 
1046  // If the load is going to be misaligned, don't vectorize it.
1047  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
1048  if (L0->getPointerAddressSpace() != 0)
1049  return false;
1050 
1051  unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
1053  DL, L0, nullptr, &DT);
1054  if (NewAlign < StackAdjustedAlignment)
1055  return false;
1056 
1057  Alignment = NewAlign;
1058  }
1059 
1060  DEBUG({
1061  dbgs() << "LSV: Loads to vectorize:\n";
1062  for (Instruction *I : Chain)
1063  I->dump();
1064  });
1065 
1066  // getVectorizablePrefix already computed getBoundaryInstrs. The value of
1067  // Last may have changed since then, but the value of First won't have. If it
1068  // matters, we could compute getBoundaryInstrs only once and reuse it here.
1069  BasicBlock::iterator First, Last;
1070  std::tie(First, Last) = getBoundaryInstrs(Chain);
1071  Builder.SetInsertPoint(&*First);
1072 
1073  Value *Bitcast =
1074  Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
1075  // This cast is safe because Builder.CreateLoad always creates a bona fide
1076  // LoadInst.
1077  LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
1078  propagateMetadata(LI, Chain);
1079  LI->setAlignment(Alignment);
1080 
1081  if (VecLoadTy) {
1082  SmallVector<Instruction *, 16> InstrsToErase;
1083 
1084  unsigned VecWidth = VecLoadTy->getNumElements();
1085  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1086  for (auto Use : Chain[I]->users()) {
1087  // All users of vector loads are ExtractElement instructions with
1088  // constant indices, otherwise we would have bailed before now.
1089  Instruction *UI = cast<Instruction>(Use);
1090  unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
1091  unsigned NewIdx = Idx + I * VecWidth;
1092  Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
1093  UI->getName());
1094  if (V->getType() != UI->getType())
1095  V = Builder.CreateBitCast(V, UI->getType());
1096 
1097  // Replace the old instruction.
1098  UI->replaceAllUsesWith(V);
1099  InstrsToErase.push_back(UI);
1100  }
1101  }
1102 
1103  // Bitcast might not be an Instruction, if the value being loaded is a
1104  // constant. In that case, no need to reorder anything.
1105  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1106  reorder(BitcastInst);
1107 
1108  for (auto I : InstrsToErase)
1109  I->eraseFromParent();
1110  } else {
1111  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1112  Value *CV = Chain[I];
1113  Value *V =
1114  Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
1115  if (V->getType() != CV->getType()) {
1116  V = Builder.CreateBitOrPointerCast(V, CV->getType());
1117  }
1118 
1119  // Replace the old instruction.
1120  CV->replaceAllUsesWith(V);
1121  }
1122 
1123  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1124  reorder(BitcastInst);
1125  }
1126 
1127  eraseInstructions(Chain);
1128 
1129  ++NumVectorInstructions;
1130  NumScalarsVectorized += Chain.size();
1131  return true;
1132 }
1133 
1134 bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
1135  unsigned Alignment) {
1136  if (Alignment % SzInBytes == 0)
1137  return false;
1138 
1139  bool Fast = false;
1140  bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
1141  SzInBytes * 8, AddressSpace,
1142  Alignment, &Fast);
1143  DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
1144  << " and fast? " << Fast << "\n";);
1145  return !Allows || !Fast;
1146 }
Legacy wrapper pass to provide the GlobalsAAResult object.
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:152
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:67
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1144
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal].
The main scalar evolution driver.
bool dominates(const Instruction *A, const Instruction *B)
Find out whether A dominates B, meaning whether A comes before B in BB.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:302
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:814
STATISTIC(NumFunctions, "Total number of functions")
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3648
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static uint32_t getAlignment(const MCSectionCOFF &Sec)
static Value * getPointerOperand(Instruction &Inst)
Type * getPointerElementType() const
Definition: Type.h:373
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:237
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
LLVMContext & getContext() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:707
This file contains the simple types necessary to represent the attributes associated with functions a...
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:404
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void initializeLoadStoreVectorizerPass(PassRegistry &)
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:608
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:126
Pass * createLoadStoreVectorizerPass()
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:142
Value * getOperand(unsigned i) const
Definition: User.h:154
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:73
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static const unsigned StackAdjustedAlignment
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned countMaxTrailingOnes() const
Returns the maximum number of trailing one bits possible.
Definition: KnownBits.h:171
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
#define DEBUG_TYPE
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayThrow() const
Return true if this instruction may throw an exception.
Represent the analysis usage information of a pass.
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
iterator_range< po_iterator< T > > post_order(const T &G)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
void setAlignment(unsigned Align)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1356
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:567
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:224
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
static unsigned getIntrinsicID(const SDNode *N)
Legacy wrapper pass to provide the SCEVAAResult object.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
Module.h This file contains the declarations for the Module class.
AddressSpace
Definition: NVPTXBaseInfo.h:22
iterator end() const
Definition: ArrayRef.h:138
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:713
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:383
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class to represent vector types.
Definition: DerivedTypes.h:393
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:585
Class for arbitrary precision integers.
Definition: APInt.h:69
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:179
void removeFromParent()
This method unlinks &#39;this&#39; from the containing basic block, but does not delete it.
Definition: Instruction.cpp:63
iv users
Definition: IVUsers.cpp:51
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:224
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE, "Vectorize load and Store instructions", false, false) INITIALIZE_PASS_END(LoadStoreVectorizer
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:276
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
static const Function * getParent(const Value *V)
#define DEBUG(X)
Definition: Debug.h:118
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
inst_range instructions(Function *F)
Definition: InstIterator.h:134
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:260
This pass exposes codegen information to IR-level passes.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Value * getPointerOperand()
Definition: Instructions.h:398
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool use_empty() const
Definition: Value.h:328
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
const BasicBlock * getParent() const
Definition: Instruction.h:67
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:873
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:496