LLVM  6.0.0svn
LoadStoreVectorizer.cpp
Go to the documentation of this file.
1 //===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "llvm/ADT/APInt.h"
11 #include "llvm/ADT/ArrayRef.h"
12 #include "llvm/ADT/MapVector.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallPtrSet.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/BasicBlock.h"
28 #include "llvm/IR/Constants.h"
29 #include "llvm/IR/DataLayout.h"
30 #include "llvm/IR/DerivedTypes.h"
31 #include "llvm/IR/Dominators.h"
32 #include "llvm/IR/Function.h"
33 #include "llvm/IR/IRBuilder.h"
34 #include "llvm/IR/InstrTypes.h"
35 #include "llvm/IR/Instruction.h"
36 #include "llvm/IR/Instructions.h"
37 #include "llvm/IR/IntrinsicInst.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Pass.h"
43 #include "llvm/Support/Casting.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/KnownBits.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdlib>
53 #include <tuple>
54 #include <utility>
55 
56 using namespace llvm;
57 
58 #define DEBUG_TYPE "load-store-vectorizer"
59 
60 STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
61 STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
62 
63 // FIXME: Assuming stack alignment of 4 is always good enough
64 static const unsigned StackAdjustedAlignment = 4;
65 
66 namespace {
67 
68 using InstrList = SmallVector<Instruction *, 8>;
69 using InstrListMap = MapVector<Value *, InstrList>;
70 
71 class Vectorizer {
72  Function &F;
73  AliasAnalysis &AA;
74  DominatorTree &DT;
75  ScalarEvolution &SE;
77  const DataLayout &DL;
78  IRBuilder<> Builder;
79 
80 public:
81  Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
83  : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
84  DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
85 
86  bool run();
87 
88 private:
89  Value *getPointerOperand(Value *I) const;
90 
91  GetElementPtrInst *getSourceGEP(Value *Src) const;
92 
93  unsigned getPointerAddressSpace(Value *I);
94 
95  unsigned getAlignment(LoadInst *LI) const {
96  unsigned Align = LI->getAlignment();
97  if (Align != 0)
98  return Align;
99 
100  return DL.getABITypeAlignment(LI->getType());
101  }
102 
103  unsigned getAlignment(StoreInst *SI) const {
104  unsigned Align = SI->getAlignment();
105  if (Align != 0)
106  return Align;
107 
108  return DL.getABITypeAlignment(SI->getValueOperand()->getType());
109  }
110 
111  bool isConsecutiveAccess(Value *A, Value *B);
112 
113  /// After vectorization, reorder the instructions that I depends on
114  /// (the instructions defining its operands), to ensure they dominate I.
115  void reorder(Instruction *I);
116 
117  /// Returns the first and the last instructions in Chain.
118  std::pair<BasicBlock::iterator, BasicBlock::iterator>
119  getBoundaryInstrs(ArrayRef<Instruction *> Chain);
120 
121  /// Erases the original instructions after vectorizing.
122  void eraseInstructions(ArrayRef<Instruction *> Chain);
123 
124  /// "Legalize" the vector type that would be produced by combining \p
125  /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
126  /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
127  /// expected to have more than 4 elements.
128  std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
129  splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
130 
131  /// Finds the largest prefix of Chain that's vectorizable, checking for
132  /// intervening instructions which may affect the memory accessed by the
133  /// instructions within Chain.
134  ///
135  /// The elements of \p Chain must be all loads or all stores and must be in
136  /// address order.
137  ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
138 
139  /// Collects load and store instructions to vectorize.
140  std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
141 
142  /// Processes the collected instructions, the \p Map. The values of \p Map
143  /// should be all loads or all stores.
144  bool vectorizeChains(InstrListMap &Map);
145 
146  /// Finds the load/stores to consecutive memory addresses and vectorizes them.
147  bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
148 
149  /// Vectorizes the load instructions in Chain.
150  bool
151  vectorizeLoadChain(ArrayRef<Instruction *> Chain,
152  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
153 
154  /// Vectorizes the store instructions in Chain.
155  bool
156  vectorizeStoreChain(ArrayRef<Instruction *> Chain,
157  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
158 
159  /// Check if this load/store access is misaligned accesses.
160  bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
161  unsigned Alignment);
162 };
163 
164 class LoadStoreVectorizer : public FunctionPass {
165 public:
166  static char ID;
167 
168  LoadStoreVectorizer() : FunctionPass(ID) {
170  }
171 
172  bool runOnFunction(Function &F) override;
173 
174  StringRef getPassName() const override {
175  return "GPU Load and Store Vectorizer";
176  }
177 
178  void getAnalysisUsage(AnalysisUsage &AU) const override {
183  AU.setPreservesCFG();
184  }
185 };
186 
187 } // end anonymous namespace
188 
189 char LoadStoreVectorizer::ID = 0;
190 
191 INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,
192  "Vectorize load and Store instructions", false, false)
198 INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
199  "Vectorize load and store instructions", false, false)
200 
202  return new LoadStoreVectorizer();
203 }
204 
205 // The real propagateMetadata expects a SmallVector<Value*>, but we deal in
206 // vectors of Instructions.
208  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
209  propagateMetadata(I, VL);
210 }
211 
213  // Don't vectorize when the attribute NoImplicitFloat is used.
214  if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
215  return false;
216 
217  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
218  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
219  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
220  TargetTransformInfo &TTI =
221  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
222 
223  Vectorizer V(F, AA, DT, SE, TTI);
224  return V.run();
225 }
226 
227 // Vectorizer Implementation
228 bool Vectorizer::run() {
229  bool Changed = false;
230 
231  // Scan the blocks in the function in post order.
232  for (BasicBlock *BB : post_order(&F)) {
233  InstrListMap LoadRefs, StoreRefs;
234  std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
235  Changed |= vectorizeChains(LoadRefs);
236  Changed |= vectorizeChains(StoreRefs);
237  }
238 
239  return Changed;
240 }
241 
243  if (LoadInst *LI = dyn_cast<LoadInst>(I))
244  return LI->getPointerOperand();
245  if (StoreInst *SI = dyn_cast<StoreInst>(I))
246  return SI->getPointerOperand();
247  return nullptr;
248 }
249 
250 unsigned Vectorizer::getPointerAddressSpace(Value *I) {
251  if (LoadInst *L = dyn_cast<LoadInst>(I))
252  return L->getPointerAddressSpace();
253  if (StoreInst *S = dyn_cast<StoreInst>(I))
254  return S->getPointerAddressSpace();
255  return -1;
256 }
257 
258 GetElementPtrInst *Vectorizer::getSourceGEP(Value *Src) const {
259  // First strip pointer bitcasts. Make sure pointee size is the same with
260  // and without casts.
261  // TODO: a stride set by the add instruction below can match the difference
262  // in pointee type size here. Currently it will not be vectorized.
263  Value *SrcPtr = getPointerOperand(Src);
264  Value *SrcBase = SrcPtr->stripPointerCasts();
265  if (DL.getTypeStoreSize(SrcPtr->getType()->getPointerElementType()) ==
266  DL.getTypeStoreSize(SrcBase->getType()->getPointerElementType()))
267  SrcPtr = SrcBase;
268  return dyn_cast<GetElementPtrInst>(SrcPtr);
269 }
270 
271 // FIXME: Merge with llvm::isConsecutiveAccess
273  Value *PtrA = getPointerOperand(A);
274  Value *PtrB = getPointerOperand(B);
275  unsigned ASA = getPointerAddressSpace(A);
276  unsigned ASB = getPointerAddressSpace(B);
277 
278  // Check that the address spaces match and that the pointers are valid.
279  if (!PtrA || !PtrB || (ASA != ASB))
280  return false;
281 
282  // Make sure that A and B are different pointers of the same size type.
283  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
284  Type *PtrATy = PtrA->getType()->getPointerElementType();
285  Type *PtrBTy = PtrB->getType()->getPointerElementType();
286  if (PtrA == PtrB ||
287  DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
288  DL.getTypeStoreSize(PtrATy->getScalarType()) !=
289  DL.getTypeStoreSize(PtrBTy->getScalarType()))
290  return false;
291 
292  APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
293 
294  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
295  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
296  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
297 
298  APInt OffsetDelta = OffsetB - OffsetA;
299 
300  // Check if they are based on the same pointer. That makes the offsets
301  // sufficient.
302  if (PtrA == PtrB)
303  return OffsetDelta == Size;
304 
305  // Compute the necessary base pointer delta to have the necessary final delta
306  // equal to the size.
307  APInt BaseDelta = Size - OffsetDelta;
308 
309  // Compute the distance with SCEV between the base pointers.
310  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
311  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
312  const SCEV *C = SE.getConstant(BaseDelta);
313  const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
314  if (X == PtrSCEVB)
315  return true;
316 
317  // Sometimes even this doesn't work, because SCEV can't always see through
318  // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
319  // things the hard way.
320 
321  // Look through GEPs after checking they're the same except for the last
322  // index.
323  GetElementPtrInst *GEPA = getSourceGEP(A);
324  GetElementPtrInst *GEPB = getSourceGEP(B);
325  if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
326  return false;
327  unsigned FinalIndex = GEPA->getNumOperands() - 1;
328  for (unsigned i = 0; i < FinalIndex; i++)
329  if (GEPA->getOperand(i) != GEPB->getOperand(i))
330  return false;
331 
332  Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex));
333  Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex));
334  if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
335  OpA->getType() != OpB->getType())
336  return false;
337 
338  // Only look through a ZExt/SExt.
339  if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
340  return false;
341 
342  bool Signed = isa<SExtInst>(OpA);
343 
344  OpA = dyn_cast<Instruction>(OpA->getOperand(0));
345  OpB = dyn_cast<Instruction>(OpB->getOperand(0));
346  if (!OpA || !OpB || OpA->getType() != OpB->getType())
347  return false;
348 
349  // Now we need to prove that adding 1 to OpA won't overflow.
350  bool Safe = false;
351  // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA,
352  // we're okay.
353  if (OpB->getOpcode() == Instruction::Add &&
354  isa<ConstantInt>(OpB->getOperand(1)) &&
355  cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) {
356  if (Signed)
357  Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap();
358  else
359  Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap();
360  }
361 
362  unsigned BitWidth = OpA->getType()->getScalarSizeInBits();
363 
364  // Second attempt:
365  // If any bits are known to be zero other than the sign bit in OpA, we can
366  // add 1 to it while guaranteeing no overflow of any sort.
367  if (!Safe) {
368  KnownBits Known(BitWidth);
369  computeKnownBits(OpA, Known, DL, 0, nullptr, OpA, &DT);
370  if (Known.countMaxTrailingOnes() < (BitWidth - 1))
371  Safe = true;
372  }
373 
374  if (!Safe)
375  return false;
376 
377  const SCEV *OffsetSCEVA = SE.getSCEV(OpA);
378  const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
379  const SCEV *One = SE.getConstant(APInt(BitWidth, 1));
380  const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One);
381  return X2 == OffsetSCEVB;
382 }
383 
384 void Vectorizer::reorder(Instruction *I) {
385  OrderedBasicBlock OBB(I->getParent());
386  SmallPtrSet<Instruction *, 16> InstructionsToMove;
388 
389  Worklist.push_back(I);
390  while (!Worklist.empty()) {
391  Instruction *IW = Worklist.pop_back_val();
392  int NumOperands = IW->getNumOperands();
393  for (int i = 0; i < NumOperands; i++) {
395  if (!IM || IM->getOpcode() == Instruction::PHI)
396  continue;
397 
398  // If IM is in another BB, no need to move it, because this pass only
399  // vectorizes instructions within one BB.
400  if (IM->getParent() != I->getParent())
401  continue;
402 
403  if (!OBB.dominates(IM, I)) {
404  InstructionsToMove.insert(IM);
405  Worklist.push_back(IM);
406  }
407  }
408  }
409 
410  // All instructions to move should follow I. Start from I, not from begin().
411  for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
412  ++BBI) {
413  if (!InstructionsToMove.count(&*BBI))
414  continue;
415  Instruction *IM = &*BBI;
416  --BBI;
417  IM->removeFromParent();
418  IM->insertBefore(I);
419  }
420 }
421 
422 std::pair<BasicBlock::iterator, BasicBlock::iterator>
423 Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
424  Instruction *C0 = Chain[0];
425  BasicBlock::iterator FirstInstr = C0->getIterator();
426  BasicBlock::iterator LastInstr = C0->getIterator();
427 
428  BasicBlock *BB = C0->getParent();
429  unsigned NumFound = 0;
430  for (Instruction &I : *BB) {
431  if (!is_contained(Chain, &I))
432  continue;
433 
434  ++NumFound;
435  if (NumFound == 1) {
436  FirstInstr = I.getIterator();
437  }
438  if (NumFound == Chain.size()) {
439  LastInstr = I.getIterator();
440  break;
441  }
442  }
443 
444  // Range is [first, last).
445  return std::make_pair(FirstInstr, ++LastInstr);
446 }
447 
448 void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
450  for (Instruction *I : Chain) {
451  Value *PtrOperand = getPointerOperand(I);
452  assert(PtrOperand && "Instruction must have a pointer operand.");
453  Instrs.push_back(I);
454  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
455  Instrs.push_back(GEP);
456  }
457 
458  // Erase instructions.
459  for (Instruction *I : Instrs)
460  if (I->use_empty())
461  I->eraseFromParent();
462 }
463 
464 std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
465 Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
466  unsigned ElementSizeBits) {
467  unsigned ElementSizeBytes = ElementSizeBits / 8;
468  unsigned SizeBytes = ElementSizeBytes * Chain.size();
469  unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
470  if (NumLeft == Chain.size()) {
471  if ((NumLeft & 1) == 0)
472  NumLeft /= 2; // Split even in half
473  else
474  --NumLeft; // Split off last element
475  } else if (NumLeft == 0)
476  NumLeft = 1;
477  return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
478 }
479 
481 Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
482  // These are in BB order, unlike Chain, which is in address order.
483  SmallVector<Instruction *, 16> MemoryInstrs;
484  SmallVector<Instruction *, 16> ChainInstrs;
485 
486  bool IsLoadChain = isa<LoadInst>(Chain[0]);
487  DEBUG({
488  for (Instruction *I : Chain) {
489  if (IsLoadChain)
490  assert(isa<LoadInst>(I) &&
491  "All elements of Chain must be loads, or all must be stores.");
492  else
493  assert(isa<StoreInst>(I) &&
494  "All elements of Chain must be loads, or all must be stores.");
495  }
496  });
497 
498  for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
499  if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
500  if (!is_contained(Chain, &I))
501  MemoryInstrs.push_back(&I);
502  else
503  ChainInstrs.push_back(&I);
504  } else if (isa<IntrinsicInst>(&I) &&
505  cast<IntrinsicInst>(&I)->getIntrinsicID() ==
506  Intrinsic::sideeffect) {
507  // Ignore llvm.sideeffect calls.
508  } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
509  DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n');
510  break;
511  } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
512  DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
513  << '\n');
514  break;
515  }
516  }
517 
518  OrderedBasicBlock OBB(Chain[0]->getParent());
519 
520  // Loop until we find an instruction in ChainInstrs that we can't vectorize.
521  unsigned ChainInstrIdx = 0;
522  Instruction *BarrierMemoryInstr = nullptr;
523 
524  for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
525  Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
526 
527  // If a barrier memory instruction was found, chain instructions that follow
528  // will not be added to the valid prefix.
529  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
530  break;
531 
532  // Check (in BB order) if any instruction prevents ChainInstr from being
533  // vectorized. Find and store the first such "conflicting" instruction.
534  for (Instruction *MemInstr : MemoryInstrs) {
535  // If a barrier memory instruction was found, do not check past it.
536  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
537  break;
538 
539  if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
540  continue;
541 
542  // We can ignore the alias as long as the load comes before the store,
543  // because that means we won't be moving the load past the store to
544  // vectorize it (the vectorized load is inserted at the location of the
545  // first load in the chain).
546  if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
547  OBB.dominates(ChainInstr, MemInstr))
548  continue;
549 
550  // Same case, but in reverse.
551  if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
552  OBB.dominates(MemInstr, ChainInstr))
553  continue;
554 
555  if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
556  MemoryLocation::get(ChainInstr))) {
557  DEBUG({
558  dbgs() << "LSV: Found alias:\n"
559  " Aliasing instruction and pointer:\n"
560  << " " << *MemInstr << '\n'
561  << " " << *getPointerOperand(MemInstr) << '\n'
562  << " Aliased instruction and pointer:\n"
563  << " " << *ChainInstr << '\n'
564  << " " << *getPointerOperand(ChainInstr) << '\n';
565  });
566  // Save this aliasing memory instruction as a barrier, but allow other
567  // instructions that precede the barrier to be vectorized with this one.
568  BarrierMemoryInstr = MemInstr;
569  break;
570  }
571  }
572  // Continue the search only for store chains, since vectorizing stores that
573  // precede an aliasing load is valid. Conversely, vectorizing loads is valid
574  // up to an aliasing store, but should not pull loads from further down in
575  // the basic block.
576  if (IsLoadChain && BarrierMemoryInstr) {
577  // The BarrierMemoryInstr is a store that precedes ChainInstr.
578  assert(OBB.dominates(BarrierMemoryInstr, ChainInstr));
579  break;
580  }
581  }
582 
583  // Find the largest prefix of Chain whose elements are all in
584  // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
585  // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
586  // order.)
587  SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
588  ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
589  unsigned ChainIdx = 0;
590  for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
591  if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
592  break;
593  }
594  return Chain.slice(0, ChainIdx);
595 }
596 
597 std::pair<InstrListMap, InstrListMap>
598 Vectorizer::collectInstructions(BasicBlock *BB) {
599  InstrListMap LoadRefs;
600  InstrListMap StoreRefs;
601 
602  for (Instruction &I : *BB) {
603  if (!I.mayReadOrWriteMemory())
604  continue;
605 
606  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
607  if (!LI->isSimple())
608  continue;
609 
610  // Skip if it's not legal.
611  if (!TTI.isLegalToVectorizeLoad(LI))
612  continue;
613 
614  Type *Ty = LI->getType();
616  continue;
617 
618  // Skip weird non-byte sizes. They probably aren't worth the effort of
619  // handling correctly.
620  unsigned TySize = DL.getTypeSizeInBits(Ty);
621  if ((TySize % 8) != 0)
622  continue;
623 
624  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
625  // functions are currently using an integer type for the vectorized
626  // load/store, and does not support casting between the integer type and a
627  // vector of pointers (e.g. i64 to <2 x i16*>)
628  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
629  continue;
630 
631  Value *Ptr = LI->getPointerOperand();
632  unsigned AS = Ptr->getType()->getPointerAddressSpace();
633  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
634 
635  // No point in looking at these if they're too big to vectorize.
636  if (TySize > VecRegSize / 2)
637  continue;
638 
639  // Make sure all the users of a vector are constant-index extracts.
640  if (isa<VectorType>(Ty) && !llvm::all_of(LI->users(), [](const User *U) {
642  return EEI && isa<ConstantInt>(EEI->getOperand(1));
643  }))
644  continue;
645 
646  // Save the load locations.
647  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
648  LoadRefs[ObjPtr].push_back(LI);
649  } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
650  if (!SI->isSimple())
651  continue;
652 
653  // Skip if it's not legal.
654  if (!TTI.isLegalToVectorizeStore(SI))
655  continue;
656 
657  Type *Ty = SI->getValueOperand()->getType();
659  continue;
660 
661  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
662  // functions are currently using an integer type for the vectorized
663  // load/store, and does not support casting between the integer type and a
664  // vector of pointers (e.g. i64 to <2 x i16*>)
665  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
666  continue;
667 
668  // Skip weird non-byte sizes. They probably aren't worth the effort of
669  // handling correctly.
670  unsigned TySize = DL.getTypeSizeInBits(Ty);
671  if ((TySize % 8) != 0)
672  continue;
673 
674  Value *Ptr = SI->getPointerOperand();
675  unsigned AS = Ptr->getType()->getPointerAddressSpace();
676  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
677 
678  // No point in looking at these if they're too big to vectorize.
679  if (TySize > VecRegSize / 2)
680  continue;
681 
682  if (isa<VectorType>(Ty) && !llvm::all_of(SI->users(), [](const User *U) {
684  return EEI && isa<ConstantInt>(EEI->getOperand(1));
685  }))
686  continue;
687 
688  // Save store location.
689  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
690  StoreRefs[ObjPtr].push_back(SI);
691  }
692  }
693 
694  return {LoadRefs, StoreRefs};
695 }
696 
697 bool Vectorizer::vectorizeChains(InstrListMap &Map) {
698  bool Changed = false;
699 
700  for (const std::pair<Value *, InstrList> &Chain : Map) {
701  unsigned Size = Chain.second.size();
702  if (Size < 2)
703  continue;
704 
705  DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n");
706 
707  // Process the stores in chunks of 64.
708  for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
709  unsigned Len = std::min<unsigned>(CE - CI, 64);
710  ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
711  Changed |= vectorizeInstructions(Chunk);
712  }
713  }
714 
715  return Changed;
716 }
717 
718 bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
719  DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n");
720  SmallVector<int, 16> Heads, Tails;
721  int ConsecutiveChain[64];
722 
723  // Do a quadratic search on all of the given loads/stores and find all of the
724  // pairs of loads/stores that follow each other.
725  for (int i = 0, e = Instrs.size(); i < e; ++i) {
726  ConsecutiveChain[i] = -1;
727  for (int j = e - 1; j >= 0; --j) {
728  if (i == j)
729  continue;
730 
731  if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
732  if (ConsecutiveChain[i] != -1) {
733  int CurDistance = std::abs(ConsecutiveChain[i] - i);
734  int NewDistance = std::abs(ConsecutiveChain[i] - j);
735  if (j < i || NewDistance > CurDistance)
736  continue; // Should not insert.
737  }
738 
739  Tails.push_back(j);
740  Heads.push_back(i);
741  ConsecutiveChain[i] = j;
742  }
743  }
744  }
745 
746  bool Changed = false;
747  SmallPtrSet<Instruction *, 16> InstructionsProcessed;
748 
749  for (int Head : Heads) {
750  if (InstructionsProcessed.count(Instrs[Head]))
751  continue;
752  bool LongerChainExists = false;
753  for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
754  if (Head == Tails[TIt] &&
755  !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
756  LongerChainExists = true;
757  break;
758  }
759  if (LongerChainExists)
760  continue;
761 
762  // We found an instr that starts a chain. Now follow the chain and try to
763  // vectorize it.
765  int I = Head;
766  while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
767  if (InstructionsProcessed.count(Instrs[I]))
768  break;
769 
770  Operands.push_back(Instrs[I]);
771  I = ConsecutiveChain[I];
772  }
773 
774  bool Vectorized = false;
775  if (isa<LoadInst>(*Operands.begin()))
776  Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
777  else
778  Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
779 
780  Changed |= Vectorized;
781  }
782 
783  return Changed;
784 }
785 
786 bool Vectorizer::vectorizeStoreChain(
788  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
789  StoreInst *S0 = cast<StoreInst>(Chain[0]);
790 
791  // If the vector has an int element, default to int for the whole store.
792  Type *StoreTy;
793  for (Instruction *I : Chain) {
794  StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
795  if (StoreTy->isIntOrIntVectorTy())
796  break;
797 
798  if (StoreTy->isPtrOrPtrVectorTy()) {
799  StoreTy = Type::getIntNTy(F.getParent()->getContext(),
800  DL.getTypeSizeInBits(StoreTy));
801  break;
802  }
803  }
804 
805  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
806  unsigned AS = S0->getPointerAddressSpace();
807  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
808  unsigned VF = VecRegSize / Sz;
809  unsigned ChainSize = Chain.size();
810  unsigned Alignment = getAlignment(S0);
811 
812  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
813  InstructionsProcessed->insert(Chain.begin(), Chain.end());
814  return false;
815  }
816 
817  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
818  if (NewChain.empty()) {
819  // No vectorization possible.
820  InstructionsProcessed->insert(Chain.begin(), Chain.end());
821  return false;
822  }
823  if (NewChain.size() == 1) {
824  // Failed after the first instruction. Discard it and try the smaller chain.
825  InstructionsProcessed->insert(NewChain.front());
826  return false;
827  }
828 
829  // Update Chain to the valid vectorizable subchain.
830  Chain = NewChain;
831  ChainSize = Chain.size();
832 
833  // Check if it's legal to vectorize this chain. If not, split the chain and
834  // try again.
835  unsigned EltSzInBytes = Sz / 8;
836  unsigned SzInBytes = EltSzInBytes * ChainSize;
837  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
838  auto Chains = splitOddVectorElts(Chain, Sz);
839  return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
840  vectorizeStoreChain(Chains.second, InstructionsProcessed);
841  }
842 
843  VectorType *VecTy;
844  VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
845  if (VecStoreTy)
846  VecTy = VectorType::get(StoreTy->getScalarType(),
847  Chain.size() * VecStoreTy->getNumElements());
848  else
849  VecTy = VectorType::get(StoreTy, Chain.size());
850 
851  // If it's more than the max vector size or the target has a better
852  // vector factor, break it into two pieces.
853  unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
854  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
855  DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
856  " Creating two separate arrays.\n");
857  return vectorizeStoreChain(Chain.slice(0, TargetVF),
858  InstructionsProcessed) |
859  vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
860  }
861 
862  DEBUG({
863  dbgs() << "LSV: Stores to vectorize:\n";
864  for (Instruction *I : Chain)
865  dbgs() << " " << *I << "\n";
866  });
867 
868  // We won't try again to vectorize the elements of the chain, regardless of
869  // whether we succeed below.
870  InstructionsProcessed->insert(Chain.begin(), Chain.end());
871 
872  // If the store is going to be misaligned, don't vectorize it.
873  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
874  if (S0->getPointerAddressSpace() != 0)
875  return false;
876 
877  unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
879  DL, S0, nullptr, &DT);
880  if (NewAlign < StackAdjustedAlignment)
881  return false;
882  }
883 
884  BasicBlock::iterator First, Last;
885  std::tie(First, Last) = getBoundaryInstrs(Chain);
886  Builder.SetInsertPoint(&*Last);
887 
888  Value *Vec = UndefValue::get(VecTy);
889 
890  if (VecStoreTy) {
891  unsigned VecWidth = VecStoreTy->getNumElements();
892  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
893  StoreInst *Store = cast<StoreInst>(Chain[I]);
894  for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
895  unsigned NewIdx = J + I * VecWidth;
896  Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
897  Builder.getInt32(J));
898  if (Extract->getType() != StoreTy->getScalarType())
899  Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
900 
901  Value *Insert =
902  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
903  Vec = Insert;
904  }
905  }
906  } else {
907  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
908  StoreInst *Store = cast<StoreInst>(Chain[I]);
909  Value *Extract = Store->getValueOperand();
910  if (Extract->getType() != StoreTy->getScalarType())
911  Extract =
912  Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
913 
914  Value *Insert =
915  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
916  Vec = Insert;
917  }
918  }
919 
920  // This cast is safe because Builder.CreateStore() always creates a bona fide
921  // StoreInst.
922  StoreInst *SI = cast<StoreInst>(
923  Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
924  VecTy->getPointerTo(AS))));
925  propagateMetadata(SI, Chain);
926  SI->setAlignment(Alignment);
927 
928  eraseInstructions(Chain);
929  ++NumVectorInstructions;
930  NumScalarsVectorized += Chain.size();
931  return true;
932 }
933 
934 bool Vectorizer::vectorizeLoadChain(
936  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
937  LoadInst *L0 = cast<LoadInst>(Chain[0]);
938 
939  // If the vector has an int element, default to int for the whole load.
940  Type *LoadTy;
941  for (const auto &V : Chain) {
942  LoadTy = cast<LoadInst>(V)->getType();
943  if (LoadTy->isIntOrIntVectorTy())
944  break;
945 
946  if (LoadTy->isPtrOrPtrVectorTy()) {
947  LoadTy = Type::getIntNTy(F.getParent()->getContext(),
948  DL.getTypeSizeInBits(LoadTy));
949  break;
950  }
951  }
952 
953  unsigned Sz = DL.getTypeSizeInBits(LoadTy);
954  unsigned AS = L0->getPointerAddressSpace();
955  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
956  unsigned VF = VecRegSize / Sz;
957  unsigned ChainSize = Chain.size();
958  unsigned Alignment = getAlignment(L0);
959 
960  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
961  InstructionsProcessed->insert(Chain.begin(), Chain.end());
962  return false;
963  }
964 
965  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
966  if (NewChain.empty()) {
967  // No vectorization possible.
968  InstructionsProcessed->insert(Chain.begin(), Chain.end());
969  return false;
970  }
971  if (NewChain.size() == 1) {
972  // Failed after the first instruction. Discard it and try the smaller chain.
973  InstructionsProcessed->insert(NewChain.front());
974  return false;
975  }
976 
977  // Update Chain to the valid vectorizable subchain.
978  Chain = NewChain;
979  ChainSize = Chain.size();
980 
981  // Check if it's legal to vectorize this chain. If not, split the chain and
982  // try again.
983  unsigned EltSzInBytes = Sz / 8;
984  unsigned SzInBytes = EltSzInBytes * ChainSize;
985  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
986  auto Chains = splitOddVectorElts(Chain, Sz);
987  return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
988  vectorizeLoadChain(Chains.second, InstructionsProcessed);
989  }
990 
991  VectorType *VecTy;
992  VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
993  if (VecLoadTy)
994  VecTy = VectorType::get(LoadTy->getScalarType(),
995  Chain.size() * VecLoadTy->getNumElements());
996  else
997  VecTy = VectorType::get(LoadTy, Chain.size());
998 
999  // If it's more than the max vector size or the target has a better
1000  // vector factor, break it into two pieces.
1001  unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
1002  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
1003  DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
1004  " Creating two separate arrays.\n");
1005  return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
1006  vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
1007  }
1008 
1009  // We won't try again to vectorize the elements of the chain, regardless of
1010  // whether we succeed below.
1011  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1012 
1013  // If the load is going to be misaligned, don't vectorize it.
1014  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
1015  if (L0->getPointerAddressSpace() != 0)
1016  return false;
1017 
1018  unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
1020  DL, L0, nullptr, &DT);
1021  if (NewAlign < StackAdjustedAlignment)
1022  return false;
1023 
1024  Alignment = NewAlign;
1025  }
1026 
1027  DEBUG({
1028  dbgs() << "LSV: Loads to vectorize:\n";
1029  for (Instruction *I : Chain)
1030  I->dump();
1031  });
1032 
1033  // getVectorizablePrefix already computed getBoundaryInstrs. The value of
1034  // Last may have changed since then, but the value of First won't have. If it
1035  // matters, we could compute getBoundaryInstrs only once and reuse it here.
1036  BasicBlock::iterator First, Last;
1037  std::tie(First, Last) = getBoundaryInstrs(Chain);
1038  Builder.SetInsertPoint(&*First);
1039 
1040  Value *Bitcast =
1041  Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
1042  // This cast is safe because Builder.CreateLoad always creates a bona fide
1043  // LoadInst.
1044  LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
1045  propagateMetadata(LI, Chain);
1046  LI->setAlignment(Alignment);
1047 
1048  if (VecLoadTy) {
1049  SmallVector<Instruction *, 16> InstrsToErase;
1050 
1051  unsigned VecWidth = VecLoadTy->getNumElements();
1052  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1053  for (auto Use : Chain[I]->users()) {
1054  // All users of vector loads are ExtractElement instructions with
1055  // constant indices, otherwise we would have bailed before now.
1056  Instruction *UI = cast<Instruction>(Use);
1057  unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
1058  unsigned NewIdx = Idx + I * VecWidth;
1059  Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
1060  UI->getName());
1061  if (V->getType() != UI->getType())
1062  V = Builder.CreateBitCast(V, UI->getType());
1063 
1064  // Replace the old instruction.
1065  UI->replaceAllUsesWith(V);
1066  InstrsToErase.push_back(UI);
1067  }
1068  }
1069 
1070  // Bitcast might not be an Instruction, if the value being loaded is a
1071  // constant. In that case, no need to reorder anything.
1072  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1073  reorder(BitcastInst);
1074 
1075  for (auto I : InstrsToErase)
1076  I->eraseFromParent();
1077  } else {
1078  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1079  Value *CV = Chain[I];
1080  Value *V =
1081  Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
1082  if (V->getType() != CV->getType()) {
1083  V = Builder.CreateBitOrPointerCast(V, CV->getType());
1084  }
1085 
1086  // Replace the old instruction.
1087  CV->replaceAllUsesWith(V);
1088  }
1089 
1090  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1091  reorder(BitcastInst);
1092  }
1093 
1094  eraseInstructions(Chain);
1095 
1096  ++NumVectorInstructions;
1097  NumScalarsVectorized += Chain.size();
1098  return true;
1099 }
1100 
1101 bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
1102  unsigned Alignment) {
1103  if (Alignment % SzInBytes == 0)
1104  return false;
1105 
1106  bool Fast = false;
1107  bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
1108  SzInBytes * 8, AddressSpace,
1109  Alignment, &Fast);
1110  DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
1111  << " and fast? " << Fast << "\n";);
1112  return !Allows || !Fast;
1113 }
Legacy wrapper pass to provide the GlobalsAAResult object.
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:81
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:152
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:395
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:69
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1066
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
iterator begin() const
Definition: ArrayRef.h:137
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
This provides a very simple, boring adaptor for a begin and end iterator into a range type...
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value *> VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal].
The main scalar evolution driver.
bool dominates(const Instruction *A, const Instruction *B)
Find out whether A dominates B, meaning whether A comes before B in BB.
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:262
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:38
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:813
STATISTIC(NumFunctions, "Total number of functions")
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
void dump() const
Support for debugging, callable in GDB: V->dump()
Definition: AsmWriter.cpp:3641
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:51
static uint32_t getAlignment(const MCSectionCOFF &Sec)
static Value * getPointerOperand(Instruction &Inst)
Type * getPointerElementType() const
Definition: Type.h:373
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:361
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:237
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
LLVMContext & getContext() const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:668
This file contains the simple types necessary to represent the attributes associated with functions a...
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
This file implements a class to represent arbitrary precision integral constant values and operations...
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:404
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
void initializeLoadStoreVectorizerPass(PassRegistry &)
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:608
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:125
Pass * createLoadStoreVectorizerPass()
An instruction for storing to memory.
Definition: Instructions.h:306
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:430
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:140
Value * getOperand(unsigned i) const
Definition: User.h:154
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:837
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Wrapper pass for TargetTransformInfo.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:421
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:75
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
static const unsigned StackAdjustedAlignment
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
unsigned countMaxTrailingOnes() const
Returns the maximum number of trailing one bits possible.
Definition: KnownBits.h:173
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
#define DEBUG_TYPE
This file contains the declarations for the subclasses of Constant, which represent the different fla...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
bool mayThrow() const
Return true if this instruction may throw an exception.
Represent the analysis usage information of a pass.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
Value * getPointerOperand()
Definition: Instructions.h:270
iterator_range< po_iterator< T > > post_order(const T &G)
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
self_iterator getIterator()
Definition: ilist_node.h:82
void setAlignment(unsigned Align)
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const AMDGPUAS & AS
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:558
static wasm::ValType getType(const TargetRegisterClass *RC)
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:224
Iterator for intrusive lists based on ilist_node.
unsigned getNumOperands() const
Definition: User.h:176
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
static unsigned getIntrinsicID(const SDNode *N)
Legacy wrapper pass to provide the SCEVAAResult object.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:254
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
Module.h This file contains the declarations for the Module class.
AddressSpace
Definition: NVPTXBaseInfo.h:22
iterator end() const
Definition: ArrayRef.h:138
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:682
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:385
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:285
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class to represent vector types.
Definition: DerivedTypes.h:393
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:576
Class for arbitrary precision integers.
Definition: APInt.h:69
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:179
void removeFromParent()
This method unlinks &#39;this&#39; from the containing basic block, but does not delete it.
Definition: Instruction.cpp:65
iv users
Definition: IVUsers.cpp:51
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:226
This class represents an analyzed expression in the program.
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:61
This file provides utility analysis objects describing memory locations.
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:220
#define I(x, y, z)
Definition: MD5.cpp:58
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1213
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
This instruction extracts a single (scalar) element from a VectorType value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:351
INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE, "Vectorize load and Store instructions", false, false) INITIALIZE_PASS_END(LoadStoreVectorizer
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:276
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:556
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
constexpr char Size[]
Key for Kernel::Arg::Metadata::mSize.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
static const Function * getParent(const Value *V)
#define DEBUG(X)
Definition: Debug.h:118
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:49
inst_range instructions(Function *F)
Definition: InstIterator.h:134
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
This pass exposes codegen information to IR-level passes.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Value * getPointerOperand()
Definition: Instructions.h:398
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
bool use_empty() const
Definition: Value.h:328
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:867
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:495