LLVM  4.0.0
LoadStoreVectorizer.cpp
Go to the documentation of this file.
1 //===----- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer ----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "llvm/ADT/MapVector.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/Statistic.h"
16 #include "llvm/ADT/Triple.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/Instructions.h"
28 #include "llvm/IR/Module.h"
29 #include "llvm/IR/Type.h"
30 #include "llvm/IR/Value.h"
32 #include "llvm/Support/Debug.h"
36 
37 using namespace llvm;
38 
39 #define DEBUG_TYPE "load-store-vectorizer"
40 STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
41 STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
42 
43 namespace {
44 
45 // FIXME: Assuming stack alignment of 4 is always good enough
46 static const unsigned StackAdjustedAlignment = 4;
47 typedef SmallVector<Instruction *, 8> InstrList;
48 typedef MapVector<Value *, InstrList> InstrListMap;
49 
50 class Vectorizer {
51  Function &F;
52  AliasAnalysis &AA;
53  DominatorTree &DT;
54  ScalarEvolution &SE;
56  const DataLayout &DL;
57  IRBuilder<> Builder;
58 
59 public:
60  Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
62  : F(F), AA(AA), DT(DT), SE(SE), TTI(TTI),
63  DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
64 
65  bool run();
66 
67 private:
69 
70  unsigned getPointerAddressSpace(Value *I);
71 
72  unsigned getAlignment(LoadInst *LI) const {
73  unsigned Align = LI->getAlignment();
74  if (Align != 0)
75  return Align;
76 
77  return DL.getABITypeAlignment(LI->getType());
78  }
79 
80  unsigned getAlignment(StoreInst *SI) const {
81  unsigned Align = SI->getAlignment();
82  if (Align != 0)
83  return Align;
84 
85  return DL.getABITypeAlignment(SI->getValueOperand()->getType());
86  }
87 
89 
90  /// After vectorization, reorder the instructions that I depends on
91  /// (the instructions defining its operands), to ensure they dominate I.
92  void reorder(Instruction *I);
93 
94  /// Returns the first and the last instructions in Chain.
95  std::pair<BasicBlock::iterator, BasicBlock::iterator>
96  getBoundaryInstrs(ArrayRef<Instruction *> Chain);
97 
98  /// Erases the original instructions after vectorizing.
99  void eraseInstructions(ArrayRef<Instruction *> Chain);
100 
101  /// "Legalize" the vector type that would be produced by combining \p
102  /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
103  /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
104  /// expected to have more than 4 elements.
105  std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
106  splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
107 
108  /// Finds the largest prefix of Chain that's vectorizable, checking for
109  /// intervening instructions which may affect the memory accessed by the
110  /// instructions within Chain.
111  ///
112  /// The elements of \p Chain must be all loads or all stores and must be in
113  /// address order.
114  ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
115 
116  /// Collects load and store instructions to vectorize.
117  std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
118 
119  /// Processes the collected instructions, the \p Map. The values of \p Map
120  /// should be all loads or all stores.
121  bool vectorizeChains(InstrListMap &Map);
122 
123  /// Finds the load/stores to consecutive memory addresses and vectorizes them.
124  bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
125 
126  /// Vectorizes the load instructions in Chain.
127  bool
128  vectorizeLoadChain(ArrayRef<Instruction *> Chain,
129  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
130 
131  /// Vectorizes the store instructions in Chain.
132  bool
133  vectorizeStoreChain(ArrayRef<Instruction *> Chain,
134  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
135 
136  /// Check if this load/store access is misaligned accesses.
137  bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
138  unsigned Alignment);
139 };
140 
141 class LoadStoreVectorizer : public FunctionPass {
142 public:
143  static char ID;
144 
145  LoadStoreVectorizer() : FunctionPass(ID) {
147  }
148 
149  bool runOnFunction(Function &F) override;
150 
151  StringRef getPassName() const override {
152  return "GPU Load and Store Vectorizer";
153  }
154 
155  void getAnalysisUsage(AnalysisUsage &AU) const override {
160  AU.setPreservesCFG();
161  }
162 };
163 }
164 
165 INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,
166  "Vectorize load and Store instructions", false, false)
172 INITIALIZE_PASS_END(LoadStoreVectorizer, DEBUG_TYPE,
173  "Vectorize load and store instructions", false, false)
174 
175 char LoadStoreVectorizer::ID = 0;
176 
178  return new LoadStoreVectorizer();
179 }
180 
181 // The real propagateMetadata expects a SmallVector<Value*>, but we deal in
182 // vectors of Instructions.
184  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
185  propagateMetadata(I, VL);
186 }
187 
188 bool LoadStoreVectorizer::runOnFunction(Function &F) {
189  // Don't vectorize when the attribute NoImplicitFloat is used.
190  if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
191  return false;
192 
193  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
194  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
195  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
196  TargetTransformInfo &TTI =
197  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
198 
199  Vectorizer V(F, AA, DT, SE, TTI);
200  return V.run();
201 }
202 
203 // Vectorizer Implementation
204 bool Vectorizer::run() {
205  bool Changed = false;
206 
207  // Scan the blocks in the function in post order.
208  for (BasicBlock *BB : post_order(&F)) {
209  InstrListMap LoadRefs, StoreRefs;
210  std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
211  Changed |= vectorizeChains(LoadRefs);
212  Changed |= vectorizeChains(StoreRefs);
213  }
214 
215  return Changed;
216 }
217 
219  if (LoadInst *LI = dyn_cast<LoadInst>(I))
220  return LI->getPointerOperand();
221  if (StoreInst *SI = dyn_cast<StoreInst>(I))
222  return SI->getPointerOperand();
223  return nullptr;
224 }
225 
226 unsigned Vectorizer::getPointerAddressSpace(Value *I) {
227  if (LoadInst *L = dyn_cast<LoadInst>(I))
228  return L->getPointerAddressSpace();
229  if (StoreInst *S = dyn_cast<StoreInst>(I))
230  return S->getPointerAddressSpace();
231  return -1;
232 }
233 
234 // FIXME: Merge with llvm::isConsecutiveAccess
236  Value *PtrA = getPointerOperand(A);
237  Value *PtrB = getPointerOperand(B);
238  unsigned ASA = getPointerAddressSpace(A);
239  unsigned ASB = getPointerAddressSpace(B);
240 
241  // Check that the address spaces match and that the pointers are valid.
242  if (!PtrA || !PtrB || (ASA != ASB))
243  return false;
244 
245  // Make sure that A and B are different pointers of the same size type.
246  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
247  Type *PtrATy = PtrA->getType()->getPointerElementType();
248  Type *PtrBTy = PtrB->getType()->getPointerElementType();
249  if (PtrA == PtrB ||
250  DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
251  DL.getTypeStoreSize(PtrATy->getScalarType()) !=
252  DL.getTypeStoreSize(PtrBTy->getScalarType()))
253  return false;
254 
255  APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
256 
257  APInt OffsetA(PtrBitWidth, 0), OffsetB(PtrBitWidth, 0);
258  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
259  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
260 
261  APInt OffsetDelta = OffsetB - OffsetA;
262 
263  // Check if they are based on the same pointer. That makes the offsets
264  // sufficient.
265  if (PtrA == PtrB)
266  return OffsetDelta == Size;
267 
268  // Compute the necessary base pointer delta to have the necessary final delta
269  // equal to the size.
270  APInt BaseDelta = Size - OffsetDelta;
271 
272  // Compute the distance with SCEV between the base pointers.
273  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
274  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
275  const SCEV *C = SE.getConstant(BaseDelta);
276  const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
277  if (X == PtrSCEVB)
278  return true;
279 
280  // Sometimes even this doesn't work, because SCEV can't always see through
281  // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
282  // things the hard way.
283 
284  // Look through GEPs after checking they're the same except for the last
285  // index.
288  if (!GEPA || !GEPB || GEPA->getNumOperands() != GEPB->getNumOperands())
289  return false;
290  unsigned FinalIndex = GEPA->getNumOperands() - 1;
291  for (unsigned i = 0; i < FinalIndex; i++)
292  if (GEPA->getOperand(i) != GEPB->getOperand(i))
293  return false;
294 
295  Instruction *OpA = dyn_cast<Instruction>(GEPA->getOperand(FinalIndex));
296  Instruction *OpB = dyn_cast<Instruction>(GEPB->getOperand(FinalIndex));
297  if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
298  OpA->getType() != OpB->getType())
299  return false;
300 
301  // Only look through a ZExt/SExt.
302  if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
303  return false;
304 
305  bool Signed = isa<SExtInst>(OpA);
306 
307  OpA = dyn_cast<Instruction>(OpA->getOperand(0));
308  OpB = dyn_cast<Instruction>(OpB->getOperand(0));
309  if (!OpA || !OpB || OpA->getType() != OpB->getType())
310  return false;
311 
312  // Now we need to prove that adding 1 to OpA won't overflow.
313  bool Safe = false;
314  // First attempt: if OpB is an add with NSW/NUW, and OpB is 1 added to OpA,
315  // we're okay.
316  if (OpB->getOpcode() == Instruction::Add &&
317  isa<ConstantInt>(OpB->getOperand(1)) &&
318  cast<ConstantInt>(OpB->getOperand(1))->getSExtValue() > 0) {
319  if (Signed)
320  Safe = cast<BinaryOperator>(OpB)->hasNoSignedWrap();
321  else
322  Safe = cast<BinaryOperator>(OpB)->hasNoUnsignedWrap();
323  }
324 
325  unsigned BitWidth = OpA->getType()->getScalarSizeInBits();
326 
327  // Second attempt:
328  // If any bits are known to be zero other than the sign bit in OpA, we can
329  // add 1 to it while guaranteeing no overflow of any sort.
330  if (!Safe) {
331  APInt KnownZero(BitWidth, 0);
332  APInt KnownOne(BitWidth, 0);
333  computeKnownBits(OpA, KnownZero, KnownOne, DL, 0, nullptr, OpA, &DT);
334  KnownZero &= ~APInt::getHighBitsSet(BitWidth, 1);
335  if (KnownZero != 0)
336  Safe = true;
337  }
338 
339  if (!Safe)
340  return false;
341 
342  const SCEV *OffsetSCEVA = SE.getSCEV(OpA);
343  const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
344  const SCEV *One = SE.getConstant(APInt(BitWidth, 1));
345  const SCEV *X2 = SE.getAddExpr(OffsetSCEVA, One);
346  return X2 == OffsetSCEVB;
347 }
348 
349 void Vectorizer::reorder(Instruction *I) {
350  OrderedBasicBlock OBB(I->getParent());
351  SmallPtrSet<Instruction *, 16> InstructionsToMove;
353 
354  Worklist.push_back(I);
355  while (!Worklist.empty()) {
356  Instruction *IW = Worklist.pop_back_val();
357  int NumOperands = IW->getNumOperands();
358  for (int i = 0; i < NumOperands; i++) {
360  if (!IM || IM->getOpcode() == Instruction::PHI)
361  continue;
362 
363  // If IM is in another BB, no need to move it, because this pass only
364  // vectorizes instructions within one BB.
365  if (IM->getParent() != I->getParent())
366  continue;
367 
368  if (!OBB.dominates(IM, I)) {
369  InstructionsToMove.insert(IM);
370  Worklist.push_back(IM);
371  }
372  }
373  }
374 
375  // All instructions to move should follow I. Start from I, not from begin().
376  for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
377  ++BBI) {
378  if (!InstructionsToMove.count(&*BBI))
379  continue;
380  Instruction *IM = &*BBI;
381  --BBI;
382  IM->removeFromParent();
383  IM->insertBefore(I);
384  }
385 }
386 
387 std::pair<BasicBlock::iterator, BasicBlock::iterator>
388 Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
389  Instruction *C0 = Chain[0];
390  BasicBlock::iterator FirstInstr = C0->getIterator();
391  BasicBlock::iterator LastInstr = C0->getIterator();
392 
393  BasicBlock *BB = C0->getParent();
394  unsigned NumFound = 0;
395  for (Instruction &I : *BB) {
396  if (!is_contained(Chain, &I))
397  continue;
398 
399  ++NumFound;
400  if (NumFound == 1) {
401  FirstInstr = I.getIterator();
402  }
403  if (NumFound == Chain.size()) {
404  LastInstr = I.getIterator();
405  break;
406  }
407  }
408 
409  // Range is [first, last).
410  return std::make_pair(FirstInstr, ++LastInstr);
411 }
412 
413 void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
415  for (Instruction *I : Chain) {
416  Value *PtrOperand = getPointerOperand(I);
417  assert(PtrOperand && "Instruction must have a pointer operand.");
418  Instrs.push_back(I);
419  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
420  Instrs.push_back(GEP);
421  }
422 
423  // Erase instructions.
424  for (Instruction *I : Instrs)
425  if (I->use_empty())
426  I->eraseFromParent();
427 }
428 
429 std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
430 Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
431  unsigned ElementSizeBits) {
432  unsigned ElementSizeBytes = ElementSizeBits / 8;
433  unsigned SizeBytes = ElementSizeBytes * Chain.size();
434  unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
435  if (NumLeft == Chain.size())
436  --NumLeft;
437  else if (NumLeft == 0)
438  NumLeft = 1;
439  return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
440 }
441 
443 Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
444  // These are in BB order, unlike Chain, which is in address order.
445  SmallVector<Instruction *, 16> MemoryInstrs;
446  SmallVector<Instruction *, 16> ChainInstrs;
447 
448  bool IsLoadChain = isa<LoadInst>(Chain[0]);
449  DEBUG({
450  for (Instruction *I : Chain) {
451  if (IsLoadChain)
452  assert(isa<LoadInst>(I) &&
453  "All elements of Chain must be loads, or all must be stores.");
454  else
455  assert(isa<StoreInst>(I) &&
456  "All elements of Chain must be loads, or all must be stores.");
457  }
458  });
459 
460  for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
461  if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
462  if (!is_contained(Chain, &I))
463  MemoryInstrs.push_back(&I);
464  else
465  ChainInstrs.push_back(&I);
466  } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
467  DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n');
468  break;
469  } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
470  DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
471  << '\n');
472  break;
473  }
474  }
475 
476  OrderedBasicBlock OBB(Chain[0]->getParent());
477 
478  // Loop until we find an instruction in ChainInstrs that we can't vectorize.
479  unsigned ChainInstrIdx = 0;
480  Instruction *BarrierMemoryInstr = nullptr;
481 
482  for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
483  Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
484 
485  // If a barrier memory instruction was found, chain instructions that follow
486  // will not be added to the valid prefix.
487  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
488  break;
489 
490  // Check (in BB order) if any instruction prevents ChainInstr from being
491  // vectorized. Find and store the first such "conflicting" instruction.
492  for (Instruction *MemInstr : MemoryInstrs) {
493  // If a barrier memory instruction was found, do not check past it.
494  if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
495  break;
496 
497  if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
498  continue;
499 
500  // We can ignore the alias as long as the load comes before the store,
501  // because that means we won't be moving the load past the store to
502  // vectorize it (the vectorized load is inserted at the location of the
503  // first load in the chain).
504  if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
505  OBB.dominates(ChainInstr, MemInstr))
506  continue;
507 
508  // Same case, but in reverse.
509  if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
510  OBB.dominates(MemInstr, ChainInstr))
511  continue;
512 
513  if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
514  MemoryLocation::get(ChainInstr))) {
515  DEBUG({
516  dbgs() << "LSV: Found alias:\n"
517  " Aliasing instruction and pointer:\n"
518  << " " << *MemInstr << '\n'
519  << " " << *getPointerOperand(MemInstr) << '\n'
520  << " Aliased instruction and pointer:\n"
521  << " " << *ChainInstr << '\n'
522  << " " << *getPointerOperand(ChainInstr) << '\n';
523  });
524  // Save this aliasing memory instruction as a barrier, but allow other
525  // instructions that precede the barrier to be vectorized with this one.
526  BarrierMemoryInstr = MemInstr;
527  break;
528  }
529  }
530  // Continue the search only for store chains, since vectorizing stores that
531  // precede an aliasing load is valid. Conversely, vectorizing loads is valid
532  // up to an aliasing store, but should not pull loads from further down in
533  // the basic block.
534  if (IsLoadChain && BarrierMemoryInstr) {
535  // The BarrierMemoryInstr is a store that precedes ChainInstr.
536  assert(OBB.dominates(BarrierMemoryInstr, ChainInstr));
537  break;
538  }
539  }
540 
541  // Find the largest prefix of Chain whose elements are all in
542  // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
543  // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
544  // order.)
545  SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
546  ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
547  unsigned ChainIdx = 0;
548  for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
549  if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
550  break;
551  }
552  return Chain.slice(0, ChainIdx);
553 }
554 
555 std::pair<InstrListMap, InstrListMap>
556 Vectorizer::collectInstructions(BasicBlock *BB) {
557  InstrListMap LoadRefs;
558  InstrListMap StoreRefs;
559 
560  for (Instruction &I : *BB) {
561  if (!I.mayReadOrWriteMemory())
562  continue;
563 
564  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
565  if (!LI->isSimple())
566  continue;
567 
568  // Skip if it's not legal.
569  if (!TTI.isLegalToVectorizeLoad(LI))
570  continue;
571 
572  Type *Ty = LI->getType();
574  continue;
575 
576  // Skip weird non-byte sizes. They probably aren't worth the effort of
577  // handling correctly.
578  unsigned TySize = DL.getTypeSizeInBits(Ty);
579  if (TySize < 8)
580  continue;
581 
582  Value *Ptr = LI->getPointerOperand();
583  unsigned AS = Ptr->getType()->getPointerAddressSpace();
584  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
585 
586  // No point in looking at these if they're too big to vectorize.
587  if (TySize > VecRegSize / 2)
588  continue;
589 
590  // Make sure all the users of a vector are constant-index extracts.
591  if (isa<VectorType>(Ty) && !all_of(LI->users(), [LI](const User *U) {
593  return EEI && isa<ConstantInt>(EEI->getOperand(1));
594  }))
595  continue;
596 
597  // Save the load locations.
598  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
599  LoadRefs[ObjPtr].push_back(LI);
600 
601  } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
602  if (!SI->isSimple())
603  continue;
604 
605  // Skip if it's not legal.
606  if (!TTI.isLegalToVectorizeStore(SI))
607  continue;
608 
609  Type *Ty = SI->getValueOperand()->getType();
611  continue;
612 
613  // Skip weird non-byte sizes. They probably aren't worth the effort of
614  // handling correctly.
615  unsigned TySize = DL.getTypeSizeInBits(Ty);
616  if (TySize < 8)
617  continue;
618 
619  Value *Ptr = SI->getPointerOperand();
620  unsigned AS = Ptr->getType()->getPointerAddressSpace();
621  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
622  if (TySize > VecRegSize / 2)
623  continue;
624 
625  if (isa<VectorType>(Ty) && !all_of(SI->users(), [SI](const User *U) {
627  return EEI && isa<ConstantInt>(EEI->getOperand(1));
628  }))
629  continue;
630 
631  // Save store location.
632  Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
633  StoreRefs[ObjPtr].push_back(SI);
634  }
635  }
636 
637  return {LoadRefs, StoreRefs};
638 }
639 
640 bool Vectorizer::vectorizeChains(InstrListMap &Map) {
641  bool Changed = false;
642 
643  for (const std::pair<Value *, InstrList> &Chain : Map) {
644  unsigned Size = Chain.second.size();
645  if (Size < 2)
646  continue;
647 
648  DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n");
649 
650  // Process the stores in chunks of 64.
651  for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
652  unsigned Len = std::min<unsigned>(CE - CI, 64);
653  ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
654  Changed |= vectorizeInstructions(Chunk);
655  }
656  }
657 
658  return Changed;
659 }
660 
661 bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
662  DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n");
663  SmallVector<int, 16> Heads, Tails;
664  int ConsecutiveChain[64];
665 
666  // Do a quadratic search on all of the given stores and find all of the pairs
667  // of stores that follow each other.
668  for (int i = 0, e = Instrs.size(); i < e; ++i) {
669  ConsecutiveChain[i] = -1;
670  for (int j = e - 1; j >= 0; --j) {
671  if (i == j)
672  continue;
673 
674  if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
675  if (ConsecutiveChain[i] != -1) {
676  int CurDistance = std::abs(ConsecutiveChain[i] - i);
677  int NewDistance = std::abs(ConsecutiveChain[i] - j);
678  if (j < i || NewDistance > CurDistance)
679  continue; // Should not insert.
680  }
681 
682  Tails.push_back(j);
683  Heads.push_back(i);
684  ConsecutiveChain[i] = j;
685  }
686  }
687  }
688 
689  bool Changed = false;
690  SmallPtrSet<Instruction *, 16> InstructionsProcessed;
691 
692  for (int Head : Heads) {
693  if (InstructionsProcessed.count(Instrs[Head]))
694  continue;
695  bool LongerChainExists = false;
696  for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
697  if (Head == Tails[TIt] &&
698  !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
699  LongerChainExists = true;
700  break;
701  }
702  if (LongerChainExists)
703  continue;
704 
705  // We found an instr that starts a chain. Now follow the chain and try to
706  // vectorize it.
708  int I = Head;
709  while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
710  if (InstructionsProcessed.count(Instrs[I]))
711  break;
712 
713  Operands.push_back(Instrs[I]);
714  I = ConsecutiveChain[I];
715  }
716 
717  bool Vectorized = false;
718  if (isa<LoadInst>(*Operands.begin()))
719  Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
720  else
721  Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
722 
723  Changed |= Vectorized;
724  }
725 
726  return Changed;
727 }
728 
729 bool Vectorizer::vectorizeStoreChain(
731  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
732  StoreInst *S0 = cast<StoreInst>(Chain[0]);
733 
734  // If the vector has an int element, default to int for the whole load.
735  Type *StoreTy;
736  for (Instruction *I : Chain) {
737  StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
738  if (StoreTy->isIntOrIntVectorTy())
739  break;
740 
741  if (StoreTy->isPtrOrPtrVectorTy()) {
742  StoreTy = Type::getIntNTy(F.getParent()->getContext(),
743  DL.getTypeSizeInBits(StoreTy));
744  break;
745  }
746  }
747 
748  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
749  unsigned AS = S0->getPointerAddressSpace();
750  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
751  unsigned VF = VecRegSize / Sz;
752  unsigned ChainSize = Chain.size();
753  unsigned Alignment = getAlignment(S0);
754 
755  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
756  InstructionsProcessed->insert(Chain.begin(), Chain.end());
757  return false;
758  }
759 
760  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
761  if (NewChain.empty()) {
762  // No vectorization possible.
763  InstructionsProcessed->insert(Chain.begin(), Chain.end());
764  return false;
765  }
766  if (NewChain.size() == 1) {
767  // Failed after the first instruction. Discard it and try the smaller chain.
768  InstructionsProcessed->insert(NewChain.front());
769  return false;
770  }
771 
772  // Update Chain to the valid vectorizable subchain.
773  Chain = NewChain;
774  ChainSize = Chain.size();
775 
776  // Check if it's legal to vectorize this chain. If not, split the chain and
777  // try again.
778  unsigned EltSzInBytes = Sz / 8;
779  unsigned SzInBytes = EltSzInBytes * ChainSize;
780  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
781  auto Chains = splitOddVectorElts(Chain, Sz);
782  return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
783  vectorizeStoreChain(Chains.second, InstructionsProcessed);
784  }
785 
786  VectorType *VecTy;
787  VectorType *VecStoreTy = dyn_cast<VectorType>(StoreTy);
788  if (VecStoreTy)
789  VecTy = VectorType::get(StoreTy->getScalarType(),
790  Chain.size() * VecStoreTy->getNumElements());
791  else
792  VecTy = VectorType::get(StoreTy, Chain.size());
793 
794  // If it's more than the max vector size or the target has a better
795  // vector factor, break it into two pieces.
796  unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
797  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
798  DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
799  " Creating two separate arrays.\n");
800  return vectorizeStoreChain(Chain.slice(0, TargetVF),
801  InstructionsProcessed) |
802  vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
803  }
804 
805  DEBUG({
806  dbgs() << "LSV: Stores to vectorize:\n";
807  for (Instruction *I : Chain)
808  dbgs() << " " << *I << "\n";
809  });
810 
811  // We won't try again to vectorize the elements of the chain, regardless of
812  // whether we succeed below.
813  InstructionsProcessed->insert(Chain.begin(), Chain.end());
814 
815  // If the store is going to be misaligned, don't vectorize it.
816  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
817  if (S0->getPointerAddressSpace() != 0)
818  return false;
819 
820  unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
821  StackAdjustedAlignment,
822  DL, S0, nullptr, &DT);
823  if (NewAlign < StackAdjustedAlignment)
824  return false;
825  }
826 
827  BasicBlock::iterator First, Last;
828  std::tie(First, Last) = getBoundaryInstrs(Chain);
829  Builder.SetInsertPoint(&*Last);
830 
831  Value *Vec = UndefValue::get(VecTy);
832 
833  if (VecStoreTy) {
834  unsigned VecWidth = VecStoreTy->getNumElements();
835  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
836  StoreInst *Store = cast<StoreInst>(Chain[I]);
837  for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
838  unsigned NewIdx = J + I * VecWidth;
839  Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
840  Builder.getInt32(J));
841  if (Extract->getType() != StoreTy->getScalarType())
842  Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
843 
844  Value *Insert =
845  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
846  Vec = Insert;
847  }
848  }
849  } else {
850  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
851  StoreInst *Store = cast<StoreInst>(Chain[I]);
852  Value *Extract = Store->getValueOperand();
853  if (Extract->getType() != StoreTy->getScalarType())
854  Extract =
855  Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
856 
857  Value *Insert =
858  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
859  Vec = Insert;
860  }
861  }
862 
863  // This cast is safe because Builder.CreateStore() always creates a bona fide
864  // StoreInst.
865  StoreInst *SI = cast<StoreInst>(
866  Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
867  VecTy->getPointerTo(AS))));
868  propagateMetadata(SI, Chain);
869  SI->setAlignment(Alignment);
870 
871  eraseInstructions(Chain);
872  ++NumVectorInstructions;
873  NumScalarsVectorized += Chain.size();
874  return true;
875 }
876 
877 bool Vectorizer::vectorizeLoadChain(
879  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
880  LoadInst *L0 = cast<LoadInst>(Chain[0]);
881 
882  // If the vector has an int element, default to int for the whole load.
883  Type *LoadTy;
884  for (const auto &V : Chain) {
885  LoadTy = cast<LoadInst>(V)->getType();
886  if (LoadTy->isIntOrIntVectorTy())
887  break;
888 
889  if (LoadTy->isPtrOrPtrVectorTy()) {
890  LoadTy = Type::getIntNTy(F.getParent()->getContext(),
891  DL.getTypeSizeInBits(LoadTy));
892  break;
893  }
894  }
895 
896  unsigned Sz = DL.getTypeSizeInBits(LoadTy);
897  unsigned AS = L0->getPointerAddressSpace();
898  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
899  unsigned VF = VecRegSize / Sz;
900  unsigned ChainSize = Chain.size();
901  unsigned Alignment = getAlignment(L0);
902 
903  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
904  InstructionsProcessed->insert(Chain.begin(), Chain.end());
905  return false;
906  }
907 
908  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
909  if (NewChain.empty()) {
910  // No vectorization possible.
911  InstructionsProcessed->insert(Chain.begin(), Chain.end());
912  return false;
913  }
914  if (NewChain.size() == 1) {
915  // Failed after the first instruction. Discard it and try the smaller chain.
916  InstructionsProcessed->insert(NewChain.front());
917  return false;
918  }
919 
920  // Update Chain to the valid vectorizable subchain.
921  Chain = NewChain;
922  ChainSize = Chain.size();
923 
924  // Check if it's legal to vectorize this chain. If not, split the chain and
925  // try again.
926  unsigned EltSzInBytes = Sz / 8;
927  unsigned SzInBytes = EltSzInBytes * ChainSize;
928  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
929  auto Chains = splitOddVectorElts(Chain, Sz);
930  return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
931  vectorizeLoadChain(Chains.second, InstructionsProcessed);
932  }
933 
934  VectorType *VecTy;
935  VectorType *VecLoadTy = dyn_cast<VectorType>(LoadTy);
936  if (VecLoadTy)
937  VecTy = VectorType::get(LoadTy->getScalarType(),
938  Chain.size() * VecLoadTy->getNumElements());
939  else
940  VecTy = VectorType::get(LoadTy, Chain.size());
941 
942  // If it's more than the max vector size or the target has a better
943  // vector factor, break it into two pieces.
944  unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
945  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
946  DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
947  " Creating two separate arrays.\n");
948  return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
949  vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
950  }
951 
952  // We won't try again to vectorize the elements of the chain, regardless of
953  // whether we succeed below.
954  InstructionsProcessed->insert(Chain.begin(), Chain.end());
955 
956  // If the load is going to be misaligned, don't vectorize it.
957  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
958  if (L0->getPointerAddressSpace() != 0)
959  return false;
960 
961  unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
962  StackAdjustedAlignment,
963  DL, L0, nullptr, &DT);
964  if (NewAlign < StackAdjustedAlignment)
965  return false;
966 
967  Alignment = NewAlign;
968  }
969 
970  DEBUG({
971  dbgs() << "LSV: Loads to vectorize:\n";
972  for (Instruction *I : Chain)
973  I->dump();
974  });
975 
976  // getVectorizablePrefix already computed getBoundaryInstrs. The value of
977  // Last may have changed since then, but the value of First won't have. If it
978  // matters, we could compute getBoundaryInstrs only once and reuse it here.
979  BasicBlock::iterator First, Last;
980  std::tie(First, Last) = getBoundaryInstrs(Chain);
981  Builder.SetInsertPoint(&*First);
982 
983  Value *Bitcast =
984  Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
985  // This cast is safe because Builder.CreateLoad always creates a bona fide
986  // LoadInst.
987  LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
988  propagateMetadata(LI, Chain);
989  LI->setAlignment(Alignment);
990 
991  if (VecLoadTy) {
992  SmallVector<Instruction *, 16> InstrsToErase;
993 
994  unsigned VecWidth = VecLoadTy->getNumElements();
995  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
996  for (auto Use : Chain[I]->users()) {
997  // All users of vector loads are ExtractElement instructions with
998  // constant indices, otherwise we would have bailed before now.
999  Instruction *UI = cast<Instruction>(Use);
1000  unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
1001  unsigned NewIdx = Idx + I * VecWidth;
1002  Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
1003  UI->getName());
1004  if (V->getType() != UI->getType())
1005  V = Builder.CreateBitCast(V, UI->getType());
1006 
1007  // Replace the old instruction.
1008  UI->replaceAllUsesWith(V);
1009  InstrsToErase.push_back(UI);
1010  }
1011  }
1012 
1013  // Bitcast might not be an Instruction, if the value being loaded is a
1014  // constant. In that case, no need to reorder anything.
1015  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1016  reorder(BitcastInst);
1017 
1018  for (auto I : InstrsToErase)
1019  I->eraseFromParent();
1020  } else {
1021  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1022  Value *CV = Chain[I];
1023  Value *V =
1024  Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
1025  if (V->getType() != CV->getType()) {
1026  V = Builder.CreateBitOrPointerCast(V, CV->getType());
1027  }
1028 
1029  // Replace the old instruction.
1030  CV->replaceAllUsesWith(V);
1031  }
1032 
1033  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1034  reorder(BitcastInst);
1035  }
1036 
1037  eraseInstructions(Chain);
1038 
1039  ++NumVectorInstructions;
1040  NumScalarsVectorized += Chain.size();
1041  return true;
1042 }
1043 
1044 bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
1045  unsigned Alignment) {
1046  if (Alignment % SzInBytes == 0)
1047  return false;
1048 
1049  bool Fast = false;
1050  bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
1051  SzInBytes * 8, AddressSpace,
1052  Alignment, &Fast);
1053  DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
1054  << " and fast? " << Fast << "\n";);
1055  return !Allows || !Fast;
1056 }
Legacy wrapper pass to provide the GlobalsAAResult object.
MachineLoop * L
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:81
Value * getValueOperand()
Definition: Instructions.h:391
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1019
const SCEV * getConstant(ConstantInt *V)
STATISTIC(NumFunctions,"Total number of functions")
size_t i
bool isLegalToVectorizeLoad(LoadInst *LI) const
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
bool isLegalToVectorizeStore(StoreInst *SI) const
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:144
INITIALIZE_PASS_BEGIN(LoadStoreVectorizer, DEBUG_TYPE,"Vectorize load and Store instructions", false, false) INITIALIZE_PASS_END(LoadStoreVectorizer
unsigned getNumOperands() const
Definition: User.h:167
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias. ...
The main scalar evolution driver.
iterator end() const
Definition: ArrayRef.h:130
bool isSimple() const
Definition: Instructions.h:384
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:380
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, unsigned Alignment=1, bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
This class implements a map that also provides access to all stored values in a deterministic order...
Definition: MapVector.h:32
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:736
An instruction for reading from memory.
Definition: Instructions.h:164
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:905
Hexagon Common GEP
bool isSimple() const
Definition: Instructions.h:263
Type * getPointerElementType() const
Definition: Type.h:358
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array...
Definition: ArrayRef.h:171
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
static Value * getPointerOperand(Instruction &Inst)
A Use represents the edge between a Value definition and its users.
Definition: Use.h:56
Pass * createLoadStoreVectorizerPass()
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
Windows NT (Windows on ARM)
Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset)
Accumulate offsets from stripInBoundsConstantOffsets().
Definition: Value.cpp:502
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:60
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:399
static unsigned getAlignment(GlobalVariable *GV)
#define F(x, y, z)
Definition: MD5.cpp:51
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
void initializeLoadStoreVectorizerPass(PassRegistry &)
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:646
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
An instruction for storing to memory.
Definition: Instructions.h:300
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:401
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.cpp:44
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
bool mayReadOrWriteMemory() const
Return true if this instruction may read or write memory.
Definition: Instruction.h:416
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:348
Wrapper pass for TargetTransformInfo.
constexpr bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:399
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction...
Definition: Instruction.cpp:82
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
#define DEBUG_TYPE
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
Represent the analysis usage information of a pass.
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang","erlang-compatible garbage collector")
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
iterator begin() const
Definition: ArrayRef.h:129
uint64_t getNumElements() const
Definition: DerivedTypes.h:335
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getOperand(unsigned i) const
Definition: User.h:145
Value * getPointerOperand()
Definition: Instructions.h:270
iterator_range< po_iterator< T > > post_order(const T &G)
self_iterator getIterator()
Definition: ilist_node.h:81
Vectorize load and store false
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:136
void setAlignment(unsigned Align)
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value...
bool mayWriteToMemory() const
Return true if this instruction may modify memory.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Iterator for intrusive lists based on ilist_node.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:425
Legacy wrapper pass to provide the SCEVAAResult object.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
iterator end()
Definition: BasicBlock.h:230
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Module.h This file contains the declarations for the Module class.
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
AddressSpace
Definition: NVPTXBaseInfo.h:22
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:382
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:173
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:275
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Class to represent vector types.
Definition: DerivedTypes.h:369
Class for arbitrary precision integers.
Definition: APInt.h:77
iterator_range< user_iterator > users()
Definition: Value.h:370
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Get a canonical add expression, or something simpler if possible.
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:72
bool mayThrow() const
Return true if this instruction may throw an exception.
iv users
Definition: IVUsers.cpp:51
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.h:226
This class represents an analyzed expression in the program.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:227
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1099
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
This instruction extracts a single (scalar) element from a VectorType value.
LLVMContext & getContext() const
Definition: Metadata.h:889
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const
bool use_empty() const
Definition: Value.h:299
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
void setAlignment(unsigned Align)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:111
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:631
static const Function * getParent(const Value *V)
#define DEBUG(X)
Definition: Debug.h:100
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:47
inst_range instructions(Function *F)
Definition: InstIterator.h:132
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
This pass exposes codegen information to IR-level passes.
int * Ptr
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
Value * getPointerOperand()
Definition: Instructions.h:394
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
const BasicBlock * getParent() const
Definition: Instruction.h:62
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:222
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal].
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:783