LLVM  4.0.0
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs various transformations related to eliminating memcpy
11 // calls, or transforming sets of stores into memset's.
12 //
13 //===----------------------------------------------------------------------===//
14 
16 #include "llvm/Transforms/Scalar.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
21 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/GlobalVariable.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/Support/Debug.h"
28 #include <algorithm>
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "memcpyopt"
32 
33 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
34 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
35 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
36 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
37 
38 static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
39  bool &VariableIdxFound,
40  const DataLayout &DL) {
41  // Skip over the first indices.
43  for (unsigned i = 1; i != Idx; ++i, ++GTI)
44  /*skip along*/;
45 
46  // Compute the offset implied by the rest of the indices.
47  int64_t Offset = 0;
48  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
50  if (!OpC)
51  return VariableIdxFound = true;
52  if (OpC->isZero()) continue; // No offset.
53 
54  // Handle struct indices, which add their field offset to the pointer.
55  if (StructType *STy = GTI.getStructTypeOrNull()) {
56  Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
57  continue;
58  }
59 
60  // Otherwise, we have a sequential type like an array or vector. Multiply
61  // the index by the ElementSize.
62  uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
63  Offset += Size*OpC->getSExtValue();
64  }
65 
66  return Offset;
67 }
68 
69 /// Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and
70 /// return that constant offset. For example, Ptr1 might be &A[42], and Ptr2
71 /// might be &A[40]. In this case offset would be -8.
72 static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
73  const DataLayout &DL) {
74  Ptr1 = Ptr1->stripPointerCasts();
75  Ptr2 = Ptr2->stripPointerCasts();
76 
77  // Handle the trivial case first.
78  if (Ptr1 == Ptr2) {
79  Offset = 0;
80  return true;
81  }
82 
83  GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
84  GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
85 
86  bool VariableIdxFound = false;
87 
88  // If one pointer is a GEP and the other isn't, then see if the GEP is a
89  // constant offset from the base, as in "P" and "gep P, 1".
90  if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
91  Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
92  return !VariableIdxFound;
93  }
94 
95  if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
96  Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
97  return !VariableIdxFound;
98  }
99 
100  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
101  // base. After that base, they may have some number of common (and
102  // potentially variable) indices. After that they handle some constant
103  // offset, which determines their offset from each other. At this point, we
104  // handle no other case.
105  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
106  return false;
107 
108  // Skip any common indices and track the GEP types.
109  unsigned Idx = 1;
110  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
111  if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
112  break;
113 
114  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
115  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
116  if (VariableIdxFound) return false;
117 
118  Offset = Offset2-Offset1;
119  return true;
120 }
121 
122 
123 /// Represents a range of memset'd bytes with the ByteVal value.
124 /// This allows us to analyze stores like:
125 /// store 0 -> P+1
126 /// store 0 -> P+0
127 /// store 0 -> P+3
128 /// store 0 -> P+2
129 /// which sometimes happens with stores to arrays of structs etc. When we see
130 /// the first store, we make a range [1, 2). The second store extends the range
131 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
132 /// two ranges into [0, 3) which is memset'able.
133 namespace {
134 struct MemsetRange {
135  // Start/End - A semi range that describes the span that this range covers.
136  // The range is closed at the start and open at the end: [Start, End).
137  int64_t Start, End;
138 
139  /// StartPtr - The getelementptr instruction that points to the start of the
140  /// range.
141  Value *StartPtr;
142 
143  /// Alignment - The known alignment of the first store.
144  unsigned Alignment;
145 
146  /// TheStores - The actual stores that make up this range.
148 
149  bool isProfitableToUseMemset(const DataLayout &DL) const;
150 };
151 } // end anon namespace
152 
153 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
154  // If we found more than 4 stores to merge or 16 bytes, use memset.
155  if (TheStores.size() >= 4 || End-Start >= 16) return true;
156 
157  // If there is nothing to merge, don't do anything.
158  if (TheStores.size() < 2) return false;
159 
160  // If any of the stores are a memset, then it is always good to extend the
161  // memset.
162  for (Instruction *SI : TheStores)
163  if (!isa<StoreInst>(SI))
164  return true;
165 
166  // Assume that the code generator is capable of merging pairs of stores
167  // together if it wants to.
168  if (TheStores.size() == 2) return false;
169 
170  // If we have fewer than 8 stores, it can still be worthwhile to do this.
171  // For example, merging 4 i8 stores into an i32 store is useful almost always.
172  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
173  // memset will be split into 2 32-bit stores anyway) and doing so can
174  // pessimize the llvm optimizer.
175  //
176  // Since we don't have perfect knowledge here, make some assumptions: assume
177  // the maximum GPR width is the same size as the largest legal integer
178  // size. If so, check to see whether we will end up actually reducing the
179  // number of stores used.
180  unsigned Bytes = unsigned(End-Start);
181  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
182  if (MaxIntSize == 0)
183  MaxIntSize = 1;
184  unsigned NumPointerStores = Bytes / MaxIntSize;
185 
186  // Assume the remaining bytes if any are done a byte at a time.
187  unsigned NumByteStores = Bytes % MaxIntSize;
188 
189  // If we will reduce the # stores (according to this heuristic), do the
190  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
191  // etc.
192  return TheStores.size() > NumPointerStores+NumByteStores;
193 }
194 
195 
196 namespace {
197 class MemsetRanges {
198  /// A sorted list of the memset ranges.
200  typedef SmallVectorImpl<MemsetRange>::iterator range_iterator;
201  const DataLayout &DL;
202 public:
203  MemsetRanges(const DataLayout &DL) : DL(DL) {}
204 
205  typedef SmallVectorImpl<MemsetRange>::const_iterator const_iterator;
206  const_iterator begin() const { return Ranges.begin(); }
207  const_iterator end() const { return Ranges.end(); }
208  bool empty() const { return Ranges.empty(); }
209 
210  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
211  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
212  addStore(OffsetFromFirst, SI);
213  else
214  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
215  }
216 
217  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
218  int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
219 
220  addRange(OffsetFromFirst, StoreSize,
221  SI->getPointerOperand(), SI->getAlignment(), SI);
222  }
223 
224  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
225  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
226  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
227  }
228 
229  void addRange(int64_t Start, int64_t Size, Value *Ptr,
230  unsigned Alignment, Instruction *Inst);
231 
232 };
233 
234 } // end anon namespace
235 
236 
237 /// Add a new store to the MemsetRanges data structure. This adds a
238 /// new range for the specified store at the specified offset, merging into
239 /// existing ranges as appropriate.
240 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
241  unsigned Alignment, Instruction *Inst) {
242  int64_t End = Start+Size;
243 
244  range_iterator I = std::lower_bound(Ranges.begin(), Ranges.end(), Start,
245  [](const MemsetRange &LHS, int64_t RHS) { return LHS.End < RHS; });
246 
247  // We now know that I == E, in which case we didn't find anything to merge
248  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
249  // to insert a new range. Handle this now.
250  if (I == Ranges.end() || End < I->Start) {
251  MemsetRange &R = *Ranges.insert(I, MemsetRange());
252  R.Start = Start;
253  R.End = End;
254  R.StartPtr = Ptr;
255  R.Alignment = Alignment;
256  R.TheStores.push_back(Inst);
257  return;
258  }
259 
260  // This store overlaps with I, add it.
261  I->TheStores.push_back(Inst);
262 
263  // At this point, we may have an interval that completely contains our store.
264  // If so, just add it to the interval and return.
265  if (I->Start <= Start && I->End >= End)
266  return;
267 
268  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
269  // but is not entirely contained within the range.
270 
271  // See if the range extends the start of the range. In this case, it couldn't
272  // possibly cause it to join the prior range, because otherwise we would have
273  // stopped on *it*.
274  if (Start < I->Start) {
275  I->Start = Start;
276  I->StartPtr = Ptr;
277  I->Alignment = Alignment;
278  }
279 
280  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
281  // is in or right at the end of I), and that End >= I->Start. Extend I out to
282  // End.
283  if (End > I->End) {
284  I->End = End;
285  range_iterator NextI = I;
286  while (++NextI != Ranges.end() && End >= NextI->Start) {
287  // Merge the range in.
288  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
289  if (NextI->End > I->End)
290  I->End = NextI->End;
291  Ranges.erase(NextI);
292  NextI = I;
293  }
294  }
295 }
296 
297 //===----------------------------------------------------------------------===//
298 // MemCpyOptLegacyPass Pass
299 //===----------------------------------------------------------------------===//
300 
301 namespace {
302  class MemCpyOptLegacyPass : public FunctionPass {
303  MemCpyOptPass Impl;
304  public:
305  static char ID; // Pass identification, replacement for typeid
306  MemCpyOptLegacyPass() : FunctionPass(ID) {
308  }
309 
310  bool runOnFunction(Function &F) override;
311 
312  private:
313  // This transformation requires dominator postdominator info
314  void getAnalysisUsage(AnalysisUsage &AU) const override {
315  AU.setPreservesCFG();
323  }
324 
325  // Helper functions
326  bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
327  bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
328  bool processMemCpy(MemCpyInst *M);
329  bool processMemMove(MemMoveInst *M);
330  bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
331  uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
332  bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
333  bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
334  bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
335  bool processByValArgument(CallSite CS, unsigned ArgNo);
336  Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
337  Value *ByteVal);
338 
339  bool iterateOnFunction(Function &F);
340  };
341 
342  char MemCpyOptLegacyPass::ID = 0;
343 }
344 
345 /// The public interface to this file...
346 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
347 
348 INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
349  false, false)
356 INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
357  false, false)
358 
359 /// When scanning forward over instructions, we look for some other patterns to
360 /// fold away. In particular, this looks for stores to neighboring locations of
361 /// memory. If it sees enough consecutive ones, it attempts to merge them
362 /// together into a memcpy/memset.
363 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
364  Value *StartPtr,
365  Value *ByteVal) {
366  const DataLayout &DL = StartInst->getModule()->getDataLayout();
367 
368  // Okay, so we now have a single store that can be splatable. Scan to find
369  // all subsequent stores of the same value to offset from the same pointer.
370  // Join these together into ranges, so we can decide whether contiguous blocks
371  // are stored.
372  MemsetRanges Ranges(DL);
373 
374  BasicBlock::iterator BI(StartInst);
375  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
376  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
377  // If the instruction is readnone, ignore it, otherwise bail out. We
378  // don't even allow readonly here because we don't want something like:
379  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
380  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
381  break;
382  continue;
383  }
384 
385  if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
386  // If this is a store, see if we can merge it in.
387  if (!NextStore->isSimple()) break;
388 
389  // Check to see if this stored value is of the same byte-splattable value.
390  if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
391  break;
392 
393  // Check to see if this store is to a constant offset from the start ptr.
394  int64_t Offset;
395  if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
396  DL))
397  break;
398 
399  Ranges.addStore(Offset, NextStore);
400  } else {
401  MemSetInst *MSI = cast<MemSetInst>(BI);
402 
403  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
404  !isa<ConstantInt>(MSI->getLength()))
405  break;
406 
407  // Check to see if this store is to a constant offset from the start ptr.
408  int64_t Offset;
409  if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
410  break;
411 
412  Ranges.addMemSet(Offset, MSI);
413  }
414  }
415 
416  // If we have no ranges, then we just had a single store with nothing that
417  // could be merged in. This is a very common case of course.
418  if (Ranges.empty())
419  return nullptr;
420 
421  // If we had at least one store that could be merged in, add the starting
422  // store as well. We try to avoid this unless there is at least something
423  // interesting as a small compile-time optimization.
424  Ranges.addInst(0, StartInst);
425 
426  // If we create any memsets, we put it right before the first instruction that
427  // isn't part of the memset block. This ensure that the memset is dominated
428  // by any addressing instruction needed by the start of the block.
429  IRBuilder<> Builder(&*BI);
430 
431  // Now that we have full information about ranges, loop over the ranges and
432  // emit memset's for anything big enough to be worthwhile.
433  Instruction *AMemSet = nullptr;
434  for (const MemsetRange &Range : Ranges) {
435 
436  if (Range.TheStores.size() == 1) continue;
437 
438  // If it is profitable to lower this range to memset, do so now.
439  if (!Range.isProfitableToUseMemset(DL))
440  continue;
441 
442  // Otherwise, we do want to transform this! Create a new memset.
443  // Get the starting pointer of the block.
444  StartPtr = Range.StartPtr;
445 
446  // Determine alignment
447  unsigned Alignment = Range.Alignment;
448  if (Alignment == 0) {
449  Type *EltType =
450  cast<PointerType>(StartPtr->getType())->getElementType();
451  Alignment = DL.getABITypeAlignment(EltType);
452  }
453 
454  AMemSet =
455  Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
456 
457  DEBUG(dbgs() << "Replace stores:\n";
458  for (Instruction *SI : Range.TheStores)
459  dbgs() << *SI << '\n';
460  dbgs() << "With: " << *AMemSet << '\n');
461 
462  if (!Range.TheStores.empty())
463  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
464 
465  // Zap all the stores.
466  for (Instruction *SI : Range.TheStores) {
467  MD->removeInstruction(SI);
468  SI->eraseFromParent();
469  }
470  ++NumMemSetInfer;
471  }
472 
473  return AMemSet;
474 }
475 
476 static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
477  const LoadInst *LI) {
478  unsigned StoreAlign = SI->getAlignment();
479  if (!StoreAlign)
480  StoreAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
481  unsigned LoadAlign = LI->getAlignment();
482  if (!LoadAlign)
483  LoadAlign = DL.getABITypeAlignment(LI->getType());
484 
485  return std::min(StoreAlign, LoadAlign);
486 }
487 
488 // This method try to lift a store instruction before position P.
489 // It will lift the store and its argument + that anything that
490 // may alias with these.
491 // The method returns true if it was successful.
492 static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
493  const LoadInst *LI) {
494  // If the store alias this position, early bail out.
495  MemoryLocation StoreLoc = MemoryLocation::get(SI);
496  if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef)
497  return false;
498 
499  // Keep track of the arguments of all instruction we plan to lift
500  // so we can make sure to lift them as well if apropriate.
502  if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
503  if (Ptr->getParent() == SI->getParent())
504  Args.insert(Ptr);
505 
506  // Instruction to lift before P.
508 
509  // Memory locations of lifted instructions.
510  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
511 
512  // Lifted callsites.
514 
515  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
516 
517  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
518  auto *C = &*I;
519 
520  bool MayAlias = AA.getModRefInfo(C) != MRI_NoModRef;
521 
522  bool NeedLift = false;
523  if (Args.erase(C))
524  NeedLift = true;
525  else if (MayAlias) {
526  NeedLift = any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
527  return AA.getModRefInfo(C, ML);
528  });
529 
530  if (!NeedLift)
531  NeedLift = any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
532  return AA.getModRefInfo(C, CS);
533  });
534  }
535 
536  if (!NeedLift)
537  continue;
538 
539  if (MayAlias) {
540  // Since LI is implicitly moved downwards past the lifted instructions,
541  // none of them may modify its source.
542  if (AA.getModRefInfo(C, LoadLoc) & MRI_Mod)
543  return false;
544  else if (auto CS = ImmutableCallSite(C)) {
545  // If we can't lift this before P, it's game over.
546  if (AA.getModRefInfo(P, CS) != MRI_NoModRef)
547  return false;
548 
549  CallSites.push_back(CS);
550  } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
551  // If we can't lift this before P, it's game over.
552  auto ML = MemoryLocation::get(C);
553  if (AA.getModRefInfo(P, ML) != MRI_NoModRef)
554  return false;
555 
556  MemLocs.push_back(ML);
557  } else
558  // We don't know how to lift this instruction.
559  return false;
560  }
561 
562  ToLift.push_back(C);
563  for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
564  if (auto *A = dyn_cast<Instruction>(C->getOperand(k)))
565  if (A->getParent() == SI->getParent())
566  Args.insert(A);
567  }
568 
569  // We made it, we need to lift
570  for (auto *I : reverse(ToLift)) {
571  DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
572  I->moveBefore(P);
573  }
574 
575  return true;
576 }
577 
578 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
579  if (!SI->isSimple()) return false;
580 
581  // Avoid merging nontemporal stores since the resulting
582  // memcpy/memset would not be able to preserve the nontemporal hint.
583  // In theory we could teach how to propagate the !nontemporal metadata to
584  // memset calls. However, that change would force the backend to
585  // conservatively expand !nontemporal memset calls back to sequences of
586  // store instructions (effectively undoing the merging).
588  return false;
589 
590  const DataLayout &DL = SI->getModule()->getDataLayout();
591 
592  // Load to store forwarding can be interpreted as memcpy.
593  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
594  if (LI->isSimple() && LI->hasOneUse() &&
595  LI->getParent() == SI->getParent()) {
596 
597  auto *T = LI->getType();
598  if (T->isAggregateType()) {
599  AliasAnalysis &AA = LookupAliasAnalysis();
600  MemoryLocation LoadLoc = MemoryLocation::get(LI);
601 
602  // We use alias analysis to check if an instruction may store to
603  // the memory we load from in between the load and the store. If
604  // such an instruction is found, we try to promote there instead
605  // of at the store position.
606  Instruction *P = SI;
607  for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
608  if (AA.getModRefInfo(&I, LoadLoc) & MRI_Mod) {
609  P = &I;
610  break;
611  }
612  }
613 
614  // We found an instruction that may write to the loaded memory.
615  // We can try to promote at this position instead of the store
616  // position if nothing alias the store memory after this and the store
617  // destination is not in the range.
618  if (P && P != SI) {
619  if (!moveUp(AA, SI, P, LI))
620  P = nullptr;
621  }
622 
623  // If a valid insertion position is found, then we can promote
624  // the load/store pair to a memcpy.
625  if (P) {
626  // If we load from memory that may alias the memory we store to,
627  // memmove must be used to preserve semantic. If not, memcpy can
628  // be used.
629  bool UseMemMove = false;
630  if (!AA.isNoAlias(MemoryLocation::get(SI), LoadLoc))
631  UseMemMove = true;
632 
633  unsigned Align = findCommonAlignment(DL, SI, LI);
634  uint64_t Size = DL.getTypeStoreSize(T);
635 
636  IRBuilder<> Builder(P);
637  Instruction *M;
638  if (UseMemMove)
639  M = Builder.CreateMemMove(SI->getPointerOperand(),
640  LI->getPointerOperand(), Size,
641  Align, SI->isVolatile());
642  else
643  M = Builder.CreateMemCpy(SI->getPointerOperand(),
644  LI->getPointerOperand(), Size,
645  Align, SI->isVolatile());
646 
647  DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI
648  << " => " << *M << "\n");
649 
650  MD->removeInstruction(SI);
651  SI->eraseFromParent();
652  MD->removeInstruction(LI);
653  LI->eraseFromParent();
654  ++NumMemCpyInstr;
655 
656  // Make sure we do not invalidate the iterator.
657  BBI = M->getIterator();
658  return true;
659  }
660  }
661 
662  // Detect cases where we're performing call slot forwarding, but
663  // happen to be using a load-store pair to implement it, rather than
664  // a memcpy.
665  MemDepResult ldep = MD->getDependency(LI);
666  CallInst *C = nullptr;
667  if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
668  C = dyn_cast<CallInst>(ldep.getInst());
669 
670  if (C) {
671  // Check that nothing touches the dest of the "copy" between
672  // the call and the store.
673  Value *CpyDest = SI->getPointerOperand()->stripPointerCasts();
674  bool CpyDestIsLocal = isa<AllocaInst>(CpyDest);
675  AliasAnalysis &AA = LookupAliasAnalysis();
676  MemoryLocation StoreLoc = MemoryLocation::get(SI);
677  for (BasicBlock::iterator I = --SI->getIterator(), E = C->getIterator();
678  I != E; --I) {
679  if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
680  C = nullptr;
681  break;
682  }
683  // The store to dest may never happen if an exception can be thrown
684  // between the load and the store.
685  if (I->mayThrow() && !CpyDestIsLocal) {
686  C = nullptr;
687  break;
688  }
689  }
690  }
691 
692  if (C) {
693  bool changed = performCallSlotOptzn(
695  LI->getPointerOperand()->stripPointerCasts(),
696  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
697  findCommonAlignment(DL, SI, LI), C);
698  if (changed) {
699  MD->removeInstruction(SI);
700  SI->eraseFromParent();
701  MD->removeInstruction(LI);
702  LI->eraseFromParent();
703  ++NumMemCpyInstr;
704  return true;
705  }
706  }
707  }
708  }
709 
710  // There are two cases that are interesting for this code to handle: memcpy
711  // and memset. Right now we only handle memset.
712 
713  // Ensure that the value being stored is something that can be memset'able a
714  // byte at a time like "0" or "-1" or any width, as well as things like
715  // 0xA0A0A0A0 and 0.0.
716  auto *V = SI->getOperand(0);
717  if (Value *ByteVal = isBytewiseValue(V)) {
718  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
719  ByteVal)) {
720  BBI = I->getIterator(); // Don't invalidate iterator.
721  return true;
722  }
723 
724  // If we have an aggregate, we try to promote it to memset regardless
725  // of opportunity for merging as it can expose optimization opportunities
726  // in subsequent passes.
727  auto *T = V->getType();
728  if (T->isAggregateType()) {
729  uint64_t Size = DL.getTypeStoreSize(T);
730  unsigned Align = SI->getAlignment();
731  if (!Align)
732  Align = DL.getABITypeAlignment(T);
733  IRBuilder<> Builder(SI);
734  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
735  Size, Align, SI->isVolatile());
736 
737  DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
738 
739  MD->removeInstruction(SI);
740  SI->eraseFromParent();
741  NumMemSetInfer++;
742 
743  // Make sure we do not invalidate the iterator.
744  BBI = M->getIterator();
745  return true;
746  }
747  }
748 
749  return false;
750 }
751 
752 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
753  // See if there is another memset or store neighboring this memset which
754  // allows us to widen out the memset to do a single larger store.
755  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
756  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
757  MSI->getValue())) {
758  BBI = I->getIterator(); // Don't invalidate iterator.
759  return true;
760  }
761  return false;
762 }
763 
764 
765 /// Takes a memcpy and a call that it depends on,
766 /// and checks for the possibility of a call slot optimization by having
767 /// the call write its result directly into the destination of the memcpy.
768 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpy, Value *cpyDest,
769  Value *cpySrc, uint64_t cpyLen,
770  unsigned cpyAlign, CallInst *C) {
771  // The general transformation to keep in mind is
772  //
773  // call @func(..., src, ...)
774  // memcpy(dest, src, ...)
775  //
776  // ->
777  //
778  // memcpy(dest, src, ...)
779  // call @func(..., dest, ...)
780  //
781  // Since moving the memcpy is technically awkward, we additionally check that
782  // src only holds uninitialized values at the moment of the call, meaning that
783  // the memcpy can be discarded rather than moved.
784 
785  // Lifetime marks shouldn't be operated on.
786  if (Function *F = C->getCalledFunction())
787  if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
788  return false;
789 
790  // Deliberately get the source and destination with bitcasts stripped away,
791  // because we'll need to do type comparisons based on the underlying type.
792  CallSite CS(C);
793 
794  // Require that src be an alloca. This simplifies the reasoning considerably.
795  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
796  if (!srcAlloca)
797  return false;
798 
799  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
800  if (!srcArraySize)
801  return false;
802 
803  const DataLayout &DL = cpy->getModule()->getDataLayout();
804  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
805  srcArraySize->getZExtValue();
806 
807  if (cpyLen < srcSize)
808  return false;
809 
810  // Check that accessing the first srcSize bytes of dest will not cause a
811  // trap. Otherwise the transform is invalid since it might cause a trap
812  // to occur earlier than it otherwise would.
813  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
814  // The destination is an alloca. Check it is larger than srcSize.
815  ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
816  if (!destArraySize)
817  return false;
818 
819  uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
820  destArraySize->getZExtValue();
821 
822  if (destSize < srcSize)
823  return false;
824  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
825  // The store to dest may never happen if the call can throw.
826  if (C->mayThrow())
827  return false;
828 
829  if (A->getDereferenceableBytes() < srcSize) {
830  // If the destination is an sret parameter then only accesses that are
831  // outside of the returned struct type can trap.
832  if (!A->hasStructRetAttr())
833  return false;
834 
835  Type *StructTy = cast<PointerType>(A->getType())->getElementType();
836  if (!StructTy->isSized()) {
837  // The call may never return and hence the copy-instruction may never
838  // be executed, and therefore it's not safe to say "the destination
839  // has at least <cpyLen> bytes, as implied by the copy-instruction",
840  return false;
841  }
842 
843  uint64_t destSize = DL.getTypeAllocSize(StructTy);
844  if (destSize < srcSize)
845  return false;
846  }
847  } else {
848  return false;
849  }
850 
851  // Check that dest points to memory that is at least as aligned as src.
852  unsigned srcAlign = srcAlloca->getAlignment();
853  if (!srcAlign)
854  srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
855  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
856  // If dest is not aligned enough and we can't increase its alignment then
857  // bail out.
858  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
859  return false;
860 
861  // Check that src is not accessed except via the call and the memcpy. This
862  // guarantees that it holds only undefined values when passed in (so the final
863  // memcpy can be dropped), that it is not read or written between the call and
864  // the memcpy, and that writing beyond the end of it is undefined.
865  SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
866  srcAlloca->user_end());
867  while (!srcUseList.empty()) {
868  User *U = srcUseList.pop_back_val();
869 
870  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
871  for (User *UU : U->users())
872  srcUseList.push_back(UU);
873  continue;
874  }
875  if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
876  if (!G->hasAllZeroIndices())
877  return false;
878 
879  for (User *UU : U->users())
880  srcUseList.push_back(UU);
881  continue;
882  }
883  if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
884  if (IT->getIntrinsicID() == Intrinsic::lifetime_start ||
885  IT->getIntrinsicID() == Intrinsic::lifetime_end)
886  continue;
887 
888  if (U != C && U != cpy)
889  return false;
890  }
891 
892  // Check that src isn't captured by the called function since the
893  // transformation can cause aliasing issues in that case.
894  for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
895  if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i))
896  return false;
897 
898  // Since we're changing the parameter to the callsite, we need to make sure
899  // that what would be the new parameter dominates the callsite.
900  DominatorTree &DT = LookupDomTree();
901  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
902  if (!DT.dominates(cpyDestInst, C))
903  return false;
904 
905  // In addition to knowing that the call does not access src in some
906  // unexpected manner, for example via a global, which we deduce from
907  // the use analysis, we also need to know that it does not sneakily
908  // access dest. We rely on AA to figure this out for us.
909  AliasAnalysis &AA = LookupAliasAnalysis();
910  ModRefInfo MR = AA.getModRefInfo(C, cpyDest, srcSize);
911  // If necessary, perform additional analysis.
912  if (MR != MRI_NoModRef)
913  MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
914  if (MR != MRI_NoModRef)
915  return false;
916 
917  // All the checks have passed, so do the transformation.
918  bool changedArgument = false;
919  for (unsigned i = 0; i < CS.arg_size(); ++i)
920  if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
921  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
922  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
923  cpyDest->getName(), C);
924  changedArgument = true;
925  if (CS.getArgument(i)->getType() == Dest->getType())
926  CS.setArgument(i, Dest);
927  else
928  CS.setArgument(i, CastInst::CreatePointerCast(Dest,
929  CS.getArgument(i)->getType(), Dest->getName(), C));
930  }
931 
932  if (!changedArgument)
933  return false;
934 
935  // If the destination wasn't sufficiently aligned then increase its alignment.
936  if (!isDestSufficientlyAligned) {
937  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
938  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
939  }
940 
941  // Drop any cached information about the call, because we may have changed
942  // its dependence information by changing its parameter.
943  MD->removeInstruction(C);
944 
945  // Update AA metadata
946  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
947  // handled here, but combineMetadata doesn't support them yet
948  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
951  combineMetadata(C, cpy, KnownIDs);
952 
953  // Remove the memcpy.
954  MD->removeInstruction(cpy);
955  ++NumMemCpyInstr;
956 
957  return true;
958 }
959 
960 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
961 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
962 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
963  MemCpyInst *MDep) {
964  // We can only transforms memcpy's where the dest of one is the source of the
965  // other.
966  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
967  return false;
968 
969  // If dep instruction is reading from our current input, then it is a noop
970  // transfer and substituting the input won't change this instruction. Just
971  // ignore the input and let someone else zap MDep. This handles cases like:
972  // memcpy(a <- a)
973  // memcpy(b <- a)
974  if (M->getSource() == MDep->getSource())
975  return false;
976 
977  // Second, the length of the memcpy's must be the same, or the preceding one
978  // must be larger than the following one.
979  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
980  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
981  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
982  return false;
983 
984  AliasAnalysis &AA = LookupAliasAnalysis();
985 
986  // Verify that the copied-from memory doesn't change in between the two
987  // transfers. For example, in:
988  // memcpy(a <- b)
989  // *b = 42;
990  // memcpy(c <- a)
991  // It would be invalid to transform the second memcpy into memcpy(c <- b).
992  //
993  // TODO: If the code between M and MDep is transparent to the destination "c",
994  // then we could still perform the xform by moving M up to the first memcpy.
995  //
996  // NOTE: This is conservative, it will stop on any read from the source loc,
997  // not just the defining memcpy.
998  MemDepResult SourceDep =
1000  M->getIterator(), M->getParent());
1001  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1002  return false;
1003 
1004  // If the dest of the second might alias the source of the first, then the
1005  // source and dest might overlap. We still want to eliminate the intermediate
1006  // value, but we have to generate a memmove instead of memcpy.
1007  bool UseMemMove = false;
1010  UseMemMove = true;
1011 
1012  // If all checks passed, then we can transform M.
1013 
1014  // Make sure to use the lesser of the alignment of the source and the dest
1015  // since we're changing where we're reading from, but don't want to increase
1016  // the alignment past what can be read from or written to.
1017  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1018  // example we could be moving from movaps -> movq on x86.
1019  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
1020 
1021  IRBuilder<> Builder(M);
1022  if (UseMemMove)
1023  Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
1024  Align, M->isVolatile());
1025  else
1026  Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
1027  Align, M->isVolatile());
1028 
1029  // Remove the instruction we're replacing.
1030  MD->removeInstruction(M);
1031  M->eraseFromParent();
1032  ++NumMemCpyInstr;
1033  return true;
1034 }
1035 
1036 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
1037 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1038 /// weren't copied over by \p MemCpy.
1039 ///
1040 /// In other words, transform:
1041 /// \code
1042 /// memset(dst, c, dst_size);
1043 /// memcpy(dst, src, src_size);
1044 /// \endcode
1045 /// into:
1046 /// \code
1047 /// memcpy(dst, src, src_size);
1048 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1049 /// \endcode
1050 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1051  MemSetInst *MemSet) {
1052  // We can only transform memset/memcpy with the same destination.
1053  if (MemSet->getDest() != MemCpy->getDest())
1054  return false;
1055 
1056  // Check that there are no other dependencies on the memset destination.
1057  MemDepResult DstDepInfo =
1059  MemCpy->getIterator(), MemCpy->getParent());
1060  if (DstDepInfo.getInst() != MemSet)
1061  return false;
1062 
1063  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1064  Value *Dest = MemCpy->getRawDest();
1065  Value *DestSize = MemSet->getLength();
1066  Value *SrcSize = MemCpy->getLength();
1067 
1068  // By default, create an unaligned memset.
1069  unsigned Align = 1;
1070  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1071  // of the sum.
1072  const unsigned DestAlign =
1073  std::max(MemSet->getAlignment(), MemCpy->getAlignment());
1074  if (DestAlign > 1)
1075  if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1076  Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1077 
1078  IRBuilder<> Builder(MemCpy);
1079 
1080  // If the sizes have different types, zext the smaller one.
1081  if (DestSize->getType() != SrcSize->getType()) {
1082  if (DestSize->getType()->getIntegerBitWidth() >
1083  SrcSize->getType()->getIntegerBitWidth())
1084  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1085  else
1086  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1087  }
1088 
1089  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1090  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1091  Value *MemsetLen = Builder.CreateSelect(
1092  Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1093  Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
1094  MemsetLen, Align);
1095 
1096  MD->removeInstruction(MemSet);
1097  MemSet->eraseFromParent();
1098  return true;
1099 }
1100 
1101 /// Transform memcpy to memset when its source was just memset.
1102 /// In other words, turn:
1103 /// \code
1104 /// memset(dst1, c, dst1_size);
1105 /// memcpy(dst2, dst1, dst2_size);
1106 /// \endcode
1107 /// into:
1108 /// \code
1109 /// memset(dst1, c, dst1_size);
1110 /// memset(dst2, c, dst2_size);
1111 /// \endcode
1112 /// When dst2_size <= dst1_size.
1113 ///
1114 /// The \p MemCpy must have a Constant length.
1115 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1116  MemSetInst *MemSet) {
1117  AliasAnalysis &AA = LookupAliasAnalysis();
1118 
1119  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1120  // memcpying from the same address. Otherwise it is hard to reason about.
1121  if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1122  return false;
1123 
1124  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
1125  ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
1126  // Make sure the memcpy doesn't read any more than what the memset wrote.
1127  // Don't worry about sizes larger than i64.
1128  if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
1129  return false;
1130 
1131  IRBuilder<> Builder(MemCpy);
1132  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1133  CopySize, MemCpy->getAlignment());
1134  return true;
1135 }
1136 
1137 /// Perform simplification of memcpy's. If we have memcpy A
1138 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1139 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
1140 /// circumstances). This allows later passes to remove the first memcpy
1141 /// altogether.
1142 bool MemCpyOptPass::processMemCpy(MemCpyInst *M) {
1143  // We can only optimize non-volatile memcpy's.
1144  if (M->isVolatile()) return false;
1145 
1146  // If the source and destination of the memcpy are the same, then zap it.
1147  if (M->getSource() == M->getDest()) {
1148  MD->removeInstruction(M);
1149  M->eraseFromParent();
1150  return false;
1151  }
1152 
1153  // If copying from a constant, try to turn the memcpy into a memset.
1154  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
1155  if (GV->isConstant() && GV->hasDefinitiveInitializer())
1156  if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
1157  IRBuilder<> Builder(M);
1158  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1159  M->getAlignment(), false);
1160  MD->removeInstruction(M);
1161  M->eraseFromParent();
1162  ++NumCpyToSet;
1163  return true;
1164  }
1165 
1166  MemDepResult DepInfo = MD->getDependency(M);
1167 
1168  // Try to turn a partially redundant memset + memcpy into
1169  // memcpy + smaller memset. We don't need the memcpy size for this.
1170  if (DepInfo.isClobber())
1171  if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
1172  if (processMemSetMemCpyDependence(M, MDep))
1173  return true;
1174 
1175  // The optimizations after this point require the memcpy size.
1176  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
1177  if (!CopySize) return false;
1178 
1179  // There are four possible optimizations we can do for memcpy:
1180  // a) memcpy-memcpy xform which exposes redundance for DSE.
1181  // b) call-memcpy xform for return slot optimization.
1182  // c) memcpy from freshly alloca'd space or space that has just started its
1183  // lifetime copies undefined data, and we can therefore eliminate the
1184  // memcpy in favor of the data that was already at the destination.
1185  // d) memcpy from a just-memset'd source can be turned into memset.
1186  if (DepInfo.isClobber()) {
1187  if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
1188  if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
1189  CopySize->getZExtValue(), M->getAlignment(),
1190  C)) {
1191  MD->removeInstruction(M);
1192  M->eraseFromParent();
1193  return true;
1194  }
1195  }
1196  }
1197 
1199  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(
1200  SrcLoc, true, M->getIterator(), M->getParent());
1201 
1202  if (SrcDepInfo.isClobber()) {
1203  if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
1204  return processMemCpyMemCpyDependence(M, MDep);
1205  } else if (SrcDepInfo.isDef()) {
1206  Instruction *I = SrcDepInfo.getInst();
1207  bool hasUndefContents = false;
1208 
1209  if (isa<AllocaInst>(I)) {
1210  hasUndefContents = true;
1211  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1212  if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1213  if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1214  if (LTSize->getZExtValue() >= CopySize->getZExtValue())
1215  hasUndefContents = true;
1216  }
1217 
1218  if (hasUndefContents) {
1219  MD->removeInstruction(M);
1220  M->eraseFromParent();
1221  ++NumMemCpyInstr;
1222  return true;
1223  }
1224  }
1225 
1226  if (SrcDepInfo.isClobber())
1227  if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
1228  if (performMemCpyToMemSetOptzn(M, MDep)) {
1229  MD->removeInstruction(M);
1230  M->eraseFromParent();
1231  ++NumCpyToSet;
1232  return true;
1233  }
1234 
1235  return false;
1236 }
1237 
1238 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1239 /// not to alias.
1240 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1241  AliasAnalysis &AA = LookupAliasAnalysis();
1242 
1243  if (!TLI->has(LibFunc::memmove))
1244  return false;
1245 
1246  // See if the pointers alias.
1249  return false;
1250 
1251  DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1252  << "\n");
1253 
1254  // If not, then we know we can transform this.
1255  Type *ArgTys[3] = { M->getRawDest()->getType(),
1256  M->getRawSource()->getType(),
1257  M->getLength()->getType() };
1259  Intrinsic::memcpy, ArgTys));
1260 
1261  // MemDep may have over conservative information about this instruction, just
1262  // conservatively flush it from the cache.
1263  MD->removeInstruction(M);
1264 
1265  ++NumMoveToCpy;
1266  return true;
1267 }
1268 
1269 /// This is called on every byval argument in call sites.
1270 bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
1271  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
1272  // Find out what feeds this byval argument.
1273  Value *ByValArg = CS.getArgument(ArgNo);
1274  Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
1275  uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
1276  MemDepResult DepInfo = MD->getPointerDependencyFrom(
1277  MemoryLocation(ByValArg, ByValSize), true,
1279  if (!DepInfo.isClobber())
1280  return false;
1281 
1282  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1283  // a memcpy, see if we can byval from the source of the memcpy instead of the
1284  // result.
1285  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
1286  if (!MDep || MDep->isVolatile() ||
1287  ByValArg->stripPointerCasts() != MDep->getDest())
1288  return false;
1289 
1290  // The length of the memcpy must be larger or equal to the size of the byval.
1291  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1292  if (!C1 || C1->getValue().getZExtValue() < ByValSize)
1293  return false;
1294 
1295  // Get the alignment of the byval. If the call doesn't specify the alignment,
1296  // then it is some target specific value that we can't know.
1297  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
1298  if (ByValAlign == 0) return false;
1299 
1300  // If it is greater than the memcpy, then we check to see if we can force the
1301  // source of the memcpy to the alignment we need. If we fail, we bail out.
1302  AssumptionCache &AC = LookupAssumptionCache();
1303  DominatorTree &DT = LookupDomTree();
1304  if (MDep->getAlignment() < ByValAlign &&
1305  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
1306  CS.getInstruction(), &AC, &DT) < ByValAlign)
1307  return false;
1308 
1309  // Verify that the copied-from memory doesn't change in between the memcpy and
1310  // the byval call.
1311  // memcpy(a <- b)
1312  // *b = 42;
1313  // foo(*a)
1314  // It would be invalid to transform the second memcpy into foo(*b).
1315  //
1316  // NOTE: This is conservative, it will stop on any read from the source loc,
1317  // not just the defining memcpy.
1318  MemDepResult SourceDep = MD->getPointerDependencyFrom(
1319  MemoryLocation::getForSource(MDep), false,
1320  CS.getInstruction()->getIterator(), MDep->getParent());
1321  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1322  return false;
1323 
1324  Value *TmpCast = MDep->getSource();
1325  if (MDep->getSource()->getType() != ByValArg->getType())
1326  TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1327  "tmpcast", CS.getInstruction());
1328 
1329  DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1330  << " " << *MDep << "\n"
1331  << " " << *CS.getInstruction() << "\n");
1332 
1333  // Otherwise we're good! Update the byval argument.
1334  CS.setArgument(ArgNo, TmpCast);
1335  ++NumMemCpyInstr;
1336  return true;
1337 }
1338 
1339 /// Executes one iteration of MemCpyOptPass.
1340 bool MemCpyOptPass::iterateOnFunction(Function &F) {
1341  bool MadeChange = false;
1342 
1343  // Walk all instruction in the function.
1344  for (BasicBlock &BB : F) {
1345  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1346  // Avoid invalidating the iterator.
1347  Instruction *I = &*BI++;
1348 
1349  bool RepeatInstruction = false;
1350 
1351  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1352  MadeChange |= processStore(SI, BI);
1353  else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
1354  RepeatInstruction = processMemSet(M, BI);
1355  else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
1356  RepeatInstruction = processMemCpy(M);
1357  else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1358  RepeatInstruction = processMemMove(M);
1359  else if (auto CS = CallSite(I)) {
1360  for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
1361  if (CS.isByValArgument(i))
1362  MadeChange |= processByValArgument(CS, i);
1363  }
1364 
1365  // Reprocess the instruction if desired.
1366  if (RepeatInstruction) {
1367  if (BI != BB.begin())
1368  --BI;
1369  MadeChange = true;
1370  }
1371  }
1372  }
1373 
1374  return MadeChange;
1375 }
1376 
1378 
1379  auto &MD = AM.getResult<MemoryDependenceAnalysis>(F);
1380  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1381 
1382  auto LookupAliasAnalysis = [&]() -> AliasAnalysis & {
1383  return AM.getResult<AAManager>(F);
1384  };
1385  auto LookupAssumptionCache = [&]() -> AssumptionCache & {
1386  return AM.getResult<AssumptionAnalysis>(F);
1387  };
1388  auto LookupDomTree = [&]() -> DominatorTree & {
1389  return AM.getResult<DominatorTreeAnalysis>(F);
1390  };
1391 
1392  bool MadeChange = runImpl(F, &MD, &TLI, LookupAliasAnalysis,
1393  LookupAssumptionCache, LookupDomTree);
1394  if (!MadeChange)
1395  return PreservedAnalyses::all();
1396  PreservedAnalyses PA;
1397  PA.preserve<GlobalsAA>();
1399  return PA;
1400 }
1401 
1404  std::function<AliasAnalysis &()> LookupAliasAnalysis_,
1405  std::function<AssumptionCache &()> LookupAssumptionCache_,
1406  std::function<DominatorTree &()> LookupDomTree_) {
1407  bool MadeChange = false;
1408  MD = MD_;
1409  TLI = TLI_;
1410  LookupAliasAnalysis = std::move(LookupAliasAnalysis_);
1411  LookupAssumptionCache = std::move(LookupAssumptionCache_);
1412  LookupDomTree = std::move(LookupDomTree_);
1413 
1414  // If we don't have at least memset and memcpy, there is little point of doing
1415  // anything here. These are required by a freestanding implementation, so if
1416  // even they are disabled, there is no point in trying hard.
1417  if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
1418  return false;
1419 
1420  while (1) {
1421  if (!iterateOnFunction(F))
1422  break;
1423  MadeChange = true;
1424  }
1425 
1426  MD = nullptr;
1427  return MadeChange;
1428 }
1429 
1430 /// This is the main transformation entry point for a function.
1431 bool MemCpyOptLegacyPass::runOnFunction(Function &F) {
1432  if (skipFunction(F))
1433  return false;
1434 
1435  auto *MD = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
1436  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1437 
1438  auto LookupAliasAnalysis = [this]() -> AliasAnalysis & {
1439  return getAnalysis<AAResultsWrapperPass>().getAAResults();
1440  };
1441  auto LookupAssumptionCache = [this, &F]() -> AssumptionCache & {
1442  return getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1443  };
1444  auto LookupDomTree = [this]() -> DominatorTree & {
1445  return getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1446  };
1447 
1448  return Impl.runImpl(F, MD, TLI, LookupAliasAnalysis, LookupAssumptionCache,
1449  LookupDomTree);
1450 }
unsigned getAlignment() const
Legacy wrapper pass to provide the GlobalsAAResult object.
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT,"arm-default-it","Generate IT block based on arch"), clEnumValN(RestrictedIT,"arm-restrict-it","Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT,"arm-no-restrict-it","Allow IT blocks based on ARMv7")))
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:76
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
FunTy * getCaller() const
getCaller - Return the caller function for this call site
Definition: CallSite.h:262
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Provides a lazy, caching interface for making common memory aliasing information queries, backed by LLVM's alias analysis passes.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1019
LLVM Argument representation.
Definition: Argument.h:34
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
bool isDef() const
Tests if this MemDepResult represents a query that is an instruction definition dependency.
STATISTIC(NumFunctions,"Total number of functions")
size_t i
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:336
Value * isBytewiseValue(Value *V)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
bool isVolatile() const
Value * getValue() const
Return the arguments to the instruction.
Implements a dense probed hash-table based set.
Definition: DenseSet.h:202
unsigned getNumOperands() const
Definition: User.h:167
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are no-alias. ...
bool isSimple() const
Definition: Instructions.h:384
This class represents a function call, abstracting a target machine's calling convention.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of .assume calls within a function.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, const DataLayout &DL)
Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and return that constant offset...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
This class wraps the llvm.memset intrinsic.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:189
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:404
An instruction for reading from memory.
Definition: Instructions.h:164
Hexagon Common GEP
The access modifies the value stored in memory.
bool isClobber() const
Tests if this MemDepResult represents a query that is an instruction clobber dependency.
The two locations may or may not alias. This is the least precise result.
Definition: AliasAnalysis.h:81
unsigned arg_size() const
Definition: CallSite.h:211
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:195
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:191
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:53
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:94
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass,"memcpyopt","MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, bool &VariableIdxFound, const DataLayout &DL)
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:566
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:143
This class wraps the llvm.memmove intrinsic.
bool has(LibFunc::Func F) const
Tests whether a library function is available.
Class to represent struct types.
Definition: DerivedTypes.h:199
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:588
An analysis that produces MemoryDependenceResults for a function.
void setArgument(unsigned ArgNo, Value *newVal)
Definition: CallSite.h:183
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:154
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:928
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:241
#define F(x, y, z)
Definition: MD5.cpp:51
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
This class represents a no-op cast from one type to another.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:949
An instruction for storing to memory.
Definition: Instructions.h:300
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:96
Maximum length of the test input libFuzzer tries to guess a good value based on the corpus and reports it always prefer smaller inputs during the corpus shuffle When libFuzzer itself reports a bug this exit code will be used If indicates the maximal total time in seconds to run the fuzzer minimizes the provided crash input Use with etc Experimental Use value profile to guide fuzzing Number of simultaneous worker processes to run the jobs If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
MemCpy false
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:517
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:254
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:589
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:830
The access neither references nor modifies the value stored in memory.
Definition: AliasAnalysis.h:96
#define P(N)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:241
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:348
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:107
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:256
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs...ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:653
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
A manager for alias analyses.
unsigned getAlignment() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:109
Value * getRawDest() const
Represent the analysis usage information of a pass.
uint16_t getParamAlignment(uint16_t i) const
Extract the alignment for a call or parameter (0=unknown).
Definition: CallSite.h:383
bool any_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:743
uint32_t Offset
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:555
Analysis pass providing a never-invalidated alias analysis result.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,"Assign register bank of generic virtual registers", false, false) RegBankSelect
static const unsigned End
FunctionPass * createMemCpyOptPass()
The public interface to this file...
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
Value * getOperand(unsigned i) const
Definition: User.h:145
self_iterator getIterator()
Definition: ilist_node.h:81
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:113
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
A wrapper analysis pass for the legacy pass manager that exposes a MemoryDepnedenceResults instance...
A memory dependence query can return one of three different answers.
bool runImpl(Function &F, MemoryDependenceResults *MD_, TargetLibraryInfo *TLI_, std::function< AliasAnalysis &()> LookupAliasAnalysis_, std::function< AssumptionCache &()> LookupAssumptionCache_, std::function< DominatorTree &()> LookupDomTree_)
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:218
Representation for a specific memory location.
A function analysis which provides an AssumptionCache.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:689
MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst=nullptr, unsigned *Limit=nullptr)
Returns the instruction on which a memory location depends.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Definition: Constants.h:88
InstrTy * getInstruction() const
Definition: CallSite.h:93
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:408
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions that feed it, giving the original input.
friend const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:233
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:178
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:58
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:230
Provides information about what library functions are available for the current target.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:175
const DataFlowGraph & G
Definition: RDFGraph.cpp:206
Value * getLength() const
Value * stripPointerCasts()
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:490
This class wraps the llvm.memcpy intrinsic.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:198
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:276
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT, OrderedBasicBlock *OBB=nullptr)
Return information about whether a particular call site modifies or reads the specified memory locati...
ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
iterator_range< user_iterator > users()
Definition: Value.h:370
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
MemCpy Optimization
bool mayThrow() const
Return true if this instruction may throw an exception.
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it...
void setCalledFunction(Value *Fn)
Set the function called.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:384
Instruction * getInst() const
If this is a normal dependency, returns the instruction that is depended on.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:227
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:665
#define I(x, y, z)
Definition: MD5.cpp:54
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:120
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:391
Analysis pass providing the TargetLibraryInfo.
Value * getRawSource() const
Return the arguments to the instruction.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
user_iterator user_begin()
Definition: Value.h:346
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:537
LLVM Value Representation.
Definition: Value.h:71
void removeInstruction(Instruction *InstToRemove)
Removes an instruction from the dependence analysis, updating the dependence of instructions that pre...
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:93
#define DEBUG(X)
Definition: Debug.h:100
print Print MemDeps of function
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:731
A container for analyses that lazily runs them and caches their results.
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:217
static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P, const LoadInst *LI)
static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI, const LoadInst *LI)
int * Ptr
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:102
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:162
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
Value * getPointerOperand()
Definition: Instructions.h:394
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:1682
const BasicBlock * getParent() const
Definition: Instruction.h:62
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:44
an instruction to allocate memory on the stack
Definition: Instructions.h:60
gep_type_iterator gep_type_begin(const User *GEP)
MemDepResult getDependency(Instruction *QueryInst)
Returns the instruction on which a memory operation depends.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
user_iterator user_end()
Definition: Value.h:354