LLVM  3.7.0
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass performs various transformations related to eliminating memcpy
11 // calls, or transforming sets of stores into memset's.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Scalar.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/Statistic.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/Dominators.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/Support/Debug.h"
33 #include <list>
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "memcpyopt"
37 
38 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
39 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
40 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
41 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
42 
43 static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
44  bool &VariableIdxFound,
45  const DataLayout &DL) {
46  // Skip over the first indices.
48  for (unsigned i = 1; i != Idx; ++i, ++GTI)
49  /*skip along*/;
50 
51  // Compute the offset implied by the rest of the indices.
52  int64_t Offset = 0;
53  for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
54  ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i));
55  if (!OpC)
56  return VariableIdxFound = true;
57  if (OpC->isZero()) continue; // No offset.
58 
59  // Handle struct indices, which add their field offset to the pointer.
60  if (StructType *STy = dyn_cast<StructType>(*GTI)) {
61  Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
62  continue;
63  }
64 
65  // Otherwise, we have a sequential type like an array or vector. Multiply
66  // the index by the ElementSize.
67  uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
68  Offset += Size*OpC->getSExtValue();
69  }
70 
71  return Offset;
72 }
73 
74 /// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a
75 /// constant offset, and return that constant offset. For example, Ptr1 might
76 /// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8.
77 static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset,
78  const DataLayout &DL) {
79  Ptr1 = Ptr1->stripPointerCasts();
80  Ptr2 = Ptr2->stripPointerCasts();
81 
82  // Handle the trivial case first.
83  if (Ptr1 == Ptr2) {
84  Offset = 0;
85  return true;
86  }
87 
88  GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1);
89  GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2);
90 
91  bool VariableIdxFound = false;
92 
93  // If one pointer is a GEP and the other isn't, then see if the GEP is a
94  // constant offset from the base, as in "P" and "gep P, 1".
95  if (GEP1 && !GEP2 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) {
96  Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, DL);
97  return !VariableIdxFound;
98  }
99 
100  if (GEP2 && !GEP1 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) {
101  Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, DL);
102  return !VariableIdxFound;
103  }
104 
105  // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical
106  // base. After that base, they may have some number of common (and
107  // potentially variable) indices. After that they handle some constant
108  // offset, which determines their offset from each other. At this point, we
109  // handle no other case.
110  if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0))
111  return false;
112 
113  // Skip any common indices and track the GEP types.
114  unsigned Idx = 1;
115  for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx)
116  if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx))
117  break;
118 
119  int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, DL);
120  int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, DL);
121  if (VariableIdxFound) return false;
122 
123  Offset = Offset2-Offset1;
124  return true;
125 }
126 
127 
128 /// MemsetRange - Represents a range of memset'd bytes with the ByteVal value.
129 /// This allows us to analyze stores like:
130 /// store 0 -> P+1
131 /// store 0 -> P+0
132 /// store 0 -> P+3
133 /// store 0 -> P+2
134 /// which sometimes happens with stores to arrays of structs etc. When we see
135 /// the first store, we make a range [1, 2). The second store extends the range
136 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
137 /// two ranges into [0, 3) which is memset'able.
138 namespace {
139 struct MemsetRange {
140  // Start/End - A semi range that describes the span that this range covers.
141  // The range is closed at the start and open at the end: [Start, End).
142  int64_t Start, End;
143 
144  /// StartPtr - The getelementptr instruction that points to the start of the
145  /// range.
146  Value *StartPtr;
147 
148  /// Alignment - The known alignment of the first store.
149  unsigned Alignment;
150 
151  /// TheStores - The actual stores that make up this range.
153 
154  bool isProfitableToUseMemset(const DataLayout &DL) const;
155 };
156 } // end anon namespace
157 
158 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
159  // If we found more than 4 stores to merge or 16 bytes, use memset.
160  if (TheStores.size() >= 4 || End-Start >= 16) return true;
161 
162  // If there is nothing to merge, don't do anything.
163  if (TheStores.size() < 2) return false;
164 
165  // If any of the stores are a memset, then it is always good to extend the
166  // memset.
167  for (unsigned i = 0, e = TheStores.size(); i != e; ++i)
168  if (!isa<StoreInst>(TheStores[i]))
169  return true;
170 
171  // Assume that the code generator is capable of merging pairs of stores
172  // together if it wants to.
173  if (TheStores.size() == 2) return false;
174 
175  // If we have fewer than 8 stores, it can still be worthwhile to do this.
176  // For example, merging 4 i8 stores into an i32 store is useful almost always.
177  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
178  // memset will be split into 2 32-bit stores anyway) and doing so can
179  // pessimize the llvm optimizer.
180  //
181  // Since we don't have perfect knowledge here, make some assumptions: assume
182  // the maximum GPR width is the same size as the largest legal integer
183  // size. If so, check to see whether we will end up actually reducing the
184  // number of stores used.
185  unsigned Bytes = unsigned(End-Start);
186  unsigned MaxIntSize = DL.getLargestLegalIntTypeSize();
187  if (MaxIntSize == 0)
188  MaxIntSize = 1;
189  unsigned NumPointerStores = Bytes / MaxIntSize;
190 
191  // Assume the remaining bytes if any are done a byte at a time.
192  unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize;
193 
194  // If we will reduce the # stores (according to this heuristic), do the
195  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
196  // etc.
197  return TheStores.size() > NumPointerStores+NumByteStores;
198 }
199 
200 
201 namespace {
202 class MemsetRanges {
203  /// Ranges - A sorted list of the memset ranges. We use std::list here
204  /// because each element is relatively large and expensive to copy.
205  std::list<MemsetRange> Ranges;
206  typedef std::list<MemsetRange>::iterator range_iterator;
207  const DataLayout &DL;
208 public:
209  MemsetRanges(const DataLayout &DL) : DL(DL) {}
210 
211  typedef std::list<MemsetRange>::const_iterator const_iterator;
212  const_iterator begin() const { return Ranges.begin(); }
213  const_iterator end() const { return Ranges.end(); }
214  bool empty() const { return Ranges.empty(); }
215 
216  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
217  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
218  addStore(OffsetFromFirst, SI);
219  else
220  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
221  }
222 
223  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
224  int64_t StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
225 
226  addRange(OffsetFromFirst, StoreSize,
227  SI->getPointerOperand(), SI->getAlignment(), SI);
228  }
229 
230  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
231  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
232  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI);
233  }
234 
235  void addRange(int64_t Start, int64_t Size, Value *Ptr,
236  unsigned Alignment, Instruction *Inst);
237 
238 };
239 
240 } // end anon namespace
241 
242 
243 /// addRange - Add a new store to the MemsetRanges data structure. This adds a
244 /// new range for the specified store at the specified offset, merging into
245 /// existing ranges as appropriate.
246 ///
247 /// Do a linear search of the ranges to see if this can be joined and/or to
248 /// find the insertion point in the list. We keep the ranges sorted for
249 /// simplicity here. This is a linear search of a linked list, which is ugly,
250 /// however the number of ranges is limited, so this won't get crazy slow.
251 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
252  unsigned Alignment, Instruction *Inst) {
253  int64_t End = Start+Size;
254  range_iterator I = Ranges.begin(), E = Ranges.end();
255 
256  while (I != E && Start > I->End)
257  ++I;
258 
259  // We now know that I == E, in which case we didn't find anything to merge
260  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
261  // to insert a new range. Handle this now.
262  if (I == E || End < I->Start) {
263  MemsetRange &R = *Ranges.insert(I, MemsetRange());
264  R.Start = Start;
265  R.End = End;
266  R.StartPtr = Ptr;
267  R.Alignment = Alignment;
268  R.TheStores.push_back(Inst);
269  return;
270  }
271 
272  // This store overlaps with I, add it.
273  I->TheStores.push_back(Inst);
274 
275  // At this point, we may have an interval that completely contains our store.
276  // If so, just add it to the interval and return.
277  if (I->Start <= Start && I->End >= End)
278  return;
279 
280  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
281  // but is not entirely contained within the range.
282 
283  // See if the range extends the start of the range. In this case, it couldn't
284  // possibly cause it to join the prior range, because otherwise we would have
285  // stopped on *it*.
286  if (Start < I->Start) {
287  I->Start = Start;
288  I->StartPtr = Ptr;
289  I->Alignment = Alignment;
290  }
291 
292  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
293  // is in or right at the end of I), and that End >= I->Start. Extend I out to
294  // End.
295  if (End > I->End) {
296  I->End = End;
297  range_iterator NextI = I;
298  while (++NextI != E && End >= NextI->Start) {
299  // Merge the range in.
300  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
301  if (NextI->End > I->End)
302  I->End = NextI->End;
303  Ranges.erase(NextI);
304  NextI = I;
305  }
306  }
307 }
308 
309 //===----------------------------------------------------------------------===//
310 // MemCpyOpt Pass
311 //===----------------------------------------------------------------------===//
312 
313 namespace {
314  class MemCpyOpt : public FunctionPass {
316  TargetLibraryInfo *TLI;
317  public:
318  static char ID; // Pass identification, replacement for typeid
319  MemCpyOpt() : FunctionPass(ID) {
321  MD = nullptr;
322  TLI = nullptr;
323  }
324 
325  bool runOnFunction(Function &F) override;
326 
327  private:
328  // This transformation requires dominator postdominator info
329  void getAnalysisUsage(AnalysisUsage &AU) const override {
330  AU.setPreservesCFG();
338  }
339 
340  // Helper functions
341  bool processStore(StoreInst *SI, BasicBlock::iterator &BBI);
342  bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI);
343  bool processMemCpy(MemCpyInst *M);
344  bool processMemMove(MemMoveInst *M);
345  bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc,
346  uint64_t cpyLen, unsigned cpyAlign, CallInst *C);
347  bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
348  bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
349  bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
350  bool processByValArgument(CallSite CS, unsigned ArgNo);
351  Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
352  Value *ByteVal);
353 
354  bool iterateOnFunction(Function &F);
355  };
356 
357  char MemCpyOpt::ID = 0;
358 }
359 
360 // createMemCpyOptPass - The public interface to this file...
361 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); }
362 
363 INITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization",
364  false, false)
371  false, false)
372 
373 /// tryMergingIntoMemset - When scanning forward over instructions, we look for
374 /// some other patterns to fold away. In particular, this looks for stores to
375 /// neighboring locations of memory. If it sees enough consecutive ones, it
376 /// attempts to merge them together into a memcpy/memset.
377 Instruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst,
378  Value *StartPtr, Value *ByteVal) {
379  const DataLayout &DL = StartInst->getModule()->getDataLayout();
380 
381  // Okay, so we now have a single store that can be splatable. Scan to find
382  // all subsequent stores of the same value to offset from the same pointer.
383  // Join these together into ranges, so we can decide whether contiguous blocks
384  // are stored.
385  MemsetRanges Ranges(DL);
386 
387  BasicBlock::iterator BI = StartInst;
388  for (++BI; !isa<TerminatorInst>(BI); ++BI) {
389  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
390  // If the instruction is readnone, ignore it, otherwise bail out. We
391  // don't even allow readonly here because we don't want something like:
392  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
393  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
394  break;
395  continue;
396  }
397 
398  if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
399  // If this is a store, see if we can merge it in.
400  if (!NextStore->isSimple()) break;
401 
402  // Check to see if this stored value is of the same byte-splattable value.
403  if (ByteVal != isBytewiseValue(NextStore->getOperand(0)))
404  break;
405 
406  // Check to see if this store is to a constant offset from the start ptr.
407  int64_t Offset;
408  if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), Offset,
409  DL))
410  break;
411 
412  Ranges.addStore(Offset, NextStore);
413  } else {
414  MemSetInst *MSI = cast<MemSetInst>(BI);
415 
416  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
417  !isa<ConstantInt>(MSI->getLength()))
418  break;
419 
420  // Check to see if this store is to a constant offset from the start ptr.
421  int64_t Offset;
422  if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, DL))
423  break;
424 
425  Ranges.addMemSet(Offset, MSI);
426  }
427  }
428 
429  // If we have no ranges, then we just had a single store with nothing that
430  // could be merged in. This is a very common case of course.
431  if (Ranges.empty())
432  return nullptr;
433 
434  // If we had at least one store that could be merged in, add the starting
435  // store as well. We try to avoid this unless there is at least something
436  // interesting as a small compile-time optimization.
437  Ranges.addInst(0, StartInst);
438 
439  // If we create any memsets, we put it right before the first instruction that
440  // isn't part of the memset block. This ensure that the memset is dominated
441  // by any addressing instruction needed by the start of the block.
442  IRBuilder<> Builder(BI);
443 
444  // Now that we have full information about ranges, loop over the ranges and
445  // emit memset's for anything big enough to be worthwhile.
446  Instruction *AMemSet = nullptr;
447  for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end();
448  I != E; ++I) {
449  const MemsetRange &Range = *I;
450 
451  if (Range.TheStores.size() == 1) continue;
452 
453  // If it is profitable to lower this range to memset, do so now.
454  if (!Range.isProfitableToUseMemset(DL))
455  continue;
456 
457  // Otherwise, we do want to transform this! Create a new memset.
458  // Get the starting pointer of the block.
459  StartPtr = Range.StartPtr;
460 
461  // Determine alignment
462  unsigned Alignment = Range.Alignment;
463  if (Alignment == 0) {
464  Type *EltType =
465  cast<PointerType>(StartPtr->getType())->getElementType();
466  Alignment = DL.getABITypeAlignment(EltType);
467  }
468 
469  AMemSet =
470  Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment);
471 
472  DEBUG(dbgs() << "Replace stores:\n";
473  for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i)
474  dbgs() << *Range.TheStores[i] << '\n';
475  dbgs() << "With: " << *AMemSet << '\n');
476 
477  if (!Range.TheStores.empty())
478  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
479 
480  // Zap all the stores.
482  SI = Range.TheStores.begin(),
483  SE = Range.TheStores.end(); SI != SE; ++SI) {
484  MD->removeInstruction(*SI);
485  (*SI)->eraseFromParent();
486  }
487  ++NumMemSetInfer;
488  }
489 
490  return AMemSet;
491 }
492 
493 
494 bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
495  if (!SI->isSimple()) return false;
496  const DataLayout &DL = SI->getModule()->getDataLayout();
497 
498  // Detect cases where we're performing call slot forwarding, but
499  // happen to be using a load-store pair to implement it, rather than
500  // a memcpy.
501  if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
502  if (LI->isSimple() && LI->hasOneUse() &&
503  LI->getParent() == SI->getParent()) {
504  MemDepResult ldep = MD->getDependency(LI);
505  CallInst *C = nullptr;
506  if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
507  C = dyn_cast<CallInst>(ldep.getInst());
508 
509  if (C) {
510  // Check that nothing touches the dest of the "copy" between
511  // the call and the store.
512  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
513  MemoryLocation StoreLoc = MemoryLocation::get(SI);
515  E = C; I != E; --I) {
516  if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
517  C = nullptr;
518  break;
519  }
520  }
521  }
522 
523  if (C) {
524  unsigned storeAlign = SI->getAlignment();
525  if (!storeAlign)
526  storeAlign = DL.getABITypeAlignment(SI->getOperand(0)->getType());
527  unsigned loadAlign = LI->getAlignment();
528  if (!loadAlign)
529  loadAlign = DL.getABITypeAlignment(LI->getType());
530 
531  bool changed = performCallSlotOptzn(
533  LI->getPointerOperand()->stripPointerCasts(),
534  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
535  std::min(storeAlign, loadAlign), C);
536  if (changed) {
537  MD->removeInstruction(SI);
538  SI->eraseFromParent();
539  MD->removeInstruction(LI);
540  LI->eraseFromParent();
541  ++NumMemCpyInstr;
542  return true;
543  }
544  }
545  }
546  }
547 
548  // There are two cases that are interesting for this code to handle: memcpy
549  // and memset. Right now we only handle memset.
550 
551  // Ensure that the value being stored is something that can be memset'able a
552  // byte at a time like "0" or "-1" or any width, as well as things like
553  // 0xA0A0A0A0 and 0.0.
554  if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
555  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
556  ByteVal)) {
557  BBI = I; // Don't invalidate iterator.
558  return true;
559  }
560 
561  return false;
562 }
563 
564 bool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
565  // See if there is another memset or store neighboring this memset which
566  // allows us to widen out the memset to do a single larger store.
567  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
568  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
569  MSI->getValue())) {
570  BBI = I; // Don't invalidate iterator.
571  return true;
572  }
573  return false;
574 }
575 
576 
577 /// performCallSlotOptzn - takes a memcpy and a call that it depends on,
578 /// and checks for the possibility of a call slot optimization by having
579 /// the call write its result directly into the destination of the memcpy.
580 bool MemCpyOpt::performCallSlotOptzn(Instruction *cpy,
581  Value *cpyDest, Value *cpySrc,
582  uint64_t cpyLen, unsigned cpyAlign,
583  CallInst *C) {
584  // The general transformation to keep in mind is
585  //
586  // call @func(..., src, ...)
587  // memcpy(dest, src, ...)
588  //
589  // ->
590  //
591  // memcpy(dest, src, ...)
592  // call @func(..., dest, ...)
593  //
594  // Since moving the memcpy is technically awkward, we additionally check that
595  // src only holds uninitialized values at the moment of the call, meaning that
596  // the memcpy can be discarded rather than moved.
597 
598  // Deliberately get the source and destination with bitcasts stripped away,
599  // because we'll need to do type comparisons based on the underlying type.
600  CallSite CS(C);
601 
602  // Require that src be an alloca. This simplifies the reasoning considerably.
603  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
604  if (!srcAlloca)
605  return false;
606 
607  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
608  if (!srcArraySize)
609  return false;
610 
611  const DataLayout &DL = cpy->getModule()->getDataLayout();
612  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
613  srcArraySize->getZExtValue();
614 
615  if (cpyLen < srcSize)
616  return false;
617 
618  // Check that accessing the first srcSize bytes of dest will not cause a
619  // trap. Otherwise the transform is invalid since it might cause a trap
620  // to occur earlier than it otherwise would.
621  if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) {
622  // The destination is an alloca. Check it is larger than srcSize.
623  ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize());
624  if (!destArraySize)
625  return false;
626 
627  uint64_t destSize = DL.getTypeAllocSize(A->getAllocatedType()) *
628  destArraySize->getZExtValue();
629 
630  if (destSize < srcSize)
631  return false;
632  } else if (Argument *A = dyn_cast<Argument>(cpyDest)) {
633  if (A->getDereferenceableBytes() < srcSize) {
634  // If the destination is an sret parameter then only accesses that are
635  // outside of the returned struct type can trap.
636  if (!A->hasStructRetAttr())
637  return false;
638 
639  Type *StructTy = cast<PointerType>(A->getType())->getElementType();
640  if (!StructTy->isSized()) {
641  // The call may never return and hence the copy-instruction may never
642  // be executed, and therefore it's not safe to say "the destination
643  // has at least <cpyLen> bytes, as implied by the copy-instruction",
644  return false;
645  }
646 
647  uint64_t destSize = DL.getTypeAllocSize(StructTy);
648  if (destSize < srcSize)
649  return false;
650  }
651  } else {
652  return false;
653  }
654 
655  // Check that dest points to memory that is at least as aligned as src.
656  unsigned srcAlign = srcAlloca->getAlignment();
657  if (!srcAlign)
658  srcAlign = DL.getABITypeAlignment(srcAlloca->getAllocatedType());
659  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
660  // If dest is not aligned enough and we can't increase its alignment then
661  // bail out.
662  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
663  return false;
664 
665  // Check that src is not accessed except via the call and the memcpy. This
666  // guarantees that it holds only undefined values when passed in (so the final
667  // memcpy can be dropped), that it is not read or written between the call and
668  // the memcpy, and that writing beyond the end of it is undefined.
669  SmallVector<User*, 8> srcUseList(srcAlloca->user_begin(),
670  srcAlloca->user_end());
671  while (!srcUseList.empty()) {
672  User *U = srcUseList.pop_back_val();
673 
674  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
675  for (User *UU : U->users())
676  srcUseList.push_back(UU);
677  continue;
678  }
679  if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
680  if (!G->hasAllZeroIndices())
681  return false;
682 
683  for (User *UU : U->users())
684  srcUseList.push_back(UU);
685  continue;
686  }
687  if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
688  if (IT->getIntrinsicID() == Intrinsic::lifetime_start ||
689  IT->getIntrinsicID() == Intrinsic::lifetime_end)
690  continue;
691 
692  if (U != C && U != cpy)
693  return false;
694  }
695 
696  // Check that src isn't captured by the called function since the
697  // transformation can cause aliasing issues in that case.
698  for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
699  if (CS.getArgument(i) == cpySrc && !CS.doesNotCapture(i))
700  return false;
701 
702  // Since we're changing the parameter to the callsite, we need to make sure
703  // that what would be the new parameter dominates the callsite.
704  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
705  if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest))
706  if (!DT.dominates(cpyDestInst, C))
707  return false;
708 
709  // In addition to knowing that the call does not access src in some
710  // unexpected manner, for example via a global, which we deduce from
711  // the use analysis, we also need to know that it does not sneakily
712  // access dest. We rely on AA to figure this out for us.
713  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
714  AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize);
715  // If necessary, perform additional analysis.
716  if (MR != AliasAnalysis::NoModRef)
717  MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT);
718  if (MR != AliasAnalysis::NoModRef)
719  return false;
720 
721  // All the checks have passed, so do the transformation.
722  bool changedArgument = false;
723  for (unsigned i = 0; i < CS.arg_size(); ++i)
724  if (CS.getArgument(i)->stripPointerCasts() == cpySrc) {
725  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
726  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
727  cpyDest->getName(), C);
728  changedArgument = true;
729  if (CS.getArgument(i)->getType() == Dest->getType())
730  CS.setArgument(i, Dest);
731  else
732  CS.setArgument(i, CastInst::CreatePointerCast(Dest,
733  CS.getArgument(i)->getType(), Dest->getName(), C));
734  }
735 
736  if (!changedArgument)
737  return false;
738 
739  // If the destination wasn't sufficiently aligned then increase its alignment.
740  if (!isDestSufficientlyAligned) {
741  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
742  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
743  }
744 
745  // Drop any cached information about the call, because we may have changed
746  // its dependence information by changing its parameter.
747  MD->removeInstruction(C);
748 
749  // Update AA metadata
750  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
751  // handled here, but combineMetadata doesn't support them yet
752  unsigned KnownIDs[] = {
756  };
757  combineMetadata(C, cpy, KnownIDs);
758 
759  // Remove the memcpy.
760  MD->removeInstruction(cpy);
761  ++NumMemCpyInstr;
762 
763  return true;
764 }
765 
766 /// processMemCpyMemCpyDependence - We've found that the (upward scanning)
767 /// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to
768 /// copy from MDep's input if we can.
769 ///
770 bool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep) {
771  // We can only transforms memcpy's where the dest of one is the source of the
772  // other.
773  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
774  return false;
775 
776  // If dep instruction is reading from our current input, then it is a noop
777  // transfer and substituting the input won't change this instruction. Just
778  // ignore the input and let someone else zap MDep. This handles cases like:
779  // memcpy(a <- a)
780  // memcpy(b <- a)
781  if (M->getSource() == MDep->getSource())
782  return false;
783 
784  // Second, the length of the memcpy's must be the same, or the preceding one
785  // must be larger than the following one.
786  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
787  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
788  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
789  return false;
790 
791  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
792 
793  // Verify that the copied-from memory doesn't change in between the two
794  // transfers. For example, in:
795  // memcpy(a <- b)
796  // *b = 42;
797  // memcpy(c <- a)
798  // It would be invalid to transform the second memcpy into memcpy(c <- b).
799  //
800  // TODO: If the code between M and MDep is transparent to the destination "c",
801  // then we could still perform the xform by moving M up to the first memcpy.
802  //
803  // NOTE: This is conservative, it will stop on any read from the source loc,
804  // not just the defining memcpy.
805  MemDepResult SourceDep = MD->getPointerDependencyFrom(
806  MemoryLocation::getForSource(MDep), false, M, M->getParent());
807  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
808  return false;
809 
810  // If the dest of the second might alias the source of the first, then the
811  // source and dest might overlap. We still want to eliminate the intermediate
812  // value, but we have to generate a memmove instead of memcpy.
813  bool UseMemMove = false;
816  UseMemMove = true;
817 
818  // If all checks passed, then we can transform M.
819 
820  // Make sure to use the lesser of the alignment of the source and the dest
821  // since we're changing where we're reading from, but don't want to increase
822  // the alignment past what can be read from or written to.
823  // TODO: Is this worth it if we're creating a less aligned memcpy? For
824  // example we could be moving from movaps -> movq on x86.
825  unsigned Align = std::min(MDep->getAlignment(), M->getAlignment());
826 
827  IRBuilder<> Builder(M);
828  if (UseMemMove)
829  Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(),
830  Align, M->isVolatile());
831  else
832  Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(),
833  Align, M->isVolatile());
834 
835  // Remove the instruction we're replacing.
836  MD->removeInstruction(M);
837  M->eraseFromParent();
838  ++NumMemCpyInstr;
839  return true;
840 }
841 
842 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
843 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
844 /// weren't copied over by \p MemCpy.
845 ///
846 /// In other words, transform:
847 /// \code
848 /// memset(dst, c, dst_size);
849 /// memcpy(dst, src, src_size);
850 /// \endcode
851 /// into:
852 /// \code
853 /// memcpy(dst, src, src_size);
854 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
855 /// \endcode
856 bool MemCpyOpt::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
857  MemSetInst *MemSet) {
858  // We can only transform memset/memcpy with the same destination.
859  if (MemSet->getDest() != MemCpy->getDest())
860  return false;
861 
862  // Check that there are no other dependencies on the memset destination.
863  MemDepResult DstDepInfo = MD->getPointerDependencyFrom(
864  MemoryLocation::getForDest(MemSet), false, MemCpy, MemCpy->getParent());
865  if (DstDepInfo.getInst() != MemSet)
866  return false;
867 
868  // Use the same i8* dest as the memcpy, killing the memset dest if different.
869  Value *Dest = MemCpy->getRawDest();
870  Value *DestSize = MemSet->getLength();
871  Value *SrcSize = MemCpy->getLength();
872 
873  // By default, create an unaligned memset.
874  unsigned Align = 1;
875  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
876  // of the sum.
877  const unsigned DestAlign =
878  std::max(MemSet->getAlignment(), MemCpy->getAlignment());
879  if (DestAlign > 1)
880  if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
881  Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
882 
883  IRBuilder<> Builder(MemCpy);
884 
885  // If the sizes have different types, zext the smaller one.
886  if (DestSize->getType() != SrcSize->getType()) {
887  if (DestSize->getType()->getIntegerBitWidth() >
888  SrcSize->getType()->getIntegerBitWidth())
889  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
890  else
891  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
892  }
893 
894  Value *MemsetLen =
895  Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize),
896  ConstantInt::getNullValue(DestSize->getType()),
897  Builder.CreateSub(DestSize, SrcSize));
898  Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
899  MemsetLen, Align);
900 
901  MD->removeInstruction(MemSet);
902  MemSet->eraseFromParent();
903  return true;
904 }
905 
906 /// Transform memcpy to memset when its source was just memset.
907 /// In other words, turn:
908 /// \code
909 /// memset(dst1, c, dst1_size);
910 /// memcpy(dst2, dst1, dst2_size);
911 /// \endcode
912 /// into:
913 /// \code
914 /// memset(dst1, c, dst1_size);
915 /// memset(dst2, c, dst2_size);
916 /// \endcode
917 /// When dst2_size <= dst1_size.
918 ///
919 /// The \p MemCpy must have a Constant length.
920 bool MemCpyOpt::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
921  MemSetInst *MemSet) {
922  // This only makes sense on memcpy(..., memset(...), ...).
923  if (MemSet->getRawDest() != MemCpy->getRawSource())
924  return false;
925 
926  ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
927  ConstantInt *MemSetSize = dyn_cast<ConstantInt>(MemSet->getLength());
928  // Make sure the memcpy doesn't read any more than what the memset wrote.
929  // Don't worry about sizes larger than i64.
930  if (!MemSetSize || CopySize->getZExtValue() > MemSetSize->getZExtValue())
931  return false;
932 
933  IRBuilder<> Builder(MemCpy);
934  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
935  CopySize, MemCpy->getAlignment());
936  return true;
937 }
938 
939 /// processMemCpy - perform simplification of memcpy's. If we have memcpy A
940 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
941 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
942 /// circumstances). This allows later passes to remove the first memcpy
943 /// altogether.
944 bool MemCpyOpt::processMemCpy(MemCpyInst *M) {
945  // We can only optimize non-volatile memcpy's.
946  if (M->isVolatile()) return false;
947 
948  // If the source and destination of the memcpy are the same, then zap it.
949  if (M->getSource() == M->getDest()) {
950  MD->removeInstruction(M);
951  M->eraseFromParent();
952  return false;
953  }
954 
955  // If copying from a constant, try to turn the memcpy into a memset.
956  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
957  if (GV->isConstant() && GV->hasDefinitiveInitializer())
958  if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) {
959  IRBuilder<> Builder(M);
960  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
961  M->getAlignment(), false);
962  MD->removeInstruction(M);
963  M->eraseFromParent();
964  ++NumCpyToSet;
965  return true;
966  }
967 
968  MemDepResult DepInfo = MD->getDependency(M);
969 
970  // Try to turn a partially redundant memset + memcpy into
971  // memcpy + smaller memset. We don't need the memcpy size for this.
972  if (DepInfo.isClobber())
973  if (MemSetInst *MDep = dyn_cast<MemSetInst>(DepInfo.getInst()))
974  if (processMemSetMemCpyDependence(M, MDep))
975  return true;
976 
977  // The optimizations after this point require the memcpy size.
978  ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength());
979  if (!CopySize) return false;
980 
981  // There are four possible optimizations we can do for memcpy:
982  // a) memcpy-memcpy xform which exposes redundance for DSE.
983  // b) call-memcpy xform for return slot optimization.
984  // c) memcpy from freshly alloca'd space or space that has just started its
985  // lifetime copies undefined data, and we can therefore eliminate the
986  // memcpy in favor of the data that was already at the destination.
987  // d) memcpy from a just-memset'd source can be turned into memset.
988  if (DepInfo.isClobber()) {
989  if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) {
990  if (performCallSlotOptzn(M, M->getDest(), M->getSource(),
991  CopySize->getZExtValue(), M->getAlignment(),
992  C)) {
993  MD->removeInstruction(M);
994  M->eraseFromParent();
995  return true;
996  }
997  }
998  }
999 
1001  MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true,
1002  M, M->getParent());
1003 
1004  if (SrcDepInfo.isClobber()) {
1005  if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst()))
1006  return processMemCpyMemCpyDependence(M, MDep);
1007  } else if (SrcDepInfo.isDef()) {
1008  Instruction *I = SrcDepInfo.getInst();
1009  bool hasUndefContents = false;
1010 
1011  if (isa<AllocaInst>(I)) {
1012  hasUndefContents = true;
1013  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1014  if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1015  if (ConstantInt *LTSize = dyn_cast<ConstantInt>(II->getArgOperand(0)))
1016  if (LTSize->getZExtValue() >= CopySize->getZExtValue())
1017  hasUndefContents = true;
1018  }
1019 
1020  if (hasUndefContents) {
1021  MD->removeInstruction(M);
1022  M->eraseFromParent();
1023  ++NumMemCpyInstr;
1024  return true;
1025  }
1026  }
1027 
1028  if (SrcDepInfo.isClobber())
1029  if (MemSetInst *MDep = dyn_cast<MemSetInst>(SrcDepInfo.getInst()))
1030  if (performMemCpyToMemSetOptzn(M, MDep)) {
1031  MD->removeInstruction(M);
1032  M->eraseFromParent();
1033  ++NumCpyToSet;
1034  return true;
1035  }
1036 
1037  return false;
1038 }
1039 
1040 /// processMemMove - Transforms memmove calls to memcpy calls when the src/dst
1041 /// are guaranteed not to alias.
1042 bool MemCpyOpt::processMemMove(MemMoveInst *M) {
1043  AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
1044 
1045  if (!TLI->has(LibFunc::memmove))
1046  return false;
1047 
1048  // See if the pointers alias.
1051  return false;
1052 
1053  DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n");
1054 
1055  // If not, then we know we can transform this.
1056  Module *Mod = M->getParent()->getParent()->getParent();
1057  Type *ArgTys[3] = { M->getRawDest()->getType(),
1058  M->getRawSource()->getType(),
1059  M->getLength()->getType() };
1060  M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
1061  ArgTys));
1062 
1063  // MemDep may have over conservative information about this instruction, just
1064  // conservatively flush it from the cache.
1065  MD->removeInstruction(M);
1066 
1067  ++NumMoveToCpy;
1068  return true;
1069 }
1070 
1071 /// processByValArgument - This is called on every byval argument in call sites.
1072 bool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) {
1073  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
1074  // Find out what feeds this byval argument.
1075  Value *ByValArg = CS.getArgument(ArgNo);
1076  Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
1077  uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
1078  MemDepResult DepInfo = MD->getPointerDependencyFrom(
1079  MemoryLocation(ByValArg, ByValSize), true, CS.getInstruction(),
1080  CS.getInstruction()->getParent());
1081  if (!DepInfo.isClobber())
1082  return false;
1083 
1084  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1085  // a memcpy, see if we can byval from the source of the memcpy instead of the
1086  // result.
1087  MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst());
1088  if (!MDep || MDep->isVolatile() ||
1089  ByValArg->stripPointerCasts() != MDep->getDest())
1090  return false;
1091 
1092  // The length of the memcpy must be larger or equal to the size of the byval.
1093  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1094  if (!C1 || C1->getValue().getZExtValue() < ByValSize)
1095  return false;
1096 
1097  // Get the alignment of the byval. If the call doesn't specify the alignment,
1098  // then it is some target specific value that we can't know.
1099  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1);
1100  if (ByValAlign == 0) return false;
1101 
1102  // If it is greater than the memcpy, then we check to see if we can force the
1103  // source of the memcpy to the alignment we need. If we fail, we bail out.
1104  AssumptionCache &AC =
1105  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
1106  *CS->getParent()->getParent());
1107  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1108  if (MDep->getAlignment() < ByValAlign &&
1109  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
1110  CS.getInstruction(), &AC, &DT) < ByValAlign)
1111  return false;
1112 
1113  // Verify that the copied-from memory doesn't change in between the memcpy and
1114  // the byval call.
1115  // memcpy(a <- b)
1116  // *b = 42;
1117  // foo(*a)
1118  // It would be invalid to transform the second memcpy into foo(*b).
1119  //
1120  // NOTE: This is conservative, it will stop on any read from the source loc,
1121  // not just the defining memcpy.
1122  MemDepResult SourceDep =
1123  MD->getPointerDependencyFrom(MemoryLocation::getForSource(MDep), false,
1124  CS.getInstruction(), MDep->getParent());
1125  if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
1126  return false;
1127 
1128  Value *TmpCast = MDep->getSource();
1129  if (MDep->getSource()->getType() != ByValArg->getType())
1130  TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1131  "tmpcast", CS.getInstruction());
1132 
1133  DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n"
1134  << " " << *MDep << "\n"
1135  << " " << *CS.getInstruction() << "\n");
1136 
1137  // Otherwise we're good! Update the byval argument.
1138  CS.setArgument(ArgNo, TmpCast);
1139  ++NumMemCpyInstr;
1140  return true;
1141 }
1142 
1143 /// iterateOnFunction - Executes one iteration of MemCpyOpt.
1144 bool MemCpyOpt::iterateOnFunction(Function &F) {
1145  bool MadeChange = false;
1146 
1147  // Walk all instruction in the function.
1148  for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) {
1149  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
1150  // Avoid invalidating the iterator.
1151  Instruction *I = BI++;
1152 
1153  bool RepeatInstruction = false;
1154 
1155  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1156  MadeChange |= processStore(SI, BI);
1157  else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
1158  RepeatInstruction = processMemSet(M, BI);
1159  else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
1160  RepeatInstruction = processMemCpy(M);
1161  else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1162  RepeatInstruction = processMemMove(M);
1163  else if (auto CS = CallSite(I)) {
1164  for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
1165  if (CS.isByValArgument(i))
1166  MadeChange |= processByValArgument(CS, i);
1167  }
1168 
1169  // Reprocess the instruction if desired.
1170  if (RepeatInstruction) {
1171  if (BI != BB->begin()) --BI;
1172  MadeChange = true;
1173  }
1174  }
1175  }
1176 
1177  return MadeChange;
1178 }
1179 
1180 // MemCpyOpt::runOnFunction - This is the main transformation entry point for a
1181 // function.
1182 //
1183 bool MemCpyOpt::runOnFunction(Function &F) {
1184  if (skipOptnoneFunction(F))
1185  return false;
1186 
1187  bool MadeChange = false;
1188  MD = &getAnalysis<MemoryDependenceAnalysis>();
1189  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
1190 
1191  // If we don't have at least memset and memcpy, there is little point of doing
1192  // anything here. These are required by a freestanding implementation, so if
1193  // even they are disabled, there is no point in trying hard.
1194  if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy))
1195  return false;
1196 
1197  while (1) {
1198  if (!iterateOnFunction(F))
1199  break;
1200  MadeChange = true;
1201  }
1202 
1203  MD = nullptr;
1204  return MadeChange;
1205 }
unsigned getAlignment() const
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
getOrEnforceKnownAlignment - If the specified pointer has an alignment that we can determine...
Definition: Local.cpp:927
LLVM Argument representation.
Definition: Argument.h:35
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
bool isDef() const
isDef - Return true if this MemDepResult represents a query that is an instruction definition depende...
STATISTIC(NumFunctions,"Total number of functions")
Value * isBytewiseValue(Value *V)
isBytewiseValue - If the specified value can be set by repeating the same byte in memory...
bool isVolatile() const
ValTy * getArgument(unsigned ArgNo) const
Definition: CallSite.h:119
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:114
ModRefResult callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
callCapturesBefore - Return information about whether a particular call site modifies or reads the sp...
iterator end()
Definition: Function.h:459
InstrTy * getInstruction() const
Definition: CallSite.h:82
Value * getValue() const
get* - Return the arguments to the instruction.
unsigned getNumOperands() const
Definition: User.h:138
bool isSimple() const
Definition: Instructions.h:401
CallInst - This class represents a function call, abstracting a target machine's calling convention...
An immutable pass that tracks lazily created AssumptionCache objects.
BBTy * getParent() const
Get the basic block containing the call site.
Definition: CallSite.h:87
A cache of .assume calls within a function.
static bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, const DataLayout &DL)
IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a constant offset, and return that constant offset.
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
MemSetInst - This class wraps the llvm.memset intrinsic.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
F(f)
FunTy * getCaller() const
getCaller - Return the caller function for this call site
Definition: CallSite.h:170
CallInst * CreateMemSet(Value *Ptr, Value *Val, uint64_t Size, unsigned Align, bool isVolatile=false, MDNode *TBAATag=nullptr, MDNode *ScopeTag=nullptr, MDNode *NoAliasTag=nullptr)
Create and insert a memset to the specified pointer and the specified value.
Definition: IRBuilder.h:365
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
Hexagon Common GEP
bool isClobber() const
isClobber - Return true if this MemDepResult represents a query that is an instruction clobber depend...
static Constant * getNullValue(Type *Ty)
Definition: Constants.cpp:178
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, bool &VariableIdxFound, const DataLayout &DL)
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:551
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT,"arm-default-it","Generate IT block based on arch"), clEnumValN(RestrictedIT,"arm-restrict-it","Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT,"arm-no-restrict-it","Allow IT blocks based on ARMv7"), clEnumValEnd))
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:106
MemMoveInst - This class wraps the llvm.memmove intrinsic.
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
bool isSized(SmallPtrSetImpl< const Type * > *Visited=nullptr) const
isSized - Return true if it makes sense to take the size of this type.
Definition: Type.h:268
MemoryDependenceAnalysis - This is an analysis that determines, for a given memory operation...
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
uint16_t getParamAlignment(uint16_t i) const
Extract the alignment for a call or parameter (0=unknown).
Definition: CallSite.h:247
#define G(x, y, z)
Definition: MD5.cpp:52
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:117
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:862
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
This class represents a no-op cast from one type to another.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:866
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
iterator begin()
Definition: Function.h:457
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
MemCpy false
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:491
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
Definition: Instructions.h:830
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
Definition: Instructions.h:365
void setDebugLoc(DebugLoc Loc)
setDebugLoc - Set the debug location information for this instruction.
Definition: Instruction.h:227
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: CallSite.h:327
unsigned getAlignment() const
getAlignment - Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:130
Value * getRawDest() const
Represent the analysis usage information of a pass.
FunctionPass * createMemCpyOptPass()
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
Value * getOperand(unsigned i) const
Definition: User.h:118
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
#define INITIALIZE_AG_DEPENDENCY(depName)
Definition: PassSupport.h:72
INITIALIZE_PASS_BEGIN(MemCpyOpt,"memcpyopt","MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOpt
MemDepResult - A memory dependence query can return one of three different answers, described below.
bool dominates(const Instruction *Def, const Use &U) const
Return true if Def dominates a use in User.
Definition: Dominators.cpp:214
Representation for a specific memory location.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:674
unsigned getIntegerBitWidth() const
Definition: Type.cpp:176
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
Value * getDest() const
getDest - This is just like getRawDest, but it strips off any cast instructions that feed it...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Provides information about what library functions are available for the current target.
ModRefResult getModRefInfo(const Instruction *I)
getModRefInfo - Return information about whether or not an instruction may read or write memory (with...
Value * getLength() const
unsigned arg_size() const
Definition: CallSite.h:162
Value * stripPointerCasts()
Strip off pointer casts, all-zero GEPs, and aliases.
Definition: Value.cpp:458
MemCpyInst - This class wraps the llvm.memcpy intrinsic.
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition: Constants.h:161
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
iterator_range< user_iterator > users()
Definition: Value.h:300
unsigned getLargestLegalIntTypeSize() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:716
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
MemCpy Optimization
SI Fix SGPR Live Ranges
Value * getSource() const
getSource - This is just like getRawSource, but it strips off any cast instructions that feed it...
uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:552
void setCalledFunction(Value *Fn)
setCalledFunction - Set the function called.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
Instruction * getInst() const
getInst() - If this is a normal dependency, return the instruction that is depended on...
#define I(x, y, z)
Definition: MD5.cpp:54
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:371
Value * getRawSource() const
get* - Return the arguments to the instruction.
void setArgument(unsigned ArgNo, Value *newVal)
Definition: CallSite.h:124
user_iterator user_begin()
Definition: Value.h:294
void initializeMemCpyOptPass(PassRegistry &)
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
LLVM Value Representation.
Definition: Value.h:69
ModRefResult
Simple mod/ref information...
const Value * getArraySize() const
getArraySize - Get the number of elements allocated.
Definition: Instructions.h:110
#define DEBUG(X)
Definition: Debug.h:92
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNoAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
isNoAlias - A trivial helper function to check to see if the specified pointers are no-alias...
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
Type * getAllocatedType() const
getAllocatedType - Return the type that is being allocated by the instruction.
Definition: Instructions.h:122
int64_t getSExtValue() const
Return the constant as a 64-bit integer value after it has been sign extended as appropriate for the ...
Definition: Constants.h:125
Value * getPointerOperand()
Definition: Instructions.h:409
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:1286
const BasicBlock * getParent() const
Definition: Instruction.h:72
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
AllocaInst - an instruction to allocate memory on the stack.
Definition: Instructions.h:76
gep_type_iterator gep_type_begin(const User *GEP)
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
user_iterator user_end()
Definition: Value.h:296