LLVM  15.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs various transformations related to eliminating memcpy
10 // calls, or transforming sets of stores into memset's.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/Loads.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalVariable.h"
38 #include "llvm/IR/IRBuilder.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/IntrinsicInst.h"
43 #include "llvm/IR/Intrinsics.h"
44 #include "llvm/IR/LLVMContext.h"
45 #include "llvm/IR/Module.h"
46 #include "llvm/IR/PassManager.h"
47 #include "llvm/IR/Type.h"
48 #include "llvm/IR/User.h"
49 #include "llvm/IR/Value.h"
50 #include "llvm/InitializePasses.h"
51 #include "llvm/Pass.h"
52 #include "llvm/Support/Casting.h"
53 #include "llvm/Support/Debug.h"
56 #include "llvm/Transforms/Scalar.h"
58 #include <algorithm>
59 #include <cassert>
60 #include <cstdint>
61 
62 using namespace llvm;
63 
64 #define DEBUG_TYPE "memcpyopt"
65 
67  "enable-memcpyopt-without-libcalls", cl::Hidden,
68  cl::desc("Enable memcpyopt even when libcalls are disabled"));
69 
70 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
71 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
72 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
73 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
74 STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
75 
76 namespace {
77 
78 /// Represents a range of memset'd bytes with the ByteVal value.
79 /// This allows us to analyze stores like:
80 /// store 0 -> P+1
81 /// store 0 -> P+0
82 /// store 0 -> P+3
83 /// store 0 -> P+2
84 /// which sometimes happens with stores to arrays of structs etc. When we see
85 /// the first store, we make a range [1, 2). The second store extends the range
86 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
87 /// two ranges into [0, 3) which is memset'able.
88 struct MemsetRange {
89  // Start/End - A semi range that describes the span that this range covers.
90  // The range is closed at the start and open at the end: [Start, End).
91  int64_t Start, End;
92 
93  /// StartPtr - The getelementptr instruction that points to the start of the
94  /// range.
95  Value *StartPtr;
96 
97  /// Alignment - The known alignment of the first store.
98  MaybeAlign Alignment;
99 
100  /// TheStores - The actual stores that make up this range.
102 
103  bool isProfitableToUseMemset(const DataLayout &DL) const;
104 };
105 
106 } // end anonymous namespace
107 
108 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
109  // If we found more than 4 stores to merge or 16 bytes, use memset.
110  if (TheStores.size() >= 4 || End-Start >= 16) return true;
111 
112  // If there is nothing to merge, don't do anything.
113  if (TheStores.size() < 2) return false;
114 
115  // If any of the stores are a memset, then it is always good to extend the
116  // memset.
117  for (Instruction *SI : TheStores)
118  if (!isa<StoreInst>(SI))
119  return true;
120 
121  // Assume that the code generator is capable of merging pairs of stores
122  // together if it wants to.
123  if (TheStores.size() == 2) return false;
124 
125  // If we have fewer than 8 stores, it can still be worthwhile to do this.
126  // For example, merging 4 i8 stores into an i32 store is useful almost always.
127  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
128  // memset will be split into 2 32-bit stores anyway) and doing so can
129  // pessimize the llvm optimizer.
130  //
131  // Since we don't have perfect knowledge here, make some assumptions: assume
132  // the maximum GPR width is the same size as the largest legal integer
133  // size. If so, check to see whether we will end up actually reducing the
134  // number of stores used.
135  unsigned Bytes = unsigned(End-Start);
136  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
137  if (MaxIntSize == 0)
138  MaxIntSize = 1;
139  unsigned NumPointerStores = Bytes / MaxIntSize;
140 
141  // Assume the remaining bytes if any are done a byte at a time.
142  unsigned NumByteStores = Bytes % MaxIntSize;
143 
144  // If we will reduce the # stores (according to this heuristic), do the
145  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
146  // etc.
147  return TheStores.size() > NumPointerStores+NumByteStores;
148 }
149 
150 namespace {
151 
152 class MemsetRanges {
153  using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
154 
155  /// A sorted list of the memset ranges.
157 
158  const DataLayout &DL;
159 
160 public:
161  MemsetRanges(const DataLayout &DL) : DL(DL) {}
162 
163  using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
164 
165  const_iterator begin() const { return Ranges.begin(); }
166  const_iterator end() const { return Ranges.end(); }
167  bool empty() const { return Ranges.empty(); }
168 
169  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
170  if (auto *SI = dyn_cast<StoreInst>(Inst))
171  addStore(OffsetFromFirst, SI);
172  else
173  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
174  }
175 
176  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
177  TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
178  assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
179  addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
180  SI->getAlign(), SI);
181  }
182 
183  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
184  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
185  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlign(), MSI);
186  }
187 
188  void addRange(int64_t Start, int64_t Size, Value *Ptr, MaybeAlign Alignment,
189  Instruction *Inst);
190 };
191 
192 } // end anonymous namespace
193 
194 /// Add a new store to the MemsetRanges data structure. This adds a
195 /// new range for the specified store at the specified offset, merging into
196 /// existing ranges as appropriate.
197 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
198  MaybeAlign Alignment, Instruction *Inst) {
199  int64_t End = Start+Size;
200 
201  range_iterator I = partition_point(
202  Ranges, [=](const MemsetRange &O) { return O.End < Start; });
203 
204  // We now know that I == E, in which case we didn't find anything to merge
205  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
206  // to insert a new range. Handle this now.
207  if (I == Ranges.end() || End < I->Start) {
208  MemsetRange &R = *Ranges.insert(I, MemsetRange());
209  R.Start = Start;
210  R.End = End;
211  R.StartPtr = Ptr;
212  R.Alignment = Alignment;
213  R.TheStores.push_back(Inst);
214  return;
215  }
216 
217  // This store overlaps with I, add it.
218  I->TheStores.push_back(Inst);
219 
220  // At this point, we may have an interval that completely contains our store.
221  // If so, just add it to the interval and return.
222  if (I->Start <= Start && I->End >= End)
223  return;
224 
225  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
226  // but is not entirely contained within the range.
227 
228  // See if the range extends the start of the range. In this case, it couldn't
229  // possibly cause it to join the prior range, because otherwise we would have
230  // stopped on *it*.
231  if (Start < I->Start) {
232  I->Start = Start;
233  I->StartPtr = Ptr;
234  I->Alignment = Alignment;
235  }
236 
237  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
238  // is in or right at the end of I), and that End >= I->Start. Extend I out to
239  // End.
240  if (End > I->End) {
241  I->End = End;
242  range_iterator NextI = I;
243  while (++NextI != Ranges.end() && End >= NextI->Start) {
244  // Merge the range in.
245  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
246  if (NextI->End > I->End)
247  I->End = NextI->End;
248  Ranges.erase(NextI);
249  NextI = I;
250  }
251  }
252 }
253 
254 //===----------------------------------------------------------------------===//
255 // MemCpyOptLegacyPass Pass
256 //===----------------------------------------------------------------------===//
257 
258 namespace {
259 
260 class MemCpyOptLegacyPass : public FunctionPass {
261  MemCpyOptPass Impl;
262 
263 public:
264  static char ID; // Pass identification, replacement for typeid
265 
266  MemCpyOptLegacyPass() : FunctionPass(ID) {
268  }
269 
270  bool runOnFunction(Function &F) override;
271 
272 private:
273  // This transformation requires dominator postdominator info
274  void getAnalysisUsage(AnalysisUsage &AU) const override {
275  AU.setPreservesCFG();
285  }
286 };
287 
288 } // end anonymous namespace
289 
290 char MemCpyOptLegacyPass::ID = 0;
291 
292 /// The public interface to this file...
293 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
294 
295 INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
296  false, false)
303 INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
305 
306 // Check that V is either not accessible by the caller, or unwinding cannot
307 // occur between Start and End.
309  Instruction *End) {
310  assert(Start->getParent() == End->getParent() && "Must be in same block");
311  // Function can't unwind, so it also can't be visible through unwinding.
312  if (Start->getFunction()->doesNotThrow())
313  return false;
314 
315  // Object is not visible on unwind.
316  // TODO: Support RequiresNoCaptureBeforeUnwind case.
317  bool RequiresNoCaptureBeforeUnwind;
319  RequiresNoCaptureBeforeUnwind) &&
320  !RequiresNoCaptureBeforeUnwind)
321  return false;
322 
323  // Check whether there are any unwinding instructions in the range.
324  return any_of(make_range(Start->getIterator(), End->getIterator()),
325  [](const Instruction &I) { return I.mayThrow(); });
326 }
327 
328 void MemCpyOptPass::eraseInstruction(Instruction *I) {
329  MSSAU->removeMemoryAccess(I);
330  I->eraseFromParent();
331 }
332 
333 // Check for mod or ref of Loc between Start and End, excluding both boundaries.
334 // Start and End must be in the same block
336  const MemoryUseOrDef *Start,
337  const MemoryUseOrDef *End) {
338  assert(Start->getBlock() == End->getBlock() && "Only local supported");
339  for (const MemoryAccess &MA :
340  make_range(++Start->getIterator(), End->getIterator())) {
341  if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
342  Loc)))
343  return true;
344  }
345  return false;
346 }
347 
348 // Check for mod of Loc between Start and End, excluding both boundaries.
349 // Start and End can be in different blocks.
351  MemoryLocation Loc, const MemoryUseOrDef *Start,
352  const MemoryUseOrDef *End) {
353  if (isa<MemoryUse>(End)) {
354  // For MemoryUses, getClobberingMemoryAccess may skip non-clobbering writes.
355  // Manually check read accesses between Start and End, if they are in the
356  // same block, for clobbers. Otherwise assume Loc is clobbered.
357  return Start->getBlock() != End->getBlock() ||
358  any_of(
359  make_range(std::next(Start->getIterator()), End->getIterator()),
360  [&AA, Loc](const MemoryAccess &Acc) {
361  if (isa<MemoryUse>(&Acc))
362  return false;
363  Instruction *AccInst =
364  cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
365  return isModSet(AA.getModRefInfo(AccInst, Loc));
366  });
367  }
368 
369  // TODO: Only walk until we hit Start.
371  End->getDefiningAccess(), Loc);
372  return !MSSA->dominates(Clobber, Start);
373 }
374 
375 /// When scanning forward over instructions, we look for some other patterns to
376 /// fold away. In particular, this looks for stores to neighboring locations of
377 /// memory. If it sees enough consecutive ones, it attempts to merge them
378 /// together into a memcpy/memset.
379 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
380  Value *StartPtr,
381  Value *ByteVal) {
382  const DataLayout &DL = StartInst->getModule()->getDataLayout();
383 
384  // We can't track scalable types
385  if (auto *SI = dyn_cast<StoreInst>(StartInst))
386  if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
387  return nullptr;
388 
389  // Okay, so we now have a single store that can be splatable. Scan to find
390  // all subsequent stores of the same value to offset from the same pointer.
391  // Join these together into ranges, so we can decide whether contiguous blocks
392  // are stored.
393  MemsetRanges Ranges(DL);
394 
395  BasicBlock::iterator BI(StartInst);
396 
397  // Keeps track of the last memory use or def before the insertion point for
398  // the new memset. The new MemoryDef for the inserted memsets will be inserted
399  // after MemInsertPoint. It points to either LastMemDef or to the last user
400  // before the insertion point of the memset, if there are any such users.
401  MemoryUseOrDef *MemInsertPoint = nullptr;
402  // Keeps track of the last MemoryDef between StartInst and the insertion point
403  // for the new memset. This will become the defining access of the inserted
404  // memsets.
405  MemoryDef *LastMemDef = nullptr;
406  for (++BI; !BI->isTerminator(); ++BI) {
407  auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
408  MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
409  if (CurrentAcc) {
410  MemInsertPoint = CurrentAcc;
411  if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
412  LastMemDef = CurrentDef;
413  }
414 
415  // Calls that only access inaccessible memory do not block merging
416  // accessible stores.
417  if (auto *CB = dyn_cast<CallBase>(BI)) {
418  if (CB->onlyAccessesInaccessibleMemory())
419  continue;
420  }
421 
422  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
423  // If the instruction is readnone, ignore it, otherwise bail out. We
424  // don't even allow readonly here because we don't want something like:
425  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
426  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
427  break;
428  continue;
429  }
430 
431  if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
432  // If this is a store, see if we can merge it in.
433  if (!NextStore->isSimple()) break;
434 
435  Value *StoredVal = NextStore->getValueOperand();
436 
437  // Don't convert stores of non-integral pointer types to memsets (which
438  // stores integers).
439  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
440  break;
441 
442  // We can't track ranges involving scalable types.
443  if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
444  break;
445 
446  // Check to see if this stored value is of the same byte-splattable value.
447  Value *StoredByte = isBytewiseValue(StoredVal, DL);
448  if (isa<UndefValue>(ByteVal) && StoredByte)
449  ByteVal = StoredByte;
450  if (ByteVal != StoredByte)
451  break;
452 
453  // Check to see if this store is to a constant offset from the start ptr.
455  isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
456  if (!Offset)
457  break;
458 
459  Ranges.addStore(*Offset, NextStore);
460  } else {
461  auto *MSI = cast<MemSetInst>(BI);
462 
463  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
464  !isa<ConstantInt>(MSI->getLength()))
465  break;
466 
467  // Check to see if this store is to a constant offset from the start ptr.
468  Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
469  if (!Offset)
470  break;
471 
472  Ranges.addMemSet(*Offset, MSI);
473  }
474  }
475 
476  // If we have no ranges, then we just had a single store with nothing that
477  // could be merged in. This is a very common case of course.
478  if (Ranges.empty())
479  return nullptr;
480 
481  // If we had at least one store that could be merged in, add the starting
482  // store as well. We try to avoid this unless there is at least something
483  // interesting as a small compile-time optimization.
484  Ranges.addInst(0, StartInst);
485 
486  // If we create any memsets, we put it right before the first instruction that
487  // isn't part of the memset block. This ensure that the memset is dominated
488  // by any addressing instruction needed by the start of the block.
489  IRBuilder<> Builder(&*BI);
490 
491  // Now that we have full information about ranges, loop over the ranges and
492  // emit memset's for anything big enough to be worthwhile.
493  Instruction *AMemSet = nullptr;
494  for (const MemsetRange &Range : Ranges) {
495  if (Range.TheStores.size() == 1) continue;
496 
497  // If it is profitable to lower this range to memset, do so now.
498  if (!Range.isProfitableToUseMemset(DL))
499  continue;
500 
501  // Otherwise, we do want to transform this! Create a new memset.
502  // Get the starting pointer of the block.
503  StartPtr = Range.StartPtr;
504 
505  AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
506  Range.Alignment);
507  LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
508  : Range.TheStores) dbgs()
509  << *SI << '\n';
510  dbgs() << "With: " << *AMemSet << '\n');
511  if (!Range.TheStores.empty())
512  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
513 
514  assert(LastMemDef && MemInsertPoint &&
515  "Both LastMemDef and MemInsertPoint need to be set");
516  auto *NewDef =
517  cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
518  ? MSSAU->createMemoryAccessBefore(
519  AMemSet, LastMemDef, MemInsertPoint)
520  : MSSAU->createMemoryAccessAfter(
521  AMemSet, LastMemDef, MemInsertPoint));
522  MSSAU->insertDef(NewDef, /*RenameUses=*/true);
523  LastMemDef = NewDef;
524  MemInsertPoint = NewDef;
525 
526  // Zap all the stores.
527  for (Instruction *SI : Range.TheStores)
528  eraseInstruction(SI);
529 
530  ++NumMemSetInfer;
531  }
532 
533  return AMemSet;
534 }
535 
536 // This method try to lift a store instruction before position P.
537 // It will lift the store and its argument + that anything that
538 // may alias with these.
539 // The method returns true if it was successful.
540 bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
541  // If the store alias this position, early bail out.
543  if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
544  return false;
545 
546  // Keep track of the arguments of all instruction we plan to lift
547  // so we can make sure to lift them as well if appropriate.
549  if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
550  if (Ptr->getParent() == SI->getParent())
551  Args.insert(Ptr);
552 
553  // Instruction to lift before P.
555 
556  // Memory locations of lifted instructions.
557  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
558 
559  // Lifted calls.
561 
562  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
563 
564  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
565  auto *C = &*I;
566 
567  // Make sure hoisting does not perform a store that was not guaranteed to
568  // happen.
570  return false;
571 
572  bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
573 
574  bool NeedLift = false;
575  if (Args.erase(C))
576  NeedLift = true;
577  else if (MayAlias) {
578  NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
579  return isModOrRefSet(AA->getModRefInfo(C, ML));
580  });
581 
582  if (!NeedLift)
583  NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
584  return isModOrRefSet(AA->getModRefInfo(C, Call));
585  });
586  }
587 
588  if (!NeedLift)
589  continue;
590 
591  if (MayAlias) {
592  // Since LI is implicitly moved downwards past the lifted instructions,
593  // none of them may modify its source.
594  if (isModSet(AA->getModRefInfo(C, LoadLoc)))
595  return false;
596  else if (const auto *Call = dyn_cast<CallBase>(C)) {
597  // If we can't lift this before P, it's game over.
598  if (isModOrRefSet(AA->getModRefInfo(P, Call)))
599  return false;
600 
601  Calls.push_back(Call);
602  } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
603  // If we can't lift this before P, it's game over.
604  auto ML = MemoryLocation::get(C);
605  if (isModOrRefSet(AA->getModRefInfo(P, ML)))
606  return false;
607 
608  MemLocs.push_back(ML);
609  } else
610  // We don't know how to lift this instruction.
611  return false;
612  }
613 
614  ToLift.push_back(C);
615  for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
616  if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
617  if (A->getParent() == SI->getParent()) {
618  // Cannot hoist user of P above P
619  if(A == P) return false;
620  Args.insert(A);
621  }
622  }
623  }
624 
625  // Find MSSA insertion point. Normally P will always have a corresponding
626  // memory access before which we can insert. However, with non-standard AA
627  // pipelines, there may be a mismatch between AA and MSSA, in which case we
628  // will scan for a memory access before P. In either case, we know for sure
629  // that at least the load will have a memory access.
630  // TODO: Simplify this once P will be determined by MSSA, in which case the
631  // discrepancy can no longer occur.
632  MemoryUseOrDef *MemInsertPoint = nullptr;
633  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
634  MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
635  } else {
636  const Instruction *ConstP = P;
637  for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
638  ++LI->getReverseIterator())) {
639  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
640  MemInsertPoint = MA;
641  break;
642  }
643  }
644  }
645 
646  // We made it, we need to lift.
647  for (auto *I : llvm::reverse(ToLift)) {
648  LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
649  I->moveBefore(P);
650  assert(MemInsertPoint && "Must have found insert point");
651  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
652  MSSAU->moveAfter(MA, MemInsertPoint);
653  MemInsertPoint = MA;
654  }
655  }
656 
657  return true;
658 }
659 
660 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
661  if (!SI->isSimple()) return false;
662 
663  // Avoid merging nontemporal stores since the resulting
664  // memcpy/memset would not be able to preserve the nontemporal hint.
665  // In theory we could teach how to propagate the !nontemporal metadata to
666  // memset calls. However, that change would force the backend to
667  // conservatively expand !nontemporal memset calls back to sequences of
668  // store instructions (effectively undoing the merging).
669  if (SI->getMetadata(LLVMContext::MD_nontemporal))
670  return false;
671 
672  const DataLayout &DL = SI->getModule()->getDataLayout();
673 
674  Value *StoredVal = SI->getValueOperand();
675 
676  // Not all the transforms below are correct for non-integral pointers, bail
677  // until we've audited the individual pieces.
678  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
679  return false;
680 
681  // Load to store forwarding can be interpreted as memcpy.
682  if (auto *LI = dyn_cast<LoadInst>(StoredVal)) {
683  if (LI->isSimple() && LI->hasOneUse() &&
684  LI->getParent() == SI->getParent()) {
685 
686  auto *T = LI->getType();
687  // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
688  // the corresponding libcalls are not available.
689  // TODO: We should really distinguish between libcall availability and
690  // our ability to introduce intrinsics.
691  if (T->isAggregateType() &&
693  (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
694  MemoryLocation LoadLoc = MemoryLocation::get(LI);
695 
696  // We use alias analysis to check if an instruction may store to
697  // the memory we load from in between the load and the store. If
698  // such an instruction is found, we try to promote there instead
699  // of at the store position.
700  // TODO: Can use MSSA for this.
701  Instruction *P = SI;
702  for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
703  if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
704  P = &I;
705  break;
706  }
707  }
708 
709  // We found an instruction that may write to the loaded memory.
710  // We can try to promote at this position instead of the store
711  // position if nothing aliases the store memory after this and the store
712  // destination is not in the range.
713  if (P && P != SI) {
714  if (!moveUp(SI, P, LI))
715  P = nullptr;
716  }
717 
718  // If a valid insertion position is found, then we can promote
719  // the load/store pair to a memcpy.
720  if (P) {
721  // If we load from memory that may alias the memory we store to,
722  // memmove must be used to preserve semantic. If not, memcpy can
723  // be used. Also, if we load from constant memory, memcpy can be used
724  // as the constant memory won't be modified.
725  bool UseMemMove = false;
726  if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
727  UseMemMove = true;
728 
729  uint64_t Size = DL.getTypeStoreSize(T);
730 
732  Instruction *M;
733  if (UseMemMove)
734  M = Builder.CreateMemMove(
735  SI->getPointerOperand(), SI->getAlign(),
736  LI->getPointerOperand(), LI->getAlign(), Size);
737  else
738  M = Builder.CreateMemCpy(
739  SI->getPointerOperand(), SI->getAlign(),
740  LI->getPointerOperand(), LI->getAlign(), Size);
741 
742  LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
743  << *M << "\n");
744 
745  auto *LastDef =
746  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
747  auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
748  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
749 
750  eraseInstruction(SI);
751  eraseInstruction(LI);
752  ++NumMemCpyInstr;
753 
754  // Make sure we do not invalidate the iterator.
755  BBI = M->getIterator();
756  return true;
757  }
758  }
759 
760  // Detect cases where we're performing call slot forwarding, but
761  // happen to be using a load-store pair to implement it, rather than
762  // a memcpy.
763  auto GetCall = [&]() -> CallInst * {
764  // We defer this expensive clobber walk until the cheap checks
765  // have been done on the source inside performCallSlotOptzn.
766  if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
767  MSSA->getWalker()->getClobberingMemoryAccess(LI)))
768  return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
769  return nullptr;
770  };
771 
772  bool changed = performCallSlotOptzn(
773  LI, SI, SI->getPointerOperand()->stripPointerCasts(),
775  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
776  commonAlignment(SI->getAlign(), LI->getAlign()), GetCall);
777  if (changed) {
778  eraseInstruction(SI);
779  eraseInstruction(LI);
780  ++NumMemCpyInstr;
781  return true;
782  }
783  }
784  }
785 
786  // The following code creates memset intrinsics out of thin air. Don't do
787  // this if the corresponding libfunc is not available.
788  // TODO: We should really distinguish between libcall availability and
789  // our ability to introduce intrinsics.
790  if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
791  return false;
792 
793  // There are two cases that are interesting for this code to handle: memcpy
794  // and memset. Right now we only handle memset.
795 
796  // Ensure that the value being stored is something that can be memset'able a
797  // byte at a time like "0" or "-1" or any width, as well as things like
798  // 0xA0A0A0A0 and 0.0.
799  auto *V = SI->getOperand(0);
800  if (Value *ByteVal = isBytewiseValue(V, DL)) {
801  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
802  ByteVal)) {
803  BBI = I->getIterator(); // Don't invalidate iterator.
804  return true;
805  }
806 
807  // If we have an aggregate, we try to promote it to memset regardless
808  // of opportunity for merging as it can expose optimization opportunities
809  // in subsequent passes.
810  auto *T = V->getType();
811  if (T->isAggregateType()) {
812  uint64_t Size = DL.getTypeStoreSize(T);
814  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
815  SI->getAlign());
816 
817  LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
818 
819  // The newly inserted memset is immediately overwritten by the original
820  // store, so we do not need to rename uses.
821  auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
822  auto *NewAccess = MSSAU->createMemoryAccessBefore(
823  M, StoreDef->getDefiningAccess(), StoreDef);
824  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
825 
826  eraseInstruction(SI);
827  NumMemSetInfer++;
828 
829  // Make sure we do not invalidate the iterator.
830  BBI = M->getIterator();
831  return true;
832  }
833  }
834 
835  return false;
836 }
837 
838 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
839  // See if there is another memset or store neighboring this memset which
840  // allows us to widen out the memset to do a single larger store.
841  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
842  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
843  MSI->getValue())) {
844  BBI = I->getIterator(); // Don't invalidate iterator.
845  return true;
846  }
847  return false;
848 }
849 
850 /// Takes a memcpy and a call that it depends on,
851 /// and checks for the possibility of a call slot optimization by having
852 /// the call write its result directly into the destination of the memcpy.
853 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
854  Instruction *cpyStore, Value *cpyDest,
855  Value *cpySrc, TypeSize cpySize,
856  Align cpyAlign,
857  std::function<CallInst *()> GetC) {
858  // The general transformation to keep in mind is
859  //
860  // call @func(..., src, ...)
861  // memcpy(dest, src, ...)
862  //
863  // ->
864  //
865  // memcpy(dest, src, ...)
866  // call @func(..., dest, ...)
867  //
868  // Since moving the memcpy is technically awkward, we additionally check that
869  // src only holds uninitialized values at the moment of the call, meaning that
870  // the memcpy can be discarded rather than moved.
871 
872  // We can't optimize scalable types.
873  if (cpySize.isScalable())
874  return false;
875 
876  // Require that src be an alloca. This simplifies the reasoning considerably.
877  auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
878  if (!srcAlloca)
879  return false;
880 
881  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
882  if (!srcArraySize)
883  return false;
884 
885  const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
886  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
887  srcArraySize->getZExtValue();
888 
889  if (cpySize < srcSize)
890  return false;
891 
892  CallInst *C = GetC();
893  if (!C)
894  return false;
895 
896  // Lifetime marks shouldn't be operated on.
897  if (Function *F = C->getCalledFunction())
898  if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
899  return false;
900 
901 
902  if (C->getParent() != cpyStore->getParent()) {
903  LLVM_DEBUG(dbgs() << "Call Slot: block local restriction\n");
904  return false;
905  }
906 
907  MemoryLocation DestLoc = isa<StoreInst>(cpyStore) ?
908  MemoryLocation::get(cpyStore) :
909  MemoryLocation::getForDest(cast<MemCpyInst>(cpyStore));
910 
911  // Check that nothing touches the dest of the copy between
912  // the call and the store/memcpy.
913  if (accessedBetween(*AA, DestLoc, MSSA->getMemoryAccess(C),
914  MSSA->getMemoryAccess(cpyStore))) {
915  LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer modified after call\n");
916  return false;
917  }
918 
919  // Check that accessing the first srcSize bytes of dest will not cause a
920  // trap. Otherwise the transform is invalid since it might cause a trap
921  // to occur earlier than it otherwise would.
922  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
923  DL, C, DT)) {
924  LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
925  return false;
926  }
927 
928 
929  // Make sure that nothing can observe cpyDest being written early. There are
930  // a number of cases to consider:
931  // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
932  // the transform.
933  // 2. C itself may not access cpyDest (prior to the transform). This is
934  // checked further below.
935  // 3. If cpyDest is accessible to the caller of this function (potentially
936  // captured and not based on an alloca), we need to ensure that we cannot
937  // unwind between C and cpyStore. This is checked here.
938  // 4. If cpyDest is potentially captured, there may be accesses to it from
939  // another thread. In this case, we need to check that cpyStore is
940  // guaranteed to be executed if C is. As it is a non-atomic access, it
941  // renders accesses from other threads undefined.
942  // TODO: This is currently not checked.
943  if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
944  LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding");
945  return false;
946  }
947 
948  // Check that dest points to memory that is at least as aligned as src.
949  Align srcAlign = srcAlloca->getAlign();
950  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
951  // If dest is not aligned enough and we can't increase its alignment then
952  // bail out.
953  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
954  return false;
955 
956  // Check that src is not accessed except via the call and the memcpy. This
957  // guarantees that it holds only undefined values when passed in (so the final
958  // memcpy can be dropped), that it is not read or written between the call and
959  // the memcpy, and that writing beyond the end of it is undefined.
960  SmallVector<User *, 8> srcUseList(srcAlloca->users());
961  while (!srcUseList.empty()) {
962  User *U = srcUseList.pop_back_val();
963 
964  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
965  append_range(srcUseList, U->users());
966  continue;
967  }
968  if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
969  if (!G->hasAllZeroIndices())
970  return false;
971 
972  append_range(srcUseList, U->users());
973  continue;
974  }
975  if (const auto *IT = dyn_cast<IntrinsicInst>(U))
976  if (IT->isLifetimeStartOrEnd())
977  continue;
978 
979  if (U != C && U != cpyLoad)
980  return false;
981  }
982 
983  // Check whether src is captured by the called function, in which case there
984  // may be further indirect uses of src.
985  bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
986  return U->stripPointerCasts() == cpySrc &&
987  !C->doesNotCapture(C->getArgOperandNo(&U));
988  });
989 
990  // If src is captured, then check whether there are any potential uses of
991  // src through the captured pointer before the lifetime of src ends, either
992  // due to a lifetime.end or a return from the function.
993  if (SrcIsCaptured) {
994  // Check that dest is not captured before/at the call. We have already
995  // checked that src is not captured before it. If either had been captured,
996  // then the call might be comparing the argument against the captured dest
997  // or src pointer.
998  Value *DestObj = getUnderlyingObject(cpyDest);
999  if (!isIdentifiedFunctionLocal(DestObj) ||
1000  PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
1001  /* StoreCaptures */ true, C, DT,
1002  /* IncludeI */ true))
1003  return false;
1004 
1005  MemoryLocation SrcLoc =
1006  MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
1007  for (Instruction &I :
1008  make_range(++C->getIterator(), C->getParent()->end())) {
1009  // Lifetime of srcAlloca ends at lifetime.end.
1010  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1011  if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1012  II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1013  cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1014  break;
1015  }
1016 
1017  // Lifetime of srcAlloca ends at return.
1018  if (isa<ReturnInst>(&I))
1019  break;
1020 
1021  // Ignore the direct read of src in the load.
1022  if (&I == cpyLoad)
1023  continue;
1024 
1025  // Check whether this instruction may mod/ref src through the captured
1026  // pointer (we have already any direct mod/refs in the loop above).
1027  // Also bail if we hit a terminator, as we don't want to scan into other
1028  // blocks.
1029  if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1030  return false;
1031  }
1032  }
1033 
1034  // Since we're changing the parameter to the callsite, we need to make sure
1035  // that what would be the new parameter dominates the callsite.
1036  if (!DT->dominates(cpyDest, C)) {
1037  // Support moving a constant index GEP before the call.
1038  auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1039  if (GEP && GEP->hasAllConstantIndices() &&
1040  DT->dominates(GEP->getPointerOperand(), C))
1041  GEP->moveBefore(C);
1042  else
1043  return false;
1044  }
1045 
1046  // In addition to knowing that the call does not access src in some
1047  // unexpected manner, for example via a global, which we deduce from
1048  // the use analysis, we also need to know that it does not sneakily
1049  // access dest. We rely on AA to figure this out for us.
1050  ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
1051  // If necessary, perform additional analysis.
1052  if (isModOrRefSet(MR))
1053  MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
1054  if (isModOrRefSet(MR))
1055  return false;
1056 
1057  // We can't create address space casts here because we don't know if they're
1058  // safe for the target.
1059  if (cpySrc->getType()->getPointerAddressSpace() !=
1060  cpyDest->getType()->getPointerAddressSpace())
1061  return false;
1062  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1063  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1064  cpySrc->getType()->getPointerAddressSpace() !=
1065  C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1066  return false;
1067 
1068  // All the checks have passed, so do the transformation.
1069  bool changedArgument = false;
1070  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1071  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1072  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1073  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1074  cpyDest->getName(), C);
1075  changedArgument = true;
1076  if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1077  C->setArgOperand(ArgI, Dest);
1078  else
1079  C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1080  Dest, C->getArgOperand(ArgI)->getType(),
1081  Dest->getName(), C));
1082  }
1083 
1084  if (!changedArgument)
1085  return false;
1086 
1087  // If the destination wasn't sufficiently aligned then increase its alignment.
1088  if (!isDestSufficientlyAligned) {
1089  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1090  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1091  }
1092 
1093  // Update AA metadata
1094  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1095  // handled here, but combineMetadata doesn't support them yet
1096  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1097  LLVMContext::MD_noalias,
1098  LLVMContext::MD_invariant_group,
1099  LLVMContext::MD_access_group};
1100  combineMetadata(C, cpyLoad, KnownIDs, true);
1101  if (cpyLoad != cpyStore)
1102  combineMetadata(C, cpyStore, KnownIDs, true);
1103 
1104  ++NumCallSlot;
1105  return true;
1106 }
1107 
1108 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1109 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1110 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1111  MemCpyInst *MDep) {
1112  // We can only transforms memcpy's where the dest of one is the source of the
1113  // other.
1114  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1115  return false;
1116 
1117  // If dep instruction is reading from our current input, then it is a noop
1118  // transfer and substituting the input won't change this instruction. Just
1119  // ignore the input and let someone else zap MDep. This handles cases like:
1120  // memcpy(a <- a)
1121  // memcpy(b <- a)
1122  if (M->getSource() == MDep->getSource())
1123  return false;
1124 
1125  // Second, the length of the memcpy's must be the same, or the preceding one
1126  // must be larger than the following one.
1127  if (MDep->getLength() != M->getLength()) {
1128  auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1129  auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1130  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1131  return false;
1132  }
1133 
1134  // Verify that the copied-from memory doesn't change in between the two
1135  // transfers. For example, in:
1136  // memcpy(a <- b)
1137  // *b = 42;
1138  // memcpy(c <- a)
1139  // It would be invalid to transform the second memcpy into memcpy(c <- b).
1140  //
1141  // TODO: If the code between M and MDep is transparent to the destination "c",
1142  // then we could still perform the xform by moving M up to the first memcpy.
1143  // TODO: It would be sufficient to check the MDep source up to the memcpy
1144  // size of M, rather than MDep.
1146  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1147  return false;
1148 
1149  // If the dest of the second might alias the source of the first, then the
1150  // source and dest might overlap. In addition, if the source of the first
1151  // points to constant memory, they won't overlap by definition. Otherwise, we
1152  // still want to eliminate the intermediate value, but we have to generate a
1153  // memmove instead of memcpy.
1154  bool UseMemMove = false;
1155  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(MDep))))
1156  UseMemMove = true;
1157 
1158  // If all checks passed, then we can transform M.
1159  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1160  << *MDep << '\n' << *M << '\n');
1161 
1162  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1163  // example we could be moving from movaps -> movq on x86.
1164  IRBuilder<> Builder(M);
1165  Instruction *NewM;
1166  if (UseMemMove)
1167  NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1168  MDep->getRawSource(), MDep->getSourceAlign(),
1169  M->getLength(), M->isVolatile());
1170  else if (isa<MemCpyInlineInst>(M)) {
1171  // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1172  // never allowed since that would allow the latter to be lowered as a call
1173  // to an external function.
1174  NewM = Builder.CreateMemCpyInline(
1175  M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1176  MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1177  } else
1178  NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1179  MDep->getRawSource(), MDep->getSourceAlign(),
1180  M->getLength(), M->isVolatile());
1181 
1182  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1183  auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1184  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1185  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1186 
1187  // Remove the instruction we're replacing.
1188  eraseInstruction(M);
1189  ++NumMemCpyInstr;
1190  return true;
1191 }
1192 
1193 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
1194 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1195 /// weren't copied over by \p MemCpy.
1196 ///
1197 /// In other words, transform:
1198 /// \code
1199 /// memset(dst, c, dst_size);
1200 /// memcpy(dst, src, src_size);
1201 /// \endcode
1202 /// into:
1203 /// \code
1204 /// memcpy(dst, src, src_size);
1205 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1206 /// \endcode
1207 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1208  MemSetInst *MemSet) {
1209  // We can only transform memset/memcpy with the same destination.
1210  if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1211  return false;
1212 
1213  // Check that src and dst of the memcpy aren't the same. While memcpy
1214  // operands cannot partially overlap, exact equality is allowed.
1215  if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1216  return false;
1217 
1218  // We know that dst up to src_size is not written. We now need to make sure
1219  // that dst up to dst_size is not accessed. (If we did not move the memset,
1220  // checking for reads would be sufficient.)
1222  MSSA->getMemoryAccess(MemSet),
1223  MSSA->getMemoryAccess(MemCpy)))
1224  return false;
1225 
1226  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1227  Value *Dest = MemCpy->getRawDest();
1228  Value *DestSize = MemSet->getLength();
1229  Value *SrcSize = MemCpy->getLength();
1230 
1231  if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1232  return false;
1233 
1234  // If the sizes are the same, simply drop the memset instead of generating
1235  // a replacement with zero size.
1236  if (DestSize == SrcSize) {
1237  eraseInstruction(MemSet);
1238  return true;
1239  }
1240 
1241  // By default, create an unaligned memset.
1242  Align Alignment = Align(1);
1243  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1244  // of the sum.
1245  const Align DestAlign = std::max(MemSet->getDestAlign().valueOrOne(),
1246  MemCpy->getDestAlign().valueOrOne());
1247  if (DestAlign > 1)
1248  if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1249  Alignment = commonAlignment(DestAlign, SrcSizeC->getZExtValue());
1250 
1251  IRBuilder<> Builder(MemCpy);
1252 
1253  // If the sizes have different types, zext the smaller one.
1254  if (DestSize->getType() != SrcSize->getType()) {
1255  if (DestSize->getType()->getIntegerBitWidth() >
1256  SrcSize->getType()->getIntegerBitWidth())
1257  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1258  else
1259  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1260  }
1261 
1262  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1263  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1264  Value *MemsetLen = Builder.CreateSelect(
1265  Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1266  unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1267  Instruction *NewMemSet = Builder.CreateMemSet(
1268  Builder.CreateGEP(
1269  Builder.getInt8Ty(),
1270  Builder.CreatePointerCast(Dest, Builder.getInt8PtrTy(DestAS)),
1271  SrcSize),
1272  MemSet->getOperand(1), MemsetLen, Alignment);
1273 
1274  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1275  "MemCpy must be a MemoryDef");
1276  // The new memset is inserted after the memcpy, but it is known that its
1277  // defining access is the memset about to be removed which immediately
1278  // precedes the memcpy.
1279  auto *LastDef =
1280  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1281  auto *NewAccess = MSSAU->createMemoryAccessBefore(
1282  NewMemSet, LastDef->getDefiningAccess(), LastDef);
1283  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1284 
1285  eraseInstruction(MemSet);
1286  return true;
1287 }
1288 
1289 /// Determine whether the instruction has undefined content for the given Size,
1290 /// either because it was freshly alloca'd or started its lifetime.
1292  MemoryDef *Def, Value *Size) {
1293  if (MSSA->isLiveOnEntryDef(Def))
1294  return isa<AllocaInst>(getUnderlyingObject(V));
1295 
1296  if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1297  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1298  auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1299 
1300  if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1301  if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1302  LTSize->getZExtValue() >= CSize->getZExtValue())
1303  return true;
1304  }
1305 
1306  // If the lifetime.start covers a whole alloca (as it almost always
1307  // does) and we're querying a pointer based on that alloca, then we know
1308  // the memory is definitely undef, regardless of how exactly we alias.
1309  // The size also doesn't matter, as an out-of-bounds access would be UB.
1310  if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1311  if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1312  const DataLayout &DL = Alloca->getModule()->getDataLayout();
1313  if (Optional<TypeSize> AllocaSize =
1314  Alloca->getAllocationSizeInBits(DL))
1315  if (*AllocaSize == LTSize->getValue() * 8)
1316  return true;
1317  }
1318  }
1319  }
1320  }
1321 
1322  return false;
1323 }
1324 
1325 /// Transform memcpy to memset when its source was just memset.
1326 /// In other words, turn:
1327 /// \code
1328 /// memset(dst1, c, dst1_size);
1329 /// memcpy(dst2, dst1, dst2_size);
1330 /// \endcode
1331 /// into:
1332 /// \code
1333 /// memset(dst1, c, dst1_size);
1334 /// memset(dst2, c, dst2_size);
1335 /// \endcode
1336 /// When dst2_size <= dst1_size.
1337 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1338  MemSetInst *MemSet) {
1339  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1340  // memcpying from the same address. Otherwise it is hard to reason about.
1341  if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1342  return false;
1343 
1344  Value *MemSetSize = MemSet->getLength();
1345  Value *CopySize = MemCpy->getLength();
1346 
1347  if (MemSetSize != CopySize) {
1348  // Make sure the memcpy doesn't read any more than what the memset wrote.
1349  // Don't worry about sizes larger than i64.
1350 
1351  // A known memset size is required.
1352  auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1353  if (!CMemSetSize)
1354  return false;
1355 
1356  // A known memcpy size is also required.
1357  auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1358  if (!CCopySize)
1359  return false;
1360  if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1361  // If the memcpy is larger than the memset, but the memory was undef prior
1362  // to the memset, we can just ignore the tail. Technically we're only
1363  // interested in the bytes from MemSetSize..CopySize here, but as we can't
1364  // easily represent this location, we use the full 0..CopySize range.
1365  MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1366  bool CanReduceSize = false;
1367  MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1368  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1369  MemSetAccess->getDefiningAccess(), MemCpyLoc);
1370  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1371  if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1372  CanReduceSize = true;
1373 
1374  if (!CanReduceSize)
1375  return false;
1376  CopySize = MemSetSize;
1377  }
1378  }
1379 
1380  IRBuilder<> Builder(MemCpy);
1381  Instruction *NewM =
1382  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1383  CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1384  auto *LastDef =
1385  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1386  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1387  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1388 
1389  return true;
1390 }
1391 
1392 /// Perform simplification of memcpy's. If we have memcpy A
1393 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1394 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
1395 /// circumstances). This allows later passes to remove the first memcpy
1396 /// altogether.
1397 bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1398  // We can only optimize non-volatile memcpy's.
1399  if (M->isVolatile()) return false;
1400 
1401  // If the source and destination of the memcpy are the same, then zap it.
1402  if (M->getSource() == M->getDest()) {
1403  ++BBI;
1404  eraseInstruction(M);
1405  return true;
1406  }
1407 
1408  // If copying from a constant, try to turn the memcpy into a memset.
1409  if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1410  if (GV->isConstant() && GV->hasDefinitiveInitializer())
1411  if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1412  M->getModule()->getDataLayout())) {
1413  IRBuilder<> Builder(M);
1414  Instruction *NewM =
1415  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1416  MaybeAlign(M->getDestAlignment()), false);
1417  auto *LastDef =
1418  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1419  auto *NewAccess =
1420  MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1421  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1422 
1423  eraseInstruction(M);
1424  ++NumCpyToSet;
1425  return true;
1426  }
1427 
1428  MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1429  // FIXME: Not using getClobberingMemoryAccess() here due to PR54682.
1430  MemoryAccess *AnyClobber = MA->getDefiningAccess();
1432  const MemoryAccess *DestClobber =
1433  MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1434 
1435  // Try to turn a partially redundant memset + memcpy into
1436  // memcpy + smaller memset. We don't need the memcpy size for this.
1437  // The memcpy most post-dom the memset, so limit this to the same basic
1438  // block. A non-local generalization is likely not worthwhile.
1439  if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1440  if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1441  if (DestClobber->getBlock() == M->getParent())
1442  if (processMemSetMemCpyDependence(M, MDep))
1443  return true;
1444 
1445  MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1446  AnyClobber, MemoryLocation::getForSource(M));
1447 
1448  // There are four possible optimizations we can do for memcpy:
1449  // a) memcpy-memcpy xform which exposes redundance for DSE.
1450  // b) call-memcpy xform for return slot optimization.
1451  // c) memcpy from freshly alloca'd space or space that has just started
1452  // its lifetime copies undefined data, and we can therefore eliminate
1453  // the memcpy in favor of the data that was already at the destination.
1454  // d) memcpy from a just-memset'd source can be turned into memset.
1455  if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1456  if (Instruction *MI = MD->getMemoryInst()) {
1457  if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1458  if (auto *C = dyn_cast<CallInst>(MI)) {
1459  // FIXME: Can we pass in either of dest/src alignment here instead
1460  // of conservatively taking the minimum?
1461  Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1462  M->getSourceAlign().valueOrOne());
1463  if (performCallSlotOptzn(
1464  M, M, M->getDest(), M->getSource(),
1465  TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1466  [C]() -> CallInst * { return C; })) {
1467  LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1468  << " call: " << *C << "\n"
1469  << " memcpy: " << *M << "\n");
1470  eraseInstruction(M);
1471  ++NumMemCpyInstr;
1472  return true;
1473  }
1474  }
1475  }
1476  if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1477  return processMemCpyMemCpyDependence(M, MDep);
1478  if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1479  if (performMemCpyToMemSetOptzn(M, MDep)) {
1480  LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1481  eraseInstruction(M);
1482  ++NumCpyToSet;
1483  return true;
1484  }
1485  }
1486  }
1487 
1488  if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
1489  LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1490  eraseInstruction(M);
1491  ++NumMemCpyInstr;
1492  return true;
1493  }
1494  }
1495 
1496  return false;
1497 }
1498 
1499 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1500 /// not to alias.
1501 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1502  // See if the source could be modified by this memmove potentially.
1503  if (isModSet(AA->getModRefInfo(M, MemoryLocation::getForSource(M))))
1504  return false;
1505 
1506  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1507  << "\n");
1508 
1509  // If not, then we know we can transform this.
1510  Type *ArgTys[3] = { M->getRawDest()->getType(),
1511  M->getRawSource()->getType(),
1512  M->getLength()->getType() };
1513  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1514  Intrinsic::memcpy, ArgTys));
1515 
1516  // For MemorySSA nothing really changes (except that memcpy may imply stricter
1517  // aliasing guarantees).
1518 
1519  ++NumMoveToCpy;
1520  return true;
1521 }
1522 
1523 /// This is called on every byval argument in call sites.
1524 bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1525  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1526  // Find out what feeds this byval argument.
1527  Value *ByValArg = CB.getArgOperand(ArgNo);
1528  Type *ByValTy = CB.getParamByValType(ArgNo);
1529  TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1530  MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1531  MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1532  if (!CallAccess)
1533  return false;
1534  MemCpyInst *MDep = nullptr;
1535  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1536  CallAccess->getDefiningAccess(), Loc);
1537  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1538  MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1539 
1540  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1541  // a memcpy, see if we can byval from the source of the memcpy instead of the
1542  // result.
1543  if (!MDep || MDep->isVolatile() ||
1544  ByValArg->stripPointerCasts() != MDep->getDest())
1545  return false;
1546 
1547  // The length of the memcpy must be larger or equal to the size of the byval.
1548  auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1549  if (!C1 || !TypeSize::isKnownGE(
1550  TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1551  return false;
1552 
1553  // Get the alignment of the byval. If the call doesn't specify the alignment,
1554  // then it is some target specific value that we can't know.
1555  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1556  if (!ByValAlign) return false;
1557 
1558  // If it is greater than the memcpy, then we check to see if we can force the
1559  // source of the memcpy to the alignment we need. If we fail, we bail out.
1560  MaybeAlign MemDepAlign = MDep->getSourceAlign();
1561  if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1562  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1563  DT) < *ByValAlign)
1564  return false;
1565 
1566  // The address space of the memcpy source must match the byval argument
1567  if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1568  ByValArg->getType()->getPointerAddressSpace())
1569  return false;
1570 
1571  // Verify that the copied-from memory doesn't change in between the memcpy and
1572  // the byval call.
1573  // memcpy(a <- b)
1574  // *b = 42;
1575  // foo(*a)
1576  // It would be invalid to transform the second memcpy into foo(*b).
1578  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1579  return false;
1580 
1581  Value *TmpCast = MDep->getSource();
1582  if (MDep->getSource()->getType() != ByValArg->getType()) {
1583  BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1584  "tmpcast", &CB);
1585  // Set the tmpcast's DebugLoc to MDep's
1586  TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1587  TmpCast = TmpBitCast;
1588  }
1589 
1590  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1591  << " " << *MDep << "\n"
1592  << " " << CB << "\n");
1593 
1594  // Otherwise we're good! Update the byval argument.
1595  CB.setArgOperand(ArgNo, TmpCast);
1596  ++NumMemCpyInstr;
1597  return true;
1598 }
1599 
1600 /// Executes one iteration of MemCpyOptPass.
1601 bool MemCpyOptPass::iterateOnFunction(Function &F) {
1602  bool MadeChange = false;
1603 
1604  // Walk all instruction in the function.
1605  for (BasicBlock &BB : F) {
1606  // Skip unreachable blocks. For example processStore assumes that an
1607  // instruction in a BB can't be dominated by a later instruction in the
1608  // same BB (which is a scenario that can happen for an unreachable BB that
1609  // has itself as a predecessor).
1610  if (!DT->isReachableFromEntry(&BB))
1611  continue;
1612 
1613  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1614  // Avoid invalidating the iterator.
1615  Instruction *I = &*BI++;
1616 
1617  bool RepeatInstruction = false;
1618 
1619  if (auto *SI = dyn_cast<StoreInst>(I))
1620  MadeChange |= processStore(SI, BI);
1621  else if (auto *M = dyn_cast<MemSetInst>(I))
1622  RepeatInstruction = processMemSet(M, BI);
1623  else if (auto *M = dyn_cast<MemCpyInst>(I))
1624  RepeatInstruction = processMemCpy(M, BI);
1625  else if (auto *M = dyn_cast<MemMoveInst>(I))
1626  RepeatInstruction = processMemMove(M);
1627  else if (auto *CB = dyn_cast<CallBase>(I)) {
1628  for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1629  if (CB->isByValArgument(i))
1630  MadeChange |= processByValArgument(*CB, i);
1631  }
1632 
1633  // Reprocess the instruction if desired.
1634  if (RepeatInstruction) {
1635  if (BI != BB.begin())
1636  --BI;
1637  MadeChange = true;
1638  }
1639  }
1640  }
1641 
1642  return MadeChange;
1643 }
1644 
1646  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1647  auto *AA = &AM.getResult<AAManager>(F);
1648  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1649  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1650  auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1651 
1652  bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1653  if (!MadeChange)
1654  return PreservedAnalyses::all();
1655 
1656  PreservedAnalyses PA;
1657  PA.preserveSet<CFGAnalyses>();
1659  return PA;
1660 }
1661 
1663  AliasAnalysis *AA_, AssumptionCache *AC_,
1664  DominatorTree *DT_, MemorySSA *MSSA_) {
1665  bool MadeChange = false;
1666  TLI = TLI_;
1667  AA = AA_;
1668  AC = AC_;
1669  DT = DT_;
1670  MSSA = MSSA_;
1671  MemorySSAUpdater MSSAU_(MSSA_);
1672  MSSAU = &MSSAU_;
1673 
1674  while (true) {
1675  if (!iterateOnFunction(F))
1676  break;
1677  MadeChange = true;
1678  }
1679 
1680  if (VerifyMemorySSA)
1681  MSSA_->verifyMemorySSA();
1682 
1683  return MadeChange;
1684 }
1685 
1686 /// This is the main transformation entry point for a function.
1688  if (skipFunction(F))
1689  return false;
1690 
1691  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1692  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1693  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1694  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1695  auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1696 
1697  return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1698 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1299
llvm::MemIntrinsicBase::getDestAlignment
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: IntrinsicInst.h:748
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:104
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm::combineMetadata
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2522
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:724
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1906
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1418
llvm::MemorySSAUpdater::createMemoryAccessBefore
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
Definition: MemorySSAUpdater.cpp:1437
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:189
IntrinsicInst.h
llvm::MemIntrinsicBase::getDestAlign
MaybeAlign getDestAlign() const
Definition: IntrinsicInst.h:753
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:780
Scalar.h
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:444
Loads.h
T
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:60
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MemMoveInst
This class wraps the llvm.memmove intrinsic.
Definition: IntrinsicInst.h:1054
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5212
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:309
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
Statistic.h
CaptureTracking.h
llvm::cast
decltype(auto) LLVM_NODISCARD cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition: Casting.h:565
memcpyopt
memcpyopt
Definition: MemCpyOptimizer.cpp:303
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:729
llvm::IRBuilder<>
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2159
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:139
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
EnableMemCpyOptWithoutLibcalls
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:380
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1356
llvm::ilist_node_impl::getReverseIterator
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:85
llvm::MaybeAlign::valueOrOne
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
Definition: Alignment.h:142
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3892
llvm::MemCpyOptPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Definition: MemCpyOptimizer.cpp:1662
llvm::Optional< int64_t >
llvm::createMemCpyOptPass
FunctionPass * createMemCpyOptPass()
The public interface to this file...
Definition: MemCpyOptimizer.cpp:293
llvm::CallBase::isByValArgument
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1678
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
STLExtras.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:260
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:216
accessedBetween
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:335
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:240
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:849
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::isIdentifiedFunctionLocal
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
Definition: AliasAnalysis.cpp:986
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
llvm::isPointerOffset
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
Definition: ValueTracking.cpp:7335
Instruction.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:998
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:511
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:751
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
InstrTypes.h
llvm::initializeMemCpyOptLegacyPassPass
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryLocation::getForSource
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Definition: MemoryLocation.cpp:93
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:789
llvm::MemorySSAUpdater::moveAfter
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
Definition: MemorySSAUpdater.cpp:1196
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:141
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
llvm::Instruction
Definition: Instruction.h:42
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
MemCpyOptimizer.h
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:101
llvm::CallBase::getParamByValType
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1751
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4455
Align
uint64_t Align
Definition: ELFObjHandler.cpp:81
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:259
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::MemTransferBase::getSourceAlign
MaybeAlign getSourceAlign() const
Definition: IntrinsicInst.h:814
llvm::None
const NoneType None
Definition: None.h:24
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:164
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Type.h
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::MemSetInst
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Definition: IntrinsicInst.h:989
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
Optimization
MemCpy Optimization
Definition: MemCpyOptimizer.cpp:303
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1058
llvm::SPIRV::Decoration::Alignment
@ Alignment
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:239
mayBeVisibleThroughUnwinding
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
Definition: MemCpyOptimizer.cpp:308
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:297
llvm::AMDGPU::Hwreg::Offset
Offset
Definition: SIDefines.h:413
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:468
uint64_t
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:54
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:620
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:242
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:714
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:731
llvm::MemCpyOptPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: MemCpyOptimizer.cpp:1645
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:386
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:364
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:148
IRBuilder.h
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:559
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:248
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::LinearPolySize< TypeSize >::isKnownGE
static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:346
iterator_range.h
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:328
function
print Print MemDeps of function
Definition: MemDepPrinter.cpp:82
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:948
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1604
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
addRange
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1041
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1447
llvm::MemIntrinsicBase::getDest
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
Definition: IntrinsicInst.h:740
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:307
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1624
llvm::CallBase::getParamAlign
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1742
DataLayout.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:252
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:990
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1823
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MemoryAccess
Definition: MemorySSA.h:142
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:305
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:173
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1346
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:682
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::partition_point
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1774
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:213
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:268
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:350
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:262
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:731
hasUndefContents
static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
Definition: MemCpyOptimizer.cpp:1291
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1339
GlobalVariable.h
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5435
llvm::TypeSize
Definition: TypeSize.h:435
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:222
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:252
writtenBetween
static bool writtenBetween(MemorySSA *MSSA, AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:350
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:39
AA
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
MemorySSA.h
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:1041
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:966
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:367
User.h
Dominators.h
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:558
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1341
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1347
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:148
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:725
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1174
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1461
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3274
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:106
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:394
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:405
raw_ostream.h
Value.h
InitializePasses.h
llvm::PointerMayBeCapturedBefore
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Definition: CaptureTracking.cpp:269
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:443
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1305
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
llvm::MemTransferBase::getSource
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Definition: IntrinsicInst.h:800
llvm::isDereferenceableAndAlignedPointer
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:205
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:792