LLVM  14.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs various transformations related to eliminating memcpy
10 // calls, or transforming sets of stores into memset's.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
25 #include "llvm/Analysis/Loads.h"
31 #include "llvm/IR/Argument.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DataLayout.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Dominators.h"
37 #include "llvm/IR/Function.h"
39 #include "llvm/IR/GlobalVariable.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/LLVMContext.h"
47 #include "llvm/IR/Module.h"
48 #include "llvm/IR/Operator.h"
49 #include "llvm/IR/PassManager.h"
50 #include "llvm/IR/Type.h"
51 #include "llvm/IR/User.h"
52 #include "llvm/IR/Value.h"
53 #include "llvm/InitializePasses.h"
54 #include "llvm/Pass.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/Debug.h"
59 #include "llvm/Transforms/Scalar.h"
61 #include <algorithm>
62 #include <cassert>
63 #include <cstdint>
64 #include <utility>
65 
66 using namespace llvm;
67 
68 #define DEBUG_TYPE "memcpyopt"
69 
71  "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
73  cl::desc("Enable memcpyopt even when libcalls are disabled"));
74 
75 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
76 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
77 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
78 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
79 STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
80 
81 namespace {
82 
83 /// Represents a range of memset'd bytes with the ByteVal value.
84 /// This allows us to analyze stores like:
85 /// store 0 -> P+1
86 /// store 0 -> P+0
87 /// store 0 -> P+3
88 /// store 0 -> P+2
89 /// which sometimes happens with stores to arrays of structs etc. When we see
90 /// the first store, we make a range [1, 2). The second store extends the range
91 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
92 /// two ranges into [0, 3) which is memset'able.
93 struct MemsetRange {
94  // Start/End - A semi range that describes the span that this range covers.
95  // The range is closed at the start and open at the end: [Start, End).
96  int64_t Start, End;
97 
98  /// StartPtr - The getelementptr instruction that points to the start of the
99  /// range.
100  Value *StartPtr;
101 
102  /// Alignment - The known alignment of the first store.
103  unsigned Alignment;
104 
105  /// TheStores - The actual stores that make up this range.
107 
108  bool isProfitableToUseMemset(const DataLayout &DL) const;
109 };
110 
111 } // end anonymous namespace
112 
113 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
114  // If we found more than 4 stores to merge or 16 bytes, use memset.
115  if (TheStores.size() >= 4 || End-Start >= 16) return true;
116 
117  // If there is nothing to merge, don't do anything.
118  if (TheStores.size() < 2) return false;
119 
120  // If any of the stores are a memset, then it is always good to extend the
121  // memset.
122  for (Instruction *SI : TheStores)
123  if (!isa<StoreInst>(SI))
124  return true;
125 
126  // Assume that the code generator is capable of merging pairs of stores
127  // together if it wants to.
128  if (TheStores.size() == 2) return false;
129 
130  // If we have fewer than 8 stores, it can still be worthwhile to do this.
131  // For example, merging 4 i8 stores into an i32 store is useful almost always.
132  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
133  // memset will be split into 2 32-bit stores anyway) and doing so can
134  // pessimize the llvm optimizer.
135  //
136  // Since we don't have perfect knowledge here, make some assumptions: assume
137  // the maximum GPR width is the same size as the largest legal integer
138  // size. If so, check to see whether we will end up actually reducing the
139  // number of stores used.
140  unsigned Bytes = unsigned(End-Start);
141  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
142  if (MaxIntSize == 0)
143  MaxIntSize = 1;
144  unsigned NumPointerStores = Bytes / MaxIntSize;
145 
146  // Assume the remaining bytes if any are done a byte at a time.
147  unsigned NumByteStores = Bytes % MaxIntSize;
148 
149  // If we will reduce the # stores (according to this heuristic), do the
150  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
151  // etc.
152  return TheStores.size() > NumPointerStores+NumByteStores;
153 }
154 
155 namespace {
156 
157 class MemsetRanges {
158  using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
159 
160  /// A sorted list of the memset ranges.
162 
163  const DataLayout &DL;
164 
165 public:
166  MemsetRanges(const DataLayout &DL) : DL(DL) {}
167 
168  using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
169 
170  const_iterator begin() const { return Ranges.begin(); }
171  const_iterator end() const { return Ranges.end(); }
172  bool empty() const { return Ranges.empty(); }
173 
174  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
175  if (auto *SI = dyn_cast<StoreInst>(Inst))
176  addStore(OffsetFromFirst, SI);
177  else
178  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
179  }
180 
181  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
182  TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
183  assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
184  addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
185  SI->getAlign().value(), SI);
186  }
187 
188  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
189  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
190  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
191  }
192 
193  void addRange(int64_t Start, int64_t Size, Value *Ptr,
194  unsigned Alignment, Instruction *Inst);
195 };
196 
197 } // end anonymous namespace
198 
199 /// Add a new store to the MemsetRanges data structure. This adds a
200 /// new range for the specified store at the specified offset, merging into
201 /// existing ranges as appropriate.
202 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
203  unsigned Alignment, Instruction *Inst) {
204  int64_t End = Start+Size;
205 
206  range_iterator I = partition_point(
207  Ranges, [=](const MemsetRange &O) { return O.End < Start; });
208 
209  // We now know that I == E, in which case we didn't find anything to merge
210  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
211  // to insert a new range. Handle this now.
212  if (I == Ranges.end() || End < I->Start) {
213  MemsetRange &R = *Ranges.insert(I, MemsetRange());
214  R.Start = Start;
215  R.End = End;
216  R.StartPtr = Ptr;
217  R.Alignment = Alignment;
218  R.TheStores.push_back(Inst);
219  return;
220  }
221 
222  // This store overlaps with I, add it.
223  I->TheStores.push_back(Inst);
224 
225  // At this point, we may have an interval that completely contains our store.
226  // If so, just add it to the interval and return.
227  if (I->Start <= Start && I->End >= End)
228  return;
229 
230  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
231  // but is not entirely contained within the range.
232 
233  // See if the range extends the start of the range. In this case, it couldn't
234  // possibly cause it to join the prior range, because otherwise we would have
235  // stopped on *it*.
236  if (Start < I->Start) {
237  I->Start = Start;
238  I->StartPtr = Ptr;
239  I->Alignment = Alignment;
240  }
241 
242  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
243  // is in or right at the end of I), and that End >= I->Start. Extend I out to
244  // End.
245  if (End > I->End) {
246  I->End = End;
247  range_iterator NextI = I;
248  while (++NextI != Ranges.end() && End >= NextI->Start) {
249  // Merge the range in.
250  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
251  if (NextI->End > I->End)
252  I->End = NextI->End;
253  Ranges.erase(NextI);
254  NextI = I;
255  }
256  }
257 }
258 
259 //===----------------------------------------------------------------------===//
260 // MemCpyOptLegacyPass Pass
261 //===----------------------------------------------------------------------===//
262 
263 namespace {
264 
265 class MemCpyOptLegacyPass : public FunctionPass {
266  MemCpyOptPass Impl;
267 
268 public:
269  static char ID; // Pass identification, replacement for typeid
270 
271  MemCpyOptLegacyPass() : FunctionPass(ID) {
273  }
274 
275  bool runOnFunction(Function &F) override;
276 
277 private:
278  // This transformation requires dominator postdominator info
279  void getAnalysisUsage(AnalysisUsage &AU) const override {
280  AU.setPreservesCFG();
290  }
291 };
292 
293 } // end anonymous namespace
294 
295 char MemCpyOptLegacyPass::ID = 0;
296 
297 /// The public interface to this file...
298 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
299 
300 INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
301  false, false)
308 INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
310 
311 // Check that V is either not accessible by the caller, or unwinding cannot
312 // occur between Start and End.
314  Instruction *End) {
315  assert(Start->getParent() == End->getParent() && "Must be in same block");
316  // Function can't unwind, so it also can't be visible through unwinding.
317  if (Start->getFunction()->doesNotThrow())
318  return false;
319 
320  // Object is not visible on unwind.
321  // TODO: Support RequiresNoCaptureBeforeUnwind case.
322  bool RequiresNoCaptureBeforeUnwind;
324  RequiresNoCaptureBeforeUnwind) &&
325  !RequiresNoCaptureBeforeUnwind)
326  return false;
327 
328  // Check whether there are any unwinding instructions in the range.
329  return any_of(make_range(Start->getIterator(), End->getIterator()),
330  [](const Instruction &I) { return I.mayThrow(); });
331 }
332 
333 void MemCpyOptPass::eraseInstruction(Instruction *I) {
334  MSSAU->removeMemoryAccess(I);
335  I->eraseFromParent();
336 }
337 
338 // Check for mod or ref of Loc between Start and End, excluding both boundaries.
339 // Start and End must be in the same block
341  const MemoryUseOrDef *Start,
342  const MemoryUseOrDef *End) {
343  assert(Start->getBlock() == End->getBlock() && "Only local supported");
344  for (const MemoryAccess &MA :
345  make_range(++Start->getIterator(), End->getIterator())) {
346  if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
347  Loc)))
348  return true;
349  }
350  return false;
351 }
352 
353 // Check for mod of Loc between Start and End, excluding both boundaries.
354 // Start and End can be in different blocks.
355 static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
356  const MemoryUseOrDef *Start,
357  const MemoryUseOrDef *End) {
358  // TODO: Only walk until we hit Start.
360  End->getDefiningAccess(), Loc);
361  return !MSSA->dominates(Clobber, Start);
362 }
363 
364 /// When scanning forward over instructions, we look for some other patterns to
365 /// fold away. In particular, this looks for stores to neighboring locations of
366 /// memory. If it sees enough consecutive ones, it attempts to merge them
367 /// together into a memcpy/memset.
368 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
369  Value *StartPtr,
370  Value *ByteVal) {
371  const DataLayout &DL = StartInst->getModule()->getDataLayout();
372 
373  // We can't track scalable types
374  if (auto *SI = dyn_cast<StoreInst>(StartInst))
375  if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
376  return nullptr;
377 
378  // Okay, so we now have a single store that can be splatable. Scan to find
379  // all subsequent stores of the same value to offset from the same pointer.
380  // Join these together into ranges, so we can decide whether contiguous blocks
381  // are stored.
382  MemsetRanges Ranges(DL);
383 
384  BasicBlock::iterator BI(StartInst);
385 
386  // Keeps track of the last memory use or def before the insertion point for
387  // the new memset. The new MemoryDef for the inserted memsets will be inserted
388  // after MemInsertPoint. It points to either LastMemDef or to the last user
389  // before the insertion point of the memset, if there are any such users.
390  MemoryUseOrDef *MemInsertPoint = nullptr;
391  // Keeps track of the last MemoryDef between StartInst and the insertion point
392  // for the new memset. This will become the defining access of the inserted
393  // memsets.
394  MemoryDef *LastMemDef = nullptr;
395  for (++BI; !BI->isTerminator(); ++BI) {
396  auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
397  MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
398  if (CurrentAcc) {
399  MemInsertPoint = CurrentAcc;
400  if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
401  LastMemDef = CurrentDef;
402  }
403 
404  // Calls that only access inaccessible memory do not block merging
405  // accessible stores.
406  if (auto *CB = dyn_cast<CallBase>(BI)) {
407  if (CB->onlyAccessesInaccessibleMemory())
408  continue;
409  }
410 
411  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
412  // If the instruction is readnone, ignore it, otherwise bail out. We
413  // don't even allow readonly here because we don't want something like:
414  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
415  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
416  break;
417  continue;
418  }
419 
420  if (auto *NextStore = dyn_cast<StoreInst>(BI)) {
421  // If this is a store, see if we can merge it in.
422  if (!NextStore->isSimple()) break;
423 
424  Value *StoredVal = NextStore->getValueOperand();
425 
426  // Don't convert stores of non-integral pointer types to memsets (which
427  // stores integers).
428  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
429  break;
430 
431  // We can't track ranges involving scalable types.
432  if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
433  break;
434 
435  // Check to see if this stored value is of the same byte-splattable value.
436  Value *StoredByte = isBytewiseValue(StoredVal, DL);
437  if (isa<UndefValue>(ByteVal) && StoredByte)
438  ByteVal = StoredByte;
439  if (ByteVal != StoredByte)
440  break;
441 
442  // Check to see if this store is to a constant offset from the start ptr.
444  isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
445  if (!Offset)
446  break;
447 
448  Ranges.addStore(*Offset, NextStore);
449  } else {
450  auto *MSI = cast<MemSetInst>(BI);
451 
452  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
453  !isa<ConstantInt>(MSI->getLength()))
454  break;
455 
456  // Check to see if this store is to a constant offset from the start ptr.
457  Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
458  if (!Offset)
459  break;
460 
461  Ranges.addMemSet(*Offset, MSI);
462  }
463  }
464 
465  // If we have no ranges, then we just had a single store with nothing that
466  // could be merged in. This is a very common case of course.
467  if (Ranges.empty())
468  return nullptr;
469 
470  // If we had at least one store that could be merged in, add the starting
471  // store as well. We try to avoid this unless there is at least something
472  // interesting as a small compile-time optimization.
473  Ranges.addInst(0, StartInst);
474 
475  // If we create any memsets, we put it right before the first instruction that
476  // isn't part of the memset block. This ensure that the memset is dominated
477  // by any addressing instruction needed by the start of the block.
478  IRBuilder<> Builder(&*BI);
479 
480  // Now that we have full information about ranges, loop over the ranges and
481  // emit memset's for anything big enough to be worthwhile.
482  Instruction *AMemSet = nullptr;
483  for (const MemsetRange &Range : Ranges) {
484  if (Range.TheStores.size() == 1) continue;
485 
486  // If it is profitable to lower this range to memset, do so now.
487  if (!Range.isProfitableToUseMemset(DL))
488  continue;
489 
490  // Otherwise, we do want to transform this! Create a new memset.
491  // Get the starting pointer of the block.
492  StartPtr = Range.StartPtr;
493 
494  AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
495  MaybeAlign(Range.Alignment));
496  LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
497  : Range.TheStores) dbgs()
498  << *SI << '\n';
499  dbgs() << "With: " << *AMemSet << '\n');
500  if (!Range.TheStores.empty())
501  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
502 
503  assert(LastMemDef && MemInsertPoint &&
504  "Both LastMemDef and MemInsertPoint need to be set");
505  auto *NewDef =
506  cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
507  ? MSSAU->createMemoryAccessBefore(
508  AMemSet, LastMemDef, MemInsertPoint)
509  : MSSAU->createMemoryAccessAfter(
510  AMemSet, LastMemDef, MemInsertPoint));
511  MSSAU->insertDef(NewDef, /*RenameUses=*/true);
512  LastMemDef = NewDef;
513  MemInsertPoint = NewDef;
514 
515  // Zap all the stores.
516  for (Instruction *SI : Range.TheStores)
517  eraseInstruction(SI);
518 
519  ++NumMemSetInfer;
520  }
521 
522  return AMemSet;
523 }
524 
525 // This method try to lift a store instruction before position P.
526 // It will lift the store and its argument + that anything that
527 // may alias with these.
528 // The method returns true if it was successful.
529 bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
530  // If the store alias this position, early bail out.
532  if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
533  return false;
534 
535  // Keep track of the arguments of all instruction we plan to lift
536  // so we can make sure to lift them as well if appropriate.
538  if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
539  if (Ptr->getParent() == SI->getParent())
540  Args.insert(Ptr);
541 
542  // Instruction to lift before P.
544 
545  // Memory locations of lifted instructions.
546  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
547 
548  // Lifted calls.
550 
551  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
552 
553  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
554  auto *C = &*I;
555 
556  // Make sure hoisting does not perform a store that was not guaranteed to
557  // happen.
559  return false;
560 
561  bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
562 
563  bool NeedLift = false;
564  if (Args.erase(C))
565  NeedLift = true;
566  else if (MayAlias) {
567  NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
568  return isModOrRefSet(AA->getModRefInfo(C, ML));
569  });
570 
571  if (!NeedLift)
572  NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
573  return isModOrRefSet(AA->getModRefInfo(C, Call));
574  });
575  }
576 
577  if (!NeedLift)
578  continue;
579 
580  if (MayAlias) {
581  // Since LI is implicitly moved downwards past the lifted instructions,
582  // none of them may modify its source.
583  if (isModSet(AA->getModRefInfo(C, LoadLoc)))
584  return false;
585  else if (const auto *Call = dyn_cast<CallBase>(C)) {
586  // If we can't lift this before P, it's game over.
587  if (isModOrRefSet(AA->getModRefInfo(P, Call)))
588  return false;
589 
590  Calls.push_back(Call);
591  } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
592  // If we can't lift this before P, it's game over.
593  auto ML = MemoryLocation::get(C);
594  if (isModOrRefSet(AA->getModRefInfo(P, ML)))
595  return false;
596 
597  MemLocs.push_back(ML);
598  } else
599  // We don't know how to lift this instruction.
600  return false;
601  }
602 
603  ToLift.push_back(C);
604  for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
605  if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
606  if (A->getParent() == SI->getParent()) {
607  // Cannot hoist user of P above P
608  if(A == P) return false;
609  Args.insert(A);
610  }
611  }
612  }
613 
614  // Find MSSA insertion point. Normally P will always have a corresponding
615  // memory access before which we can insert. However, with non-standard AA
616  // pipelines, there may be a mismatch between AA and MSSA, in which case we
617  // will scan for a memory access before P. In either case, we know for sure
618  // that at least the load will have a memory access.
619  // TODO: Simplify this once P will be determined by MSSA, in which case the
620  // discrepancy can no longer occur.
621  MemoryUseOrDef *MemInsertPoint = nullptr;
622  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
623  MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
624  } else {
625  const Instruction *ConstP = P;
626  for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
627  ++LI->getReverseIterator())) {
628  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
629  MemInsertPoint = MA;
630  break;
631  }
632  }
633  }
634 
635  // We made it, we need to lift.
636  for (auto *I : llvm::reverse(ToLift)) {
637  LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
638  I->moveBefore(P);
639  assert(MemInsertPoint && "Must have found insert point");
640  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
641  MSSAU->moveAfter(MA, MemInsertPoint);
642  MemInsertPoint = MA;
643  }
644  }
645 
646  return true;
647 }
648 
649 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
650  if (!SI->isSimple()) return false;
651 
652  // Avoid merging nontemporal stores since the resulting
653  // memcpy/memset would not be able to preserve the nontemporal hint.
654  // In theory we could teach how to propagate the !nontemporal metadata to
655  // memset calls. However, that change would force the backend to
656  // conservatively expand !nontemporal memset calls back to sequences of
657  // store instructions (effectively undoing the merging).
658  if (SI->getMetadata(LLVMContext::MD_nontemporal))
659  return false;
660 
661  const DataLayout &DL = SI->getModule()->getDataLayout();
662 
663  Value *StoredVal = SI->getValueOperand();
664 
665  // Not all the transforms below are correct for non-integral pointers, bail
666  // until we've audited the individual pieces.
667  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
668  return false;
669 
670  // Load to store forwarding can be interpreted as memcpy.
671  if (auto *LI = dyn_cast<LoadInst>(StoredVal)) {
672  if (LI->isSimple() && LI->hasOneUse() &&
673  LI->getParent() == SI->getParent()) {
674 
675  auto *T = LI->getType();
676  // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
677  // the corresponding libcalls are not available.
678  // TODO: We should really distinguish between libcall availability and
679  // our ability to introduce intrinsics.
680  if (T->isAggregateType() &&
682  (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
683  MemoryLocation LoadLoc = MemoryLocation::get(LI);
684 
685  // We use alias analysis to check if an instruction may store to
686  // the memory we load from in between the load and the store. If
687  // such an instruction is found, we try to promote there instead
688  // of at the store position.
689  // TODO: Can use MSSA for this.
690  Instruction *P = SI;
691  for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
692  if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
693  P = &I;
694  break;
695  }
696  }
697 
698  // We found an instruction that may write to the loaded memory.
699  // We can try to promote at this position instead of the store
700  // position if nothing aliases the store memory after this and the store
701  // destination is not in the range.
702  if (P && P != SI) {
703  if (!moveUp(SI, P, LI))
704  P = nullptr;
705  }
706 
707  // If a valid insertion position is found, then we can promote
708  // the load/store pair to a memcpy.
709  if (P) {
710  // If we load from memory that may alias the memory we store to,
711  // memmove must be used to preserve semantic. If not, memcpy can
712  // be used. Also, if we load from constant memory, memcpy can be used
713  // as the constant memory won't be modified.
714  bool UseMemMove = false;
715  if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
716  UseMemMove = true;
717 
718  uint64_t Size = DL.getTypeStoreSize(T);
719 
721  Instruction *M;
722  if (UseMemMove)
723  M = Builder.CreateMemMove(
724  SI->getPointerOperand(), SI->getAlign(),
725  LI->getPointerOperand(), LI->getAlign(), Size);
726  else
727  M = Builder.CreateMemCpy(
728  SI->getPointerOperand(), SI->getAlign(),
729  LI->getPointerOperand(), LI->getAlign(), Size);
730 
731  LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
732  << *M << "\n");
733 
734  auto *LastDef =
735  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
736  auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
737  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
738 
739  eraseInstruction(SI);
740  eraseInstruction(LI);
741  ++NumMemCpyInstr;
742 
743  // Make sure we do not invalidate the iterator.
744  BBI = M->getIterator();
745  return true;
746  }
747  }
748 
749  // Detect cases where we're performing call slot forwarding, but
750  // happen to be using a load-store pair to implement it, rather than
751  // a memcpy.
752  CallInst *C = nullptr;
753  if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
754  MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
755  // The load most post-dom the call. Limit to the same block for now.
756  // TODO: Support non-local call-slot optimization?
757  if (LoadClobber->getBlock() == SI->getParent())
758  C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
759  }
760 
761  if (C) {
762  // Check that nothing touches the dest of the "copy" between
763  // the call and the store.
765  if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
766  MSSA->getMemoryAccess(SI)))
767  C = nullptr;
768  }
769 
770  if (C) {
771  bool changed = performCallSlotOptzn(
772  LI, SI, SI->getPointerOperand()->stripPointerCasts(),
774  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
775  commonAlignment(SI->getAlign(), LI->getAlign()), C);
776  if (changed) {
777  eraseInstruction(SI);
778  eraseInstruction(LI);
779  ++NumMemCpyInstr;
780  return true;
781  }
782  }
783  }
784  }
785 
786  // The following code creates memset intrinsics out of thin air. Don't do
787  // this if the corresponding libfunc is not available.
788  // TODO: We should really distinguish between libcall availability and
789  // our ability to introduce intrinsics.
790  if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
791  return false;
792 
793  // There are two cases that are interesting for this code to handle: memcpy
794  // and memset. Right now we only handle memset.
795 
796  // Ensure that the value being stored is something that can be memset'able a
797  // byte at a time like "0" or "-1" or any width, as well as things like
798  // 0xA0A0A0A0 and 0.0.
799  auto *V = SI->getOperand(0);
800  if (Value *ByteVal = isBytewiseValue(V, DL)) {
801  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
802  ByteVal)) {
803  BBI = I->getIterator(); // Don't invalidate iterator.
804  return true;
805  }
806 
807  // If we have an aggregate, we try to promote it to memset regardless
808  // of opportunity for merging as it can expose optimization opportunities
809  // in subsequent passes.
810  auto *T = V->getType();
811  if (T->isAggregateType()) {
812  uint64_t Size = DL.getTypeStoreSize(T);
814  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
815  SI->getAlign());
816 
817  LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
818 
819  // The newly inserted memset is immediately overwritten by the original
820  // store, so we do not need to rename uses.
821  auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
822  auto *NewAccess = MSSAU->createMemoryAccessBefore(
823  M, StoreDef->getDefiningAccess(), StoreDef);
824  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
825 
826  eraseInstruction(SI);
827  NumMemSetInfer++;
828 
829  // Make sure we do not invalidate the iterator.
830  BBI = M->getIterator();
831  return true;
832  }
833  }
834 
835  return false;
836 }
837 
838 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
839  // See if there is another memset or store neighboring this memset which
840  // allows us to widen out the memset to do a single larger store.
841  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
842  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
843  MSI->getValue())) {
844  BBI = I->getIterator(); // Don't invalidate iterator.
845  return true;
846  }
847  return false;
848 }
849 
850 /// Takes a memcpy and a call that it depends on,
851 /// and checks for the possibility of a call slot optimization by having
852 /// the call write its result directly into the destination of the memcpy.
853 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
854  Instruction *cpyStore, Value *cpyDest,
855  Value *cpySrc, TypeSize cpySize,
856  Align cpyAlign, CallInst *C) {
857  // The general transformation to keep in mind is
858  //
859  // call @func(..., src, ...)
860  // memcpy(dest, src, ...)
861  //
862  // ->
863  //
864  // memcpy(dest, src, ...)
865  // call @func(..., dest, ...)
866  //
867  // Since moving the memcpy is technically awkward, we additionally check that
868  // src only holds uninitialized values at the moment of the call, meaning that
869  // the memcpy can be discarded rather than moved.
870 
871  // We can't optimize scalable types.
872  if (cpySize.isScalable())
873  return false;
874 
875  // Lifetime marks shouldn't be operated on.
876  if (Function *F = C->getCalledFunction())
877  if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
878  return false;
879 
880  // Require that src be an alloca. This simplifies the reasoning considerably.
881  auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
882  if (!srcAlloca)
883  return false;
884 
885  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
886  if (!srcArraySize)
887  return false;
888 
889  const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
890  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
891  srcArraySize->getZExtValue();
892 
893  if (cpySize < srcSize)
894  return false;
895 
896  // Check that accessing the first srcSize bytes of dest will not cause a
897  // trap. Otherwise the transform is invalid since it might cause a trap
898  // to occur earlier than it otherwise would.
899  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
900  DL, C, DT)) {
901  LLVM_DEBUG(dbgs() << "Call Slot: Dest pointer not dereferenceable\n");
902  return false;
903  }
904 
905  // Make sure that nothing can observe cpyDest being written early. There are
906  // a number of cases to consider:
907  // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
908  // the transform.
909  // 2. C itself may not access cpyDest (prior to the transform). This is
910  // checked further below.
911  // 3. If cpyDest is accessible to the caller of this function (potentially
912  // captured and not based on an alloca), we need to ensure that we cannot
913  // unwind between C and cpyStore. This is checked here.
914  // 4. If cpyDest is potentially captured, there may be accesses to it from
915  // another thread. In this case, we need to check that cpyStore is
916  // guaranteed to be executed if C is. As it is a non-atomic access, it
917  // renders accesses from other threads undefined.
918  // TODO: This is currently not checked.
919  if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore)) {
920  LLVM_DEBUG(dbgs() << "Call Slot: Dest may be visible through unwinding");
921  return false;
922  }
923 
924  // Check that dest points to memory that is at least as aligned as src.
925  Align srcAlign = srcAlloca->getAlign();
926  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
927  // If dest is not aligned enough and we can't increase its alignment then
928  // bail out.
929  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
930  return false;
931 
932  // Check that src is not accessed except via the call and the memcpy. This
933  // guarantees that it holds only undefined values when passed in (so the final
934  // memcpy can be dropped), that it is not read or written between the call and
935  // the memcpy, and that writing beyond the end of it is undefined.
936  SmallVector<User *, 8> srcUseList(srcAlloca->users());
937  while (!srcUseList.empty()) {
938  User *U = srcUseList.pop_back_val();
939 
940  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
941  append_range(srcUseList, U->users());
942  continue;
943  }
944  if (const auto *G = dyn_cast<GetElementPtrInst>(U)) {
945  if (!G->hasAllZeroIndices())
946  return false;
947 
948  append_range(srcUseList, U->users());
949  continue;
950  }
951  if (const auto *IT = dyn_cast<IntrinsicInst>(U))
952  if (IT->isLifetimeStartOrEnd())
953  continue;
954 
955  if (U != C && U != cpyLoad)
956  return false;
957  }
958 
959  // Check whether src is captured by the called function, in which case there
960  // may be further indirect uses of src.
961  bool SrcIsCaptured = any_of(C->args(), [&](Use &U) {
962  return U->stripPointerCasts() == cpySrc &&
963  !C->doesNotCapture(C->getArgOperandNo(&U));
964  });
965 
966  // If src is captured, then check whether there are any potential uses of
967  // src through the captured pointer before the lifetime of src ends, either
968  // due to a lifetime.end or a return from the function.
969  if (SrcIsCaptured) {
970  // Check that dest is not captured before/at the call. We have already
971  // checked that src is not captured before it. If either had been captured,
972  // then the call might be comparing the argument against the captured dest
973  // or src pointer.
974  Value *DestObj = getUnderlyingObject(cpyDest);
975  if (!isIdentifiedFunctionLocal(DestObj) ||
976  PointerMayBeCapturedBefore(DestObj, /* ReturnCaptures */ true,
977  /* StoreCaptures */ true, C, DT,
978  /* IncludeI */ true))
979  return false;
980 
981  MemoryLocation SrcLoc =
982  MemoryLocation(srcAlloca, LocationSize::precise(srcSize));
983  for (Instruction &I :
984  make_range(++C->getIterator(), C->getParent()->end())) {
985  // Lifetime of srcAlloca ends at lifetime.end.
986  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
987  if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
988  II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
989  cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
990  break;
991  }
992 
993  // Lifetime of srcAlloca ends at return.
994  if (isa<ReturnInst>(&I))
995  break;
996 
997  // Ignore the direct read of src in the load.
998  if (&I == cpyLoad)
999  continue;
1000 
1001  // Check whether this instruction may mod/ref src through the captured
1002  // pointer (we have already any direct mod/refs in the loop above).
1003  // Also bail if we hit a terminator, as we don't want to scan into other
1004  // blocks.
1005  if (isModOrRefSet(AA->getModRefInfo(&I, SrcLoc)) || I.isTerminator())
1006  return false;
1007  }
1008  }
1009 
1010  // Since we're changing the parameter to the callsite, we need to make sure
1011  // that what would be the new parameter dominates the callsite.
1012  if (!DT->dominates(cpyDest, C)) {
1013  // Support moving a constant index GEP before the call.
1014  auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1015  if (GEP && GEP->hasAllConstantIndices() &&
1016  DT->dominates(GEP->getPointerOperand(), C))
1017  GEP->moveBefore(C);
1018  else
1019  return false;
1020  }
1021 
1022  // In addition to knowing that the call does not access src in some
1023  // unexpected manner, for example via a global, which we deduce from
1024  // the use analysis, we also need to know that it does not sneakily
1025  // access dest. We rely on AA to figure this out for us.
1026  ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
1027  // If necessary, perform additional analysis.
1028  if (isModOrRefSet(MR))
1029  MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
1030  if (isModOrRefSet(MR))
1031  return false;
1032 
1033  // We can't create address space casts here because we don't know if they're
1034  // safe for the target.
1035  if (cpySrc->getType()->getPointerAddressSpace() !=
1036  cpyDest->getType()->getPointerAddressSpace())
1037  return false;
1038  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1039  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1040  cpySrc->getType()->getPointerAddressSpace() !=
1041  C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
1042  return false;
1043 
1044  // All the checks have passed, so do the transformation.
1045  bool changedArgument = false;
1046  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
1047  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1048  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
1049  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
1050  cpyDest->getName(), C);
1051  changedArgument = true;
1052  if (C->getArgOperand(ArgI)->getType() == Dest->getType())
1053  C->setArgOperand(ArgI, Dest);
1054  else
1055  C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1056  Dest, C->getArgOperand(ArgI)->getType(),
1057  Dest->getName(), C));
1058  }
1059 
1060  if (!changedArgument)
1061  return false;
1062 
1063  // If the destination wasn't sufficiently aligned then increase its alignment.
1064  if (!isDestSufficientlyAligned) {
1065  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1066  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1067  }
1068 
1069  // Update AA metadata
1070  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1071  // handled here, but combineMetadata doesn't support them yet
1072  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1073  LLVMContext::MD_noalias,
1074  LLVMContext::MD_invariant_group,
1075  LLVMContext::MD_access_group};
1076  combineMetadata(C, cpyLoad, KnownIDs, true);
1077  if (cpyLoad != cpyStore)
1078  combineMetadata(C, cpyStore, KnownIDs, true);
1079 
1080  ++NumCallSlot;
1081  return true;
1082 }
1083 
1084 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1085 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1086 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1087  MemCpyInst *MDep) {
1088  // We can only transforms memcpy's where the dest of one is the source of the
1089  // other.
1090  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1091  return false;
1092 
1093  // If dep instruction is reading from our current input, then it is a noop
1094  // transfer and substituting the input won't change this instruction. Just
1095  // ignore the input and let someone else zap MDep. This handles cases like:
1096  // memcpy(a <- a)
1097  // memcpy(b <- a)
1098  if (M->getSource() == MDep->getSource())
1099  return false;
1100 
1101  // Second, the length of the memcpy's must be the same, or the preceding one
1102  // must be larger than the following one.
1103  if (MDep->getLength() != M->getLength()) {
1104  auto *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1105  auto *MLen = dyn_cast<ConstantInt>(M->getLength());
1106  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1107  return false;
1108  }
1109 
1110  // Verify that the copied-from memory doesn't change in between the two
1111  // transfers. For example, in:
1112  // memcpy(a <- b)
1113  // *b = 42;
1114  // memcpy(c <- a)
1115  // It would be invalid to transform the second memcpy into memcpy(c <- b).
1116  //
1117  // TODO: If the code between M and MDep is transparent to the destination "c",
1118  // then we could still perform the xform by moving M up to the first memcpy.
1119  // TODO: It would be sufficient to check the MDep source up to the memcpy
1120  // size of M, rather than MDep.
1122  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1123  return false;
1124 
1125  // If the dest of the second might alias the source of the first, then the
1126  // source and dest might overlap. In addition, if the source of the first
1127  // points to constant memory, they won't overlap by definition. Otherwise, we
1128  // still want to eliminate the intermediate value, but we have to generate a
1129  // memmove instead of memcpy.
1130  bool UseMemMove = false;
1132  UseMemMove = true;
1133 
1134  // If all checks passed, then we can transform M.
1135  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1136  << *MDep << '\n' << *M << '\n');
1137 
1138  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1139  // example we could be moving from movaps -> movq on x86.
1140  IRBuilder<> Builder(M);
1141  Instruction *NewM;
1142  if (UseMemMove)
1143  NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1144  MDep->getRawSource(), MDep->getSourceAlign(),
1145  M->getLength(), M->isVolatile());
1146  else if (isa<MemCpyInlineInst>(M)) {
1147  // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1148  // never allowed since that would allow the latter to be lowered as a call
1149  // to an external function.
1150  NewM = Builder.CreateMemCpyInline(
1151  M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1152  MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1153  } else
1154  NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1155  MDep->getRawSource(), MDep->getSourceAlign(),
1156  M->getLength(), M->isVolatile());
1157 
1158  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1159  auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1160  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1161  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1162 
1163  // Remove the instruction we're replacing.
1164  eraseInstruction(M);
1165  ++NumMemCpyInstr;
1166  return true;
1167 }
1168 
1169 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
1170 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1171 /// weren't copied over by \p MemCpy.
1172 ///
1173 /// In other words, transform:
1174 /// \code
1175 /// memset(dst, c, dst_size);
1176 /// memcpy(dst, src, src_size);
1177 /// \endcode
1178 /// into:
1179 /// \code
1180 /// memcpy(dst, src, src_size);
1181 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1182 /// \endcode
1183 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1184  MemSetInst *MemSet) {
1185  // We can only transform memset/memcpy with the same destination.
1186  if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1187  return false;
1188 
1189  // Check that src and dst of the memcpy aren't the same. While memcpy
1190  // operands cannot partially overlap, exact equality is allowed.
1191  if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1192  return false;
1193 
1194  // We know that dst up to src_size is not written. We now need to make sure
1195  // that dst up to dst_size is not accessed. (If we did not move the memset,
1196  // checking for reads would be sufficient.)
1198  MSSA->getMemoryAccess(MemSet),
1199  MSSA->getMemoryAccess(MemCpy)))
1200  return false;
1201 
1202  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1203  Value *Dest = MemCpy->getRawDest();
1204  Value *DestSize = MemSet->getLength();
1205  Value *SrcSize = MemCpy->getLength();
1206 
1207  if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1208  return false;
1209 
1210  // If the sizes are the same, simply drop the memset instead of generating
1211  // a replacement with zero size.
1212  if (DestSize == SrcSize) {
1213  eraseInstruction(MemSet);
1214  return true;
1215  }
1216 
1217  // By default, create an unaligned memset.
1218  unsigned Align = 1;
1219  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1220  // of the sum.
1221  const unsigned DestAlign =
1222  std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1223  if (DestAlign > 1)
1224  if (auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1225  Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1226 
1227  IRBuilder<> Builder(MemCpy);
1228 
1229  // If the sizes have different types, zext the smaller one.
1230  if (DestSize->getType() != SrcSize->getType()) {
1231  if (DestSize->getType()->getIntegerBitWidth() >
1232  SrcSize->getType()->getIntegerBitWidth())
1233  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1234  else
1235  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1236  }
1237 
1238  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1239  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1240  Value *MemsetLen = Builder.CreateSelect(
1241  Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1242  unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1243  Instruction *NewMemSet = Builder.CreateMemSet(
1244  Builder.CreateGEP(Builder.getInt8Ty(),
1245  Builder.CreatePointerCast(Dest,
1246  Builder.getInt8PtrTy(DestAS)),
1247  SrcSize),
1248  MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
1249 
1250  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1251  "MemCpy must be a MemoryDef");
1252  // The new memset is inserted after the memcpy, but it is known that its
1253  // defining access is the memset about to be removed which immediately
1254  // precedes the memcpy.
1255  auto *LastDef =
1256  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1257  auto *NewAccess = MSSAU->createMemoryAccessBefore(
1258  NewMemSet, LastDef->getDefiningAccess(), LastDef);
1259  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1260 
1261  eraseInstruction(MemSet);
1262  return true;
1263 }
1264 
1265 /// Determine whether the instruction has undefined content for the given Size,
1266 /// either because it was freshly alloca'd or started its lifetime.
1267 static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
1268  MemoryDef *Def, Value *Size) {
1269  if (MSSA->isLiveOnEntryDef(Def))
1270  return isa<AllocaInst>(getUnderlyingObject(V));
1271 
1272  if (auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1273  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1274  auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1275 
1276  if (auto *CSize = dyn_cast<ConstantInt>(Size)) {
1277  if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1278  LTSize->getZExtValue() >= CSize->getZExtValue())
1279  return true;
1280  }
1281 
1282  // If the lifetime.start covers a whole alloca (as it almost always
1283  // does) and we're querying a pointer based on that alloca, then we know
1284  // the memory is definitely undef, regardless of how exactly we alias.
1285  // The size also doesn't matter, as an out-of-bounds access would be UB.
1286  if (auto *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V))) {
1287  if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1288  const DataLayout &DL = Alloca->getModule()->getDataLayout();
1289  if (Optional<TypeSize> AllocaSize =
1290  Alloca->getAllocationSizeInBits(DL))
1291  if (*AllocaSize == LTSize->getValue() * 8)
1292  return true;
1293  }
1294  }
1295  }
1296  }
1297 
1298  return false;
1299 }
1300 
1301 /// Transform memcpy to memset when its source was just memset.
1302 /// In other words, turn:
1303 /// \code
1304 /// memset(dst1, c, dst1_size);
1305 /// memcpy(dst2, dst1, dst2_size);
1306 /// \endcode
1307 /// into:
1308 /// \code
1309 /// memset(dst1, c, dst1_size);
1310 /// memset(dst2, c, dst2_size);
1311 /// \endcode
1312 /// When dst2_size <= dst1_size.
1313 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1314  MemSetInst *MemSet) {
1315  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1316  // memcpying from the same address. Otherwise it is hard to reason about.
1317  if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1318  return false;
1319 
1320  Value *MemSetSize = MemSet->getLength();
1321  Value *CopySize = MemCpy->getLength();
1322 
1323  if (MemSetSize != CopySize) {
1324  // Make sure the memcpy doesn't read any more than what the memset wrote.
1325  // Don't worry about sizes larger than i64.
1326 
1327  // A known memset size is required.
1328  auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1329  if (!CMemSetSize)
1330  return false;
1331 
1332  // A known memcpy size is also required.
1333  auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1334  if (!CCopySize)
1335  return false;
1336  if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1337  // If the memcpy is larger than the memset, but the memory was undef prior
1338  // to the memset, we can just ignore the tail. Technically we're only
1339  // interested in the bytes from MemSetSize..CopySize here, but as we can't
1340  // easily represent this location, we use the full 0..CopySize range.
1341  MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1342  bool CanReduceSize = false;
1343  MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1344  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1345  MemSetAccess->getDefiningAccess(), MemCpyLoc);
1346  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1347  if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1348  CanReduceSize = true;
1349 
1350  if (!CanReduceSize)
1351  return false;
1352  CopySize = MemSetSize;
1353  }
1354  }
1355 
1356  IRBuilder<> Builder(MemCpy);
1357  Instruction *NewM =
1358  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1359  CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1360  auto *LastDef =
1361  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1362  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1363  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1364 
1365  return true;
1366 }
1367 
1368 /// Perform simplification of memcpy's. If we have memcpy A
1369 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1370 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
1371 /// circumstances). This allows later passes to remove the first memcpy
1372 /// altogether.
1373 bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1374  // We can only optimize non-volatile memcpy's.
1375  if (M->isVolatile()) return false;
1376 
1377  // If the source and destination of the memcpy are the same, then zap it.
1378  if (M->getSource() == M->getDest()) {
1379  ++BBI;
1380  eraseInstruction(M);
1381  return true;
1382  }
1383 
1384  // If copying from a constant, try to turn the memcpy into a memset.
1385  if (auto *GV = dyn_cast<GlobalVariable>(M->getSource()))
1386  if (GV->isConstant() && GV->hasDefinitiveInitializer())
1387  if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1388  M->getModule()->getDataLayout())) {
1389  IRBuilder<> Builder(M);
1390  Instruction *NewM =
1391  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1392  MaybeAlign(M->getDestAlignment()), false);
1393  auto *LastDef =
1394  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1395  auto *NewAccess =
1396  MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1397  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1398 
1399  eraseInstruction(M);
1400  ++NumCpyToSet;
1401  return true;
1402  }
1403 
1404  MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1405  MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1407  const MemoryAccess *DestClobber =
1408  MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1409 
1410  // Try to turn a partially redundant memset + memcpy into
1411  // memcpy + smaller memset. We don't need the memcpy size for this.
1412  // The memcpy most post-dom the memset, so limit this to the same basic
1413  // block. A non-local generalization is likely not worthwhile.
1414  if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1415  if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1416  if (DestClobber->getBlock() == M->getParent())
1417  if (processMemSetMemCpyDependence(M, MDep))
1418  return true;
1419 
1420  MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1421  AnyClobber, MemoryLocation::getForSource(M));
1422 
1423  // There are four possible optimizations we can do for memcpy:
1424  // a) memcpy-memcpy xform which exposes redundance for DSE.
1425  // b) call-memcpy xform for return slot optimization.
1426  // c) memcpy from freshly alloca'd space or space that has just started
1427  // its lifetime copies undefined data, and we can therefore eliminate
1428  // the memcpy in favor of the data that was already at the destination.
1429  // d) memcpy from a just-memset'd source can be turned into memset.
1430  if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1431  if (Instruction *MI = MD->getMemoryInst()) {
1432  if (auto *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1433  if (auto *C = dyn_cast<CallInst>(MI)) {
1434  // The memcpy must post-dom the call. Limit to the same block for
1435  // now. Additionally, we need to ensure that there are no accesses
1436  // to dest between the call and the memcpy. Accesses to src will be
1437  // checked by performCallSlotOptzn().
1438  // TODO: Support non-local call-slot optimization?
1439  if (C->getParent() == M->getParent() &&
1440  !accessedBetween(*AA, DestLoc, MD, MA)) {
1441  // FIXME: Can we pass in either of dest/src alignment here instead
1442  // of conservatively taking the minimum?
1443  Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1444  M->getSourceAlign().valueOrOne());
1445  if (performCallSlotOptzn(
1446  M, M, M->getDest(), M->getSource(),
1447  TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1448  C)) {
1449  LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1450  << " call: " << *C << "\n"
1451  << " memcpy: " << *M << "\n");
1452  eraseInstruction(M);
1453  ++NumMemCpyInstr;
1454  return true;
1455  }
1456  }
1457  }
1458  }
1459  if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1460  return processMemCpyMemCpyDependence(M, MDep);
1461  if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1462  if (performMemCpyToMemSetOptzn(M, MDep)) {
1463  LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1464  eraseInstruction(M);
1465  ++NumCpyToSet;
1466  return true;
1467  }
1468  }
1469  }
1470 
1471  if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
1472  LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1473  eraseInstruction(M);
1474  ++NumMemCpyInstr;
1475  return true;
1476  }
1477  }
1478 
1479  return false;
1480 }
1481 
1482 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1483 /// not to alias.
1484 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1485  // See if the source could be modified by this memmove potentially.
1487  return false;
1488 
1489  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1490  << "\n");
1491 
1492  // If not, then we know we can transform this.
1493  Type *ArgTys[3] = { M->getRawDest()->getType(),
1494  M->getRawSource()->getType(),
1495  M->getLength()->getType() };
1496  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1497  Intrinsic::memcpy, ArgTys));
1498 
1499  // For MemorySSA nothing really changes (except that memcpy may imply stricter
1500  // aliasing guarantees).
1501 
1502  ++NumMoveToCpy;
1503  return true;
1504 }
1505 
1506 /// This is called on every byval argument in call sites.
1507 bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1508  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1509  // Find out what feeds this byval argument.
1510  Value *ByValArg = CB.getArgOperand(ArgNo);
1511  Type *ByValTy = CB.getParamByValType(ArgNo);
1512  TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1513  MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1514  MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1515  if (!CallAccess)
1516  return false;
1517  MemCpyInst *MDep = nullptr;
1518  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1519  CallAccess->getDefiningAccess(), Loc);
1520  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1521  MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1522 
1523  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1524  // a memcpy, see if we can byval from the source of the memcpy instead of the
1525  // result.
1526  if (!MDep || MDep->isVolatile() ||
1527  ByValArg->stripPointerCasts() != MDep->getDest())
1528  return false;
1529 
1530  // The length of the memcpy must be larger or equal to the size of the byval.
1531  auto *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1532  if (!C1 || !TypeSize::isKnownGE(
1533  TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1534  return false;
1535 
1536  // Get the alignment of the byval. If the call doesn't specify the alignment,
1537  // then it is some target specific value that we can't know.
1538  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1539  if (!ByValAlign) return false;
1540 
1541  // If it is greater than the memcpy, then we check to see if we can force the
1542  // source of the memcpy to the alignment we need. If we fail, we bail out.
1543  MaybeAlign MemDepAlign = MDep->getSourceAlign();
1544  if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1545  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1546  DT) < *ByValAlign)
1547  return false;
1548 
1549  // The address space of the memcpy source must match the byval argument
1550  if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1551  ByValArg->getType()->getPointerAddressSpace())
1552  return false;
1553 
1554  // Verify that the copied-from memory doesn't change in between the memcpy and
1555  // the byval call.
1556  // memcpy(a <- b)
1557  // *b = 42;
1558  // foo(*a)
1559  // It would be invalid to transform the second memcpy into foo(*b).
1561  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1562  return false;
1563 
1564  Value *TmpCast = MDep->getSource();
1565  if (MDep->getSource()->getType() != ByValArg->getType()) {
1566  BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1567  "tmpcast", &CB);
1568  // Set the tmpcast's DebugLoc to MDep's
1569  TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1570  TmpCast = TmpBitCast;
1571  }
1572 
1573  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1574  << " " << *MDep << "\n"
1575  << " " << CB << "\n");
1576 
1577  // Otherwise we're good! Update the byval argument.
1578  CB.setArgOperand(ArgNo, TmpCast);
1579  ++NumMemCpyInstr;
1580  return true;
1581 }
1582 
1583 /// Executes one iteration of MemCpyOptPass.
1584 bool MemCpyOptPass::iterateOnFunction(Function &F) {
1585  bool MadeChange = false;
1586 
1587  // Walk all instruction in the function.
1588  for (BasicBlock &BB : F) {
1589  // Skip unreachable blocks. For example processStore assumes that an
1590  // instruction in a BB can't be dominated by a later instruction in the
1591  // same BB (which is a scenario that can happen for an unreachable BB that
1592  // has itself as a predecessor).
1593  if (!DT->isReachableFromEntry(&BB))
1594  continue;
1595 
1596  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1597  // Avoid invalidating the iterator.
1598  Instruction *I = &*BI++;
1599 
1600  bool RepeatInstruction = false;
1601 
1602  if (auto *SI = dyn_cast<StoreInst>(I))
1603  MadeChange |= processStore(SI, BI);
1604  else if (auto *M = dyn_cast<MemSetInst>(I))
1605  RepeatInstruction = processMemSet(M, BI);
1606  else if (auto *M = dyn_cast<MemCpyInst>(I))
1607  RepeatInstruction = processMemCpy(M, BI);
1608  else if (auto *M = dyn_cast<MemMoveInst>(I))
1609  RepeatInstruction = processMemMove(M);
1610  else if (auto *CB = dyn_cast<CallBase>(I)) {
1611  for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1612  if (CB->isByValArgument(i))
1613  MadeChange |= processByValArgument(*CB, i);
1614  }
1615 
1616  // Reprocess the instruction if desired.
1617  if (RepeatInstruction) {
1618  if (BI != BB.begin())
1619  --BI;
1620  MadeChange = true;
1621  }
1622  }
1623  }
1624 
1625  return MadeChange;
1626 }
1627 
1629  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1630  auto *AA = &AM.getResult<AAManager>(F);
1631  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1632  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1633  auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1634 
1635  bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1636  if (!MadeChange)
1637  return PreservedAnalyses::all();
1638 
1639  PreservedAnalyses PA;
1640  PA.preserveSet<CFGAnalyses>();
1642  return PA;
1643 }
1644 
1646  AliasAnalysis *AA_, AssumptionCache *AC_,
1647  DominatorTree *DT_, MemorySSA *MSSA_) {
1648  bool MadeChange = false;
1649  TLI = TLI_;
1650  AA = AA_;
1651  AC = AC_;
1652  DT = DT_;
1653  MSSA = MSSA_;
1654  MemorySSAUpdater MSSAU_(MSSA_);
1655  MSSAU = &MSSAU_;
1656 
1657  while (true) {
1658  if (!iterateOnFunction(F))
1659  break;
1660  MadeChange = true;
1661  }
1662 
1663  if (VerifyMemorySSA)
1664  MSSA_->verifyMemorySSA();
1665 
1666  return MadeChange;
1667 }
1668 
1669 /// This is the main transformation entry point for a function.
1671  if (skipFunction(F))
1672  return false;
1673 
1674  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1675  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1676  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1677  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1678  auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1679 
1680  return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1681 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1295
llvm::MemIntrinsicBase::getDestAlignment
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: IntrinsicInst.h:712
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::combineMetadata
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2504
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:22
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:713
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1901
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1400
llvm::MemorySSAUpdater::createMemoryAccessBefore
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
Definition: MemorySSAUpdater.cpp:1436
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:189
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:783
Scalar.h
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:425
Loads.h
T
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:62
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MemMoveInst
This class wraps the llvm.memmove intrinsic.
Definition: IntrinsicInst.h:1001
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5218
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1176
Statistic.h
CaptureTracking.h
llvm::isDereferenceableAndAlignedPointer
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:210
memcpyopt
memcpyopt
Definition: MemCpyOptimizer.cpp:308
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:736
llvm::IRBuilder<>
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2154
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:357
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1362
llvm::ilist_node_impl::getReverseIterator
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:84
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3746
llvm::MemCpyOptPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Definition: MemCpyOptimizer.cpp:1645
llvm::Optional< int64_t >
llvm::createMemCpyOptPass
FunctionPass * createMemCpyOptPass()
The public interface to this file...
Definition: MemCpyOptimizer.cpp:298
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
Operator.h
llvm::CallBase::isByValArgument
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1671
STLExtras.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:272
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:228
accessedBetween
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:340
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:298
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:208
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:818
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::isIdentifiedFunctionLocal
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
Definition: AliasAnalysis.cpp:987
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
llvm::isPointerOffset
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
Definition: ValueTracking.cpp:7073
Instruction.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:980
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:507
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:741
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
InstrTypes.h
llvm::initializeMemCpyOptLegacyPassPass
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryLocation::getForSource
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Definition: MemoryLocation.cpp:95
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:758
llvm::MemorySSAUpdater::moveAfter
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
Definition: MemorySSAUpdater.cpp:1195
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
MemCpyOptimizer.h
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:100
llvm::CallBase::getParamByValType
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1744
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4280
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:253
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::MemTransferBase::getSourceAlign
MaybeAlign getSourceAlign() const
Definition: IntrinsicInst.h:783
llvm::None
const NoneType None
Definition: None.h:23
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:283
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:158
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Type.h
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:957
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
Optimization
MemCpy Optimization
Definition: MemCpyOptimizer.cpp:308
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:267
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1040
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:207
mayBeVisibleThroughUnwinding
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
Definition: MemCpyOptimizer.cpp:313
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:309
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:465
uint64_t
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:55
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:243
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:704
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:721
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
llvm::MemCpyOptPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: MemCpyOptimizer.cpp:1628
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:376
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:148
IRBuilder.h
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:558
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:243
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::LinearPolySize< TypeSize >::isKnownGE
static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:346
iterator_range.h
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:325
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:930
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1599
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:650
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
addRange
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1011
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1446
llvm::MemIntrinsicBase::getDest
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
Definition: IntrinsicInst.h:704
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:313
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1599
llvm::CallBase::getParamAlign
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1735
DataLayout.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:264
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::isNotVisibleOnUnwind
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
Definition: AliasAnalysis.cpp:991
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1789
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MemoryAccess
Definition: MemorySSA.h:136
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:180
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1348
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:685
Argument.h
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::partition_point
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1740
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:252
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:256
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:695
hasUndefContents
static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
Definition: MemCpyOptimizer.cpp:1267
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1341
GlobalVariable.h
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5186
llvm::TypeSize
Definition: TypeSize.h:416
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:221
EnableMemCpyOptWithoutLibcalls
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable memcpyopt even when libcalls are disabled"))
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:246
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:41
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
MemorySSA.h
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:988
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:935
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
User.h
Dominators.h
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:557
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1343
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1343
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:579
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:689
llvm::AAResults::callCapturesBefore
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Return information about whether a particular call site modifies or reads the specified memory locati...
Definition: AliasAnalysis.h:848
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1478
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3244
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:108
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:412
raw_ostream.h
Value.h
InitializePasses.h
llvm::PointerMayBeCapturedBefore
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Definition: CaptureTracking.cpp:245
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:440
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:218
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1304
writtenBetween
static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:355
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::MemTransferBase::getSource
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Definition: IntrinsicInst.h:769
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:781