LLVM  14.0.0git
MemCpyOptimizer.cpp
Go to the documentation of this file.
1 //===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass performs various transformations related to eliminating memcpy
10 // calls, or transforming sets of stores into memset's.
11 //
12 //===----------------------------------------------------------------------===//
13 
15 #include "llvm/ADT/DenseSet.h"
16 #include "llvm/ADT/None.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/Statistic.h"
24 #include "llvm/Analysis/Loads.h"
30 #include "llvm/IR/Argument.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/Constants.h"
33 #include "llvm/IR/DataLayout.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/Dominators.h"
36 #include "llvm/IR/Function.h"
38 #include "llvm/IR/GlobalVariable.h"
39 #include "llvm/IR/IRBuilder.h"
40 #include "llvm/IR/InstrTypes.h"
41 #include "llvm/IR/Instruction.h"
42 #include "llvm/IR/Instructions.h"
43 #include "llvm/IR/IntrinsicInst.h"
44 #include "llvm/IR/Intrinsics.h"
45 #include "llvm/IR/LLVMContext.h"
46 #include "llvm/IR/Module.h"
47 #include "llvm/IR/Operator.h"
48 #include "llvm/IR/PassManager.h"
49 #include "llvm/IR/Type.h"
50 #include "llvm/IR/User.h"
51 #include "llvm/IR/Value.h"
52 #include "llvm/InitializePasses.h"
53 #include "llvm/Pass.h"
54 #include "llvm/Support/Casting.h"
55 #include "llvm/Support/Debug.h"
58 #include "llvm/Transforms/Scalar.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <utility>
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "memcpyopt"
68 
70  "enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden,
72  cl::desc("Enable memcpyopt even when libcalls are disabled"));
73 
74 STATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted");
75 STATISTIC(NumMemSetInfer, "Number of memsets inferred");
76 STATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy");
77 STATISTIC(NumCpyToSet, "Number of memcpys converted to memset");
78 STATISTIC(NumCallSlot, "Number of call slot optimizations performed");
79 
80 namespace {
81 
82 /// Represents a range of memset'd bytes with the ByteVal value.
83 /// This allows us to analyze stores like:
84 /// store 0 -> P+1
85 /// store 0 -> P+0
86 /// store 0 -> P+3
87 /// store 0 -> P+2
88 /// which sometimes happens with stores to arrays of structs etc. When we see
89 /// the first store, we make a range [1, 2). The second store extends the range
90 /// to [0, 2). The third makes a new range [2, 3). The fourth store joins the
91 /// two ranges into [0, 3) which is memset'able.
92 struct MemsetRange {
93  // Start/End - A semi range that describes the span that this range covers.
94  // The range is closed at the start and open at the end: [Start, End).
95  int64_t Start, End;
96 
97  /// StartPtr - The getelementptr instruction that points to the start of the
98  /// range.
99  Value *StartPtr;
100 
101  /// Alignment - The known alignment of the first store.
102  unsigned Alignment;
103 
104  /// TheStores - The actual stores that make up this range.
106 
107  bool isProfitableToUseMemset(const DataLayout &DL) const;
108 };
109 
110 } // end anonymous namespace
111 
112 bool MemsetRange::isProfitableToUseMemset(const DataLayout &DL) const {
113  // If we found more than 4 stores to merge or 16 bytes, use memset.
114  if (TheStores.size() >= 4 || End-Start >= 16) return true;
115 
116  // If there is nothing to merge, don't do anything.
117  if (TheStores.size() < 2) return false;
118 
119  // If any of the stores are a memset, then it is always good to extend the
120  // memset.
121  for (Instruction *SI : TheStores)
122  if (!isa<StoreInst>(SI))
123  return true;
124 
125  // Assume that the code generator is capable of merging pairs of stores
126  // together if it wants to.
127  if (TheStores.size() == 2) return false;
128 
129  // If we have fewer than 8 stores, it can still be worthwhile to do this.
130  // For example, merging 4 i8 stores into an i32 store is useful almost always.
131  // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the
132  // memset will be split into 2 32-bit stores anyway) and doing so can
133  // pessimize the llvm optimizer.
134  //
135  // Since we don't have perfect knowledge here, make some assumptions: assume
136  // the maximum GPR width is the same size as the largest legal integer
137  // size. If so, check to see whether we will end up actually reducing the
138  // number of stores used.
139  unsigned Bytes = unsigned(End-Start);
140  unsigned MaxIntSize = DL.getLargestLegalIntTypeSizeInBits() / 8;
141  if (MaxIntSize == 0)
142  MaxIntSize = 1;
143  unsigned NumPointerStores = Bytes / MaxIntSize;
144 
145  // Assume the remaining bytes if any are done a byte at a time.
146  unsigned NumByteStores = Bytes % MaxIntSize;
147 
148  // If we will reduce the # stores (according to this heuristic), do the
149  // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32
150  // etc.
151  return TheStores.size() > NumPointerStores+NumByteStores;
152 }
153 
154 namespace {
155 
156 class MemsetRanges {
157  using range_iterator = SmallVectorImpl<MemsetRange>::iterator;
158 
159  /// A sorted list of the memset ranges.
161 
162  const DataLayout &DL;
163 
164 public:
165  MemsetRanges(const DataLayout &DL) : DL(DL) {}
166 
167  using const_iterator = SmallVectorImpl<MemsetRange>::const_iterator;
168 
169  const_iterator begin() const { return Ranges.begin(); }
170  const_iterator end() const { return Ranges.end(); }
171  bool empty() const { return Ranges.empty(); }
172 
173  void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
174  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
175  addStore(OffsetFromFirst, SI);
176  else
177  addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
178  }
179 
180  void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
181  TypeSize StoreSize = DL.getTypeStoreSize(SI->getOperand(0)->getType());
182  assert(!StoreSize.isScalable() && "Can't track scalable-typed stores");
183  addRange(OffsetFromFirst, StoreSize.getFixedSize(), SI->getPointerOperand(),
184  SI->getAlign().value(), SI);
185  }
186 
187  void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
188  int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue();
189  addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getDestAlignment(), MSI);
190  }
191 
192  void addRange(int64_t Start, int64_t Size, Value *Ptr,
193  unsigned Alignment, Instruction *Inst);
194 };
195 
196 } // end anonymous namespace
197 
198 /// Add a new store to the MemsetRanges data structure. This adds a
199 /// new range for the specified store at the specified offset, merging into
200 /// existing ranges as appropriate.
201 void MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr,
202  unsigned Alignment, Instruction *Inst) {
203  int64_t End = Start+Size;
204 
205  range_iterator I = partition_point(
206  Ranges, [=](const MemsetRange &O) { return O.End < Start; });
207 
208  // We now know that I == E, in which case we didn't find anything to merge
209  // with, or that Start <= I->End. If End < I->Start or I == E, then we need
210  // to insert a new range. Handle this now.
211  if (I == Ranges.end() || End < I->Start) {
212  MemsetRange &R = *Ranges.insert(I, MemsetRange());
213  R.Start = Start;
214  R.End = End;
215  R.StartPtr = Ptr;
216  R.Alignment = Alignment;
217  R.TheStores.push_back(Inst);
218  return;
219  }
220 
221  // This store overlaps with I, add it.
222  I->TheStores.push_back(Inst);
223 
224  // At this point, we may have an interval that completely contains our store.
225  // If so, just add it to the interval and return.
226  if (I->Start <= Start && I->End >= End)
227  return;
228 
229  // Now we know that Start <= I->End and End >= I->Start so the range overlaps
230  // but is not entirely contained within the range.
231 
232  // See if the range extends the start of the range. In this case, it couldn't
233  // possibly cause it to join the prior range, because otherwise we would have
234  // stopped on *it*.
235  if (Start < I->Start) {
236  I->Start = Start;
237  I->StartPtr = Ptr;
238  I->Alignment = Alignment;
239  }
240 
241  // Now we know that Start <= I->End and Start >= I->Start (so the startpoint
242  // is in or right at the end of I), and that End >= I->Start. Extend I out to
243  // End.
244  if (End > I->End) {
245  I->End = End;
246  range_iterator NextI = I;
247  while (++NextI != Ranges.end() && End >= NextI->Start) {
248  // Merge the range in.
249  I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
250  if (NextI->End > I->End)
251  I->End = NextI->End;
252  Ranges.erase(NextI);
253  NextI = I;
254  }
255  }
256 }
257 
258 //===----------------------------------------------------------------------===//
259 // MemCpyOptLegacyPass Pass
260 //===----------------------------------------------------------------------===//
261 
262 namespace {
263 
264 class MemCpyOptLegacyPass : public FunctionPass {
265  MemCpyOptPass Impl;
266 
267 public:
268  static char ID; // Pass identification, replacement for typeid
269 
270  MemCpyOptLegacyPass() : FunctionPass(ID) {
272  }
273 
274  bool runOnFunction(Function &F) override;
275 
276 private:
277  // This transformation requires dominator postdominator info
278  void getAnalysisUsage(AnalysisUsage &AU) const override {
279  AU.setPreservesCFG();
289  }
290 };
291 
292 } // end anonymous namespace
293 
294 char MemCpyOptLegacyPass::ID = 0;
295 
296 /// The public interface to this file...
297 FunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOptLegacyPass(); }
298 
299 INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
300  false, false)
307 INITIALIZE_PASS_END(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization",
309 
310 // Check that V is either not accessible by the caller, or unwinding cannot
311 // occur between Start and End.
313  Instruction *End) {
314  assert(Start->getParent() == End->getParent() && "Must be in same block");
315  if (!Start->getFunction()->doesNotThrow() &&
316  !isa<AllocaInst>(getUnderlyingObject(V))) {
317  for (const Instruction &I :
318  make_range(Start->getIterator(), End->getIterator())) {
319  if (I.mayThrow())
320  return true;
321  }
322  }
323  return false;
324 }
325 
326 void MemCpyOptPass::eraseInstruction(Instruction *I) {
327  MSSAU->removeMemoryAccess(I);
328  I->eraseFromParent();
329 }
330 
331 // Check for mod or ref of Loc between Start and End, excluding both boundaries.
332 // Start and End must be in the same block
334  const MemoryUseOrDef *Start,
335  const MemoryUseOrDef *End) {
336  assert(Start->getBlock() == End->getBlock() && "Only local supported");
337  for (const MemoryAccess &MA :
338  make_range(++Start->getIterator(), End->getIterator())) {
339  if (isModOrRefSet(AA.getModRefInfo(cast<MemoryUseOrDef>(MA).getMemoryInst(),
340  Loc)))
341  return true;
342  }
343  return false;
344 }
345 
346 // Check for mod of Loc between Start and End, excluding both boundaries.
347 // Start and End can be in different blocks.
348 static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc,
349  const MemoryUseOrDef *Start,
350  const MemoryUseOrDef *End) {
351  // TODO: Only walk until we hit Start.
353  End->getDefiningAccess(), Loc);
354  return !MSSA->dominates(Clobber, Start);
355 }
356 
357 /// When scanning forward over instructions, we look for some other patterns to
358 /// fold away. In particular, this looks for stores to neighboring locations of
359 /// memory. If it sees enough consecutive ones, it attempts to merge them
360 /// together into a memcpy/memset.
361 Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
362  Value *StartPtr,
363  Value *ByteVal) {
364  const DataLayout &DL = StartInst->getModule()->getDataLayout();
365 
366  // We can't track scalable types
367  if (StoreInst *SI = dyn_cast<StoreInst>(StartInst))
368  if (DL.getTypeStoreSize(SI->getOperand(0)->getType()).isScalable())
369  return nullptr;
370 
371  // Okay, so we now have a single store that can be splatable. Scan to find
372  // all subsequent stores of the same value to offset from the same pointer.
373  // Join these together into ranges, so we can decide whether contiguous blocks
374  // are stored.
375  MemsetRanges Ranges(DL);
376 
377  BasicBlock::iterator BI(StartInst);
378 
379  // Keeps track of the last memory use or def before the insertion point for
380  // the new memset. The new MemoryDef for the inserted memsets will be inserted
381  // after MemInsertPoint. It points to either LastMemDef or to the last user
382  // before the insertion point of the memset, if there are any such users.
383  MemoryUseOrDef *MemInsertPoint = nullptr;
384  // Keeps track of the last MemoryDef between StartInst and the insertion point
385  // for the new memset. This will become the defining access of the inserted
386  // memsets.
387  MemoryDef *LastMemDef = nullptr;
388  for (++BI; !BI->isTerminator(); ++BI) {
389  auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
390  MSSAU->getMemorySSA()->getMemoryAccess(&*BI));
391  if (CurrentAcc) {
392  MemInsertPoint = CurrentAcc;
393  if (auto *CurrentDef = dyn_cast<MemoryDef>(CurrentAcc))
394  LastMemDef = CurrentDef;
395  }
396 
397  // Calls that only access inaccessible memory do not block merging
398  // accessible stores.
399  if (auto *CB = dyn_cast<CallBase>(BI)) {
400  if (CB->onlyAccessesInaccessibleMemory())
401  continue;
402  }
403 
404  if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
405  // If the instruction is readnone, ignore it, otherwise bail out. We
406  // don't even allow readonly here because we don't want something like:
407  // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A).
408  if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
409  break;
410  continue;
411  }
412 
413  if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) {
414  // If this is a store, see if we can merge it in.
415  if (!NextStore->isSimple()) break;
416 
417  Value *StoredVal = NextStore->getValueOperand();
418 
419  // Don't convert stores of non-integral pointer types to memsets (which
420  // stores integers).
421  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
422  break;
423 
424  // We can't track ranges involving scalable types.
425  if (DL.getTypeStoreSize(StoredVal->getType()).isScalable())
426  break;
427 
428  // Check to see if this stored value is of the same byte-splattable value.
429  Value *StoredByte = isBytewiseValue(StoredVal, DL);
430  if (isa<UndefValue>(ByteVal) && StoredByte)
431  ByteVal = StoredByte;
432  if (ByteVal != StoredByte)
433  break;
434 
435  // Check to see if this store is to a constant offset from the start ptr.
437  isPointerOffset(StartPtr, NextStore->getPointerOperand(), DL);
438  if (!Offset)
439  break;
440 
441  Ranges.addStore(*Offset, NextStore);
442  } else {
443  MemSetInst *MSI = cast<MemSetInst>(BI);
444 
445  if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
446  !isa<ConstantInt>(MSI->getLength()))
447  break;
448 
449  // Check to see if this store is to a constant offset from the start ptr.
450  Optional<int64_t> Offset = isPointerOffset(StartPtr, MSI->getDest(), DL);
451  if (!Offset)
452  break;
453 
454  Ranges.addMemSet(*Offset, MSI);
455  }
456  }
457 
458  // If we have no ranges, then we just had a single store with nothing that
459  // could be merged in. This is a very common case of course.
460  if (Ranges.empty())
461  return nullptr;
462 
463  // If we had at least one store that could be merged in, add the starting
464  // store as well. We try to avoid this unless there is at least something
465  // interesting as a small compile-time optimization.
466  Ranges.addInst(0, StartInst);
467 
468  // If we create any memsets, we put it right before the first instruction that
469  // isn't part of the memset block. This ensure that the memset is dominated
470  // by any addressing instruction needed by the start of the block.
471  IRBuilder<> Builder(&*BI);
472 
473  // Now that we have full information about ranges, loop over the ranges and
474  // emit memset's for anything big enough to be worthwhile.
475  Instruction *AMemSet = nullptr;
476  for (const MemsetRange &Range : Ranges) {
477  if (Range.TheStores.size() == 1) continue;
478 
479  // If it is profitable to lower this range to memset, do so now.
480  if (!Range.isProfitableToUseMemset(DL))
481  continue;
482 
483  // Otherwise, we do want to transform this! Create a new memset.
484  // Get the starting pointer of the block.
485  StartPtr = Range.StartPtr;
486 
487  AMemSet = Builder.CreateMemSet(StartPtr, ByteVal, Range.End - Range.Start,
488  MaybeAlign(Range.Alignment));
489  LLVM_DEBUG(dbgs() << "Replace stores:\n"; for (Instruction *SI
490  : Range.TheStores) dbgs()
491  << *SI << '\n';
492  dbgs() << "With: " << *AMemSet << '\n');
493  if (!Range.TheStores.empty())
494  AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc());
495 
496  assert(LastMemDef && MemInsertPoint &&
497  "Both LastMemDef and MemInsertPoint need to be set");
498  auto *NewDef =
499  cast<MemoryDef>(MemInsertPoint->getMemoryInst() == &*BI
500  ? MSSAU->createMemoryAccessBefore(
501  AMemSet, LastMemDef, MemInsertPoint)
502  : MSSAU->createMemoryAccessAfter(
503  AMemSet, LastMemDef, MemInsertPoint));
504  MSSAU->insertDef(NewDef, /*RenameUses=*/true);
505  LastMemDef = NewDef;
506  MemInsertPoint = NewDef;
507 
508  // Zap all the stores.
509  for (Instruction *SI : Range.TheStores)
510  eraseInstruction(SI);
511 
512  ++NumMemSetInfer;
513  }
514 
515  return AMemSet;
516 }
517 
518 // This method try to lift a store instruction before position P.
519 // It will lift the store and its argument + that anything that
520 // may alias with these.
521 // The method returns true if it was successful.
522 bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *P, const LoadInst *LI) {
523  // If the store alias this position, early bail out.
525  if (isModOrRefSet(AA->getModRefInfo(P, StoreLoc)))
526  return false;
527 
528  // Keep track of the arguments of all instruction we plan to lift
529  // so we can make sure to lift them as well if appropriate.
531  if (auto *Ptr = dyn_cast<Instruction>(SI->getPointerOperand()))
532  if (Ptr->getParent() == SI->getParent())
533  Args.insert(Ptr);
534 
535  // Instruction to lift before P.
537 
538  // Memory locations of lifted instructions.
539  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
540 
541  // Lifted calls.
543 
544  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
545 
546  for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
547  auto *C = &*I;
548 
549  // Make sure hoisting does not perform a store that was not guaranteed to
550  // happen.
552  return false;
553 
554  bool MayAlias = isModOrRefSet(AA->getModRefInfo(C, None));
555 
556  bool NeedLift = false;
557  if (Args.erase(C))
558  NeedLift = true;
559  else if (MayAlias) {
560  NeedLift = llvm::any_of(MemLocs, [C, this](const MemoryLocation &ML) {
561  return isModOrRefSet(AA->getModRefInfo(C, ML));
562  });
563 
564  if (!NeedLift)
565  NeedLift = llvm::any_of(Calls, [C, this](const CallBase *Call) {
566  return isModOrRefSet(AA->getModRefInfo(C, Call));
567  });
568  }
569 
570  if (!NeedLift)
571  continue;
572 
573  if (MayAlias) {
574  // Since LI is implicitly moved downwards past the lifted instructions,
575  // none of them may modify its source.
576  if (isModSet(AA->getModRefInfo(C, LoadLoc)))
577  return false;
578  else if (const auto *Call = dyn_cast<CallBase>(C)) {
579  // If we can't lift this before P, it's game over.
580  if (isModOrRefSet(AA->getModRefInfo(P, Call)))
581  return false;
582 
583  Calls.push_back(Call);
584  } else if (isa<LoadInst>(C) || isa<StoreInst>(C) || isa<VAArgInst>(C)) {
585  // If we can't lift this before P, it's game over.
586  auto ML = MemoryLocation::get(C);
587  if (isModOrRefSet(AA->getModRefInfo(P, ML)))
588  return false;
589 
590  MemLocs.push_back(ML);
591  } else
592  // We don't know how to lift this instruction.
593  return false;
594  }
595 
596  ToLift.push_back(C);
597  for (unsigned k = 0, e = C->getNumOperands(); k != e; ++k)
598  if (auto *A = dyn_cast<Instruction>(C->getOperand(k))) {
599  if (A->getParent() == SI->getParent()) {
600  // Cannot hoist user of P above P
601  if(A == P) return false;
602  Args.insert(A);
603  }
604  }
605  }
606 
607  // Find MSSA insertion point. Normally P will always have a corresponding
608  // memory access before which we can insert. However, with non-standard AA
609  // pipelines, there may be a mismatch between AA and MSSA, in which case we
610  // will scan for a memory access before P. In either case, we know for sure
611  // that at least the load will have a memory access.
612  // TODO: Simplify this once P will be determined by MSSA, in which case the
613  // discrepancy can no longer occur.
614  MemoryUseOrDef *MemInsertPoint = nullptr;
615  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(P)) {
616  MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
617  } else {
618  const Instruction *ConstP = P;
619  for (const Instruction &I : make_range(++ConstP->getReverseIterator(),
620  ++LI->getReverseIterator())) {
621  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(&I)) {
622  MemInsertPoint = MA;
623  break;
624  }
625  }
626  }
627 
628  // We made it, we need to lift.
629  for (auto *I : llvm::reverse(ToLift)) {
630  LLVM_DEBUG(dbgs() << "Lifting " << *I << " before " << *P << "\n");
631  I->moveBefore(P);
632  assert(MemInsertPoint && "Must have found insert point");
633  if (MemoryUseOrDef *MA = MSSAU->getMemorySSA()->getMemoryAccess(I)) {
634  MSSAU->moveAfter(MA, MemInsertPoint);
635  MemInsertPoint = MA;
636  }
637  }
638 
639  return true;
640 }
641 
642 bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
643  if (!SI->isSimple()) return false;
644 
645  // Avoid merging nontemporal stores since the resulting
646  // memcpy/memset would not be able to preserve the nontemporal hint.
647  // In theory we could teach how to propagate the !nontemporal metadata to
648  // memset calls. However, that change would force the backend to
649  // conservatively expand !nontemporal memset calls back to sequences of
650  // store instructions (effectively undoing the merging).
651  if (SI->getMetadata(LLVMContext::MD_nontemporal))
652  return false;
653 
654  const DataLayout &DL = SI->getModule()->getDataLayout();
655 
656  Value *StoredVal = SI->getValueOperand();
657 
658  // Not all the transforms below are correct for non-integral pointers, bail
659  // until we've audited the individual pieces.
660  if (DL.isNonIntegralPointerType(StoredVal->getType()->getScalarType()))
661  return false;
662 
663  // Load to store forwarding can be interpreted as memcpy.
664  if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) {
665  if (LI->isSimple() && LI->hasOneUse() &&
666  LI->getParent() == SI->getParent()) {
667 
668  auto *T = LI->getType();
669  // Don't introduce calls to memcpy/memmove intrinsics out of thin air if
670  // the corresponding libcalls are not available.
671  // TODO: We should really distinguish between libcall availability and
672  // our ability to introduce intrinsics.
673  if (T->isAggregateType() &&
675  (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
676  MemoryLocation LoadLoc = MemoryLocation::get(LI);
677 
678  // We use alias analysis to check if an instruction may store to
679  // the memory we load from in between the load and the store. If
680  // such an instruction is found, we try to promote there instead
681  // of at the store position.
682  // TODO: Can use MSSA for this.
683  Instruction *P = SI;
684  for (auto &I : make_range(++LI->getIterator(), SI->getIterator())) {
685  if (isModSet(AA->getModRefInfo(&I, LoadLoc))) {
686  P = &I;
687  break;
688  }
689  }
690 
691  // We found an instruction that may write to the loaded memory.
692  // We can try to promote at this position instead of the store
693  // position if nothing aliases the store memory after this and the store
694  // destination is not in the range.
695  if (P && P != SI) {
696  if (!moveUp(SI, P, LI))
697  P = nullptr;
698  }
699 
700  // If a valid insertion position is found, then we can promote
701  // the load/store pair to a memcpy.
702  if (P) {
703  // If we load from memory that may alias the memory we store to,
704  // memmove must be used to preserve semantic. If not, memcpy can
705  // be used. Also, if we load from constant memory, memcpy can be used
706  // as the constant memory won't be modified.
707  bool UseMemMove = false;
708  if (isModSet(AA->getModRefInfo(SI, LoadLoc)))
709  UseMemMove = true;
710 
711  uint64_t Size = DL.getTypeStoreSize(T);
712 
714  Instruction *M;
715  if (UseMemMove)
716  M = Builder.CreateMemMove(
717  SI->getPointerOperand(), SI->getAlign(),
718  LI->getPointerOperand(), LI->getAlign(), Size);
719  else
720  M = Builder.CreateMemCpy(
721  SI->getPointerOperand(), SI->getAlign(),
722  LI->getPointerOperand(), LI->getAlign(), Size);
723 
724  LLVM_DEBUG(dbgs() << "Promoting " << *LI << " to " << *SI << " => "
725  << *M << "\n");
726 
727  auto *LastDef =
728  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(SI));
729  auto *NewAccess = MSSAU->createMemoryAccessAfter(M, LastDef, LastDef);
730  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
731 
732  eraseInstruction(SI);
733  eraseInstruction(LI);
734  ++NumMemCpyInstr;
735 
736  // Make sure we do not invalidate the iterator.
737  BBI = M->getIterator();
738  return true;
739  }
740  }
741 
742  // Detect cases where we're performing call slot forwarding, but
743  // happen to be using a load-store pair to implement it, rather than
744  // a memcpy.
745  CallInst *C = nullptr;
746  if (auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
747  MSSA->getWalker()->getClobberingMemoryAccess(LI))) {
748  // The load most post-dom the call. Limit to the same block for now.
749  // TODO: Support non-local call-slot optimization?
750  if (LoadClobber->getBlock() == SI->getParent())
751  C = dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
752  }
753 
754  if (C) {
755  // Check that nothing touches the dest of the "copy" between
756  // the call and the store.
758  if (accessedBetween(*AA, StoreLoc, MSSA->getMemoryAccess(C),
759  MSSA->getMemoryAccess(SI)))
760  C = nullptr;
761  }
762 
763  if (C) {
764  bool changed = performCallSlotOptzn(
765  LI, SI, SI->getPointerOperand()->stripPointerCasts(),
767  DL.getTypeStoreSize(SI->getOperand(0)->getType()),
768  commonAlignment(SI->getAlign(), LI->getAlign()), C);
769  if (changed) {
770  eraseInstruction(SI);
771  eraseInstruction(LI);
772  ++NumMemCpyInstr;
773  return true;
774  }
775  }
776  }
777  }
778 
779  // The following code creates memset intrinsics out of thin air. Don't do
780  // this if the corresponding libfunc is not available.
781  // TODO: We should really distinguish between libcall availability and
782  // our ability to introduce intrinsics.
783  if (!(TLI->has(LibFunc_memset) || EnableMemCpyOptWithoutLibcalls))
784  return false;
785 
786  // There are two cases that are interesting for this code to handle: memcpy
787  // and memset. Right now we only handle memset.
788 
789  // Ensure that the value being stored is something that can be memset'able a
790  // byte at a time like "0" or "-1" or any width, as well as things like
791  // 0xA0A0A0A0 and 0.0.
792  auto *V = SI->getOperand(0);
793  if (Value *ByteVal = isBytewiseValue(V, DL)) {
794  if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
795  ByteVal)) {
796  BBI = I->getIterator(); // Don't invalidate iterator.
797  return true;
798  }
799 
800  // If we have an aggregate, we try to promote it to memset regardless
801  // of opportunity for merging as it can expose optimization opportunities
802  // in subsequent passes.
803  auto *T = V->getType();
804  if (T->isAggregateType()) {
805  uint64_t Size = DL.getTypeStoreSize(T);
807  auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal, Size,
808  SI->getAlign());
809 
810  LLVM_DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
811 
812  // The newly inserted memset is immediately overwritten by the original
813  // store, so we do not need to rename uses.
814  auto *StoreDef = cast<MemoryDef>(MSSA->getMemoryAccess(SI));
815  auto *NewAccess = MSSAU->createMemoryAccessBefore(
816  M, StoreDef->getDefiningAccess(), StoreDef);
817  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/false);
818 
819  eraseInstruction(SI);
820  NumMemSetInfer++;
821 
822  // Make sure we do not invalidate the iterator.
823  BBI = M->getIterator();
824  return true;
825  }
826  }
827 
828  return false;
829 }
830 
831 bool MemCpyOptPass::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) {
832  // See if there is another memset or store neighboring this memset which
833  // allows us to widen out the memset to do a single larger store.
834  if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile())
835  if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(),
836  MSI->getValue())) {
837  BBI = I->getIterator(); // Don't invalidate iterator.
838  return true;
839  }
840  return false;
841 }
842 
843 /// Takes a memcpy and a call that it depends on,
844 /// and checks for the possibility of a call slot optimization by having
845 /// the call write its result directly into the destination of the memcpy.
846 bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
847  Instruction *cpyStore, Value *cpyDest,
848  Value *cpySrc, TypeSize cpySize,
849  Align cpyAlign, CallInst *C) {
850  // The general transformation to keep in mind is
851  //
852  // call @func(..., src, ...)
853  // memcpy(dest, src, ...)
854  //
855  // ->
856  //
857  // memcpy(dest, src, ...)
858  // call @func(..., dest, ...)
859  //
860  // Since moving the memcpy is technically awkward, we additionally check that
861  // src only holds uninitialized values at the moment of the call, meaning that
862  // the memcpy can be discarded rather than moved.
863 
864  // We can't optimize scalable types.
865  if (cpySize.isScalable())
866  return false;
867 
868  // Lifetime marks shouldn't be operated on.
869  if (Function *F = C->getCalledFunction())
870  if (F->isIntrinsic() && F->getIntrinsicID() == Intrinsic::lifetime_start)
871  return false;
872 
873  // Require that src be an alloca. This simplifies the reasoning considerably.
874  AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
875  if (!srcAlloca)
876  return false;
877 
878  ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
879  if (!srcArraySize)
880  return false;
881 
882  const DataLayout &DL = cpyLoad->getModule()->getDataLayout();
883  uint64_t srcSize = DL.getTypeAllocSize(srcAlloca->getAllocatedType()) *
884  srcArraySize->getZExtValue();
885 
886  if (cpySize < srcSize)
887  return false;
888 
889  // Check that accessing the first srcSize bytes of dest will not cause a
890  // trap. Otherwise the transform is invalid since it might cause a trap
891  // to occur earlier than it otherwise would.
892  if (!isDereferenceableAndAlignedPointer(cpyDest, Align(1), APInt(64, cpySize),
893  DL, C, DT))
894  return false;
895 
896  // Make sure that nothing can observe cpyDest being written early. There are
897  // a number of cases to consider:
898  // 1. cpyDest cannot be accessed between C and cpyStore as a precondition of
899  // the transform.
900  // 2. C itself may not access cpyDest (prior to the transform). This is
901  // checked further below.
902  // 3. If cpyDest is accessible to the caller of this function (potentially
903  // captured and not based on an alloca), we need to ensure that we cannot
904  // unwind between C and cpyStore. This is checked here.
905  // 4. If cpyDest is potentially captured, there may be accesses to it from
906  // another thread. In this case, we need to check that cpyStore is
907  // guaranteed to be executed if C is. As it is a non-atomic access, it
908  // renders accesses from other threads undefined.
909  // TODO: This is currently not checked.
910  if (mayBeVisibleThroughUnwinding(cpyDest, C, cpyStore))
911  return false;
912 
913  // Check that dest points to memory that is at least as aligned as src.
914  Align srcAlign = srcAlloca->getAlign();
915  bool isDestSufficientlyAligned = srcAlign <= cpyAlign;
916  // If dest is not aligned enough and we can't increase its alignment then
917  // bail out.
918  if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest))
919  return false;
920 
921  // Check that src is not accessed except via the call and the memcpy. This
922  // guarantees that it holds only undefined values when passed in (so the final
923  // memcpy can be dropped), that it is not read or written between the call and
924  // the memcpy, and that writing beyond the end of it is undefined.
925  SmallVector<User *, 8> srcUseList(srcAlloca->users());
926  while (!srcUseList.empty()) {
927  User *U = srcUseList.pop_back_val();
928 
929  if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
930  append_range(srcUseList, U->users());
931  continue;
932  }
933  if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(U)) {
934  if (!G->hasAllZeroIndices())
935  return false;
936 
937  append_range(srcUseList, U->users());
938  continue;
939  }
940  if (const IntrinsicInst *IT = dyn_cast<IntrinsicInst>(U))
941  if (IT->isLifetimeStartOrEnd())
942  continue;
943 
944  if (U != C && U != cpyLoad)
945  return false;
946  }
947 
948  // Check that src isn't captured by the called function since the
949  // transformation can cause aliasing issues in that case.
950  for (unsigned ArgI = 0, E = C->arg_size(); ArgI != E; ++ArgI)
951  if (C->getArgOperand(ArgI) == cpySrc && !C->doesNotCapture(ArgI))
952  return false;
953 
954  // Since we're changing the parameter to the callsite, we need to make sure
955  // that what would be the new parameter dominates the callsite.
956  if (!DT->dominates(cpyDest, C)) {
957  // Support moving a constant index GEP before the call.
958  auto *GEP = dyn_cast<GetElementPtrInst>(cpyDest);
959  if (GEP && GEP->hasAllConstantIndices() &&
960  DT->dominates(GEP->getPointerOperand(), C))
961  GEP->moveBefore(C);
962  else
963  return false;
964  }
965 
966  // In addition to knowing that the call does not access src in some
967  // unexpected manner, for example via a global, which we deduce from
968  // the use analysis, we also need to know that it does not sneakily
969  // access dest. We rely on AA to figure this out for us.
970  ModRefInfo MR = AA->getModRefInfo(C, cpyDest, LocationSize::precise(srcSize));
971  // If necessary, perform additional analysis.
972  if (isModOrRefSet(MR))
973  MR = AA->callCapturesBefore(C, cpyDest, LocationSize::precise(srcSize), DT);
974  if (isModOrRefSet(MR))
975  return false;
976 
977  // We can't create address space casts here because we don't know if they're
978  // safe for the target.
979  if (cpySrc->getType()->getPointerAddressSpace() !=
980  cpyDest->getType()->getPointerAddressSpace())
981  return false;
982  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
983  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
984  cpySrc->getType()->getPointerAddressSpace() !=
985  C->getArgOperand(ArgI)->getType()->getPointerAddressSpace())
986  return false;
987 
988  // All the checks have passed, so do the transformation.
989  bool changedArgument = false;
990  for (unsigned ArgI = 0; ArgI < C->arg_size(); ++ArgI)
991  if (C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
992  Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest
993  : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(),
994  cpyDest->getName(), C);
995  changedArgument = true;
996  if (C->getArgOperand(ArgI)->getType() == Dest->getType())
997  C->setArgOperand(ArgI, Dest);
998  else
999  C->setArgOperand(ArgI, CastInst::CreatePointerCast(
1000  Dest, C->getArgOperand(ArgI)->getType(),
1001  Dest->getName(), C));
1002  }
1003 
1004  if (!changedArgument)
1005  return false;
1006 
1007  // If the destination wasn't sufficiently aligned then increase its alignment.
1008  if (!isDestSufficientlyAligned) {
1009  assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!");
1010  cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1011  }
1012 
1013  // Update AA metadata
1014  // FIXME: MD_tbaa_struct and MD_mem_parallel_loop_access should also be
1015  // handled here, but combineMetadata doesn't support them yet
1016  unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
1017  LLVMContext::MD_noalias,
1018  LLVMContext::MD_invariant_group,
1019  LLVMContext::MD_access_group};
1020  combineMetadata(C, cpyLoad, KnownIDs, true);
1021 
1022  ++NumCallSlot;
1023  return true;
1024 }
1025 
1026 /// We've found that the (upward scanning) memory dependence of memcpy 'M' is
1027 /// the memcpy 'MDep'. Try to simplify M to copy from MDep's input if we can.
1028 bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1029  MemCpyInst *MDep) {
1030  // We can only transforms memcpy's where the dest of one is the source of the
1031  // other.
1032  if (M->getSource() != MDep->getDest() || MDep->isVolatile())
1033  return false;
1034 
1035  // If dep instruction is reading from our current input, then it is a noop
1036  // transfer and substituting the input won't change this instruction. Just
1037  // ignore the input and let someone else zap MDep. This handles cases like:
1038  // memcpy(a <- a)
1039  // memcpy(b <- a)
1040  if (M->getSource() == MDep->getSource())
1041  return false;
1042 
1043  // Second, the length of the memcpy's must be the same, or the preceding one
1044  // must be larger than the following one.
1045  if (MDep->getLength() != M->getLength()) {
1046  ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength());
1047  ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength());
1048  if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1049  return false;
1050  }
1051 
1052  // Verify that the copied-from memory doesn't change in between the two
1053  // transfers. For example, in:
1054  // memcpy(a <- b)
1055  // *b = 42;
1056  // memcpy(c <- a)
1057  // It would be invalid to transform the second memcpy into memcpy(c <- b).
1058  //
1059  // TODO: If the code between M and MDep is transparent to the destination "c",
1060  // then we could still perform the xform by moving M up to the first memcpy.
1061  // TODO: It would be sufficient to check the MDep source up to the memcpy
1062  // size of M, rather than MDep.
1064  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(M)))
1065  return false;
1066 
1067  // If the dest of the second might alias the source of the first, then the
1068  // source and dest might overlap. In addition, if the source of the first
1069  // points to constant memory, they won't overlap by definition. Otherwise, we
1070  // still want to eliminate the intermediate value, but we have to generate a
1071  // memmove instead of memcpy.
1072  bool UseMemMove = false;
1074  UseMemMove = true;
1075 
1076  // If all checks passed, then we can transform M.
1077  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1078  << *MDep << '\n' << *M << '\n');
1079 
1080  // TODO: Is this worth it if we're creating a less aligned memcpy? For
1081  // example we could be moving from movaps -> movq on x86.
1082  IRBuilder<> Builder(M);
1083  Instruction *NewM;
1084  if (UseMemMove)
1085  NewM = Builder.CreateMemMove(M->getRawDest(), M->getDestAlign(),
1086  MDep->getRawSource(), MDep->getSourceAlign(),
1087  M->getLength(), M->isVolatile());
1088  else if (isa<MemCpyInlineInst>(M)) {
1089  // llvm.memcpy may be promoted to llvm.memcpy.inline, but the converse is
1090  // never allowed since that would allow the latter to be lowered as a call
1091  // to an external function.
1092  NewM = Builder.CreateMemCpyInline(
1093  M->getRawDest(), M->getDestAlign(), MDep->getRawSource(),
1094  MDep->getSourceAlign(), M->getLength(), M->isVolatile());
1095  } else
1096  NewM = Builder.CreateMemCpy(M->getRawDest(), M->getDestAlign(),
1097  MDep->getRawSource(), MDep->getSourceAlign(),
1098  M->getLength(), M->isVolatile());
1099 
1100  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M)));
1101  auto *LastDef = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1102  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1103  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1104 
1105  // Remove the instruction we're replacing.
1106  eraseInstruction(M);
1107  ++NumMemCpyInstr;
1108  return true;
1109 }
1110 
1111 /// We've found that the (upward scanning) memory dependence of \p MemCpy is
1112 /// \p MemSet. Try to simplify \p MemSet to only set the trailing bytes that
1113 /// weren't copied over by \p MemCpy.
1114 ///
1115 /// In other words, transform:
1116 /// \code
1117 /// memset(dst, c, dst_size);
1118 /// memcpy(dst, src, src_size);
1119 /// \endcode
1120 /// into:
1121 /// \code
1122 /// memcpy(dst, src, src_size);
1123 /// memset(dst + src_size, c, dst_size <= src_size ? 0 : dst_size - src_size);
1124 /// \endcode
1125 bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1126  MemSetInst *MemSet) {
1127  // We can only transform memset/memcpy with the same destination.
1128  if (!AA->isMustAlias(MemSet->getDest(), MemCpy->getDest()))
1129  return false;
1130 
1131  // Check that src and dst of the memcpy aren't the same. While memcpy
1132  // operands cannot partially overlap, exact equality is allowed.
1133  if (isModSet(AA->getModRefInfo(MemCpy, MemoryLocation::getForSource(MemCpy))))
1134  return false;
1135 
1136  // We know that dst up to src_size is not written. We now need to make sure
1137  // that dst up to dst_size is not accessed. (If we did not move the memset,
1138  // checking for reads would be sufficient.)
1140  MSSA->getMemoryAccess(MemSet),
1141  MSSA->getMemoryAccess(MemCpy)))
1142  return false;
1143 
1144  // Use the same i8* dest as the memcpy, killing the memset dest if different.
1145  Value *Dest = MemCpy->getRawDest();
1146  Value *DestSize = MemSet->getLength();
1147  Value *SrcSize = MemCpy->getLength();
1148 
1149  if (mayBeVisibleThroughUnwinding(Dest, MemSet, MemCpy))
1150  return false;
1151 
1152  // If the sizes are the same, simply drop the memset instead of generating
1153  // a replacement with zero size.
1154  if (DestSize == SrcSize) {
1155  eraseInstruction(MemSet);
1156  return true;
1157  }
1158 
1159  // By default, create an unaligned memset.
1160  unsigned Align = 1;
1161  // If Dest is aligned, and SrcSize is constant, use the minimum alignment
1162  // of the sum.
1163  const unsigned DestAlign =
1164  std::max(MemSet->getDestAlignment(), MemCpy->getDestAlignment());
1165  if (DestAlign > 1)
1166  if (ConstantInt *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1167  Align = MinAlign(SrcSizeC->getZExtValue(), DestAlign);
1168 
1169  IRBuilder<> Builder(MemCpy);
1170 
1171  // If the sizes have different types, zext the smaller one.
1172  if (DestSize->getType() != SrcSize->getType()) {
1173  if (DestSize->getType()->getIntegerBitWidth() >
1174  SrcSize->getType()->getIntegerBitWidth())
1175  SrcSize = Builder.CreateZExt(SrcSize, DestSize->getType());
1176  else
1177  DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
1178  }
1179 
1180  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1181  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1182  Value *MemsetLen = Builder.CreateSelect(
1183  Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
1184  unsigned DestAS = Dest->getType()->getPointerAddressSpace();
1185  Instruction *NewMemSet = Builder.CreateMemSet(
1186  Builder.CreateGEP(Builder.getInt8Ty(),
1187  Builder.CreatePointerCast(Dest,
1188  Builder.getInt8PtrTy(DestAS)),
1189  SrcSize),
1190  MemSet->getOperand(1), MemsetLen, MaybeAlign(Align));
1191 
1192  assert(isa<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy)) &&
1193  "MemCpy must be a MemoryDef");
1194  // The new memset is inserted after the memcpy, but it is known that its
1195  // defining access is the memset about to be removed which immediately
1196  // precedes the memcpy.
1197  auto *LastDef =
1198  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1199  auto *NewAccess = MSSAU->createMemoryAccessBefore(
1200  NewMemSet, LastDef->getDefiningAccess(), LastDef);
1201  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1202 
1203  eraseInstruction(MemSet);
1204  return true;
1205 }
1206 
1207 /// Determine whether the instruction has undefined content for the given Size,
1208 /// either because it was freshly alloca'd or started its lifetime.
1209 static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V,
1210  MemoryDef *Def, Value *Size) {
1211  if (MSSA->isLiveOnEntryDef(Def))
1212  return isa<AllocaInst>(getUnderlyingObject(V));
1213 
1214  if (IntrinsicInst *II =
1215  dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1216  if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1217  ConstantInt *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1218 
1219  if (ConstantInt *CSize = dyn_cast<ConstantInt>(Size)) {
1220  if (AA->isMustAlias(V, II->getArgOperand(1)) &&
1221  LTSize->getZExtValue() >= CSize->getZExtValue())
1222  return true;
1223  }
1224 
1225  // If the lifetime.start covers a whole alloca (as it almost always
1226  // does) and we're querying a pointer based on that alloca, then we know
1227  // the memory is definitely undef, regardless of how exactly we alias.
1228  // The size also doesn't matter, as an out-of-bounds access would be UB.
1229  AllocaInst *Alloca = dyn_cast<AllocaInst>(getUnderlyingObject(V));
1230  if (getUnderlyingObject(II->getArgOperand(1)) == Alloca) {
1231  const DataLayout &DL = Alloca->getModule()->getDataLayout();
1232  if (Optional<TypeSize> AllocaSize = Alloca->getAllocationSizeInBits(DL))
1233  if (*AllocaSize == LTSize->getValue() * 8)
1234  return true;
1235  }
1236  }
1237  }
1238 
1239  return false;
1240 }
1241 
1242 /// Transform memcpy to memset when its source was just memset.
1243 /// In other words, turn:
1244 /// \code
1245 /// memset(dst1, c, dst1_size);
1246 /// memcpy(dst2, dst1, dst2_size);
1247 /// \endcode
1248 /// into:
1249 /// \code
1250 /// memset(dst1, c, dst1_size);
1251 /// memset(dst2, c, dst2_size);
1252 /// \endcode
1253 /// When dst2_size <= dst1_size.
1254 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1255  MemSetInst *MemSet) {
1256  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
1257  // memcpying from the same address. Otherwise it is hard to reason about.
1258  if (!AA->isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
1259  return false;
1260 
1261  Value *MemSetSize = MemSet->getLength();
1262  Value *CopySize = MemCpy->getLength();
1263 
1264  if (MemSetSize != CopySize) {
1265  // Make sure the memcpy doesn't read any more than what the memset wrote.
1266  // Don't worry about sizes larger than i64.
1267 
1268  // A known memset size is required.
1269  ConstantInt *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1270  if (!CMemSetSize)
1271  return false;
1272 
1273  // A known memcpy size is also required.
1274  ConstantInt *CCopySize = dyn_cast<ConstantInt>(CopySize);
1275  if (!CCopySize)
1276  return false;
1277  if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1278  // If the memcpy is larger than the memset, but the memory was undef prior
1279  // to the memset, we can just ignore the tail. Technically we're only
1280  // interested in the bytes from MemSetSize..CopySize here, but as we can't
1281  // easily represent this location, we use the full 0..CopySize range.
1282  MemoryLocation MemCpyLoc = MemoryLocation::getForSource(MemCpy);
1283  bool CanReduceSize = false;
1284  MemoryUseOrDef *MemSetAccess = MSSA->getMemoryAccess(MemSet);
1285  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1286  MemSetAccess->getDefiningAccess(), MemCpyLoc);
1287  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1288  if (hasUndefContents(MSSA, AA, MemCpy->getSource(), MD, CopySize))
1289  CanReduceSize = true;
1290 
1291  if (!CanReduceSize)
1292  return false;
1293  CopySize = MemSetSize;
1294  }
1295  }
1296 
1297  IRBuilder<> Builder(MemCpy);
1298  Instruction *NewM =
1299  Builder.CreateMemSet(MemCpy->getRawDest(), MemSet->getOperand(1),
1300  CopySize, MaybeAlign(MemCpy->getDestAlignment()));
1301  auto *LastDef =
1302  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(MemCpy));
1303  auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1304  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1305 
1306  return true;
1307 }
1308 
1309 /// Perform simplification of memcpy's. If we have memcpy A
1310 /// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite
1311 /// B to be a memcpy from X to Z (or potentially a memmove, depending on
1312 /// circumstances). This allows later passes to remove the first memcpy
1313 /// altogether.
1314 bool MemCpyOptPass::processMemCpy(MemCpyInst *M, BasicBlock::iterator &BBI) {
1315  // We can only optimize non-volatile memcpy's.
1316  if (M->isVolatile()) return false;
1317 
1318  // If the source and destination of the memcpy are the same, then zap it.
1319  if (M->getSource() == M->getDest()) {
1320  ++BBI;
1321  eraseInstruction(M);
1322  return true;
1323  }
1324 
1325  // If copying from a constant, try to turn the memcpy into a memset.
1326  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource()))
1327  if (GV->isConstant() && GV->hasDefinitiveInitializer())
1328  if (Value *ByteVal = isBytewiseValue(GV->getInitializer(),
1329  M->getModule()->getDataLayout())) {
1330  IRBuilder<> Builder(M);
1331  Instruction *NewM =
1332  Builder.CreateMemSet(M->getRawDest(), ByteVal, M->getLength(),
1333  MaybeAlign(M->getDestAlignment()), false);
1334  auto *LastDef =
1335  cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(M));
1336  auto *NewAccess =
1337  MSSAU->createMemoryAccessAfter(NewM, LastDef, LastDef);
1338  MSSAU->insertDef(cast<MemoryDef>(NewAccess), /*RenameUses=*/true);
1339 
1340  eraseInstruction(M);
1341  ++NumCpyToSet;
1342  return true;
1343  }
1344 
1345  MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1346  MemoryAccess *AnyClobber = MSSA->getWalker()->getClobberingMemoryAccess(MA);
1348  const MemoryAccess *DestClobber =
1349  MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc);
1350 
1351  // Try to turn a partially redundant memset + memcpy into
1352  // memcpy + smaller memset. We don't need the memcpy size for this.
1353  // The memcpy most post-dom the memset, so limit this to the same basic
1354  // block. A non-local generalization is likely not worthwhile.
1355  if (auto *MD = dyn_cast<MemoryDef>(DestClobber))
1356  if (auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1357  if (DestClobber->getBlock() == M->getParent())
1358  if (processMemSetMemCpyDependence(M, MDep))
1359  return true;
1360 
1361  MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1362  AnyClobber, MemoryLocation::getForSource(M));
1363 
1364  // There are four possible optimizations we can do for memcpy:
1365  // a) memcpy-memcpy xform which exposes redundance for DSE.
1366  // b) call-memcpy xform for return slot optimization.
1367  // c) memcpy from freshly alloca'd space or space that has just started
1368  // its lifetime copies undefined data, and we can therefore eliminate
1369  // the memcpy in favor of the data that was already at the destination.
1370  // d) memcpy from a just-memset'd source can be turned into memset.
1371  if (auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1372  if (Instruction *MI = MD->getMemoryInst()) {
1373  if (ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength())) {
1374  if (auto *C = dyn_cast<CallInst>(MI)) {
1375  // The memcpy must post-dom the call. Limit to the same block for
1376  // now. Additionally, we need to ensure that there are no accesses
1377  // to dest between the call and the memcpy. Accesses to src will be
1378  // checked by performCallSlotOptzn().
1379  // TODO: Support non-local call-slot optimization?
1380  if (C->getParent() == M->getParent() &&
1381  !accessedBetween(*AA, DestLoc, MD, MA)) {
1382  // FIXME: Can we pass in either of dest/src alignment here instead
1383  // of conservatively taking the minimum?
1384  Align Alignment = std::min(M->getDestAlign().valueOrOne(),
1385  M->getSourceAlign().valueOrOne());
1386  if (performCallSlotOptzn(
1387  M, M, M->getDest(), M->getSource(),
1388  TypeSize::getFixed(CopySize->getZExtValue()), Alignment,
1389  C)) {
1390  LLVM_DEBUG(dbgs() << "Performed call slot optimization:\n"
1391  << " call: " << *C << "\n"
1392  << " memcpy: " << *M << "\n");
1393  eraseInstruction(M);
1394  ++NumMemCpyInstr;
1395  return true;
1396  }
1397  }
1398  }
1399  }
1400  if (auto *MDep = dyn_cast<MemCpyInst>(MI))
1401  return processMemCpyMemCpyDependence(M, MDep);
1402  if (auto *MDep = dyn_cast<MemSetInst>(MI)) {
1403  if (performMemCpyToMemSetOptzn(M, MDep)) {
1404  LLVM_DEBUG(dbgs() << "Converted memcpy to memset\n");
1405  eraseInstruction(M);
1406  ++NumCpyToSet;
1407  return true;
1408  }
1409  }
1410  }
1411 
1412  if (hasUndefContents(MSSA, AA, M->getSource(), MD, M->getLength())) {
1413  LLVM_DEBUG(dbgs() << "Removed memcpy from undef\n");
1414  eraseInstruction(M);
1415  ++NumMemCpyInstr;
1416  return true;
1417  }
1418  }
1419 
1420  return false;
1421 }
1422 
1423 /// Transforms memmove calls to memcpy calls when the src/dst are guaranteed
1424 /// not to alias.
1425 bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
1426  // See if the source could be modified by this memmove potentially.
1428  return false;
1429 
1430  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1431  << "\n");
1432 
1433  // If not, then we know we can transform this.
1434  Type *ArgTys[3] = { M->getRawDest()->getType(),
1435  M->getRawSource()->getType(),
1436  M->getLength()->getType() };
1437  M->setCalledFunction(Intrinsic::getDeclaration(M->getModule(),
1438  Intrinsic::memcpy, ArgTys));
1439 
1440  // For MemorySSA nothing really changes (except that memcpy may imply stricter
1441  // aliasing guarantees).
1442 
1443  ++NumMoveToCpy;
1444  return true;
1445 }
1446 
1447 /// This is called on every byval argument in call sites.
1448 bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
1449  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
1450  // Find out what feeds this byval argument.
1451  Value *ByValArg = CB.getArgOperand(ArgNo);
1452  Type *ByValTy = CB.getParamByValType(ArgNo);
1453  TypeSize ByValSize = DL.getTypeAllocSize(ByValTy);
1454  MemoryLocation Loc(ByValArg, LocationSize::precise(ByValSize));
1455  MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
1456  if (!CallAccess)
1457  return false;
1458  MemCpyInst *MDep = nullptr;
1459  MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
1460  CallAccess->getDefiningAccess(), Loc);
1461  if (auto *MD = dyn_cast<MemoryDef>(Clobber))
1462  MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1463 
1464  // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by
1465  // a memcpy, see if we can byval from the source of the memcpy instead of the
1466  // result.
1467  if (!MDep || MDep->isVolatile() ||
1468  ByValArg->stripPointerCasts() != MDep->getDest())
1469  return false;
1470 
1471  // The length of the memcpy must be larger or equal to the size of the byval.
1472  ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength());
1473  if (!C1 || !TypeSize::isKnownGE(
1474  TypeSize::getFixed(C1->getValue().getZExtValue()), ByValSize))
1475  return false;
1476 
1477  // Get the alignment of the byval. If the call doesn't specify the alignment,
1478  // then it is some target specific value that we can't know.
1479  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
1480  if (!ByValAlign) return false;
1481 
1482  // If it is greater than the memcpy, then we check to see if we can force the
1483  // source of the memcpy to the alignment we need. If we fail, we bail out.
1484  MaybeAlign MemDepAlign = MDep->getSourceAlign();
1485  if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1486  getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL, &CB, AC,
1487  DT) < *ByValAlign)
1488  return false;
1489 
1490  // The address space of the memcpy source must match the byval argument
1491  if (MDep->getSource()->getType()->getPointerAddressSpace() !=
1492  ByValArg->getType()->getPointerAddressSpace())
1493  return false;
1494 
1495  // Verify that the copied-from memory doesn't change in between the memcpy and
1496  // the byval call.
1497  // memcpy(a <- b)
1498  // *b = 42;
1499  // foo(*a)
1500  // It would be invalid to transform the second memcpy into foo(*b).
1502  MSSA->getMemoryAccess(MDep), MSSA->getMemoryAccess(&CB)))
1503  return false;
1504 
1505  Value *TmpCast = MDep->getSource();
1506  if (MDep->getSource()->getType() != ByValArg->getType()) {
1507  BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
1508  "tmpcast", &CB);
1509  // Set the tmpcast's DebugLoc to MDep's
1510  TmpBitCast->setDebugLoc(MDep->getDebugLoc());
1511  TmpCast = TmpBitCast;
1512  }
1513 
1514  LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
1515  << " " << *MDep << "\n"
1516  << " " << CB << "\n");
1517 
1518  // Otherwise we're good! Update the byval argument.
1519  CB.setArgOperand(ArgNo, TmpCast);
1520  ++NumMemCpyInstr;
1521  return true;
1522 }
1523 
1524 /// Executes one iteration of MemCpyOptPass.
1525 bool MemCpyOptPass::iterateOnFunction(Function &F) {
1526  bool MadeChange = false;
1527 
1528  // Walk all instruction in the function.
1529  for (BasicBlock &BB : F) {
1530  // Skip unreachable blocks. For example processStore assumes that an
1531  // instruction in a BB can't be dominated by a later instruction in the
1532  // same BB (which is a scenario that can happen for an unreachable BB that
1533  // has itself as a predecessor).
1534  if (!DT->isReachableFromEntry(&BB))
1535  continue;
1536 
1537  for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
1538  // Avoid invalidating the iterator.
1539  Instruction *I = &*BI++;
1540 
1541  bool RepeatInstruction = false;
1542 
1543  if (StoreInst *SI = dyn_cast<StoreInst>(I))
1544  MadeChange |= processStore(SI, BI);
1545  else if (MemSetInst *M = dyn_cast<MemSetInst>(I))
1546  RepeatInstruction = processMemSet(M, BI);
1547  else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I))
1548  RepeatInstruction = processMemCpy(M, BI);
1549  else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
1550  RepeatInstruction = processMemMove(M);
1551  else if (auto *CB = dyn_cast<CallBase>(I)) {
1552  for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
1553  if (CB->isByValArgument(i))
1554  MadeChange |= processByValArgument(*CB, i);
1555  }
1556 
1557  // Reprocess the instruction if desired.
1558  if (RepeatInstruction) {
1559  if (BI != BB.begin())
1560  --BI;
1561  MadeChange = true;
1562  }
1563  }
1564  }
1565 
1566  return MadeChange;
1567 }
1568 
1570  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1571  auto *AA = &AM.getResult<AAManager>(F);
1572  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
1573  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
1574  auto *MSSA = &AM.getResult<MemorySSAAnalysis>(F);
1575 
1576  bool MadeChange = runImpl(F, &TLI, AA, AC, DT, &MSSA->getMSSA());
1577  if (!MadeChange)
1578  return PreservedAnalyses::all();
1579 
1580  PreservedAnalyses PA;
1581  PA.preserveSet<CFGAnalyses>();
1583  return PA;
1584 }
1585 
1587  AliasAnalysis *AA_, AssumptionCache *AC_,
1588  DominatorTree *DT_, MemorySSA *MSSA_) {
1589  bool MadeChange = false;
1590  TLI = TLI_;
1591  AA = AA_;
1592  AC = AC_;
1593  DT = DT_;
1594  MSSA = MSSA_;
1595  MemorySSAUpdater MSSAU_(MSSA_);
1596  MSSAU = &MSSAU_;
1597 
1598  while (true) {
1599  if (!iterateOnFunction(F))
1600  break;
1601  MadeChange = true;
1602  }
1603 
1604  if (VerifyMemorySSA)
1605  MSSA_->verifyMemorySSA();
1606 
1607  return MadeChange;
1608 }
1609 
1610 /// This is the main transformation entry point for a function.
1612  if (skipFunction(F))
1613  return false;
1614 
1615  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1616  auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
1617  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1618  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1619  auto *MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
1620 
1621  return Impl.runImpl(F, TLI, AA, AC, DT, MSSA);
1622 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1288
llvm::MemIntrinsicBase::getDestAlignment
unsigned getDestAlignment() const
FIXME: Remove this function once transition to Align is over.
Definition: IntrinsicInst.h:660
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm::combineMetadata
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:2496
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::SmallVectorImpl::erase
iterator erase(const_iterator CI)
Definition: SmallVector.h:705
llvm::tgtok::Def
@ Def
Definition: TGLexer.h:50
llvm::MemorySSA::verifyMemorySSA
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1899
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Value::hasOneUse
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:435
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
llvm::MemorySSAUpdater::createMemoryAccessBefore
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before or after an existing MemoryAccess.
Definition: MemorySSAUpdater.cpp:1441
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::AllocaInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:120
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:190
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
Scalar.h
llvm::TypeSize::getFixedSize
ScalarTy getFixedSize() const
Definition: TypeSize.h:426
Loads.h
T
MemorySSAUpdater.h
llvm::Function
Definition: Function.h:61
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
Pass.h
llvm::MemMoveInst
This class wraps the llvm.memmove intrinsic.
Definition: IntrinsicInst.h:949
llvm::BitCastInst
This class represents a no-op cast from one type to another.
Definition: Instructions.h:5200
C1
instcombine should handle this C2 when C1
Definition: README.txt:263
GetElementPtrTypeIterator.h
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::ConstantInt::getValue
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:133
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
Statistic.h
llvm::isDereferenceableAndAlignedPointer
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, MaybeAlign Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
Definition: Loads.cpp:210
memcpyopt
memcpyopt
Definition: MemCpyOptimizer.cpp:307
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:40
llvm::MemorySSA::dominates
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
Definition: MemorySSA.cpp:2152
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:143
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
Module.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1343
llvm::ilist_node_impl::getReverseIterator
reverse_self_iterator getReverseIterator()
Definition: ilist_node.h:84
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::isBytewiseValue
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Definition: ValueTracking.cpp:3844
llvm::MemCpyOptPass::runImpl
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, MemorySSA *MSSA)
Definition: MemCpyOptimizer.cpp:1586
llvm::Optional< int64_t >
llvm::createMemCpyOptPass
FunctionPass * createMemCpyOptPass()
The public interface to this file...
Definition: MemCpyOptimizer.cpp:297
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Operator.h
llvm::CallBase::isByValArgument
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
Definition: InstrTypes.h:1666
STLExtras.h
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:223
accessedBetween
static bool accessedBetween(AliasAnalysis &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:333
llvm::LinearPolySize::isScalable
bool isScalable() const
Returns whether the size is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:299
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::MemSetBase::getValue
Value * getValue() const
Definition: IntrinsicInst.h:766
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:115
llvm::isPointerOffset
Optional< int64_t > isPointerOffset(const Value *Ptr1, const Value *Ptr2, const DataLayout &DL)
If Ptr1 is provably equal to Ptr2 plus a constant offset, return that offset.
Definition: ValueTracking.cpp:7119
Instruction.h
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MemorySSAWrapperPass
Legacy analysis pass which computes MemorySSA.
Definition: MemorySSA.h:981
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:508
llvm::MemorySSA::isLiveOnEntryDef
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Definition: MemorySSA.h:742
llvm::AllocaInst::getAllocatedType
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:113
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(MemCpyOptLegacyPass, "memcpyopt", "MemCpy Optimization", false, false) INITIALIZE_PASS_END(MemCpyOptLegacyPass
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
InstrTypes.h
llvm::initializeMemCpyOptLegacyPassPass
void initializeMemCpyOptLegacyPassPass(PassRegistry &)
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::MemoryLocation::getForSource
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
Definition: MemoryLocation.cpp:95
llvm::MemTransferBase::getRawSource
Value * getRawSource() const
Return the arguments to the instruction.
Definition: IntrinsicInst.h:706
llvm::MemorySSAUpdater::moveAfter
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
Definition: MemorySSAUpdater.cpp:1200
TargetLibraryInfo.h
DenseSet.h
false
Definition: StackSlotColoring.cpp:142
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
llvm::Instruction
Definition: Instruction.h:45
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::AllocaInst::getArraySize
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:100
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
MemCpyOptimizer.h
llvm::LocationSize::precise
static LocationSize precise(uint64_t Value)
Definition: MemoryLocation.h:100
llvm::CallBase::getParamByValType
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Definition: InstrTypes.h:1733
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4378
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::MemoryUseOrDef::getMemoryInst
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
Definition: MemorySSA.h:254
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:197
llvm::MemTransferBase::getSourceAlign
MaybeAlign getSourceAlign() const
Definition: IntrinsicInst.h:731
llvm::AllocaInst::getAllocationSizeInBits
Optional< TypeSize > getAllocationSizeInBits(const DataLayout &DL) const
Get allocation size in bits.
Definition: Instructions.cpp:57
llvm::None
const NoneType None
Definition: None.h:23
llvm::LinearPolySize< TypeSize >::getFixed
static TypeSize getFixed(ScalarTy MinVal)
Definition: TypeSize.h:284
llvm::MemoryAccess::getBlock
BasicBlock * getBlock() const
Definition: MemorySSA.h:159
llvm::Type::getIntegerBitWidth
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Type.h
llvm::CallBase::getCaller
Function * getCaller()
Helper to get the caller (the parent function).
Definition: Instructions.cpp:282
llvm::MemSetInst
This class wraps the llvm.memset intrinsic.
Definition: IntrinsicInst.h:905
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::cl::ZeroOrMore
@ ZeroOrMore
Definition: CommandLine.h:120
Optimization
MemCpy Optimization
Definition: MemCpyOptimizer.cpp:307
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::DenseSet
Implements a dense probed hash-table based set.
Definition: DenseSet.h:268
llvm::MemorySSAWalker::getClobberingMemoryAccess
MemoryAccess * getClobberingMemoryAccess(const Instruction *I)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Definition: MemorySSA.h:1041
BasicBlock.h
llvm::cl::opt< bool >
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
mayBeVisibleThroughUnwinding
MemCpy static false bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
Definition: MemCpyOptimizer.cpp:312
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
uint64_t
llvm::MemorySSAUpdater
Definition: MemorySSAUpdater.h:56
llvm::GlobalValue::getParent
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
llvm::MemorySSAUpdater::getMemorySSA
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
Definition: MemorySSAUpdater.h:244
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:176
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::MemorySSA
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:705
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::MemorySSA::getMemoryAccess
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:722
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
llvm::MemCpyOptPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: MemCpyOptimizer.cpp:1569
llvm::MemoryDef
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Definition: MemorySSA.h:377
llvm::Instruction::setDebugLoc
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
Definition: Instruction.h:367
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:149
IRBuilder.h
llvm::SmallVectorImpl::const_iterator
typename SuperClass::const_iterator const_iterator
Definition: SmallVector.h:563
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::MachineBasicBlock::size
unsigned size() const
Definition: MachineBasicBlock.h:239
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:328
llvm::LinearPolySize< TypeSize >::isKnownGE
static bool isKnownGE(const LinearPolySize &LHS, const LinearPolySize &RHS)
Definition: TypeSize.h:347
iterator_range.h
llvm::TargetLibraryInfo::has
bool has(LibFunc F) const
Tests whether a library function is available.
Definition: TargetLibraryInfo.h:323
llvm::MemorySSAAnalysis
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:931
llvm::MemorySSA::getWalker
MemorySSAWalker * getWalker()
Definition: MemorySSA.cpp:1598
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
addRange
static void addRange(SmallVectorImpl< ConstantInt * > &EndPoints, ConstantInt *Low, ConstantInt *High)
Definition: Metadata.cpp:1011
llvm::MemorySSAUpdater::createMemoryAccessAfter
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Definition: MemorySSAUpdater.cpp:1451
llvm::MemIntrinsicBase::getDest
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
Definition: IntrinsicInst.h:652
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
IT
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::ZeroOrMore, cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate IT block based on arch"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow deprecated IT based on ARMv8"), clEnumValN(NoRestrictedIT, "arm-no-restrict-it", "Allow IT blocks based on ARMv7")))
None.h
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::MemorySSAUpdater::insertDef
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
Definition: MemorySSAUpdater.cpp:314
llvm::CallBase::getParamAlign
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
Definition: InstrTypes.h:1724
DataLayout.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:259
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1748
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::MemoryAccess
Definition: MemorySSA.h:137
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::CallBase::setArgOperand
void setArgOperand(unsigned i, Value *v)
Definition: InstrTypes.h:1343
llvm::Value::stripPointerCasts
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition: Value.cpp:687
Argument.h
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:142
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::partition_point
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
Definition: STLExtras.h:1699
llvm::commonAlignment
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:348
llvm::MemoryUseOrDef::getDefiningAccess
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Definition: MemorySSA.h:257
llvm::MemIntrinsicBase::getLength
Value * getLength() const
Definition: IntrinsicInst.h:643
hasUndefContents
static bool hasUndefContents(MemorySSA *MSSA, AliasAnalysis *AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
Definition: MemCpyOptimizer.cpp:1209
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::CallBase::arg_size
unsigned arg_size() const
Definition: InstrTypes.h:1326
GlobalVariable.h
llvm::isGuaranteedToTransferExecutionToSuccessor
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
Definition: ValueTracking.cpp:5294
llvm::TypeSize
Definition: TypeSize.h:417
Casting.h
Function.h
PassManager.h
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
EnableMemCpyOptWithoutLibcalls
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::init(false), cl::Hidden, cl::ZeroOrMore, cl::desc("Enable memcpyopt even when libcalls are disabled"))
llvm::MemoryUseOrDef
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:247
llvm::MemCpyOptPass
Definition: MemCpyOptimizer.h:41
llvm::IntrinsicInst
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:45
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
MemorySSA.h
Instructions.h
llvm::MemCpyInst
This class wraps the llvm.memcpy intrinsic.
Definition: IntrinsicInst.h:936
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::MemIntrinsic::isVolatile
bool isVolatile() const
Definition: IntrinsicInst.h:883
SmallVector.h
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:370
User.h
Dominators.h
llvm::SmallVectorImpl::iterator
typename SuperClass::iterator iterator
Definition: SmallVector.h:562
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1338
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1336
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::AAResults::isMustAlias
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
A trivial helper function to check to see if the specified pointers are must-alias.
Definition: AliasAnalysis.h:580
llvm::MemIntrinsicBase::getRawDest
Value * getRawDest() const
Definition: IntrinsicInst.h:637
llvm::AAResults::callCapturesBefore
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Return information about whether a particular call site modifies or reads the specified memory locati...
Definition: AliasAnalysis.h:849
llvm::CallBase
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1161
llvm::Module::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:401
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1475
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::CastInst::CreatePointerCast
static CastInst * CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd)
Create a BitCast AddrSpaceCast, or a PtrToInt cast instruction.
Definition: Instructions.cpp:3168
llvm::MemoryLocation::getForDest
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
Definition: MemoryLocation.cpp:113
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::VerifyMemorySSA
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:91
LLVMContext.h
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:62
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:414
raw_ostream.h
Value.h
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:438
llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:422
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:218
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:209
llvm::MemorySSAUpdater::removeMemoryAccess
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
Definition: MemorySSAUpdater.cpp:1309
writtenBetween
static bool writtenBetween(MemorySSA *MSSA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
Definition: MemCpyOptimizer.cpp:348
llvm::MemTransferBase::getSource
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
Definition: IntrinsicInst.h:717
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37
llvm::SmallVectorImpl::insert
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:773