LLVM  3.7.0
SROA.cpp
Go to the documentation of this file.
1 //===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This transformation implements the well known scalar replacement of
11 /// aggregates transformation. It tries to identify promotable elements of an
12 /// aggregate alloca, and promote them to registers. It will also try to
13 /// convert uses of an element (or set of elements) of an alloca into a vector
14 /// or bitfield-style integer scalar if appropriate.
15 ///
16 /// It works to do this with minimal slicing of the alloca so that regions
17 /// which are merely transferred in and out of external memory remain unchanged
18 /// and are not decomposed to scalar code.
19 ///
20 /// Because this also performs alloca promotion, it can be thought of as also
21 /// serving the purpose of SSA formation. The algorithm iterates on the
22 /// function until all opportunities for promotion have been realized.
23 ///
24 //===----------------------------------------------------------------------===//
25 
26 #include "llvm/Transforms/Scalar.h"
27 #include "llvm/ADT/STLExtras.h"
28 #include "llvm/ADT/SetVector.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/ADT/Statistic.h"
32 #include "llvm/Analysis/Loads.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DIBuilder.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DebugInfo.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/IR/Dominators.h"
41 #include "llvm/IR/Function.h"
42 #include "llvm/IR/IRBuilder.h"
43 #include "llvm/IR/InstVisitor.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/LLVMContext.h"
47 #include "llvm/IR/Operator.h"
48 #include "llvm/Pass.h"
50 #include "llvm/Support/Compiler.h"
51 #include "llvm/Support/Debug.h"
54 #include "llvm/Support/TimeValue.h"
59 
60 #if __cplusplus >= 201103L && !defined(NDEBUG)
61 // We only use this for a debug check in C++11
62 #include <random>
63 #endif
64 
65 using namespace llvm;
66 
67 #define DEBUG_TYPE "sroa"
68 
69 STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
70 STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
71 STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
72 STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
73 STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
74 STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
75 STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
76 STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
77 STATISTIC(NumDeleted, "Number of instructions deleted");
78 STATISTIC(NumVectorized, "Number of vectorized aggregates");
79 
80 /// Hidden option to force the pass to not use DomTree and mem2reg, instead
81 /// forming SSA values through the SSAUpdater infrastructure.
82 static cl::opt<bool> ForceSSAUpdater("force-ssa-updater", cl::init(false),
83  cl::Hidden);
84 
85 /// Hidden option to enable randomly shuffling the slices to help uncover
86 /// instability in their order.
87 static cl::opt<bool> SROARandomShuffleSlices("sroa-random-shuffle-slices",
88  cl::init(false), cl::Hidden);
89 
90 /// Hidden option to experiment with completely strict handling of inbounds
91 /// GEPs.
92 static cl::opt<bool> SROAStrictInbounds("sroa-strict-inbounds", cl::init(false),
93  cl::Hidden);
94 
95 namespace {
96 /// \brief A custom IRBuilder inserter which prefixes all names if they are
97 /// preserved.
98 template <bool preserveNames = true>
99 class IRBuilderPrefixedInserter
100  : public IRBuilderDefaultInserter<preserveNames> {
101  std::string Prefix;
102 
103 public:
104  void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
105 
106 protected:
107  void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
108  BasicBlock::iterator InsertPt) const {
110  I, Name.isTriviallyEmpty() ? Name : Prefix + Name, BB, InsertPt);
111  }
112 };
113 
114 // Specialization for not preserving the name is trivial.
115 template <>
116 class IRBuilderPrefixedInserter<false>
117  : public IRBuilderDefaultInserter<false> {
118 public:
119  void SetNamePrefix(const Twine &P) {}
120 };
121 
122 /// \brief Provide a typedef for IRBuilder that drops names in release builds.
123 #ifndef NDEBUG
125  IRBuilderTy;
126 #else
128  IRBuilderTy;
129 #endif
130 }
131 
132 namespace {
133 /// \brief A used slice of an alloca.
134 ///
135 /// This structure represents a slice of an alloca used by some instruction. It
136 /// stores both the begin and end offsets of this use, a pointer to the use
137 /// itself, and a flag indicating whether we can classify the use as splittable
138 /// or not when forming partitions of the alloca.
139 class Slice {
140  /// \brief The beginning offset of the range.
141  uint64_t BeginOffset;
142 
143  /// \brief The ending offset, not included in the range.
144  uint64_t EndOffset;
145 
146  /// \brief Storage for both the use of this slice and whether it can be
147  /// split.
148  PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
149 
150 public:
151  Slice() : BeginOffset(), EndOffset() {}
152  Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable)
153  : BeginOffset(BeginOffset), EndOffset(EndOffset),
154  UseAndIsSplittable(U, IsSplittable) {}
155 
156  uint64_t beginOffset() const { return BeginOffset; }
157  uint64_t endOffset() const { return EndOffset; }
158 
159  bool isSplittable() const { return UseAndIsSplittable.getInt(); }
160  void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
161 
162  Use *getUse() const { return UseAndIsSplittable.getPointer(); }
163 
164  bool isDead() const { return getUse() == nullptr; }
165  void kill() { UseAndIsSplittable.setPointer(nullptr); }
166 
167  /// \brief Support for ordering ranges.
168  ///
169  /// This provides an ordering over ranges such that start offsets are
170  /// always increasing, and within equal start offsets, the end offsets are
171  /// decreasing. Thus the spanning range comes first in a cluster with the
172  /// same start position.
173  bool operator<(const Slice &RHS) const {
174  if (beginOffset() < RHS.beginOffset())
175  return true;
176  if (beginOffset() > RHS.beginOffset())
177  return false;
178  if (isSplittable() != RHS.isSplittable())
179  return !isSplittable();
180  if (endOffset() > RHS.endOffset())
181  return true;
182  return false;
183  }
184 
185  /// \brief Support comparison with a single offset to allow binary searches.
186  friend LLVM_ATTRIBUTE_UNUSED bool operator<(const Slice &LHS,
187  uint64_t RHSOffset) {
188  return LHS.beginOffset() < RHSOffset;
189  }
190  friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset,
191  const Slice &RHS) {
192  return LHSOffset < RHS.beginOffset();
193  }
194 
195  bool operator==(const Slice &RHS) const {
196  return isSplittable() == RHS.isSplittable() &&
197  beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
198  }
199  bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
200 };
201 } // end anonymous namespace
202 
203 namespace llvm {
204 template <typename T> struct isPodLike;
205 template <> struct isPodLike<Slice> { static const bool value = true; };
206 }
207 
208 namespace {
209 /// \brief Representation of the alloca slices.
210 ///
211 /// This class represents the slices of an alloca which are formed by its
212 /// various uses. If a pointer escapes, we can't fully build a representation
213 /// for the slices used and we reflect that in this structure. The uses are
214 /// stored, sorted by increasing beginning offset and with unsplittable slices
215 /// starting at a particular offset before splittable slices.
216 class AllocaSlices {
217 public:
218  /// \brief Construct the slices of a particular alloca.
219  AllocaSlices(const DataLayout &DL, AllocaInst &AI);
220 
221  /// \brief Test whether a pointer to the allocation escapes our analysis.
222  ///
223  /// If this is true, the slices are never fully built and should be
224  /// ignored.
225  bool isEscaped() const { return PointerEscapingInstr; }
226 
227  /// \brief Support for iterating over the slices.
228  /// @{
229  typedef SmallVectorImpl<Slice>::iterator iterator;
230  typedef iterator_range<iterator> range;
231  iterator begin() { return Slices.begin(); }
232  iterator end() { return Slices.end(); }
233 
234  typedef SmallVectorImpl<Slice>::const_iterator const_iterator;
235  typedef iterator_range<const_iterator> const_range;
236  const_iterator begin() const { return Slices.begin(); }
237  const_iterator end() const { return Slices.end(); }
238  /// @}
239 
240  /// \brief Erase a range of slices.
241  void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
242 
243  /// \brief Insert new slices for this alloca.
244  ///
245  /// This moves the slices into the alloca's slices collection, and re-sorts
246  /// everything so that the usual ordering properties of the alloca's slices
247  /// hold.
248  void insert(ArrayRef<Slice> NewSlices) {
249  int OldSize = Slices.size();
250  Slices.append(NewSlices.begin(), NewSlices.end());
251  auto SliceI = Slices.begin() + OldSize;
252  std::sort(SliceI, Slices.end());
253  std::inplace_merge(Slices.begin(), SliceI, Slices.end());
254  }
255 
256  // Forward declare an iterator to befriend it.
257  class partition_iterator;
258 
259  /// \brief A partition of the slices.
260  ///
261  /// An ephemeral representation for a range of slices which can be viewed as
262  /// a partition of the alloca. This range represents a span of the alloca's
263  /// memory which cannot be split, and provides access to all of the slices
264  /// overlapping some part of the partition.
265  ///
266  /// Objects of this type are produced by traversing the alloca's slices, but
267  /// are only ephemeral and not persistent.
268  class Partition {
269  private:
270  friend class AllocaSlices;
271  friend class AllocaSlices::partition_iterator;
272 
273  /// \brief The begining and ending offsets of the alloca for this partition.
274  uint64_t BeginOffset, EndOffset;
275 
276  /// \brief The start end end iterators of this partition.
277  iterator SI, SJ;
278 
279  /// \brief A collection of split slice tails overlapping the partition.
280  SmallVector<Slice *, 4> SplitTails;
281 
282  /// \brief Raw constructor builds an empty partition starting and ending at
283  /// the given iterator.
284  Partition(iterator SI) : SI(SI), SJ(SI) {}
285 
286  public:
287  /// \brief The start offset of this partition.
288  ///
289  /// All of the contained slices start at or after this offset.
290  uint64_t beginOffset() const { return BeginOffset; }
291 
292  /// \brief The end offset of this partition.
293  ///
294  /// All of the contained slices end at or before this offset.
295  uint64_t endOffset() const { return EndOffset; }
296 
297  /// \brief The size of the partition.
298  ///
299  /// Note that this can never be zero.
300  uint64_t size() const {
301  assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
302  return EndOffset - BeginOffset;
303  }
304 
305  /// \brief Test whether this partition contains no slices, and merely spans
306  /// a region occupied by split slices.
307  bool empty() const { return SI == SJ; }
308 
309  /// \name Iterate slices that start within the partition.
310  /// These may be splittable or unsplittable. They have a begin offset >= the
311  /// partition begin offset.
312  /// @{
313  // FIXME: We should probably define a "concat_iterator" helper and use that
314  // to stitch together pointee_iterators over the split tails and the
315  // contiguous iterators of the partition. That would give a much nicer
316  // interface here. We could then additionally expose filtered iterators for
317  // split, unsplit, and unsplittable splices based on the usage patterns.
318  iterator begin() const { return SI; }
319  iterator end() const { return SJ; }
320  /// @}
321 
322  /// \brief Get the sequence of split slice tails.
323  ///
324  /// These tails are of slices which start before this partition but are
325  /// split and overlap into the partition. We accumulate these while forming
326  /// partitions.
327  ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
328  };
329 
330  /// \brief An iterator over partitions of the alloca's slices.
331  ///
332  /// This iterator implements the core algorithm for partitioning the alloca's
333  /// slices. It is a forward iterator as we don't support backtracking for
334  /// efficiency reasons, and re-use a single storage area to maintain the
335  /// current set of split slices.
336  ///
337  /// It is templated on the slice iterator type to use so that it can operate
338  /// with either const or non-const slice iterators.
339  class partition_iterator
340  : public iterator_facade_base<partition_iterator,
341  std::forward_iterator_tag, Partition> {
342  friend class AllocaSlices;
343 
344  /// \brief Most of the state for walking the partitions is held in a class
345  /// with a nice interface for examining them.
346  Partition P;
347 
348  /// \brief We need to keep the end of the slices to know when to stop.
350 
351  /// \brief We also need to keep track of the maximum split end offset seen.
352  /// FIXME: Do we really?
353  uint64_t MaxSplitSliceEndOffset;
354 
355  /// \brief Sets the partition to be empty at given iterator, and sets the
356  /// end iterator.
357  partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
358  : P(SI), SE(SE), MaxSplitSliceEndOffset(0) {
359  // If not already at the end, advance our state to form the initial
360  // partition.
361  if (SI != SE)
362  advance();
363  }
364 
365  /// \brief Advance the iterator to the next partition.
366  ///
367  /// Requires that the iterator not be at the end of the slices.
368  void advance() {
369  assert((P.SI != SE || !P.SplitTails.empty()) &&
370  "Cannot advance past the end of the slices!");
371 
372  // Clear out any split uses which have ended.
373  if (!P.SplitTails.empty()) {
374  if (P.EndOffset >= MaxSplitSliceEndOffset) {
375  // If we've finished all splits, this is easy.
376  P.SplitTails.clear();
377  MaxSplitSliceEndOffset = 0;
378  } else {
379  // Remove the uses which have ended in the prior partition. This
380  // cannot change the max split slice end because we just checked that
381  // the prior partition ended prior to that max.
382  P.SplitTails.erase(
383  std::remove_if(
384  P.SplitTails.begin(), P.SplitTails.end(),
385  [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
386  P.SplitTails.end());
387  assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
388  [&](Slice *S) {
389  return S->endOffset() == MaxSplitSliceEndOffset;
390  }) &&
391  "Could not find the current max split slice offset!");
392  assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
393  [&](Slice *S) {
394  return S->endOffset() <= MaxSplitSliceEndOffset;
395  }) &&
396  "Max split slice end offset is not actually the max!");
397  }
398  }
399 
400  // If P.SI is already at the end, then we've cleared the split tail and
401  // now have an end iterator.
402  if (P.SI == SE) {
403  assert(P.SplitTails.empty() && "Failed to clear the split slices!");
404  return;
405  }
406 
407  // If we had a non-empty partition previously, set up the state for
408  // subsequent partitions.
409  if (P.SI != P.SJ) {
410  // Accumulate all the splittable slices which started in the old
411  // partition into the split list.
412  for (Slice &S : P)
413  if (S.isSplittable() && S.endOffset() > P.EndOffset) {
414  P.SplitTails.push_back(&S);
415  MaxSplitSliceEndOffset =
416  std::max(S.endOffset(), MaxSplitSliceEndOffset);
417  }
418 
419  // Start from the end of the previous partition.
420  P.SI = P.SJ;
421 
422  // If P.SI is now at the end, we at most have a tail of split slices.
423  if (P.SI == SE) {
424  P.BeginOffset = P.EndOffset;
425  P.EndOffset = MaxSplitSliceEndOffset;
426  return;
427  }
428 
429  // If the we have split slices and the next slice is after a gap and is
430  // not splittable immediately form an empty partition for the split
431  // slices up until the next slice begins.
432  if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
433  !P.SI->isSplittable()) {
434  P.BeginOffset = P.EndOffset;
435  P.EndOffset = P.SI->beginOffset();
436  return;
437  }
438  }
439 
440  // OK, we need to consume new slices. Set the end offset based on the
441  // current slice, and step SJ past it. The beginning offset of the
442  // parttion is the beginning offset of the next slice unless we have
443  // pre-existing split slices that are continuing, in which case we begin
444  // at the prior end offset.
445  P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
446  P.EndOffset = P.SI->endOffset();
447  ++P.SJ;
448 
449  // There are two strategies to form a partition based on whether the
450  // partition starts with an unsplittable slice or a splittable slice.
451  if (!P.SI->isSplittable()) {
452  // When we're forming an unsplittable region, it must always start at
453  // the first slice and will extend through its end.
454  assert(P.BeginOffset == P.SI->beginOffset());
455 
456  // Form a partition including all of the overlapping slices with this
457  // unsplittable slice.
458  while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
459  if (!P.SJ->isSplittable())
460  P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
461  ++P.SJ;
462  }
463 
464  // We have a partition across a set of overlapping unsplittable
465  // partitions.
466  return;
467  }
468 
469  // If we're starting with a splittable slice, then we need to form
470  // a synthetic partition spanning it and any other overlapping splittable
471  // splices.
472  assert(P.SI->isSplittable() && "Forming a splittable partition!");
473 
474  // Collect all of the overlapping splittable slices.
475  while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
476  P.SJ->isSplittable()) {
477  P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
478  ++P.SJ;
479  }
480 
481  // Back upiP.EndOffset if we ended the span early when encountering an
482  // unsplittable slice. This synthesizes the early end offset of
483  // a partition spanning only splittable slices.
484  if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
485  assert(!P.SJ->isSplittable());
486  P.EndOffset = P.SJ->beginOffset();
487  }
488  }
489 
490  public:
491  bool operator==(const partition_iterator &RHS) const {
492  assert(SE == RHS.SE &&
493  "End iterators don't match between compared partition iterators!");
494 
495  // The observed positions of partitions is marked by the P.SI iterator and
496  // the emptyness of the split slices. The latter is only relevant when
497  // P.SI == SE, as the end iterator will additionally have an empty split
498  // slices list, but the prior may have the same P.SI and a tail of split
499  // slices.
500  if (P.SI == RHS.P.SI &&
501  P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
502  assert(P.SJ == RHS.P.SJ &&
503  "Same set of slices formed two different sized partitions!");
504  assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
505  "Same slice position with differently sized non-empty split "
506  "slice tails!");
507  return true;
508  }
509  return false;
510  }
511 
512  partition_iterator &operator++() {
513  advance();
514  return *this;
515  }
516 
517  Partition &operator*() { return P; }
518  };
519 
520  /// \brief A forward range over the partitions of the alloca's slices.
521  ///
522  /// This accesses an iterator range over the partitions of the alloca's
523  /// slices. It computes these partitions on the fly based on the overlapping
524  /// offsets of the slices and the ability to split them. It will visit "empty"
525  /// partitions to cover regions of the alloca only accessed via split
526  /// slices.
528  return make_range(partition_iterator(begin(), end()),
529  partition_iterator(end(), end()));
530  }
531 
532  /// \brief Access the dead users for this alloca.
533  ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
534 
535  /// \brief Access the dead operands referring to this alloca.
536  ///
537  /// These are operands which have cannot actually be used to refer to the
538  /// alloca as they are outside its range and the user doesn't correct for
539  /// that. These mostly consist of PHI node inputs and the like which we just
540  /// need to replace with undef.
541  ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
542 
543 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
544  void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
545  void printSlice(raw_ostream &OS, const_iterator I,
546  StringRef Indent = " ") const;
547  void printUse(raw_ostream &OS, const_iterator I,
548  StringRef Indent = " ") const;
549  void print(raw_ostream &OS) const;
550  void dump(const_iterator I) const;
551  void dump() const;
552 #endif
553 
554 private:
555  template <typename DerivedT, typename RetT = void> class BuilderBase;
556  class SliceBuilder;
557  friend class AllocaSlices::SliceBuilder;
558 
559 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
560  /// \brief Handle to alloca instruction to simplify method interfaces.
561  AllocaInst &AI;
562 #endif
563 
564  /// \brief The instruction responsible for this alloca not having a known set
565  /// of slices.
566  ///
567  /// When an instruction (potentially) escapes the pointer to the alloca, we
568  /// store a pointer to that here and abort trying to form slices of the
569  /// alloca. This will be null if the alloca slices are analyzed successfully.
570  Instruction *PointerEscapingInstr;
571 
572  /// \brief The slices of the alloca.
573  ///
574  /// We store a vector of the slices formed by uses of the alloca here. This
575  /// vector is sorted by increasing begin offset, and then the unsplittable
576  /// slices before the splittable ones. See the Slice inner class for more
577  /// details.
578  SmallVector<Slice, 8> Slices;
579 
580  /// \brief Instructions which will become dead if we rewrite the alloca.
581  ///
582  /// Note that these are not separated by slice. This is because we expect an
583  /// alloca to be completely rewritten or not rewritten at all. If rewritten,
584  /// all these instructions can simply be removed and replaced with undef as
585  /// they come from outside of the allocated space.
587 
588  /// \brief Operands which will become dead if we rewrite the alloca.
589  ///
590  /// These are operands that in their particular use can be replaced with
591  /// undef when we rewrite the alloca. These show up in out-of-bounds inputs
592  /// to PHI nodes and the like. They aren't entirely dead (there might be
593  /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
594  /// want to swap this particular input for undef to simplify the use lists of
595  /// the alloca.
596  SmallVector<Use *, 8> DeadOperands;
597 };
598 }
599 
601  // If the condition being selected on is a constant or the same value is
602  // being selected between, fold the select. Yes this does (rarely) happen
603  // early on.
604  if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
605  return SI.getOperand(1 + CI->isZero());
606  if (SI.getOperand(1) == SI.getOperand(2))
607  return SI.getOperand(1);
608 
609  return nullptr;
610 }
611 
612 /// \brief A helper that folds a PHI node or a select.
614  if (PHINode *PN = dyn_cast<PHINode>(&I)) {
615  // If PN merges together the same value, return that value.
616  return PN->hasConstantValue();
617  }
618  return foldSelectInst(cast<SelectInst>(I));
619 }
620 
621 /// \brief Builder for the alloca slices.
622 ///
623 /// This class builds a set of alloca slices by recursively visiting the uses
624 /// of an alloca and making a slice for each load and store at each offset.
625 class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
627  friend class InstVisitor<SliceBuilder>;
629 
630  const uint64_t AllocSize;
631  AllocaSlices &AS;
632 
633  SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
635 
636  /// \brief Set to de-duplicate dead instructions found in the use walk.
637  SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
638 
639 public:
640  SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
642  AllocSize(DL.getTypeAllocSize(AI.getAllocatedType())), AS(AS) {}
643 
644 private:
645  void markAsDead(Instruction &I) {
646  if (VisitedDeadInsts.insert(&I).second)
647  AS.DeadUsers.push_back(&I);
648  }
649 
650  void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
651  bool IsSplittable = false) {
652  // Completely skip uses which have a zero size or start either before or
653  // past the end of the allocation.
654  if (Size == 0 || Offset.uge(AllocSize)) {
655  DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset
656  << " which has zero size or starts outside of the "
657  << AllocSize << " byte alloca:\n"
658  << " alloca: " << AS.AI << "\n"
659  << " use: " << I << "\n");
660  return markAsDead(I);
661  }
662 
663  uint64_t BeginOffset = Offset.getZExtValue();
664  uint64_t EndOffset = BeginOffset + Size;
665 
666  // Clamp the end offset to the end of the allocation. Note that this is
667  // formulated to handle even the case where "BeginOffset + Size" overflows.
668  // This may appear superficially to be something we could ignore entirely,
669  // but that is not so! There may be widened loads or PHI-node uses where
670  // some instructions are dead but not others. We can't completely ignore
671  // them, and so have to record at least the information here.
672  assert(AllocSize >= BeginOffset); // Established above.
673  if (Size > AllocSize - BeginOffset) {
674  DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset
675  << " to remain within the " << AllocSize << " byte alloca:\n"
676  << " alloca: " << AS.AI << "\n"
677  << " use: " << I << "\n");
678  EndOffset = AllocSize;
679  }
680 
681  AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
682  }
683 
684  void visitBitCastInst(BitCastInst &BC) {
685  if (BC.use_empty())
686  return markAsDead(BC);
687 
688  return Base::visitBitCastInst(BC);
689  }
690 
691  void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
692  if (GEPI.use_empty())
693  return markAsDead(GEPI);
694 
695  if (SROAStrictInbounds && GEPI.isInBounds()) {
696  // FIXME: This is a manually un-factored variant of the basic code inside
697  // of GEPs with checking of the inbounds invariant specified in the
698  // langref in a very strict sense. If we ever want to enable
699  // SROAStrictInbounds, this code should be factored cleanly into
700  // PtrUseVisitor, but it is easier to experiment with SROAStrictInbounds
701  // by writing out the code here where we have tho underlying allocation
702  // size readily available.
703  APInt GEPOffset = Offset;
704  const DataLayout &DL = GEPI.getModule()->getDataLayout();
705  for (gep_type_iterator GTI = gep_type_begin(GEPI),
706  GTE = gep_type_end(GEPI);
707  GTI != GTE; ++GTI) {
708  ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
709  if (!OpC)
710  break;
711 
712  // Handle a struct index, which adds its field offset to the pointer.
713  if (StructType *STy = dyn_cast<StructType>(*GTI)) {
714  unsigned ElementIdx = OpC->getZExtValue();
715  const StructLayout *SL = DL.getStructLayout(STy);
716  GEPOffset +=
717  APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
718  } else {
719  // For array or vector indices, scale the index by the size of the
720  // type.
721  APInt Index = OpC->getValue().sextOrTrunc(Offset.getBitWidth());
722  GEPOffset += Index * APInt(Offset.getBitWidth(),
723  DL.getTypeAllocSize(GTI.getIndexedType()));
724  }
725 
726  // If this index has computed an intermediate pointer which is not
727  // inbounds, then the result of the GEP is a poison value and we can
728  // delete it and all uses.
729  if (GEPOffset.ugt(AllocSize))
730  return markAsDead(GEPI);
731  }
732  }
733 
734  return Base::visitGetElementPtrInst(GEPI);
735  }
736 
737  void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
738  uint64_t Size, bool IsVolatile) {
739  // We allow splitting of non-volatile loads and stores where the type is an
740  // integer type. These may be used to implement 'memcpy' or other "transfer
741  // of bits" patterns.
742  bool IsSplittable = Ty->isIntegerTy() && !IsVolatile;
743 
744  insertUse(I, Offset, Size, IsSplittable);
745  }
746 
747  void visitLoadInst(LoadInst &LI) {
748  assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
749  "All simple FCA loads should have been pre-split");
750 
751  if (!IsOffsetKnown)
752  return PI.setAborted(&LI);
753 
754  const DataLayout &DL = LI.getModule()->getDataLayout();
755  uint64_t Size = DL.getTypeStoreSize(LI.getType());
756  return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
757  }
758 
759  void visitStoreInst(StoreInst &SI) {
760  Value *ValOp = SI.getValueOperand();
761  if (ValOp == *U)
762  return PI.setEscapedAndAborted(&SI);
763  if (!IsOffsetKnown)
764  return PI.setAborted(&SI);
765 
766  const DataLayout &DL = SI.getModule()->getDataLayout();
767  uint64_t Size = DL.getTypeStoreSize(ValOp->getType());
768 
769  // If this memory access can be shown to *statically* extend outside the
770  // bounds of of the allocation, it's behavior is undefined, so simply
771  // ignore it. Note that this is more strict than the generic clamping
772  // behavior of insertUse. We also try to handle cases which might run the
773  // risk of overflow.
774  // FIXME: We should instead consider the pointer to have escaped if this
775  // function is being instrumented for addressing bugs or race conditions.
776  if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
777  DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @" << Offset
778  << " which extends past the end of the " << AllocSize
779  << " byte alloca:\n"
780  << " alloca: " << AS.AI << "\n"
781  << " use: " << SI << "\n");
782  return markAsDead(SI);
783  }
784 
785  assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
786  "All simple FCA stores should have been pre-split");
787  handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
788  }
789 
790  void visitMemSetInst(MemSetInst &II) {
791  assert(II.getRawDest() == *U && "Pointer use is not the destination?");
792  ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
793  if ((Length && Length->getValue() == 0) ||
794  (IsOffsetKnown && Offset.uge(AllocSize)))
795  // Zero-length mem transfer intrinsics can be ignored entirely.
796  return markAsDead(II);
797 
798  if (!IsOffsetKnown)
799  return PI.setAborted(&II);
800 
801  insertUse(II, Offset, Length ? Length->getLimitedValue()
802  : AllocSize - Offset.getLimitedValue(),
803  (bool)Length);
804  }
805 
806  void visitMemTransferInst(MemTransferInst &II) {
807  ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
808  if (Length && Length->getValue() == 0)
809  // Zero-length mem transfer intrinsics can be ignored entirely.
810  return markAsDead(II);
811 
812  // Because we can visit these intrinsics twice, also check to see if the
813  // first time marked this instruction as dead. If so, skip it.
814  if (VisitedDeadInsts.count(&II))
815  return;
816 
817  if (!IsOffsetKnown)
818  return PI.setAborted(&II);
819 
820  // This side of the transfer is completely out-of-bounds, and so we can
821  // nuke the entire transfer. However, we also need to nuke the other side
822  // if already added to our partitions.
823  // FIXME: Yet another place we really should bypass this when
824  // instrumenting for ASan.
825  if (Offset.uge(AllocSize)) {
827  MemTransferSliceMap.find(&II);
828  if (MTPI != MemTransferSliceMap.end())
829  AS.Slices[MTPI->second].kill();
830  return markAsDead(II);
831  }
832 
833  uint64_t RawOffset = Offset.getLimitedValue();
834  uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
835 
836  // Check for the special case where the same exact value is used for both
837  // source and dest.
838  if (*U == II.getRawDest() && *U == II.getRawSource()) {
839  // For non-volatile transfers this is a no-op.
840  if (!II.isVolatile())
841  return markAsDead(II);
842 
843  return insertUse(II, Offset, Size, /*IsSplittable=*/false);
844  }
845 
846  // If we have seen both source and destination for a mem transfer, then
847  // they both point to the same alloca.
848  bool Inserted;
850  std::tie(MTPI, Inserted) =
851  MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
852  unsigned PrevIdx = MTPI->second;
853  if (!Inserted) {
854  Slice &PrevP = AS.Slices[PrevIdx];
855 
856  // Check if the begin offsets match and this is a non-volatile transfer.
857  // In that case, we can completely elide the transfer.
858  if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
859  PrevP.kill();
860  return markAsDead(II);
861  }
862 
863  // Otherwise we have an offset transfer within the same alloca. We can't
864  // split those.
865  PrevP.makeUnsplittable();
866  }
867 
868  // Insert the use now that we've fixed up the splittable nature.
869  insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
870 
871  // Check that we ended up with a valid index in the map.
872  assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
873  "Map index doesn't point back to a slice with this user.");
874  }
875 
876  // Disable SRoA for any intrinsics except for lifetime invariants.
877  // FIXME: What about debug intrinsics? This matches old behavior, but
878  // doesn't make sense.
879  void visitIntrinsicInst(IntrinsicInst &II) {
880  if (!IsOffsetKnown)
881  return PI.setAborted(&II);
882 
883  if (II.getIntrinsicID() == Intrinsic::lifetime_start ||
884  II.getIntrinsicID() == Intrinsic::lifetime_end) {
885  ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0));
886  uint64_t Size = std::min(AllocSize - Offset.getLimitedValue(),
887  Length->getLimitedValue());
888  insertUse(II, Offset, Size, true);
889  return;
890  }
891 
892  Base::visitIntrinsicInst(II);
893  }
894 
895  Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
896  // We consider any PHI or select that results in a direct load or store of
897  // the same offset to be a viable use for slicing purposes. These uses
898  // are considered unsplittable and the size is the maximum loaded or stored
899  // size.
902  Visited.insert(Root);
903  Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
904  const DataLayout &DL = Root->getModule()->getDataLayout();
905  // If there are no loads or stores, the access is dead. We mark that as
906  // a size zero access.
907  Size = 0;
908  do {
909  Instruction *I, *UsedI;
910  std::tie(UsedI, I) = Uses.pop_back_val();
911 
912  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
913  Size = std::max(Size, DL.getTypeStoreSize(LI->getType()));
914  continue;
915  }
916  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
917  Value *Op = SI->getOperand(0);
918  if (Op == UsedI)
919  return SI;
920  Size = std::max(Size, DL.getTypeStoreSize(Op->getType()));
921  continue;
922  }
923 
924  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
925  if (!GEP->hasAllZeroIndices())
926  return GEP;
927  } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
928  !isa<SelectInst>(I)) {
929  return I;
930  }
931 
932  for (User *U : I->users())
933  if (Visited.insert(cast<Instruction>(U)).second)
934  Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
935  } while (!Uses.empty());
936 
937  return nullptr;
938  }
939 
940  void visitPHINodeOrSelectInst(Instruction &I) {
941  assert(isa<PHINode>(I) || isa<SelectInst>(I));
942  if (I.use_empty())
943  return markAsDead(I);
944 
945  // TODO: We could use SimplifyInstruction here to fold PHINodes and
946  // SelectInsts. However, doing so requires to change the current
947  // dead-operand-tracking mechanism. For instance, suppose neither loading
948  // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
949  // trap either. However, if we simply replace %U with undef using the
950  // current dead-operand-tracking mechanism, "load (select undef, undef,
951  // %other)" may trap because the select may return the first operand
952  // "undef".
953  if (Value *Result = foldPHINodeOrSelectInst(I)) {
954  if (Result == *U)
955  // If the result of the constant fold will be the pointer, recurse
956  // through the PHI/select as if we had RAUW'ed it.
957  enqueueUsers(I);
958  else
959  // Otherwise the operand to the PHI/select is dead, and we can replace
960  // it with undef.
961  AS.DeadOperands.push_back(U);
962 
963  return;
964  }
965 
966  if (!IsOffsetKnown)
967  return PI.setAborted(&I);
968 
969  // See if we already have computed info on this node.
970  uint64_t &Size = PHIOrSelectSizes[&I];
971  if (!Size) {
972  // This is a new PHI/Select, check for an unsafe use of it.
973  if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
974  return PI.setAborted(UnsafeI);
975  }
976 
977  // For PHI and select operands outside the alloca, we can't nuke the entire
978  // phi or select -- the other side might still be relevant, so we special
979  // case them here and use a separate structure to track the operands
980  // themselves which should be replaced with undef.
981  // FIXME: This should instead be escaped in the event we're instrumenting
982  // for address sanitization.
983  if (Offset.uge(AllocSize)) {
984  AS.DeadOperands.push_back(U);
985  return;
986  }
987 
988  insertUse(I, Offset, Size);
989  }
990 
991  void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
992 
993  void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
994 
995  /// \brief Disable SROA entirely if there are unhandled users of the alloca.
996  void visitInstruction(Instruction &I) { PI.setAborted(&I); }
997 };
998 
999 AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1000  :
1001 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1002  AI(AI),
1003 #endif
1004  PointerEscapingInstr(nullptr) {
1005  SliceBuilder PB(DL, AI, *this);
1006  SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1007  if (PtrI.isEscaped() || PtrI.isAborted()) {
1008  // FIXME: We should sink the escape vs. abort info into the caller nicely,
1009  // possibly by just storing the PtrInfo in the AllocaSlices.
1010  PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1011  : PtrI.getAbortingInst();
1012  assert(PointerEscapingInstr && "Did not track a bad instruction");
1013  return;
1014  }
1015 
1016  Slices.erase(std::remove_if(Slices.begin(), Slices.end(),
1017  [](const Slice &S) {
1018  return S.isDead();
1019  }),
1020  Slices.end());
1021 
1022 #if __cplusplus >= 201103L && !defined(NDEBUG)
1024  std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
1025  std::shuffle(Slices.begin(), Slices.end(), MT);
1026  }
1027 #endif
1028 
1029  // Sort the uses. This arranges for the offsets to be in ascending order,
1030  // and the sizes to be in descending order.
1031  std::sort(Slices.begin(), Slices.end());
1032 }
1033 
1034 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1035 
1036 void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1037  StringRef Indent) const {
1038  printSlice(OS, I, Indent);
1039  OS << "\n";
1040  printUse(OS, I, Indent);
1041 }
1042 
1043 void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1044  StringRef Indent) const {
1045  OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1046  << " slice #" << (I - begin())
1047  << (I->isSplittable() ? " (splittable)" : "");
1048 }
1049 
1050 void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1051  StringRef Indent) const {
1052  OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1053 }
1054 
1055 void AllocaSlices::print(raw_ostream &OS) const {
1056  if (PointerEscapingInstr) {
1057  OS << "Can't analyze slices for alloca: " << AI << "\n"
1058  << " A pointer to this alloca escaped by:\n"
1059  << " " << *PointerEscapingInstr << "\n";
1060  return;
1061  }
1062 
1063  OS << "Slices of alloca: " << AI << "\n";
1064  for (const_iterator I = begin(), E = end(); I != E; ++I)
1065  print(OS, I);
1066 }
1067 
1068 LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1069  print(dbgs(), I);
1070 }
1071 LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1072 
1073 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1074 
1075 namespace {
1076 /// \brief Implementation of LoadAndStorePromoter for promoting allocas.
1077 ///
1078 /// This subclass of LoadAndStorePromoter adds overrides to handle promoting
1079 /// the loads and stores of an alloca instruction, as well as updating its
1080 /// debug information. This is used when a domtree is unavailable and thus
1081 /// mem2reg in its full form can't be used to handle promotion of allocas to
1082 /// scalar values.
1083 class AllocaPromoter : public LoadAndStorePromoter {
1084  AllocaInst &AI;
1085  DIBuilder &DIB;
1086 
1089 
1090 public:
1091  AllocaPromoter(ArrayRef<const Instruction *> Insts,
1092  SSAUpdater &S,
1093  AllocaInst &AI, DIBuilder &DIB)
1094  : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {}
1095 
1096  void run(const SmallVectorImpl<Instruction *> &Insts) {
1097  // Retain the debug information attached to the alloca for use when
1098  // rewriting loads and stores.
1099  if (auto *L = LocalAsMetadata::getIfExists(&AI)) {
1100  if (auto *DINode = MetadataAsValue::getIfExists(AI.getContext(), L)) {
1101  for (User *U : DINode->users())
1102  if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(U))
1103  DDIs.push_back(DDI);
1104  else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
1105  DVIs.push_back(DVI);
1106  }
1107  }
1108 
1109  LoadAndStorePromoter::run(Insts);
1110 
1111  // While we have the debug information, clear it off of the alloca. The
1112  // caller takes care of deleting the alloca.
1113  while (!DDIs.empty())
1114  DDIs.pop_back_val()->eraseFromParent();
1115  while (!DVIs.empty())
1116  DVIs.pop_back_val()->eraseFromParent();
1117  }
1118 
1119  bool
1120  isInstInList(Instruction *I,
1121  const SmallVectorImpl<Instruction *> &Insts) const override {
1122  Value *Ptr;
1123  if (LoadInst *LI = dyn_cast<LoadInst>(I))
1124  Ptr = LI->getOperand(0);
1125  else
1126  Ptr = cast<StoreInst>(I)->getPointerOperand();
1127 
1128  // Only used to detect cycles, which will be rare and quickly found as
1129  // we're walking up a chain of defs rather than down through uses.
1130  SmallPtrSet<Value *, 4> Visited;
1131 
1132  do {
1133  if (Ptr == &AI)
1134  return true;
1135 
1136  if (BitCastInst *BCI = dyn_cast<BitCastInst>(Ptr))
1137  Ptr = BCI->getOperand(0);
1138  else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Ptr))
1139  Ptr = GEPI->getPointerOperand();
1140  else
1141  return false;
1142 
1143  } while (Visited.insert(Ptr).second);
1144 
1145  return false;
1146  }
1147 
1148  void updateDebugInfo(Instruction *Inst) const override {
1149  for (DbgDeclareInst *DDI : DDIs)
1150  if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
1151  ConvertDebugDeclareToDebugValue(DDI, SI, DIB);
1152  else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
1153  ConvertDebugDeclareToDebugValue(DDI, LI, DIB);
1154  for (DbgValueInst *DVI : DVIs) {
1155  Value *Arg = nullptr;
1156  if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
1157  // If an argument is zero extended then use argument directly. The ZExt
1158  // may be zapped by an optimization pass in future.
1159  if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
1160  Arg = dyn_cast<Argument>(ZExt->getOperand(0));
1161  else if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
1162  Arg = dyn_cast<Argument>(SExt->getOperand(0));
1163  if (!Arg)
1164  Arg = SI->getValueOperand();
1165  } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
1166  Arg = LI->getPointerOperand();
1167  } else {
1168  continue;
1169  }
1170  DIB.insertDbgValueIntrinsic(Arg, 0, DVI->getVariable(),
1171  DVI->getExpression(), DVI->getDebugLoc(),
1172  Inst);
1173  }
1174  }
1175 };
1176 } // end anon namespace
1177 
1178 namespace {
1179 /// \brief An optimization pass providing Scalar Replacement of Aggregates.
1180 ///
1181 /// This pass takes allocations which can be completely analyzed (that is, they
1182 /// don't escape) and tries to turn them into scalar SSA values. There are
1183 /// a few steps to this process.
1184 ///
1185 /// 1) It takes allocations of aggregates and analyzes the ways in which they
1186 /// are used to try to split them into smaller allocations, ideally of
1187 /// a single scalar data type. It will split up memcpy and memset accesses
1188 /// as necessary and try to isolate individual scalar accesses.
1189 /// 2) It will transform accesses into forms which are suitable for SSA value
1190 /// promotion. This can be replacing a memset with a scalar store of an
1191 /// integer value, or it can involve speculating operations on a PHI or
1192 /// select to be a PHI or select of the results.
1193 /// 3) Finally, this will try to detect a pattern of accesses which map cleanly
1194 /// onto insert and extract operations on a vector value, and convert them to
1195 /// this form. By doing so, it will enable promotion of vector aggregates to
1196 /// SSA vector values.
1197 class SROA : public FunctionPass {
1198  const bool RequiresDomTree;
1199 
1200  LLVMContext *C;
1201  DominatorTree *DT;
1202  AssumptionCache *AC;
1203 
1204  /// \brief Worklist of alloca instructions to simplify.
1205  ///
1206  /// Each alloca in the function is added to this. Each new alloca formed gets
1207  /// added to it as well to recursively simplify unless that alloca can be
1208  /// directly promoted. Finally, each time we rewrite a use of an alloca other
1209  /// the one being actively rewritten, we add it back onto the list if not
1210  /// already present to ensure it is re-visited.
1212 
1213  /// \brief A collection of instructions to delete.
1214  /// We try to batch deletions to simplify code and make things a bit more
1215  /// efficient.
1217 
1218  /// \brief Post-promotion worklist.
1219  ///
1220  /// Sometimes we discover an alloca which has a high probability of becoming
1221  /// viable for SROA after a round of promotion takes place. In those cases,
1222  /// the alloca is enqueued here for re-processing.
1223  ///
1224  /// Note that we have to be very careful to clear allocas out of this list in
1225  /// the event they are deleted.
1227 
1228  /// \brief A collection of alloca instructions we can directly promote.
1229  std::vector<AllocaInst *> PromotableAllocas;
1230 
1231  /// \brief A worklist of PHIs to speculate prior to promoting allocas.
1232  ///
1233  /// All of these PHIs have been checked for the safety of speculation and by
1234  /// being speculated will allow promoting allocas currently in the promotable
1235  /// queue.
1237 
1238  /// \brief A worklist of select instructions to speculate prior to promoting
1239  /// allocas.
1240  ///
1241  /// All of these select instructions have been checked for the safety of
1242  /// speculation and by being speculated will allow promoting allocas
1243  /// currently in the promotable queue.
1245 
1246 public:
1247  SROA(bool RequiresDomTree = true)
1248  : FunctionPass(ID), RequiresDomTree(RequiresDomTree), C(nullptr),
1249  DT(nullptr) {
1250  initializeSROAPass(*PassRegistry::getPassRegistry());
1251  }
1252  bool runOnFunction(Function &F) override;
1253  void getAnalysisUsage(AnalysisUsage &AU) const override;
1254 
1255  const char *getPassName() const override { return "SROA"; }
1256  static char ID;
1257 
1258 private:
1259  friend class PHIOrSelectSpeculator;
1260  friend class AllocaSliceRewriter;
1261 
1262  bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
1263  AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS,
1264  AllocaSlices::Partition &P);
1265  bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
1266  bool runOnAlloca(AllocaInst &AI);
1267  void clobberUse(Use &U);
1268  void deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
1269  bool promoteAllocas(Function &F);
1270 };
1271 }
1272 
1273 char SROA::ID = 0;
1274 
1275 FunctionPass *llvm::createSROAPass(bool RequiresDomTree) {
1276  return new SROA(RequiresDomTree);
1277 }
1278 
1279 INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
1280  false)
1283 INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", false,
1284  false)
1285 
1286 /// Walk the range of a partitioning looking for a common type to cover this
1287 /// sequence of slices.
1288 static Type *findCommonType(AllocaSlices::const_iterator B,
1289  AllocaSlices::const_iterator E,
1290  uint64_t EndOffset) {
1291  Type *Ty = nullptr;
1292  bool TyIsCommon = true;
1293  IntegerType *ITy = nullptr;
1294 
1295  // Note that we need to look at *every* alloca slice's Use to ensure we
1296  // always get consistent results regardless of the order of slices.
1297  for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1298  Use *U = I->getUse();
1299  if (isa<IntrinsicInst>(*U->getUser()))
1300  continue;
1301  if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1302  continue;
1303 
1304  Type *UserTy = nullptr;
1305  if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1306  UserTy = LI->getType();
1307  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1308  UserTy = SI->getValueOperand()->getType();
1309  }
1310 
1311  if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1312  // If the type is larger than the partition, skip it. We only encounter
1313  // this for split integer operations where we want to use the type of the
1314  // entity causing the split. Also skip if the type is not a byte width
1315  // multiple.
1316  if (UserITy->getBitWidth() % 8 != 0 ||
1317  UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1318  continue;
1319 
1320  // Track the largest bitwidth integer type used in this way in case there
1321  // is no common type.
1322  if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1323  ITy = UserITy;
1324  }
1325 
1326  // To avoid depending on the order of slices, Ty and TyIsCommon must not
1327  // depend on types skipped above.
1328  if (!UserTy || (Ty && Ty != UserTy))
1329  TyIsCommon = false; // Give up on anything but an iN type.
1330  else
1331  Ty = UserTy;
1332  }
1333 
1334  return TyIsCommon ? Ty : ITy;
1335 }
1336 
1337 /// PHI instructions that use an alloca and are subsequently loaded can be
1338 /// rewritten to load both input pointers in the pred blocks and then PHI the
1339 /// results, allowing the load of the alloca to be promoted.
1340 /// From this:
1341 /// %P2 = phi [i32* %Alloca, i32* %Other]
1342 /// %V = load i32* %P2
1343 /// to:
1344 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1345 /// ...
1346 /// %V2 = load i32* %Other
1347 /// ...
1348 /// %V = phi [i32 %V1, i32 %V2]
1349 ///
1350 /// We can do this to a select if its only uses are loads and if the operands
1351 /// to the select can be loaded unconditionally.
1352 ///
1353 /// FIXME: This should be hoisted into a generic utility, likely in
1354 /// Transforms/Util/Local.h
1355 static bool isSafePHIToSpeculate(PHINode &PN) {
1356  // For now, we can only do this promotion if the load is in the same block
1357  // as the PHI, and if there are no stores between the phi and load.
1358  // TODO: Allow recursive phi users.
1359  // TODO: Allow stores.
1360  BasicBlock *BB = PN.getParent();
1361  unsigned MaxAlign = 0;
1362  bool HaveLoad = false;
1363  for (User *U : PN.users()) {
1364  LoadInst *LI = dyn_cast<LoadInst>(U);
1365  if (!LI || !LI->isSimple())
1366  return false;
1367 
1368  // For now we only allow loads in the same block as the PHI. This is
1369  // a common case that happens when instcombine merges two loads through
1370  // a PHI.
1371  if (LI->getParent() != BB)
1372  return false;
1373 
1374  // Ensure that there are no instructions between the PHI and the load that
1375  // could store.
1376  for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI)
1377  if (BBI->mayWriteToMemory())
1378  return false;
1379 
1380  MaxAlign = std::max(MaxAlign, LI->getAlignment());
1381  HaveLoad = true;
1382  }
1383 
1384  if (!HaveLoad)
1385  return false;
1386 
1387  const DataLayout &DL = PN.getModule()->getDataLayout();
1388 
1389  // We can only transform this if it is safe to push the loads into the
1390  // predecessor blocks. The only thing to watch out for is that we can't put
1391  // a possibly trapping load in the predecessor if it is a critical edge.
1392  for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1394  Value *InVal = PN.getIncomingValue(Idx);
1395 
1396  // If the value is produced by the terminator of the predecessor (an
1397  // invoke) or it has side-effects, there is no valid place to put a load
1398  // in the predecessor.
1399  if (TI == InVal || TI->mayHaveSideEffects())
1400  return false;
1401 
1402  // If the predecessor has a single successor, then the edge isn't
1403  // critical.
1404  if (TI->getNumSuccessors() == 1)
1405  continue;
1406 
1407  // If this pointer is always safe to load, or if we can prove that there
1408  // is already a load in the block, then we can move the load to the pred
1409  // block.
1410  if (isDereferenceablePointer(InVal, DL) ||
1411  isSafeToLoadUnconditionally(InVal, TI, MaxAlign))
1412  continue;
1413 
1414  return false;
1415  }
1416 
1417  return true;
1418 }
1419 
1420 static void speculatePHINodeLoads(PHINode &PN) {
1421  DEBUG(dbgs() << " original: " << PN << "\n");
1422 
1423  Type *LoadTy = cast<PointerType>(PN.getType())->getElementType();
1424  IRBuilderTy PHIBuilder(&PN);
1425  PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1426  PN.getName() + ".sroa.speculated");
1427 
1428  // Get the AA tags and alignment to use from one of the loads. It doesn't
1429  // matter which one we get and if any differ.
1430  LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1431 
1432  AAMDNodes AATags;
1433  SomeLoad->getAAMetadata(AATags);
1434  unsigned Align = SomeLoad->getAlignment();
1435 
1436  // Rewrite all loads of the PN to use the new PHI.
1437  while (!PN.use_empty()) {
1438  LoadInst *LI = cast<LoadInst>(PN.user_back());
1439  LI->replaceAllUsesWith(NewPN);
1440  LI->eraseFromParent();
1441  }
1442 
1443  // Inject loads into all of the pred blocks.
1444  for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1445  BasicBlock *Pred = PN.getIncomingBlock(Idx);
1446  TerminatorInst *TI = Pred->getTerminator();
1447  Value *InVal = PN.getIncomingValue(Idx);
1448  IRBuilderTy PredBuilder(TI);
1449 
1450  LoadInst *Load = PredBuilder.CreateLoad(
1451  InVal, (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1452  ++NumLoadsSpeculated;
1453  Load->setAlignment(Align);
1454  if (AATags)
1455  Load->setAAMetadata(AATags);
1456  NewPN->addIncoming(Load, Pred);
1457  }
1458 
1459  DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1460  PN.eraseFromParent();
1461 }
1462 
1463 /// Select instructions that use an alloca and are subsequently loaded can be
1464 /// rewritten to load both input pointers and then select between the result,
1465 /// allowing the load of the alloca to be promoted.
1466 /// From this:
1467 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other
1468 /// %V = load i32* %P2
1469 /// to:
1470 /// %V1 = load i32* %Alloca -> will be mem2reg'd
1471 /// %V2 = load i32* %Other
1472 /// %V = select i1 %cond, i32 %V1, i32 %V2
1473 ///
1474 /// We can do this to a select if its only uses are loads and if the operand
1475 /// to the select can be loaded unconditionally.
1477  Value *TValue = SI.getTrueValue();
1478  Value *FValue = SI.getFalseValue();
1479  const DataLayout &DL = SI.getModule()->getDataLayout();
1480  bool TDerefable = isDereferenceablePointer(TValue, DL);
1481  bool FDerefable = isDereferenceablePointer(FValue, DL);
1482 
1483  for (User *U : SI.users()) {
1484  LoadInst *LI = dyn_cast<LoadInst>(U);
1485  if (!LI || !LI->isSimple())
1486  return false;
1487 
1488  // Both operands to the select need to be dereferencable, either
1489  // absolutely (e.g. allocas) or at this point because we can see other
1490  // accesses to it.
1491  if (!TDerefable &&
1492  !isSafeToLoadUnconditionally(TValue, LI, LI->getAlignment()))
1493  return false;
1494  if (!FDerefable &&
1495  !isSafeToLoadUnconditionally(FValue, LI, LI->getAlignment()))
1496  return false;
1497  }
1498 
1499  return true;
1500 }
1501 
1503  DEBUG(dbgs() << " original: " << SI << "\n");
1504 
1505  IRBuilderTy IRB(&SI);
1506  Value *TV = SI.getTrueValue();
1507  Value *FV = SI.getFalseValue();
1508  // Replace the loads of the select with a select of two loads.
1509  while (!SI.use_empty()) {
1510  LoadInst *LI = cast<LoadInst>(SI.user_back());
1511  assert(LI->isSimple() && "We only speculate simple loads");
1512 
1513  IRB.SetInsertPoint(LI);
1514  LoadInst *TL =
1515  IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true");
1516  LoadInst *FL =
1517  IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false");
1518  NumLoadsSpeculated += 2;
1519 
1520  // Transfer alignment and AA info if present.
1521  TL->setAlignment(LI->getAlignment());
1522  FL->setAlignment(LI->getAlignment());
1523 
1524  AAMDNodes Tags;
1525  LI->getAAMetadata(Tags);
1526  if (Tags) {
1527  TL->setAAMetadata(Tags);
1528  FL->setAAMetadata(Tags);
1529  }
1530 
1531  Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1532  LI->getName() + ".sroa.speculated");
1533 
1534  DEBUG(dbgs() << " speculated to: " << *V << "\n");
1535  LI->replaceAllUsesWith(V);
1536  LI->eraseFromParent();
1537  }
1538  SI.eraseFromParent();
1539 }
1540 
1541 /// \brief Build a GEP out of a base pointer and indices.
1542 ///
1543 /// This will return the BasePtr if that is valid, or build a new GEP
1544 /// instruction using the IRBuilder if GEP-ing is needed.
1545 static Value *buildGEP(IRBuilderTy &IRB, Value *BasePtr,
1546  SmallVectorImpl<Value *> &Indices, Twine NamePrefix) {
1547  if (Indices.empty())
1548  return BasePtr;
1549 
1550  // A single zero index is a no-op, so check for this and avoid building a GEP
1551  // in that case.
1552  if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
1553  return BasePtr;
1554 
1555  return IRB.CreateInBoundsGEP(nullptr, BasePtr, Indices,
1556  NamePrefix + "sroa_idx");
1557 }
1558 
1559 /// \brief Get a natural GEP off of the BasePtr walking through Ty toward
1560 /// TargetTy without changing the offset of the pointer.
1561 ///
1562 /// This routine assumes we've already established a properly offset GEP with
1563 /// Indices, and arrived at the Ty type. The goal is to continue to GEP with
1564 /// zero-indices down through type layers until we find one the same as
1565 /// TargetTy. If we can't find one with the same type, we at least try to use
1566 /// one with the same size. If none of that works, we just produce the GEP as
1567 /// indicated by Indices to have the correct offset.
1568 static Value *getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL,
1569  Value *BasePtr, Type *Ty, Type *TargetTy,
1570  SmallVectorImpl<Value *> &Indices,
1571  Twine NamePrefix) {
1572  if (Ty == TargetTy)
1573  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1574 
1575  // Pointer size to use for the indices.
1576  unsigned PtrSize = DL.getPointerTypeSizeInBits(BasePtr->getType());
1577 
1578  // See if we can descend into a struct and locate a field with the correct
1579  // type.
1580  unsigned NumLayers = 0;
1581  Type *ElementTy = Ty;
1582  do {
1583  if (ElementTy->isPointerTy())
1584  break;
1585 
1586  if (ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
1587  ElementTy = ArrayTy->getElementType();
1588  Indices.push_back(IRB.getIntN(PtrSize, 0));
1589  } else if (VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
1590  ElementTy = VectorTy->getElementType();
1591  Indices.push_back(IRB.getInt32(0));
1592  } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) {
1593  if (STy->element_begin() == STy->element_end())
1594  break; // Nothing left to descend into.
1595  ElementTy = *STy->element_begin();
1596  Indices.push_back(IRB.getInt32(0));
1597  } else {
1598  break;
1599  }
1600  ++NumLayers;
1601  } while (ElementTy != TargetTy);
1602  if (ElementTy != TargetTy)
1603  Indices.erase(Indices.end() - NumLayers, Indices.end());
1604 
1605  return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1606 }
1607 
1608 /// \brief Recursively compute indices for a natural GEP.
1609 ///
1610 /// This is the recursive step for getNaturalGEPWithOffset that walks down the
1611 /// element types adding appropriate indices for the GEP.
1612 static Value *getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL,
1613  Value *Ptr, Type *Ty, APInt &Offset,
1614  Type *TargetTy,
1615  SmallVectorImpl<Value *> &Indices,
1616  Twine NamePrefix) {
1617  if (Offset == 0)
1618  return getNaturalGEPWithType(IRB, DL, Ptr, Ty, TargetTy, Indices,
1619  NamePrefix);
1620 
1621  // We can't recurse through pointer types.
1622  if (Ty->isPointerTy())
1623  return nullptr;
1624 
1625  // We try to analyze GEPs over vectors here, but note that these GEPs are
1626  // extremely poorly defined currently. The long-term goal is to remove GEPing
1627  // over a vector from the IR completely.
1628  if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1629  unsigned ElementSizeInBits = DL.getTypeSizeInBits(VecTy->getScalarType());
1630  if (ElementSizeInBits % 8 != 0) {
1631  // GEPs over non-multiple of 8 size vector elements are invalid.
1632  return nullptr;
1633  }
1634  APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8);
1635  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1636  if (NumSkippedElements.ugt(VecTy->getNumElements()))
1637  return nullptr;
1638  Offset -= NumSkippedElements * ElementSize;
1639  Indices.push_back(IRB.getInt(NumSkippedElements));
1640  return getNaturalGEPRecursively(IRB, DL, Ptr, VecTy->getElementType(),
1641  Offset, TargetTy, Indices, NamePrefix);
1642  }
1643 
1644  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
1645  Type *ElementTy = ArrTy->getElementType();
1646  APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
1647  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1648  if (NumSkippedElements.ugt(ArrTy->getNumElements()))
1649  return nullptr;
1650 
1651  Offset -= NumSkippedElements * ElementSize;
1652  Indices.push_back(IRB.getInt(NumSkippedElements));
1653  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1654  Indices, NamePrefix);
1655  }
1656 
1657  StructType *STy = dyn_cast<StructType>(Ty);
1658  if (!STy)
1659  return nullptr;
1660 
1661  const StructLayout *SL = DL.getStructLayout(STy);
1662  uint64_t StructOffset = Offset.getZExtValue();
1663  if (StructOffset >= SL->getSizeInBytes())
1664  return nullptr;
1665  unsigned Index = SL->getElementContainingOffset(StructOffset);
1666  Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index));
1667  Type *ElementTy = STy->getElementType(Index);
1668  if (Offset.uge(DL.getTypeAllocSize(ElementTy)))
1669  return nullptr; // The offset points into alignment padding.
1670 
1671  Indices.push_back(IRB.getInt32(Index));
1672  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1673  Indices, NamePrefix);
1674 }
1675 
1676 /// \brief Get a natural GEP from a base pointer to a particular offset and
1677 /// resulting in a particular type.
1678 ///
1679 /// The goal is to produce a "natural" looking GEP that works with the existing
1680 /// composite types to arrive at the appropriate offset and element type for
1681 /// a pointer. TargetTy is the element type the returned GEP should point-to if
1682 /// possible. We recurse by decreasing Offset, adding the appropriate index to
1683 /// Indices, and setting Ty to the result subtype.
1684 ///
1685 /// If no natural GEP can be constructed, this function returns null.
1686 static Value *getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL,
1687  Value *Ptr, APInt Offset, Type *TargetTy,
1688  SmallVectorImpl<Value *> &Indices,
1689  Twine NamePrefix) {
1690  PointerType *Ty = cast<PointerType>(Ptr->getType());
1691 
1692  // Don't consider any GEPs through an i8* as natural unless the TargetTy is
1693  // an i8.
1694  if (Ty == IRB.getInt8PtrTy(Ty->getAddressSpace()) && TargetTy->isIntegerTy(8))
1695  return nullptr;
1696 
1697  Type *ElementTy = Ty->getElementType();
1698  if (!ElementTy->isSized())
1699  return nullptr; // We can't GEP through an unsized element.
1700  APInt ElementSize(Offset.getBitWidth(), DL.getTypeAllocSize(ElementTy));
1701  if (ElementSize == 0)
1702  return nullptr; // Zero-length arrays can't help us build a natural GEP.
1703  APInt NumSkippedElements = Offset.sdiv(ElementSize);
1704 
1705  Offset -= NumSkippedElements * ElementSize;
1706  Indices.push_back(IRB.getInt(NumSkippedElements));
1707  return getNaturalGEPRecursively(IRB, DL, Ptr, ElementTy, Offset, TargetTy,
1708  Indices, NamePrefix);
1709 }
1710 
1711 /// \brief Compute an adjusted pointer from Ptr by Offset bytes where the
1712 /// resulting pointer has PointerTy.
1713 ///
1714 /// This tries very hard to compute a "natural" GEP which arrives at the offset
1715 /// and produces the pointer type desired. Where it cannot, it will try to use
1716 /// the natural GEP to arrive at the offset and bitcast to the type. Where that
1717 /// fails, it will try to use an existing i8* and GEP to the byte offset and
1718 /// bitcast to the type.
1719 ///
1720 /// The strategy for finding the more natural GEPs is to peel off layers of the
1721 /// pointer, walking back through bit casts and GEPs, searching for a base
1722 /// pointer from which we can compute a natural GEP with the desired
1723 /// properties. The algorithm tries to fold as many constant indices into
1724 /// a single GEP as possible, thus making each GEP more independent of the
1725 /// surrounding code.
1726 static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1727  APInt Offset, Type *PointerTy, Twine NamePrefix) {
1728  // Even though we don't look through PHI nodes, we could be called on an
1729  // instruction in an unreachable block, which may be on a cycle.
1730  SmallPtrSet<Value *, 4> Visited;
1731  Visited.insert(Ptr);
1732  SmallVector<Value *, 4> Indices;
1733 
1734  // We may end up computing an offset pointer that has the wrong type. If we
1735  // never are able to compute one directly that has the correct type, we'll
1736  // fall back to it, so keep it and the base it was computed from around here.
1737  Value *OffsetPtr = nullptr;
1738  Value *OffsetBasePtr;
1739 
1740  // Remember any i8 pointer we come across to re-use if we need to do a raw
1741  // byte offset.
1742  Value *Int8Ptr = nullptr;
1743  APInt Int8PtrOffset(Offset.getBitWidth(), 0);
1744 
1745  Type *TargetTy = PointerTy->getPointerElementType();
1746 
1747  do {
1748  // First fold any existing GEPs into the offset.
1749  while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
1750  APInt GEPOffset(Offset.getBitWidth(), 0);
1751  if (!GEP->accumulateConstantOffset(DL, GEPOffset))
1752  break;
1753  Offset += GEPOffset;
1754  Ptr = GEP->getPointerOperand();
1755  if (!Visited.insert(Ptr).second)
1756  break;
1757  }
1758 
1759  // See if we can perform a natural GEP here.
1760  Indices.clear();
1761  if (Value *P = getNaturalGEPWithOffset(IRB, DL, Ptr, Offset, TargetTy,
1762  Indices, NamePrefix)) {
1763  // If we have a new natural pointer at the offset, clear out any old
1764  // offset pointer we computed. Unless it is the base pointer or
1765  // a non-instruction, we built a GEP we don't need. Zap it.
1766  if (OffsetPtr && OffsetPtr != OffsetBasePtr)
1767  if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) {
1768  assert(I->use_empty() && "Built a GEP with uses some how!");
1769  I->eraseFromParent();
1770  }
1771  OffsetPtr = P;
1772  OffsetBasePtr = Ptr;
1773  // If we also found a pointer of the right type, we're done.
1774  if (P->getType() == PointerTy)
1775  return P;
1776  }
1777 
1778  // Stash this pointer if we've found an i8*.
1779  if (Ptr->getType()->isIntegerTy(8)) {
1780  Int8Ptr = Ptr;
1781  Int8PtrOffset = Offset;
1782  }
1783 
1784  // Peel off a layer of the pointer and update the offset appropriately.
1785  if (Operator::getOpcode(Ptr) == Instruction::BitCast) {
1786  Ptr = cast<Operator>(Ptr)->getOperand(0);
1787  } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
1788  if (GA->mayBeOverridden())
1789  break;
1790  Ptr = GA->getAliasee();
1791  } else {
1792  break;
1793  }
1794  assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!");
1795  } while (Visited.insert(Ptr).second);
1796 
1797  if (!OffsetPtr) {
1798  if (!Int8Ptr) {
1799  Int8Ptr = IRB.CreateBitCast(
1800  Ptr, IRB.getInt8PtrTy(PointerTy->getPointerAddressSpace()),
1801  NamePrefix + "sroa_raw_cast");
1802  Int8PtrOffset = Offset;
1803  }
1804 
1805  OffsetPtr = Int8PtrOffset == 0
1806  ? Int8Ptr
1807  : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
1808  IRB.getInt(Int8PtrOffset),
1809  NamePrefix + "sroa_raw_idx");
1810  }
1811  Ptr = OffsetPtr;
1812 
1813  // On the off chance we were targeting i8*, guard the bitcast here.
1814  if (Ptr->getType() != PointerTy)
1815  Ptr = IRB.CreateBitCast(Ptr, PointerTy, NamePrefix + "sroa_cast");
1816 
1817  return Ptr;
1818 }
1819 
1820 /// \brief Compute the adjusted alignment for a load or store from an offset.
1821 static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset,
1822  const DataLayout &DL) {
1823  unsigned Alignment;
1824  Type *Ty;
1825  if (auto *LI = dyn_cast<LoadInst>(I)) {
1826  Alignment = LI->getAlignment();
1827  Ty = LI->getType();
1828  } else if (auto *SI = dyn_cast<StoreInst>(I)) {
1829  Alignment = SI->getAlignment();
1830  Ty = SI->getValueOperand()->getType();
1831  } else {
1832  llvm_unreachable("Only loads and stores are allowed!");
1833  }
1834 
1835  if (!Alignment)
1836  Alignment = DL.getABITypeAlignment(Ty);
1837 
1838  return MinAlign(Alignment, Offset);
1839 }
1840 
1841 /// \brief Test whether we can convert a value from the old to the new type.
1842 ///
1843 /// This predicate should be used to guard calls to convertValue in order to
1844 /// ensure that we only try to convert viable values. The strategy is that we
1845 /// will peel off single element struct and array wrappings to get to an
1846 /// underlying value, and convert that value.
1847 static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy) {
1848  if (OldTy == NewTy)
1849  return true;
1850 
1851  // For integer types, we can't handle any bit-width differences. This would
1852  // break both vector conversions with extension and introduce endianness
1853  // issues when in conjunction with loads and stores.
1854  if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1855  assert(cast<IntegerType>(OldTy)->getBitWidth() !=
1856  cast<IntegerType>(NewTy)->getBitWidth() &&
1857  "We can't have the same bitwidth for different int types");
1858  return false;
1859  }
1860 
1861  if (DL.getTypeSizeInBits(NewTy) != DL.getTypeSizeInBits(OldTy))
1862  return false;
1863  if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
1864  return false;
1865 
1866  // We can convert pointers to integers and vice-versa. Same for vectors
1867  // of pointers and integers.
1868  OldTy = OldTy->getScalarType();
1869  NewTy = NewTy->getScalarType();
1870  if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
1871  if (NewTy->isPointerTy() && OldTy->isPointerTy())
1872  return true;
1873  if (NewTy->isIntegerTy() || OldTy->isIntegerTy())
1874  return true;
1875  return false;
1876  }
1877 
1878  return true;
1879 }
1880 
1881 /// \brief Generic routine to convert an SSA value to a value of a different
1882 /// type.
1883 ///
1884 /// This will try various different casting techniques, such as bitcasts,
1885 /// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
1886 /// two types for viability with this routine.
1887 static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
1888  Type *NewTy) {
1889  Type *OldTy = V->getType();
1890  assert(canConvertValue(DL, OldTy, NewTy) && "Value not convertable to type");
1891 
1892  if (OldTy == NewTy)
1893  return V;
1894 
1895  assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1896  "Integer types must be the exact same to convert.");
1897 
1898  // See if we need inttoptr for this type pair. A cast involving both scalars
1899  // and vectors requires and additional bitcast.
1900  if (OldTy->getScalarType()->isIntegerTy() &&
1901  NewTy->getScalarType()->isPointerTy()) {
1902  // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
1903  if (OldTy->isVectorTy() && !NewTy->isVectorTy())
1904  return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
1905  NewTy);
1906 
1907  // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
1908  if (!OldTy->isVectorTy() && NewTy->isVectorTy())
1909  return IRB.CreateIntToPtr(IRB.CreateBitCast(V, DL.getIntPtrType(NewTy)),
1910  NewTy);
1911 
1912  return IRB.CreateIntToPtr(V, NewTy);
1913  }
1914 
1915  // See if we need ptrtoint for this type pair. A cast involving both scalars
1916  // and vectors requires and additional bitcast.
1917  if (OldTy->getScalarType()->isPointerTy() &&
1918  NewTy->getScalarType()->isIntegerTy()) {
1919  // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
1920  if (OldTy->isVectorTy() && !NewTy->isVectorTy())
1921  return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1922  NewTy);
1923 
1924  // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
1925  if (!OldTy->isVectorTy() && NewTy->isVectorTy())
1926  return IRB.CreateBitCast(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
1927  NewTy);
1928 
1929  return IRB.CreatePtrToInt(V, NewTy);
1930  }
1931 
1932  return IRB.CreateBitCast(V, NewTy);
1933 }
1934 
1935 /// \brief Test whether the given slice use can be promoted to a vector.
1936 ///
1937 /// This function is called to test each entry in a partioning which is slated
1938 /// for a single slice.
1939 static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P,
1940  const Slice &S, VectorType *Ty,
1941  uint64_t ElementSize,
1942  const DataLayout &DL) {
1943  // First validate the slice offsets.
1944  uint64_t BeginOffset =
1945  std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
1946  uint64_t BeginIndex = BeginOffset / ElementSize;
1947  if (BeginIndex * ElementSize != BeginOffset ||
1948  BeginIndex >= Ty->getNumElements())
1949  return false;
1950  uint64_t EndOffset =
1951  std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
1952  uint64_t EndIndex = EndOffset / ElementSize;
1953  if (EndIndex * ElementSize != EndOffset || EndIndex > Ty->getNumElements())
1954  return false;
1955 
1956  assert(EndIndex > BeginIndex && "Empty vector!");
1957  uint64_t NumElements = EndIndex - BeginIndex;
1958  Type *SliceTy = (NumElements == 1)
1959  ? Ty->getElementType()
1960  : VectorType::get(Ty->getElementType(), NumElements);
1961 
1962  Type *SplitIntTy =
1963  Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
1964 
1965  Use *U = S.getUse();
1966 
1967  if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
1968  if (MI->isVolatile())
1969  return false;
1970  if (!S.isSplittable())
1971  return false; // Skip any unsplittable intrinsics.
1972  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1973  if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
1974  II->getIntrinsicID() != Intrinsic::lifetime_end)
1975  return false;
1976  } else if (U->get()->getType()->getPointerElementType()->isStructTy()) {
1977  // Disable vector promotion when there are loads or stores of an FCA.
1978  return false;
1979  } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1980  if (LI->isVolatile())
1981  return false;
1982  Type *LTy = LI->getType();
1983  if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1984  assert(LTy->isIntegerTy());
1985  LTy = SplitIntTy;
1986  }
1987  if (!canConvertValue(DL, SliceTy, LTy))
1988  return false;
1989  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1990  if (SI->isVolatile())
1991  return false;
1992  Type *STy = SI->getValueOperand()->getType();
1993  if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
1994  assert(STy->isIntegerTy());
1995  STy = SplitIntTy;
1996  }
1997  if (!canConvertValue(DL, STy, SliceTy))
1998  return false;
1999  } else {
2000  return false;
2001  }
2002 
2003  return true;
2004 }
2005 
2006 /// \brief Test whether the given alloca partitioning and range of slices can be
2007 /// promoted to a vector.
2008 ///
2009 /// This is a quick test to check whether we can rewrite a particular alloca
2010 /// partition (and its newly formed alloca) into a vector alloca with only
2011 /// whole-vector loads and stores such that it could be promoted to a vector
2012 /// SSA value. We only can ensure this for a limited set of operations, and we
2013 /// don't want to do the rewrites unless we are confident that the result will
2014 /// be promotable, so we have an early test here.
2015 static VectorType *isVectorPromotionViable(AllocaSlices::Partition &P,
2016  const DataLayout &DL) {
2017  // Collect the candidate types for vector-based promotion. Also track whether
2018  // we have different element types.
2019  SmallVector<VectorType *, 4> CandidateTys;
2020  Type *CommonEltTy = nullptr;
2021  bool HaveCommonEltTy = true;
2022  auto CheckCandidateType = [&](Type *Ty) {
2023  if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2024  CandidateTys.push_back(VTy);
2025  if (!CommonEltTy)
2026  CommonEltTy = VTy->getElementType();
2027  else if (CommonEltTy != VTy->getElementType())
2028  HaveCommonEltTy = false;
2029  }
2030  };
2031  // Consider any loads or stores that are the exact size of the slice.
2032  for (const Slice &S : P)
2033  if (S.beginOffset() == P.beginOffset() &&
2034  S.endOffset() == P.endOffset()) {
2035  if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2036  CheckCandidateType(LI->getType());
2037  else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2038  CheckCandidateType(SI->getValueOperand()->getType());
2039  }
2040 
2041  // If we didn't find a vector type, nothing to do here.
2042  if (CandidateTys.empty())
2043  return nullptr;
2044 
2045  // Remove non-integer vector types if we had multiple common element types.
2046  // FIXME: It'd be nice to replace them with integer vector types, but we can't
2047  // do that until all the backends are known to produce good code for all
2048  // integer vector types.
2049  if (!HaveCommonEltTy) {
2050  CandidateTys.erase(std::remove_if(CandidateTys.begin(), CandidateTys.end(),
2051  [](VectorType *VTy) {
2052  return !VTy->getElementType()->isIntegerTy();
2053  }),
2054  CandidateTys.end());
2055 
2056  // If there were no integer vector types, give up.
2057  if (CandidateTys.empty())
2058  return nullptr;
2059 
2060  // Rank the remaining candidate vector types. This is easy because we know
2061  // they're all integer vectors. We sort by ascending number of elements.
2062  auto RankVectorTypes = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2063  assert(DL.getTypeSizeInBits(RHSTy) == DL.getTypeSizeInBits(LHSTy) &&
2064  "Cannot have vector types of different sizes!");
2065  assert(RHSTy->getElementType()->isIntegerTy() &&
2066  "All non-integer types eliminated!");
2067  assert(LHSTy->getElementType()->isIntegerTy() &&
2068  "All non-integer types eliminated!");
2069  return RHSTy->getNumElements() < LHSTy->getNumElements();
2070  };
2071  std::sort(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes);
2072  CandidateTys.erase(
2073  std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
2074  CandidateTys.end());
2075  } else {
2076 // The only way to have the same element type in every vector type is to
2077 // have the same vector type. Check that and remove all but one.
2078 #ifndef NDEBUG
2079  for (VectorType *VTy : CandidateTys) {
2080  assert(VTy->getElementType() == CommonEltTy &&
2081  "Unaccounted for element type!");
2082  assert(VTy == CandidateTys[0] &&
2083  "Different vector types with the same element type!");
2084  }
2085 #endif
2086  CandidateTys.resize(1);
2087  }
2088 
2089  // Try each vector type, and return the one which works.
2090  auto CheckVectorTypeForPromotion = [&](VectorType *VTy) {
2091  uint64_t ElementSize = DL.getTypeSizeInBits(VTy->getElementType());
2092 
2093  // While the definition of LLVM vectors is bitpacked, we don't support sizes
2094  // that aren't byte sized.
2095  if (ElementSize % 8)
2096  return false;
2097  assert((DL.getTypeSizeInBits(VTy) % 8) == 0 &&
2098  "vector size not a multiple of element size?");
2099  ElementSize /= 8;
2100 
2101  for (const Slice &S : P)
2102  if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL))
2103  return false;
2104 
2105  for (const Slice *S : P.splitSliceTails())
2106  if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL))
2107  return false;
2108 
2109  return true;
2110  };
2111  for (VectorType *VTy : CandidateTys)
2112  if (CheckVectorTypeForPromotion(VTy))
2113  return VTy;
2114 
2115  return nullptr;
2116 }
2117 
2118 /// \brief Test whether a slice of an alloca is valid for integer widening.
2119 ///
2120 /// This implements the necessary checking for the \c isIntegerWideningViable
2121 /// test below on a single slice of the alloca.
2122 static bool isIntegerWideningViableForSlice(const Slice &S,
2123  uint64_t AllocBeginOffset,
2124  Type *AllocaTy,
2125  const DataLayout &DL,
2126  bool &WholeAllocaOp) {
2127  uint64_t Size = DL.getTypeStoreSize(AllocaTy);
2128 
2129  uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2130  uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2131 
2132  // We can't reasonably handle cases where the load or store extends past
2133  // the end of the aloca's type and into its padding.
2134  if (RelEnd > Size)
2135  return false;
2136 
2137  Use *U = S.getUse();
2138 
2139  if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2140  if (LI->isVolatile())
2141  return false;
2142  // We can't handle loads that extend past the allocated memory.
2143  if (DL.getTypeStoreSize(LI->getType()) > Size)
2144  return false;
2145  // Note that we don't count vector loads or stores as whole-alloca
2146  // operations which enable integer widening because we would prefer to use
2147  // vector widening instead.
2148  if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2149  WholeAllocaOp = true;
2150  if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2151  if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
2152  return false;
2153  } else if (RelBegin != 0 || RelEnd != Size ||
2154  !canConvertValue(DL, AllocaTy, LI->getType())) {
2155  // Non-integer loads need to be convertible from the alloca type so that
2156  // they are promotable.
2157  return false;
2158  }
2159  } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2160  Type *ValueTy = SI->getValueOperand()->getType();
2161  if (SI->isVolatile())
2162  return false;
2163  // We can't handle stores that extend past the allocated memory.
2164  if (DL.getTypeStoreSize(ValueTy) > Size)
2165  return false;
2166  // Note that we don't count vector loads or stores as whole-alloca
2167  // operations which enable integer widening because we would prefer to use
2168  // vector widening instead.
2169  if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2170  WholeAllocaOp = true;
2171  if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2172  if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy))
2173  return false;
2174  } else if (RelBegin != 0 || RelEnd != Size ||
2175  !canConvertValue(DL, ValueTy, AllocaTy)) {
2176  // Non-integer stores need to be convertible to the alloca type so that
2177  // they are promotable.
2178  return false;
2179  }
2180  } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2181  if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2182  return false;
2183  if (!S.isSplittable())
2184  return false; // Skip any unsplittable intrinsics.
2185  } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2186  if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
2187  II->getIntrinsicID() != Intrinsic::lifetime_end)
2188  return false;
2189  } else {
2190  return false;
2191  }
2192 
2193  return true;
2194 }
2195 
2196 /// \brief Test whether the given alloca partition's integer operations can be
2197 /// widened to promotable ones.
2198 ///
2199 /// This is a quick test to check whether we can rewrite the integer loads and
2200 /// stores to a particular alloca into wider loads and stores and be able to
2201 /// promote the resulting alloca.
2202 static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy,
2203  const DataLayout &DL) {
2204  uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy);
2205  // Don't create integer types larger than the maximum bitwidth.
2206  if (SizeInBits > IntegerType::MAX_INT_BITS)
2207  return false;
2208 
2209  // Don't try to handle allocas with bit-padding.
2210  if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy))
2211  return false;
2212 
2213  // We need to ensure that an integer type with the appropriate bitwidth can
2214  // be converted to the alloca type, whatever that is. We don't want to force
2215  // the alloca itself to have an integer type if there is a more suitable one.
2216  Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2217  if (!canConvertValue(DL, AllocaTy, IntTy) ||
2218  !canConvertValue(DL, IntTy, AllocaTy))
2219  return false;
2220 
2221  // While examining uses, we ensure that the alloca has a covering load or
2222  // store. We don't want to widen the integer operations only to fail to
2223  // promote due to some other unsplittable entry (which we may make splittable
2224  // later). However, if there are only splittable uses, go ahead and assume
2225  // that we cover the alloca.
2226  // FIXME: We shouldn't consider split slices that happen to start in the
2227  // partition here...
2228  bool WholeAllocaOp =
2229  P.begin() != P.end() ? false : DL.isLegalInteger(SizeInBits);
2230 
2231  for (const Slice &S : P)
2232  if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2233  WholeAllocaOp))
2234  return false;
2235 
2236  for (const Slice *S : P.splitSliceTails())
2237  if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2238  WholeAllocaOp))
2239  return false;
2240 
2241  return WholeAllocaOp;
2242 }
2243 
2244 static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2245  IntegerType *Ty, uint64_t Offset,
2246  const Twine &Name) {
2247  DEBUG(dbgs() << " start: " << *V << "\n");
2248  IntegerType *IntTy = cast<IntegerType>(V->getType());
2249  assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
2250  "Element extends past full value");
2251  uint64_t ShAmt = 8 * Offset;
2252  if (DL.isBigEndian())
2253  ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2254  if (ShAmt) {
2255  V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2256  DEBUG(dbgs() << " shifted: " << *V << "\n");
2257  }
2258  assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2259  "Cannot extract to a larger integer!");
2260  if (Ty != IntTy) {
2261  V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2262  DEBUG(dbgs() << " trunced: " << *V << "\n");
2263  }
2264  return V;
2265 }
2266 
2267 static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2268  Value *V, uint64_t Offset, const Twine &Name) {
2269  IntegerType *IntTy = cast<IntegerType>(Old->getType());
2270  IntegerType *Ty = cast<IntegerType>(V->getType());
2271  assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2272  "Cannot insert a larger integer!");
2273  DEBUG(dbgs() << " start: " << *V << "\n");
2274  if (Ty != IntTy) {
2275  V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2276  DEBUG(dbgs() << " extended: " << *V << "\n");
2277  }
2278  assert(DL.getTypeStoreSize(Ty) + Offset <= DL.getTypeStoreSize(IntTy) &&
2279  "Element store outside of alloca store");
2280  uint64_t ShAmt = 8 * Offset;
2281  if (DL.isBigEndian())
2282  ShAmt = 8 * (DL.getTypeStoreSize(IntTy) - DL.getTypeStoreSize(Ty) - Offset);
2283  if (ShAmt) {
2284  V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2285  DEBUG(dbgs() << " shifted: " << *V << "\n");
2286  }
2287 
2288  if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2289  APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2290  Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2291  DEBUG(dbgs() << " masked: " << *Old << "\n");
2292  V = IRB.CreateOr(Old, V, Name + ".insert");
2293  DEBUG(dbgs() << " inserted: " << *V << "\n");
2294  }
2295  return V;
2296 }
2297 
2298 static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2299  unsigned EndIndex, const Twine &Name) {
2300  VectorType *VecTy = cast<VectorType>(V->getType());
2301  unsigned NumElements = EndIndex - BeginIndex;
2302  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2303 
2304  if (NumElements == VecTy->getNumElements())
2305  return V;
2306 
2307  if (NumElements == 1) {
2308  V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2309  Name + ".extract");
2310  DEBUG(dbgs() << " extract: " << *V << "\n");
2311  return V;
2312  }
2313 
2315  Mask.reserve(NumElements);
2316  for (unsigned i = BeginIndex; i != EndIndex; ++i)
2317  Mask.push_back(IRB.getInt32(i));
2318  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
2319  ConstantVector::get(Mask), Name + ".extract");
2320  DEBUG(dbgs() << " shuffle: " << *V << "\n");
2321  return V;
2322 }
2323 
2324 static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2325  unsigned BeginIndex, const Twine &Name) {
2326  VectorType *VecTy = cast<VectorType>(Old->getType());
2327  assert(VecTy && "Can only insert a vector into a vector");
2328 
2329  VectorType *Ty = dyn_cast<VectorType>(V->getType());
2330  if (!Ty) {
2331  // Single element to insert.
2332  V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2333  Name + ".insert");
2334  DEBUG(dbgs() << " insert: " << *V << "\n");
2335  return V;
2336  }
2337 
2338  assert(Ty->getNumElements() <= VecTy->getNumElements() &&
2339  "Too many elements!");
2340  if (Ty->getNumElements() == VecTy->getNumElements()) {
2341  assert(V->getType() == VecTy && "Vector type mismatch");
2342  return V;
2343  }
2344  unsigned EndIndex = BeginIndex + Ty->getNumElements();
2345 
2346  // When inserting a smaller vector into the larger to store, we first
2347  // use a shuffle vector to widen it with undef elements, and then
2348  // a second shuffle vector to select between the loaded vector and the
2349  // incoming vector.
2351  Mask.reserve(VecTy->getNumElements());
2352  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
2353  if (i >= BeginIndex && i < EndIndex)
2354  Mask.push_back(IRB.getInt32(i - BeginIndex));
2355  else
2356  Mask.push_back(UndefValue::get(IRB.getInt32Ty()));
2357  V = IRB.CreateShuffleVector(V, UndefValue::get(V->getType()),
2358  ConstantVector::get(Mask), Name + ".expand");
2359  DEBUG(dbgs() << " shuffle: " << *V << "\n");
2360 
2361  Mask.clear();
2362  for (unsigned i = 0; i != VecTy->getNumElements(); ++i)
2363  Mask.push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2364 
2365  V = IRB.CreateSelect(ConstantVector::get(Mask), V, Old, Name + "blend");
2366 
2367  DEBUG(dbgs() << " blend: " << *V << "\n");
2368  return V;
2369 }
2370 
2371 namespace {
2372 /// \brief Visitor to rewrite instructions using p particular slice of an alloca
2373 /// to use a new alloca.
2374 ///
2375 /// Also implements the rewriting to vector-based accesses when the partition
2376 /// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2377 /// lives here.
2378 class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2379  // Befriend the base class so it can delegate to private visit methods.
2380  friend class llvm::InstVisitor<AllocaSliceRewriter, bool>;
2382 
2383  const DataLayout &DL;
2384  AllocaSlices &AS;
2385  SROA &Pass;
2386  AllocaInst &OldAI, &NewAI;
2387  const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2388  Type *NewAllocaTy;
2389 
2390  // This is a convenience and flag variable that will be null unless the new
2391  // alloca's integer operations should be widened to this integer type due to
2392  // passing isIntegerWideningViable above. If it is non-null, the desired
2393  // integer type will be stored here for easy access during rewriting.
2394  IntegerType *IntTy;
2395 
2396  // If we are rewriting an alloca partition which can be written as pure
2397  // vector operations, we stash extra information here. When VecTy is
2398  // non-null, we have some strict guarantees about the rewritten alloca:
2399  // - The new alloca is exactly the size of the vector type here.
2400  // - The accesses all either map to the entire vector or to a single
2401  // element.
2402  // - The set of accessing instructions is only one of those handled above
2403  // in isVectorPromotionViable. Generally these are the same access kinds
2404  // which are promotable via mem2reg.
2405  VectorType *VecTy;
2406  Type *ElementTy;
2407  uint64_t ElementSize;
2408 
2409  // The original offset of the slice currently being rewritten relative to
2410  // the original alloca.
2411  uint64_t BeginOffset, EndOffset;
2412  // The new offsets of the slice currently being rewritten relative to the
2413  // original alloca.
2414  uint64_t NewBeginOffset, NewEndOffset;
2415 
2416  uint64_t SliceSize;
2417  bool IsSplittable;
2418  bool IsSplit;
2419  Use *OldUse;
2420  Instruction *OldPtr;
2421 
2422  // Track post-rewrite users which are PHI nodes and Selects.
2423  SmallPtrSetImpl<PHINode *> &PHIUsers;
2424  SmallPtrSetImpl<SelectInst *> &SelectUsers;
2425 
2426  // Utility IR builder, whose name prefix is setup for each visited use, and
2427  // the insertion point is set to point to the user.
2428  IRBuilderTy IRB;
2429 
2430 public:
2431  AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2432  AllocaInst &OldAI, AllocaInst &NewAI,
2433  uint64_t NewAllocaBeginOffset,
2434  uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2435  VectorType *PromotableVecTy,
2436  SmallPtrSetImpl<PHINode *> &PHIUsers,
2437  SmallPtrSetImpl<SelectInst *> &SelectUsers)
2438  : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2439  NewAllocaBeginOffset(NewAllocaBeginOffset),
2440  NewAllocaEndOffset(NewAllocaEndOffset),
2441  NewAllocaTy(NewAI.getAllocatedType()),
2442  IntTy(IsIntegerPromotable
2443  ? Type::getIntNTy(
2444  NewAI.getContext(),
2445  DL.getTypeSizeInBits(NewAI.getAllocatedType()))
2446  : nullptr),
2447  VecTy(PromotableVecTy),
2448  ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2449  ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy) / 8 : 0),
2450  BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
2451  OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2452  IRB(NewAI.getContext(), ConstantFolder()) {
2453  if (VecTy) {
2454  assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
2455  "Only multiple-of-8 sized vector elements are viable");
2456  ++NumVectorized;
2457  }
2458  assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2459  }
2460 
2461  bool visit(AllocaSlices::const_iterator I) {
2462  bool CanSROA = true;
2463  BeginOffset = I->beginOffset();
2464  EndOffset = I->endOffset();
2465  IsSplittable = I->isSplittable();
2466  IsSplit =
2467  BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2468  DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2469  DEBUG(AS.printSlice(dbgs(), I, ""));
2470  DEBUG(dbgs() << "\n");
2471 
2472  // Compute the intersecting offset range.
2473  assert(BeginOffset < NewAllocaEndOffset);
2474  assert(EndOffset > NewAllocaBeginOffset);
2475  NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2476  NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2477 
2478  SliceSize = NewEndOffset - NewBeginOffset;
2479 
2480  OldUse = I->getUse();
2481  OldPtr = cast<Instruction>(OldUse->get());
2482 
2483  Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2484  IRB.SetInsertPoint(OldUserI);
2485  IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2486  IRB.SetNamePrefix(Twine(NewAI.getName()) + "." + Twine(BeginOffset) + ".");
2487 
2488  CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2489  if (VecTy || IntTy)
2490  assert(CanSROA);
2491  return CanSROA;
2492  }
2493 
2494 private:
2495  // Make sure the other visit overloads are visible.
2496  using Base::visit;
2497 
2498  // Every instruction which can end up as a user must have a rewrite rule.
2499  bool visitInstruction(Instruction &I) {
2500  DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
2501  llvm_unreachable("No rewrite rule for this instruction!");
2502  }
2503 
2504  Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
2505  // Note that the offset computation can use BeginOffset or NewBeginOffset
2506  // interchangeably for unsplit slices.
2507  assert(IsSplit || BeginOffset == NewBeginOffset);
2508  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2509 
2510 #ifndef NDEBUG
2511  StringRef OldName = OldPtr->getName();
2512  // Skip through the last '.sroa.' component of the name.
2513  size_t LastSROAPrefix = OldName.rfind(".sroa.");
2514  if (LastSROAPrefix != StringRef::npos) {
2515  OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
2516  // Look for an SROA slice index.
2517  size_t IndexEnd = OldName.find_first_not_of("0123456789");
2518  if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
2519  // Strip the index and look for the offset.
2520  OldName = OldName.substr(IndexEnd + 1);
2521  size_t OffsetEnd = OldName.find_first_not_of("0123456789");
2522  if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
2523  // Strip the offset.
2524  OldName = OldName.substr(OffsetEnd + 1);
2525  }
2526  }
2527  // Strip any SROA suffixes as well.
2528  OldName = OldName.substr(0, OldName.find(".sroa_"));
2529 #endif
2530 
2531  return getAdjustedPtr(IRB, DL, &NewAI,
2532  APInt(DL.getPointerSizeInBits(), Offset), PointerTy,
2533 #ifndef NDEBUG
2534  Twine(OldName) + "."
2535 #else
2536  Twine()
2537 #endif
2538  );
2539  }
2540 
2541  /// \brief Compute suitable alignment to access this slice of the *new*
2542  /// alloca.
2543  ///
2544  /// You can optionally pass a type to this routine and if that type's ABI
2545  /// alignment is itself suitable, this will return zero.
2546  unsigned getSliceAlign(Type *Ty = nullptr) {
2547  unsigned NewAIAlign = NewAI.getAlignment();
2548  if (!NewAIAlign)
2549  NewAIAlign = DL.getABITypeAlignment(NewAI.getAllocatedType());
2550  unsigned Align =
2551  MinAlign(NewAIAlign, NewBeginOffset - NewAllocaBeginOffset);
2552  return (Ty && Align == DL.getABITypeAlignment(Ty)) ? 0 : Align;
2553  }
2554 
2555  unsigned getIndex(uint64_t Offset) {
2556  assert(VecTy && "Can only call getIndex when rewriting a vector");
2557  uint64_t RelOffset = Offset - NewAllocaBeginOffset;
2558  assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
2559  uint32_t Index = RelOffset / ElementSize;
2560  assert(Index * ElementSize == RelOffset);
2561  return Index;
2562  }
2563 
2564  void deleteIfTriviallyDead(Value *V) {
2565  Instruction *I = cast<Instruction>(V);
2567  Pass.DeadInsts.insert(I);
2568  }
2569 
2570  Value *rewriteVectorizedLoadInst() {
2571  unsigned BeginIndex = getIndex(NewBeginOffset);
2572  unsigned EndIndex = getIndex(NewEndOffset);
2573  assert(EndIndex > BeginIndex && "Empty vector!");
2574 
2575  Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2576  return extractVector(IRB, V, BeginIndex, EndIndex, "vec");
2577  }
2578 
2579  Value *rewriteIntegerLoad(LoadInst &LI) {
2580  assert(IntTy && "We cannot insert an integer to the alloca");
2581  assert(!LI.isVolatile());
2582  Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2583  V = convertValue(DL, IRB, V, IntTy);
2584  assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2585  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2586  if (Offset > 0 || NewEndOffset < NewAllocaEndOffset)
2587  V = extractInteger(DL, IRB, V, cast<IntegerType>(LI.getType()), Offset,
2588  "extract");
2589  return V;
2590  }
2591 
2592  bool visitLoadInst(LoadInst &LI) {
2593  DEBUG(dbgs() << " original: " << LI << "\n");
2594  Value *OldOp = LI.getOperand(0);
2595  assert(OldOp == OldPtr);
2596 
2597  Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
2598  : LI.getType();
2599  const bool IsLoadPastEnd = DL.getTypeStoreSize(TargetTy) > SliceSize;
2600  bool IsPtrAdjusted = false;
2601  Value *V;
2602  if (VecTy) {
2603  V = rewriteVectorizedLoadInst();
2604  } else if (IntTy && LI.getType()->isIntegerTy()) {
2605  V = rewriteIntegerLoad(LI);
2606  } else if (NewBeginOffset == NewAllocaBeginOffset &&
2607  NewEndOffset == NewAllocaEndOffset &&
2608  (canConvertValue(DL, NewAllocaTy, TargetTy) ||
2609  (IsLoadPastEnd && NewAllocaTy->isIntegerTy() &&
2610  TargetTy->isIntegerTy()))) {
2611  LoadInst *NewLI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(),
2612  LI.isVolatile(), LI.getName());
2613  if (LI.isVolatile())
2614  NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
2615  V = NewLI;
2616 
2617  // If this is an integer load past the end of the slice (which means the
2618  // bytes outside the slice are undef or this load is dead) just forcibly
2619  // fix the integer size with correct handling of endianness.
2620  if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2621  if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
2622  if (AITy->getBitWidth() < TITy->getBitWidth()) {
2623  V = IRB.CreateZExt(V, TITy, "load.ext");
2624  if (DL.isBigEndian())
2625  V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2626  "endian_shift");
2627  }
2628  } else {
2629  Type *LTy = TargetTy->getPointerTo();
2630  LoadInst *NewLI = IRB.CreateAlignedLoad(getNewAllocaSlicePtr(IRB, LTy),
2631  getSliceAlign(TargetTy),
2632  LI.isVolatile(), LI.getName());
2633  if (LI.isVolatile())
2634  NewLI->setAtomic(LI.getOrdering(), LI.getSynchScope());
2635 
2636  V = NewLI;
2637  IsPtrAdjusted = true;
2638  }
2639  V = convertValue(DL, IRB, V, TargetTy);
2640 
2641  if (IsSplit) {
2642  assert(!LI.isVolatile());
2643  assert(LI.getType()->isIntegerTy() &&
2644  "Only integer type loads and stores are split");
2645  assert(SliceSize < DL.getTypeStoreSize(LI.getType()) &&
2646  "Split load isn't smaller than original load");
2647  assert(LI.getType()->getIntegerBitWidth() ==
2648  DL.getTypeStoreSizeInBits(LI.getType()) &&
2649  "Non-byte-multiple bit width");
2650  // Move the insertion point just past the load so that we can refer to it.
2651  IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI)));
2652  // Create a placeholder value with the same type as LI to use as the
2653  // basis for the new value. This allows us to replace the uses of LI with
2654  // the computed value, and then replace the placeholder with LI, leaving
2655  // LI only used for this computation.
2656  Value *Placeholder =
2658  V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2659  "insert");
2660  LI.replaceAllUsesWith(V);
2661  Placeholder->replaceAllUsesWith(&LI);
2662  delete Placeholder;
2663  } else {
2664  LI.replaceAllUsesWith(V);
2665  }
2666 
2667  Pass.DeadInsts.insert(&LI);
2668  deleteIfTriviallyDead(OldOp);
2669  DEBUG(dbgs() << " to: " << *V << "\n");
2670  return !LI.isVolatile() && !IsPtrAdjusted;
2671  }
2672 
2673  bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp) {
2674  if (V->getType() != VecTy) {
2675  unsigned BeginIndex = getIndex(NewBeginOffset);
2676  unsigned EndIndex = getIndex(NewEndOffset);
2677  assert(EndIndex > BeginIndex && "Empty vector!");
2678  unsigned NumElements = EndIndex - BeginIndex;
2679  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2680  Type *SliceTy = (NumElements == 1)
2681  ? ElementTy
2682  : VectorType::get(ElementTy, NumElements);
2683  if (V->getType() != SliceTy)
2684  V = convertValue(DL, IRB, V, SliceTy);
2685 
2686  // Mix in the existing elements.
2687  Value *Old = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
2688  V = insertVector(IRB, Old, V, BeginIndex, "vec");
2689  }
2690  StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2691  Pass.DeadInsts.insert(&SI);
2692 
2693  (void)Store;
2694  DEBUG(dbgs() << " to: " << *Store << "\n");
2695  return true;
2696  }
2697 
2698  bool rewriteIntegerStore(Value *V, StoreInst &SI) {
2699  assert(IntTy && "We cannot extract an integer from the alloca");
2700  assert(!SI.isVolatile());
2701  if (DL.getTypeSizeInBits(V->getType()) != IntTy->getBitWidth()) {
2702  Value *Old =
2703  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2704  Old = convertValue(DL, IRB, Old, IntTy);
2705  assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
2706  uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
2707  V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
2708  }
2709  V = convertValue(DL, IRB, V, NewAllocaTy);
2710  StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
2711  Pass.DeadInsts.insert(&SI);
2712  (void)Store;
2713  DEBUG(dbgs() << " to: " << *Store << "\n");
2714  return true;
2715  }
2716 
2717  bool visitStoreInst(StoreInst &SI) {
2718  DEBUG(dbgs() << " original: " << SI << "\n");
2719  Value *OldOp = SI.getOperand(1);
2720  assert(OldOp == OldPtr);
2721 
2722  Value *V = SI.getValueOperand();
2723 
2724  // Strip all inbounds GEPs and pointer casts to try to dig out any root
2725  // alloca that should be re-examined after promoting this alloca.
2726  if (V->getType()->isPointerTy())
2727  if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
2728  Pass.PostPromotionWorklist.insert(AI);
2729 
2730  if (SliceSize < DL.getTypeStoreSize(V->getType())) {
2731  assert(!SI.isVolatile());
2732  assert(V->getType()->isIntegerTy() &&
2733  "Only integer type loads and stores are split");
2734  assert(V->getType()->getIntegerBitWidth() ==
2735  DL.getTypeStoreSizeInBits(V->getType()) &&
2736  "Non-byte-multiple bit width");
2737  IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
2738  V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
2739  "extract");
2740  }
2741 
2742  if (VecTy)
2743  return rewriteVectorizedStoreInst(V, SI, OldOp);
2744  if (IntTy && V->getType()->isIntegerTy())
2745  return rewriteIntegerStore(V, SI);
2746 
2747  const bool IsStorePastEnd = DL.getTypeStoreSize(V->getType()) > SliceSize;
2748  StoreInst *NewSI;
2749  if (NewBeginOffset == NewAllocaBeginOffset &&
2750  NewEndOffset == NewAllocaEndOffset &&
2751  (canConvertValue(DL, V->getType(), NewAllocaTy) ||
2752  (IsStorePastEnd && NewAllocaTy->isIntegerTy() &&
2753  V->getType()->isIntegerTy()))) {
2754  // If this is an integer store past the end of slice (and thus the bytes
2755  // past that point are irrelevant or this is unreachable), truncate the
2756  // value prior to storing.
2757  if (auto *VITy = dyn_cast<IntegerType>(V->getType()))
2758  if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2759  if (VITy->getBitWidth() > AITy->getBitWidth()) {
2760  if (DL.isBigEndian())
2761  V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2762  "endian_shift");
2763  V = IRB.CreateTrunc(V, AITy, "load.trunc");
2764  }
2765 
2766  V = convertValue(DL, IRB, V, NewAllocaTy);
2767  NewSI = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2768  SI.isVolatile());
2769  } else {
2770  Value *NewPtr = getNewAllocaSlicePtr(IRB, V->getType()->getPointerTo());
2771  NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
2772  SI.isVolatile());
2773  }
2774  if (SI.isVolatile())
2775  NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
2776  Pass.DeadInsts.insert(&SI);
2777  deleteIfTriviallyDead(OldOp);
2778 
2779  DEBUG(dbgs() << " to: " << *NewSI << "\n");
2780  return NewSI->getPointerOperand() == &NewAI && !SI.isVolatile();
2781  }
2782 
2783  /// \brief Compute an integer value from splatting an i8 across the given
2784  /// number of bytes.
2785  ///
2786  /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
2787  /// call this routine.
2788  /// FIXME: Heed the advice above.
2789  ///
2790  /// \param V The i8 value to splat.
2791  /// \param Size The number of bytes in the output (assuming i8 is one byte)
2792  Value *getIntegerSplat(Value *V, unsigned Size) {
2793  assert(Size > 0 && "Expected a positive number of bytes.");
2794  IntegerType *VTy = cast<IntegerType>(V->getType());
2795  assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
2796  if (Size == 1)
2797  return V;
2798 
2799  Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
2800  V = IRB.CreateMul(
2801  IRB.CreateZExt(V, SplatIntTy, "zext"),
2803  Constant::getAllOnesValue(SplatIntTy),
2805  SplatIntTy)),
2806  "isplat");
2807  return V;
2808  }
2809 
2810  /// \brief Compute a vector splat for a given element value.
2811  Value *getVectorSplat(Value *V, unsigned NumElements) {
2812  V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
2813  DEBUG(dbgs() << " splat: " << *V << "\n");
2814  return V;
2815  }
2816 
2817  bool visitMemSetInst(MemSetInst &II) {
2818  DEBUG(dbgs() << " original: " << II << "\n");
2819  assert(II.getRawDest() == OldPtr);
2820 
2821  // If the memset has a variable size, it cannot be split, just adjust the
2822  // pointer to the new alloca.
2823  if (!isa<Constant>(II.getLength())) {
2824  assert(!IsSplit);
2825  assert(NewBeginOffset == BeginOffset);
2826  II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
2827  Type *CstTy = II.getAlignmentCst()->getType();
2828  II.setAlignment(ConstantInt::get(CstTy, getSliceAlign()));
2829 
2830  deleteIfTriviallyDead(OldPtr);
2831  return false;
2832  }
2833 
2834  // Record this instruction for deletion.
2835  Pass.DeadInsts.insert(&II);
2836 
2837  Type *AllocaTy = NewAI.getAllocatedType();
2838  Type *ScalarTy = AllocaTy->getScalarType();
2839 
2840  // If this doesn't map cleanly onto the alloca type, and that type isn't
2841  // a single value type, just emit a memset.
2842  if (!VecTy && !IntTy &&
2843  (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2844  SliceSize != DL.getTypeStoreSize(AllocaTy) ||
2845  !AllocaTy->isSingleValueType() ||
2846  !DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy)) ||
2847  DL.getTypeSizeInBits(ScalarTy) % 8 != 0)) {
2848  Type *SizeTy = II.getLength()->getType();
2849  Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
2850  CallInst *New = IRB.CreateMemSet(
2851  getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
2852  getSliceAlign(), II.isVolatile());
2853  (void)New;
2854  DEBUG(dbgs() << " to: " << *New << "\n");
2855  return false;
2856  }
2857 
2858  // If we can represent this as a simple value, we have to build the actual
2859  // value to store, which requires expanding the byte present in memset to
2860  // a sensible representation for the alloca type. This is essentially
2861  // splatting the byte to a sufficiently wide integer, splatting it across
2862  // any desired vector width, and bitcasting to the final type.
2863  Value *V;
2864 
2865  if (VecTy) {
2866  // If this is a memset of a vectorized alloca, insert it.
2867  assert(ElementTy == ScalarTy);
2868 
2869  unsigned BeginIndex = getIndex(NewBeginOffset);
2870  unsigned EndIndex = getIndex(NewEndOffset);
2871  assert(EndIndex > BeginIndex && "Empty vector!");
2872  unsigned NumElements = EndIndex - BeginIndex;
2873  assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2874 
2875  Value *Splat =
2876  getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ElementTy) / 8);
2877  Splat = convertValue(DL, IRB, Splat, ElementTy);
2878  if (NumElements > 1)
2879  Splat = getVectorSplat(Splat, NumElements);
2880 
2881  Value *Old =
2882  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2883  V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
2884  } else if (IntTy) {
2885  // If this is a memset on an alloca where we can widen stores, insert the
2886  // set integer.
2887  assert(!II.isVolatile());
2888 
2889  uint64_t Size = NewEndOffset - NewBeginOffset;
2890  V = getIntegerSplat(II.getValue(), Size);
2891 
2892  if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
2893  EndOffset != NewAllocaBeginOffset)) {
2894  Value *Old =
2895  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
2896  Old = convertValue(DL, IRB, Old, IntTy);
2897  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
2898  V = insertInteger(DL, IRB, Old, V, Offset, "insert");
2899  } else {
2900  assert(V->getType() == IntTy &&
2901  "Wrong type for an alloca wide integer!");
2902  }
2903  V = convertValue(DL, IRB, V, AllocaTy);
2904  } else {
2905  // Established these invariants above.
2906  assert(NewBeginOffset == NewAllocaBeginOffset);
2907  assert(NewEndOffset == NewAllocaEndOffset);
2908 
2909  V = getIntegerSplat(II.getValue(), DL.getTypeSizeInBits(ScalarTy) / 8);
2910  if (VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
2911  V = getVectorSplat(V, AllocaVecTy->getNumElements());
2912 
2913  V = convertValue(DL, IRB, V, AllocaTy);
2914  }
2915 
2916  Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(),
2917  II.isVolatile());
2918  (void)New;
2919  DEBUG(dbgs() << " to: " << *New << "\n");
2920  return !II.isVolatile();
2921  }
2922 
2924  // Rewriting of memory transfer instructions can be a bit tricky. We break
2925  // them into two categories: split intrinsics and unsplit intrinsics.
2926 
2927  DEBUG(dbgs() << " original: " << II << "\n");
2928 
2929  bool IsDest = &II.getRawDestUse() == OldUse;
2930  assert((IsDest && II.getRawDest() == OldPtr) ||
2931  (!IsDest && II.getRawSource() == OldPtr));
2932 
2933  unsigned SliceAlign = getSliceAlign();
2934 
2935  // For unsplit intrinsics, we simply modify the source and destination
2936  // pointers in place. This isn't just an optimization, it is a matter of
2937  // correctness. With unsplit intrinsics we may be dealing with transfers
2938  // within a single alloca before SROA ran, or with transfers that have
2939  // a variable length. We may also be dealing with memmove instead of
2940  // memcpy, and so simply updating the pointers is the necessary for us to
2941  // update both source and dest of a single call.
2942  if (!IsSplittable) {
2943  Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
2944  if (IsDest)
2945  II.setDest(AdjustedPtr);
2946  else
2947  II.setSource(AdjustedPtr);
2948 
2949  if (II.getAlignment() > SliceAlign) {
2950  Type *CstTy = II.getAlignmentCst()->getType();
2951  II.setAlignment(
2952  ConstantInt::get(CstTy, MinAlign(II.getAlignment(), SliceAlign)));
2953  }
2954 
2955  DEBUG(dbgs() << " to: " << II << "\n");
2956  deleteIfTriviallyDead(OldPtr);
2957  return false;
2958  }
2959  // For split transfer intrinsics we have an incredibly useful assurance:
2960  // the source and destination do not reside within the same alloca, and at
2961  // least one of them does not escape. This means that we can replace
2962  // memmove with memcpy, and we don't need to worry about all manner of
2963  // downsides to splitting and transforming the operations.
2964 
2965  // If this doesn't map cleanly onto the alloca type, and that type isn't
2966  // a single value type, just emit a memcpy.
2967  bool EmitMemCpy =
2968  !VecTy && !IntTy &&
2969  (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2970  SliceSize != DL.getTypeStoreSize(NewAI.getAllocatedType()) ||
2971  !NewAI.getAllocatedType()->isSingleValueType());
2972 
2973  // If we're just going to emit a memcpy, the alloca hasn't changed, and the
2974  // size hasn't been shrunk based on analysis of the viable range, this is
2975  // a no-op.
2976  if (EmitMemCpy && &OldAI == &NewAI) {
2977  // Ensure the start lines up.
2978  assert(NewBeginOffset == BeginOffset);
2979 
2980  // Rewrite the size as needed.
2981  if (NewEndOffset != EndOffset)
2983  NewEndOffset - NewBeginOffset));
2984  return false;
2985  }
2986  // Record this instruction for deletion.
2987  Pass.DeadInsts.insert(&II);
2988 
2989  // Strip all inbounds GEPs and pointer casts to try to dig out any root
2990  // alloca that should be re-examined after rewriting this instruction.
2991  Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
2992  if (AllocaInst *AI =
2993  dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) {
2994  assert(AI != &OldAI && AI != &NewAI &&
2995  "Splittable transfers cannot reach the same alloca on both ends.");
2996  Pass.Worklist.insert(AI);
2997  }
2998 
2999  Type *OtherPtrTy = OtherPtr->getType();
3000  unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3001 
3002  // Compute the relative offset for the other pointer within the transfer.
3003  unsigned IntPtrWidth = DL.getPointerSizeInBits(OtherAS);
3004  APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset);
3005  unsigned OtherAlign = MinAlign(II.getAlignment() ? II.getAlignment() : 1,
3006  OtherOffset.zextOrTrunc(64).getZExtValue());
3007 
3008  if (EmitMemCpy) {
3009  // Compute the other pointer, folding as much as possible to produce
3010  // a single, simple GEP in most cases.
3011  OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3012  OtherPtr->getName() + ".");
3013 
3014  Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3015  Type *SizeTy = II.getLength()->getType();
3016  Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3017 
3018  CallInst *New = IRB.CreateMemCpy(
3019  IsDest ? OurPtr : OtherPtr, IsDest ? OtherPtr : OurPtr, Size,
3020  MinAlign(SliceAlign, OtherAlign), II.isVolatile());
3021  (void)New;
3022  DEBUG(dbgs() << " to: " << *New << "\n");
3023  return false;
3024  }
3025 
3026  bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3027  NewEndOffset == NewAllocaEndOffset;
3028  uint64_t Size = NewEndOffset - NewBeginOffset;
3029  unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3030  unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3031  unsigned NumElements = EndIndex - BeginIndex;
3032  IntegerType *SubIntTy =
3033  IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3034 
3035  // Reset the other pointer type to match the register type we're going to
3036  // use, but using the address space of the original other pointer.
3037  if (VecTy && !IsWholeAlloca) {
3038  if (NumElements == 1)
3039  OtherPtrTy = VecTy->getElementType();
3040  else
3041  OtherPtrTy = VectorType::get(VecTy->getElementType(), NumElements);
3042 
3043  OtherPtrTy = OtherPtrTy->getPointerTo(OtherAS);
3044  } else if (IntTy && !IsWholeAlloca) {
3045  OtherPtrTy = SubIntTy->getPointerTo(OtherAS);
3046  } else {
3047  OtherPtrTy = NewAllocaTy->getPointerTo(OtherAS);
3048  }
3049 
3050  Value *SrcPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3051  OtherPtr->getName() + ".");
3052  unsigned SrcAlign = OtherAlign;
3053  Value *DstPtr = &NewAI;
3054  unsigned DstAlign = SliceAlign;
3055  if (!IsDest) {
3056  std::swap(SrcPtr, DstPtr);
3057  std::swap(SrcAlign, DstAlign);
3058  }
3059 
3060  Value *Src;
3061  if (VecTy && !IsWholeAlloca && !IsDest) {
3062  Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
3063  Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3064  } else if (IntTy && !IsWholeAlloca && !IsDest) {
3065  Src = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "load");
3066  Src = convertValue(DL, IRB, Src, IntTy);
3067  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3068  Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3069  } else {
3070  Src =
3071  IRB.CreateAlignedLoad(SrcPtr, SrcAlign, II.isVolatile(), "copyload");
3072  }
3073 
3074  if (VecTy && !IsWholeAlloca && IsDest) {
3075  Value *Old =
3076  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
3077  Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3078  } else if (IntTy && !IsWholeAlloca && IsDest) {
3079  Value *Old =
3080  IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), "oldload");
3081  Old = convertValue(DL, IRB, Old, IntTy);
3082  uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3083  Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3084  Src = convertValue(DL, IRB, Src, NewAllocaTy);
3085  }
3086 
3087  StoreInst *Store = cast<StoreInst>(
3088  IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3089  (void)Store;
3090  DEBUG(dbgs() << " to: " << *Store << "\n");
3091  return !II.isVolatile();
3092  }
3093 
3094  bool visitIntrinsicInst(IntrinsicInst &II) {
3095  assert(II.getIntrinsicID() == Intrinsic::lifetime_start ||
3096  II.getIntrinsicID() == Intrinsic::lifetime_end);
3097  DEBUG(dbgs() << " original: " << II << "\n");
3098  assert(II.getArgOperand(1) == OldPtr);
3099 
3100  // Record this instruction for deletion.
3101  Pass.DeadInsts.insert(&II);
3102 
3103  ConstantInt *Size =
3104  ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
3105  NewEndOffset - NewBeginOffset);
3106  Value *Ptr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3107  Value *New;
3108  if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3109  New = IRB.CreateLifetimeStart(Ptr, Size);
3110  else
3111  New = IRB.CreateLifetimeEnd(Ptr, Size);
3112 
3113  (void)New;
3114  DEBUG(dbgs() << " to: " << *New << "\n");
3115  return true;
3116  }
3117 
3118  bool visitPHINode(PHINode &PN) {
3119  DEBUG(dbgs() << " original: " << PN << "\n");
3120  assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3121  assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3122 
3123  // We would like to compute a new pointer in only one place, but have it be
3124  // as local as possible to the PHI. To do that, we re-use the location of
3125  // the old pointer, which necessarily must be in the right position to
3126  // dominate the PHI.
3127  IRBuilderTy PtrBuilder(IRB);
3128  if (isa<PHINode>(OldPtr))
3129  PtrBuilder.SetInsertPoint(OldPtr->getParent()->getFirstInsertionPt());
3130  else
3131  PtrBuilder.SetInsertPoint(OldPtr);
3132  PtrBuilder.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3133 
3134  Value *NewPtr = getNewAllocaSlicePtr(PtrBuilder, OldPtr->getType());
3135  // Replace the operands which were using the old pointer.
3136  std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
3137 
3138  DEBUG(dbgs() << " to: " << PN << "\n");
3139  deleteIfTriviallyDead(OldPtr);
3140 
3141  // PHIs can't be promoted on their own, but often can be speculated. We
3142  // check the speculation outside of the rewriter so that we see the
3143  // fully-rewritten alloca.
3144  PHIUsers.insert(&PN);
3145  return true;
3146  }
3147 
3148  bool visitSelectInst(SelectInst &SI) {
3149  DEBUG(dbgs() << " original: " << SI << "\n");
3150  assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
3151  "Pointer isn't an operand!");
3152  assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
3153  assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
3154 
3155  Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3156  // Replace the operands which were using the old pointer.
3157  if (SI.getOperand(1) == OldPtr)
3158  SI.setOperand(1, NewPtr);
3159  if (SI.getOperand(2) == OldPtr)
3160  SI.setOperand(2, NewPtr);
3161 
3162  DEBUG(dbgs() << " to: " << SI << "\n");
3163  deleteIfTriviallyDead(OldPtr);
3164 
3165  // Selects can't be promoted on their own, but often can be speculated. We
3166  // check the speculation outside of the rewriter so that we see the
3167  // fully-rewritten alloca.
3168  SelectUsers.insert(&SI);
3169  return true;
3170  }
3171 };
3172 }
3173 
3174 namespace {
3175 /// \brief Visitor to rewrite aggregate loads and stores as scalar.
3176 ///
3177 /// This pass aggressively rewrites all aggregate loads and stores on
3178 /// a particular pointer (or any pointer derived from it which we can identify)
3179 /// with scalar loads and stores.
3180 class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
3181  // Befriend the base class so it can delegate to private visit methods.
3182  friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>;
3183 
3184  const DataLayout &DL;
3185 
3186  /// Queue of pointer uses to analyze and potentially rewrite.
3187  SmallVector<Use *, 8> Queue;
3188 
3189  /// Set to prevent us from cycling with phi nodes and loops.
3190  SmallPtrSet<User *, 8> Visited;
3191 
3192  /// The current pointer use being rewritten. This is used to dig up the used
3193  /// value (as opposed to the user).
3194  Use *U;
3195 
3196 public:
3197  AggLoadStoreRewriter(const DataLayout &DL) : DL(DL) {}
3198 
3199  /// Rewrite loads and stores through a pointer and all pointers derived from
3200  /// it.
3201  bool rewrite(Instruction &I) {
3202  DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
3203  enqueueUsers(I);
3204  bool Changed = false;
3205  while (!Queue.empty()) {
3206  U = Queue.pop_back_val();
3207  Changed |= visit(cast<Instruction>(U->getUser()));
3208  }
3209  return Changed;
3210  }
3211 
3212 private:
3213  /// Enqueue all the users of the given instruction for further processing.
3214  /// This uses a set to de-duplicate users.
3215  void enqueueUsers(Instruction &I) {
3216  for (Use &U : I.uses())
3217  if (Visited.insert(U.getUser()).second)
3218  Queue.push_back(&U);
3219  }
3220 
3221  // Conservative default is to not rewrite anything.
3222  bool visitInstruction(Instruction &I) { return false; }
3223 
3224  /// \brief Generic recursive split emission class.
3225  template <typename Derived> class OpSplitter {
3226  protected:
3227  /// The builder used to form new instructions.
3228  IRBuilderTy IRB;
3229  /// The indices which to be used with insert- or extractvalue to select the
3230  /// appropriate value within the aggregate.
3231  SmallVector<unsigned, 4> Indices;
3232  /// The indices to a GEP instruction which will move Ptr to the correct slot
3233  /// within the aggregate.
3234  SmallVector<Value *, 4> GEPIndices;
3235  /// The base pointer of the original op, used as a base for GEPing the
3236  /// split operations.
3237  Value *Ptr;
3238 
3239  /// Initialize the splitter with an insertion point, Ptr and start with a
3240  /// single zero GEP index.
3241  OpSplitter(Instruction *InsertionPoint, Value *Ptr)
3242  : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {}
3243 
3244  public:
3245  /// \brief Generic recursive split emission routine.
3246  ///
3247  /// This method recursively splits an aggregate op (load or store) into
3248  /// scalar or vector ops. It splits recursively until it hits a single value
3249  /// and emits that single value operation via the template argument.
3250  ///
3251  /// The logic of this routine relies on GEPs and insertvalue and
3252  /// extractvalue all operating with the same fundamental index list, merely
3253  /// formatted differently (GEPs need actual values).
3254  ///
3255  /// \param Ty The type being split recursively into smaller ops.
3256  /// \param Agg The aggregate value being built up or stored, depending on
3257  /// whether this is splitting a load or a store respectively.
3258  void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
3259  if (Ty->isSingleValueType())
3260  return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name);
3261 
3262  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3263  unsigned OldSize = Indices.size();
3264  (void)OldSize;
3265  for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
3266  ++Idx) {
3267  assert(Indices.size() == OldSize && "Did not return to the old size");
3268  Indices.push_back(Idx);
3269  GEPIndices.push_back(IRB.getInt32(Idx));
3270  emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
3271  GEPIndices.pop_back();
3272  Indices.pop_back();
3273  }
3274  return;
3275  }
3276 
3277  if (StructType *STy = dyn_cast<StructType>(Ty)) {
3278  unsigned OldSize = Indices.size();
3279  (void)OldSize;
3280  for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
3281  ++Idx) {
3282  assert(Indices.size() == OldSize && "Did not return to the old size");
3283  Indices.push_back(Idx);
3284  GEPIndices.push_back(IRB.getInt32(Idx));
3285  emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
3286  GEPIndices.pop_back();
3287  Indices.pop_back();
3288  }
3289  return;
3290  }
3291 
3292  llvm_unreachable("Only arrays and structs are aggregate loadable types");
3293  }
3294  };
3295 
3296  struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
3297  LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr)
3298  : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {}
3299 
3300  /// Emit a leaf load of a single value. This is called at the leaves of the
3301  /// recursive emission to actually load values.
3302  void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
3303  assert(Ty->isSingleValueType());
3304  // Load the single value and insert it using the indices.
3305  Value *GEP =
3306  IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep");
3307  Value *Load = IRB.CreateLoad(GEP, Name + ".load");
3308  Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
3309  DEBUG(dbgs() << " to: " << *Load << "\n");
3310  }
3311  };
3312 
3313  bool visitLoadInst(LoadInst &LI) {
3314  assert(LI.getPointerOperand() == *U);
3315  if (!LI.isSimple() || LI.getType()->isSingleValueType())
3316  return false;
3317 
3318  // We have an aggregate being loaded, split it apart.
3319  DEBUG(dbgs() << " original: " << LI << "\n");
3320  LoadOpSplitter Splitter(&LI, *U);
3321  Value *V = UndefValue::get(LI.getType());
3322  Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
3323  LI.replaceAllUsesWith(V);
3324  LI.eraseFromParent();
3325  return true;
3326  }
3327 
3328  struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
3329  StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr)
3330  : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {}
3331 
3332  /// Emit a leaf store of a single value. This is called at the leaves of the
3333  /// recursive emission to actually produce stores.
3334  void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) {
3335  assert(Ty->isSingleValueType());
3336  // Extract the single value and store it using the indices.
3337  Value *Store = IRB.CreateStore(
3338  IRB.CreateExtractValue(Agg, Indices, Name + ".extract"),
3339  IRB.CreateInBoundsGEP(nullptr, Ptr, GEPIndices, Name + ".gep"));
3340  (void)Store;
3341  DEBUG(dbgs() << " to: " << *Store << "\n");
3342  }
3343  };
3344 
3345  bool visitStoreInst(StoreInst &SI) {
3346  if (!SI.isSimple() || SI.getPointerOperand() != *U)
3347  return false;
3348  Value *V = SI.getValueOperand();
3349  if (V->getType()->isSingleValueType())
3350  return false;
3351 
3352  // We have an aggregate being stored, split it apart.
3353  DEBUG(dbgs() << " original: " << SI << "\n");
3354  StoreOpSplitter Splitter(&SI, *U);
3355  Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
3356  SI.eraseFromParent();
3357  return true;
3358  }
3359 
3360  bool visitBitCastInst(BitCastInst &BC) {
3361  enqueueUsers(BC);
3362  return false;
3363  }
3364 
3365  bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
3366  enqueueUsers(GEPI);
3367  return false;
3368  }
3369 
3370  bool visitPHINode(PHINode &PN) {
3371  enqueueUsers(PN);
3372  return false;
3373  }
3374 
3375  bool visitSelectInst(SelectInst &SI) {
3376  enqueueUsers(SI);
3377  return false;
3378  }
3379 };
3380 }
3381 
3382 /// \brief Strip aggregate type wrapping.
3383 ///
3384 /// This removes no-op aggregate types wrapping an underlying type. It will
3385 /// strip as many layers of types as it can without changing either the type
3386 /// size or the allocated size.
3388  if (Ty->isSingleValueType())
3389  return Ty;
3390 
3391  uint64_t AllocSize = DL.getTypeAllocSize(Ty);
3392  uint64_t TypeSize = DL.getTypeSizeInBits(Ty);
3393 
3394  Type *InnerTy;
3395  if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3396  InnerTy = ArrTy->getElementType();
3397  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
3398  const StructLayout *SL = DL.getStructLayout(STy);
3399  unsigned Index = SL->getElementContainingOffset(0);
3400  InnerTy = STy->getElementType(Index);
3401  } else {
3402  return Ty;
3403  }
3404 
3405  if (AllocSize > DL.getTypeAllocSize(InnerTy) ||
3406  TypeSize > DL.getTypeSizeInBits(InnerTy))
3407  return Ty;
3408 
3409  return stripAggregateTypeWrapping(DL, InnerTy);
3410 }
3411 
3412 /// \brief Try to find a partition of the aggregate type passed in for a given
3413 /// offset and size.
3414 ///
3415 /// This recurses through the aggregate type and tries to compute a subtype
3416 /// based on the offset and size. When the offset and size span a sub-section
3417 /// of an array, it will even compute a new array type for that sub-section,
3418 /// and the same for structs.
3419 ///
3420 /// Note that this routine is very strict and tries to find a partition of the
3421 /// type which produces the *exact* right offset and size. It is not forgiving
3422 /// when the size or offset cause either end of type-based partition to be off.
3423 /// Also, this is a best-effort routine. It is reasonable to give up and not
3424 /// return a type if necessary.
3425 static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
3426  uint64_t Size) {
3427  if (Offset == 0 && DL.getTypeAllocSize(Ty) == Size)
3428  return stripAggregateTypeWrapping(DL, Ty);
3429  if (Offset > DL.getTypeAllocSize(Ty) ||
3430  (DL.getTypeAllocSize(Ty) - Offset) < Size)
3431  return nullptr;
3432 
3433  if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
3434  // We can't partition pointers...
3435  if (SeqTy->isPointerTy())
3436  return nullptr;
3437 
3438  Type *ElementTy = SeqTy->getElementType();
3439  uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
3440  uint64_t NumSkippedElements = Offset / ElementSize;
3441  if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
3442  if (NumSkippedElements >= ArrTy->getNumElements())
3443  return nullptr;
3444  } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
3445  if (NumSkippedElements >= VecTy->getNumElements())
3446  return nullptr;
3447  }
3448  Offset -= NumSkippedElements * ElementSize;
3449 
3450  // First check if we need to recurse.
3451  if (Offset > 0 || Size < ElementSize) {
3452  // Bail if the partition ends in a different array element.
3453  if ((Offset + Size) > ElementSize)
3454  return nullptr;
3455  // Recurse through the element type trying to peel off offset bytes.
3456  return getTypePartition(DL, ElementTy, Offset, Size);
3457  }
3458  assert(Offset == 0);
3459 
3460  if (Size == ElementSize)
3461  return stripAggregateTypeWrapping(DL, ElementTy);
3462  assert(Size > ElementSize);
3463  uint64_t NumElements = Size / ElementSize;
3464  if (NumElements * ElementSize != Size)
3465  return nullptr;
3466  return ArrayType::get(ElementTy, NumElements);
3467  }
3468 
3469  StructType *STy = dyn_cast<StructType>(Ty);
3470  if (!STy)
3471  return nullptr;
3472 
3473  const StructLayout *SL = DL.getStructLayout(STy);
3474  if (Offset >= SL->getSizeInBytes())
3475  return nullptr;
3476  uint64_t EndOffset = Offset + Size;
3477  if (EndOffset > SL->getSizeInBytes())
3478  return nullptr;
3479 
3480  unsigned Index = SL->getElementContainingOffset(Offset);
3481  Offset -= SL->getElementOffset(Index);
3482 
3483  Type *ElementTy = STy->getElementType(Index);
3484  uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
3485  if (Offset >= ElementSize)
3486  return nullptr; // The offset points into alignment padding.
3487 
3488  // See if any partition must be contained by the element.
3489  if (Offset > 0 || Size < ElementSize) {
3490  if ((Offset + Size) > ElementSize)
3491  return nullptr;
3492  return getTypePartition(DL, ElementTy, Offset, Size);
3493  }
3494  assert(Offset == 0);
3495 
3496  if (Size == ElementSize)
3497  return stripAggregateTypeWrapping(DL, ElementTy);
3498 
3499  StructType::element_iterator EI = STy->element_begin() + Index,
3500  EE = STy->element_end();
3501  if (EndOffset < SL->getSizeInBytes()) {
3502  unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
3503  if (Index == EndIndex)
3504  return nullptr; // Within a single element and its padding.
3505 
3506  // Don't try to form "natural" types if the elements don't line up with the
3507  // expected size.
3508  // FIXME: We could potentially recurse down through the last element in the
3509  // sub-struct to find a natural end point.
3510  if (SL->getElementOffset(EndIndex) != EndOffset)
3511  return nullptr;
3512 
3513  assert(Index < EndIndex);
3514  EE = STy->element_begin() + EndIndex;
3515  }
3516 
3517  // Try to build up a sub-structure.
3518  StructType *SubTy =
3519  StructType::get(STy->getContext(), makeArrayRef(EI, EE), STy->isPacked());
3520  const StructLayout *SubSL = DL.getStructLayout(SubTy);
3521  if (Size != SubSL->getSizeInBytes())
3522  return nullptr; // The sub-struct doesn't have quite the size needed.
3523 
3524  return SubTy;
3525 }
3526 
3527 /// \brief Pre-split loads and stores to simplify rewriting.
3528 ///
3529 /// We want to break up the splittable load+store pairs as much as
3530 /// possible. This is important to do as a preprocessing step, as once we
3531 /// start rewriting the accesses to partitions of the alloca we lose the
3532 /// necessary information to correctly split apart paired loads and stores
3533 /// which both point into this alloca. The case to consider is something like
3534 /// the following:
3535 ///
3536 /// %a = alloca [12 x i8]
3537 /// %gep1 = getelementptr [12 x i8]* %a, i32 0, i32 0
3538 /// %gep2 = getelementptr [12 x i8]* %a, i32 0, i32 4
3539 /// %gep3 = getelementptr [12 x i8]* %a, i32 0, i32 8
3540 /// %iptr1 = bitcast i8* %gep1 to i64*
3541 /// %iptr2 = bitcast i8* %gep2 to i64*
3542 /// %fptr1 = bitcast i8* %gep1 to float*
3543 /// %fptr2 = bitcast i8* %gep2 to float*
3544 /// %fptr3 = bitcast i8* %gep3 to float*
3545 /// store float 0.0, float* %fptr1
3546 /// store float 1.0, float* %fptr2
3547 /// %v = load i64* %iptr1
3548 /// store i64 %v, i64* %iptr2
3549 /// %f1 = load float* %fptr2
3550 /// %f2 = load float* %fptr3
3551 ///
3552 /// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
3553 /// promote everything so we recover the 2 SSA values that should have been
3554 /// there all along.
3555 ///
3556 /// \returns true if any changes are made.
3557 bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
3558  DEBUG(dbgs() << "Pre-splitting loads and stores\n");
3559 
3560  // Track the loads and stores which are candidates for pre-splitting here, in
3561  // the order they first appear during the partition scan. These give stable
3562  // iteration order and a basis for tracking which loads and stores we
3563  // actually split.
3566 
3567  // We need to accumulate the splits required of each load or store where we
3568  // can find them via a direct lookup. This is important to cross-check loads
3569  // and stores against each other. We also track the slice so that we can kill
3570  // all the slices that end up split.
3571  struct SplitOffsets {
3572  Slice *S;
3573  std::vector<uint64_t> Splits;
3574  };
3576 
3577  // Track loads out of this alloca which cannot, for any reason, be pre-split.
3578  // This is important as we also cannot pre-split stores of those loads!
3579  // FIXME: This is all pretty gross. It means that we can be more aggressive
3580  // in pre-splitting when the load feeding the store happens to come from
3581  // a separate alloca. Put another way, the effectiveness of SROA would be
3582  // decreased by a frontend which just concatenated all of its local allocas
3583  // into one big flat alloca. But defeating such patterns is exactly the job
3584  // SROA is tasked with! Sadly, to not have this discrepancy we would have
3585  // change store pre-splitting to actually force pre-splitting of the load
3586  // that feeds it *and all stores*. That makes pre-splitting much harder, but
3587  // maybe it would make it more principled?
3588  SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
3589 
3590  DEBUG(dbgs() << " Searching for candidate loads and stores\n");
3591  for (auto &P : AS.partitions()) {
3592  for (Slice &S : P) {
3593  Instruction *I = cast<Instruction>(S.getUse()->getUser());
3594  if (!S.isSplittable() ||S.endOffset() <= P.endOffset()) {
3595  // If this was a load we have to track that it can't participate in any
3596  // pre-splitting!
3597  if (auto *LI = dyn_cast<LoadInst>(I))
3598  UnsplittableLoads.insert(LI);
3599  continue;
3600  }
3601  assert(P.endOffset() > S.beginOffset() &&
3602  "Empty or backwards partition!");
3603 
3604  // Determine if this is a pre-splittable slice.
3605  if (auto *LI = dyn_cast<LoadInst>(I)) {
3606  assert(!LI->isVolatile() && "Cannot split volatile loads!");
3607 
3608  // The load must be used exclusively to store into other pointers for
3609  // us to be able to arbitrarily pre-split it. The stores must also be
3610  // simple to avoid changing semantics.
3611  auto IsLoadSimplyStored = [](LoadInst *LI) {
3612  for (User *LU : LI->users()) {
3613  auto *SI = dyn_cast<StoreInst>(LU);
3614  if (!SI || !SI->isSimple())
3615  return false;
3616  }
3617  return true;
3618  };
3619  if (!IsLoadSimplyStored(LI)) {
3620  UnsplittableLoads.insert(LI);
3621  continue;
3622  }
3623 
3624  Loads.push_back(LI);
3625  } else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser())) {
3626  if (!SI ||
3627  S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
3628  continue;
3629  auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
3630  if (!StoredLoad || !StoredLoad->isSimple())
3631  continue;
3632  assert(!SI->isVolatile() && "Cannot split volatile stores!");
3633 
3634  Stores.push_back(SI);
3635  } else {
3636  // Other uses cannot be pre-split.
3637  continue;
3638  }
3639 
3640  // Record the initial split.
3641  DEBUG(dbgs() << " Candidate: " << *I << "\n");
3642  auto &Offsets = SplitOffsetsMap[I];
3643  assert(Offsets.Splits.empty() &&
3644  "Should not have splits the first time we see an instruction!");
3645  Offsets.S = &S;
3646  Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
3647  }
3648 
3649  // Now scan the already split slices, and add a split for any of them which
3650  // we're going to pre-split.
3651  for (Slice *S : P.splitSliceTails()) {
3652  auto SplitOffsetsMapI =
3653  SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
3654  if (SplitOffsetsMapI == SplitOffsetsMap.end())
3655  continue;
3656  auto &Offsets = SplitOffsetsMapI->second;
3657 
3658  assert(Offsets.S == S && "Found a mismatched slice!");
3659  assert(!Offsets.Splits.empty() &&
3660  "Cannot have an empty set of splits on the second partition!");
3661  assert(Offsets.Splits.back() ==
3662  P.beginOffset() - Offsets.S->beginOffset() &&
3663  "Previous split does not end where this one begins!");
3664 
3665  // Record each split. The last partition's end isn't needed as the size
3666  // of the slice dictates that.
3667  if (S->endOffset() > P.endOffset())
3668  Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
3669  }
3670  }
3671 
3672  // We may have split loads where some of their stores are split stores. For
3673  // such loads and stores, we can only pre-split them if their splits exactly
3674  // match relative to their starting offset. We have to verify this prior to
3675  // any rewriting.
3676  Stores.erase(
3677  std::remove_if(Stores.begin(), Stores.end(),
3678  [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
3679  // Lookup the load we are storing in our map of split
3680  // offsets.
3681  auto *LI = cast<LoadInst>(SI->getValueOperand());
3682  // If it was completely unsplittable, then we're done,
3683  // and this store can't be pre-split.
3684  if (UnsplittableLoads.count(LI))
3685  return true;
3686 
3687  auto LoadOffsetsI = SplitOffsetsMap.find(LI);
3688  if (LoadOffsetsI == SplitOffsetsMap.end())
3689  return false; // Unrelated loads are definitely safe.
3690  auto &LoadOffsets = LoadOffsetsI->second;
3691 
3692  // Now lookup the store's offsets.
3693  auto &StoreOffsets = SplitOffsetsMap[SI];
3694 
3695  // If the relative offsets of each split in the load and
3696  // store match exactly, then we can split them and we
3697  // don't need to remove them here.
3698  if (LoadOffsets.Splits == StoreOffsets.Splits)
3699  return false;
3700 
3701  DEBUG(dbgs()
3702  << " Mismatched splits for load and store:\n"
3703  << " " << *LI << "\n"
3704  << " " << *SI << "\n");
3705 
3706  // We've found a store and load that we need to split
3707  // with mismatched relative splits. Just give up on them
3708  // and remove both instructions from our list of
3709  // candidates.
3710  UnsplittableLoads.insert(LI);
3711  return true;
3712  }),
3713  Stores.end());
3714  // Now we have to go *back* through all te stores, because a later store may
3715  // have caused an earlier store's load to become unsplittable and if it is
3716  // unsplittable for the later store, then we can't rely on it being split in
3717  // the earlier store either.
3718  Stores.erase(std::remove_if(Stores.begin(), Stores.end(),
3719  [&UnsplittableLoads](StoreInst *SI) {
3720  auto *LI =
3721  cast<LoadInst>(SI->getValueOperand());
3722  return UnsplittableLoads.count(LI);
3723  }),
3724  Stores.end());
3725  // Once we've established all the loads that can't be split for some reason,
3726  // filter any that made it into our list out.
3727  Loads.erase(std::remove_if(Loads.begin(), Loads.end(),
3728  [&UnsplittableLoads](LoadInst *LI) {
3729  return UnsplittableLoads.count(LI);
3730  }),
3731  Loads.end());
3732 
3733 
3734  // If no loads or stores are left, there is no pre-splitting to be done for
3735  // this alloca.
3736  if (Loads.empty() && Stores.empty())
3737  return false;
3738 
3739  // From here on, we can't fail and will be building new accesses, so rig up
3740  // an IR builder.
3741  IRBuilderTy IRB(&AI);
3742 
3743  // Collect the new slices which we will merge into the alloca slices.
3744  SmallVector<Slice, 4> NewSlices;
3745 
3746  // Track any allocas we end up splitting loads and stores for so we iterate
3747  // on them.
3748  SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
3749 
3750  // At this point, we have collected all of the loads and stores we can
3751  // pre-split, and the specific splits needed for them. We actually do the
3752  // splitting in a specific order in order to handle when one of the loads in
3753  // the value operand to one of the stores.
3754  //
3755  // First, we rewrite all of the split loads, and just accumulate each split
3756  // load in a parallel structure. We also build the slices for them and append
3757  // them to the alloca slices.
3759  std::vector<LoadInst *> SplitLoads;
3760  const DataLayout &DL = AI.getModule()->getDataLayout();
3761  for (LoadInst *LI : Loads) {
3762  SplitLoads.clear();
3763 
3764  IntegerType *Ty = cast<IntegerType>(LI->getType());
3765  uint64_t LoadSize = Ty->getBitWidth() / 8;
3766  assert(LoadSize > 0 && "Cannot have a zero-sized integer load!");
3767 
3768  auto &Offsets = SplitOffsetsMap[LI];
3769  assert(LoadSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
3770  "Slice size should always match load size exactly!");
3771  uint64_t BaseOffset = Offsets.S->beginOffset();
3772  assert(BaseOffset + LoadSize > BaseOffset &&
3773  "Cannot represent alloca access size using 64-bit integers!");
3774 
3775  Instruction *BasePtr = cast<Instruction>(LI->getPointerOperand());
3776  IRB.SetInsertPoint(BasicBlock::iterator(LI));
3777 
3778  DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
3779 
3780  uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
3781  int Idx = 0, Size = Offsets.Splits.size();
3782  for (;;) {
3783  auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
3784  auto *PartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
3785  LoadInst *PLoad = IRB.CreateAlignedLoad(
3786  getAdjustedPtr(IRB, DL, BasePtr,
3787  APInt(DL.getPointerSizeInBits(), PartOffset),
3788  PartPtrTy, BasePtr->getName() + "."),
3789  getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
3790  LI->getName());
3791 
3792  // Append this load onto the list of split loads so we can find it later
3793  // to rewrite the stores.
3794  SplitLoads.push_back(PLoad);
3795 
3796  // Now build a new slice for the alloca.
3797  NewSlices.push_back(
3798  Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3799  &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
3800  /*IsSplittable*/ false));
3801  DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
3802  << ", " << NewSlices.back().endOffset() << "): " << *PLoad
3803  << "\n");
3804 
3805  // See if we've handled all the splits.
3806  if (Idx >= Size)
3807  break;
3808 
3809  // Setup the next partition.
3810  PartOffset = Offsets.Splits[Idx];
3811  ++Idx;
3812  PartSize = (Idx < Size ? Offsets.Splits[Idx] : LoadSize) - PartOffset;
3813  }
3814 
3815  // Now that we have the split loads, do the slow walk over all uses of the
3816  // load and rewrite them as split stores, or save the split loads to use
3817  // below if the store is going to be split there anyways.
3818  bool DeferredStores = false;
3819  for (User *LU : LI->users()) {
3820  StoreInst *SI = cast<StoreInst>(LU);
3821  if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
3822  DeferredStores = true;
3823  DEBUG(dbgs() << " Deferred splitting of store: " << *SI << "\n");
3824  continue;
3825  }
3826 
3827  Value *StoreBasePtr = SI->getPointerOperand();
3828  IRB.SetInsertPoint(BasicBlock::iterator(SI));
3829 
3830  DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
3831 
3832  for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
3833  LoadInst *PLoad = SplitLoads[Idx];
3834  uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
3835  auto *PartPtrTy =
3836  PLoad->getType()->getPointerTo(SI->getPointerAddressSpace());
3837 
3838  StoreInst *PStore = IRB.CreateAlignedStore(
3839  PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
3840  APInt(DL.getPointerSizeInBits(), PartOffset),
3841  PartPtrTy, StoreBasePtr->getName() + "."),
3842  getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
3843  (void)PStore;
3844  DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
3845  }
3846 
3847  // We want to immediately iterate on any allocas impacted by splitting
3848  // this store, and we have to track any promotable alloca (indicated by
3849  // a direct store) as needing to be resplit because it is no longer
3850  // promotable.
3851  if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
3852  ResplitPromotableAllocas.insert(OtherAI);
3853  Worklist.insert(OtherAI);
3854  } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3855  StoreBasePtr->stripInBoundsOffsets())) {
3856  Worklist.insert(OtherAI);
3857  }
3858 
3859  // Mark the original store as dead.
3860  DeadInsts.insert(SI);
3861  }
3862 
3863  // Save the split loads if there are deferred stores among the users.
3864  if (DeferredStores)
3865  SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
3866 
3867  // Mark the original load as dead and kill the original slice.
3868  DeadInsts.insert(LI);
3869  Offsets.S->kill();
3870  }
3871 
3872  // Second, we rewrite all of the split stores. At this point, we know that
3873  // all loads from this alloca have been split already. For stores of such
3874  // loads, we can simply look up the pre-existing split loads. For stores of
3875  // other loads, we split those loads first and then write split stores of
3876  // them.
3877  for (StoreInst *SI : Stores) {
3878  auto *LI = cast<LoadInst>(SI->getValueOperand());
3879  IntegerType *Ty = cast<IntegerType>(LI->getType());
3880  uint64_t StoreSize = Ty->getBitWidth() / 8;
3881  assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
3882 
3883  auto &Offsets = SplitOffsetsMap[SI];
3884  assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
3885  "Slice size should always match load size exactly!");
3886  uint64_t BaseOffset = Offsets.S->beginOffset();
3887  assert(BaseOffset + StoreSize > BaseOffset &&
3888  "Cannot represent alloca access size using 64-bit integers!");
3889 
3890  Value *LoadBasePtr = LI->getPointerOperand();
3891  Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
3892 
3893  DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
3894 
3895  // Check whether we have an already split load.
3896  auto SplitLoadsMapI = SplitLoadsMap.find(LI);
3897  std::vector<LoadInst *> *SplitLoads = nullptr;
3898  if (SplitLoadsMapI != SplitLoadsMap.end()) {
3899  SplitLoads = &SplitLoadsMapI->second;
3900  assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
3901  "Too few split loads for the number of splits in the store!");
3902  } else {
3903  DEBUG(dbgs() << " of load: " << *LI << "\n");
3904  }
3905 
3906  uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
3907  int Idx = 0, Size = Offsets.Splits.size();
3908  for (;;) {
3909  auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
3910  auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
3911 
3912  // Either lookup a split load or create one.
3913  LoadInst *PLoad;
3914  if (SplitLoads) {
3915  PLoad = (*SplitLoads)[Idx];
3916  } else {
3917  IRB.SetInsertPoint(BasicBlock::iterator(LI));
3918  PLoad = IRB.CreateAlignedLoad(
3919  getAdjustedPtr(IRB, DL, LoadBasePtr,
3920  APInt(DL.getPointerSizeInBits(), PartOffset),
3921  PartPtrTy, LoadBasePtr->getName() + "."),
3922  getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
3923  LI->getName());
3924  }
3925 
3926  // And store this partition.
3927  IRB.SetInsertPoint(BasicBlock::iterator(SI));
3928  StoreInst *PStore = IRB.CreateAlignedStore(
3929  PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
3930  APInt(DL.getPointerSizeInBits(), PartOffset),
3931  PartPtrTy, StoreBasePtr->getName() + "."),
3932  getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
3933 
3934  // Now build a new slice for the alloca.
3935  NewSlices.push_back(
3936  Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
3937  &PStore->getOperandUse(PStore->getPointerOperandIndex()),
3938  /*IsSplittable*/ false));
3939  DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
3940  << ", " << NewSlices.back().endOffset() << "): " << *PStore
3941  << "\n");
3942  if (!SplitLoads) {
3943  DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
3944  }
3945 
3946  // See if we've finished all the splits.
3947  if (Idx >= Size)
3948  break;
3949 
3950  // Setup the next partition.
3951  PartOffset = Offsets.Splits[Idx];
3952  ++Idx;
3953  PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
3954  }
3955 
3956  // We want to immediately iterate on any allocas impacted by splitting
3957  // this load, which is only relevant if it isn't a load of this alloca and
3958  // thus we didn't already split the loads above. We also have to keep track
3959  // of any promotable allocas we split loads on as they can no longer be
3960  // promoted.
3961  if (!SplitLoads) {
3962  if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
3963  assert(OtherAI != &AI && "We can't re-split our own alloca!");
3964  ResplitPromotableAllocas.insert(OtherAI);
3965  Worklist.insert(OtherAI);
3966  } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
3967  LoadBasePtr->stripInBoundsOffsets())) {
3968  assert(OtherAI != &AI && "We can't re-split our own alloca!");
3969  Worklist.insert(OtherAI);
3970  }
3971  }
3972 
3973  // Mark the original store as dead now that we've split it up and kill its
3974  // slice. Note that we leave the original load in place unless this store
3975  // was its ownly use. It may in turn be split up if it is an alloca load
3976  // for some other alloca, but it may be a normal load. This may introduce
3977  // redundant loads, but where those can be merged the rest of the optimizer
3978  // should handle the merging, and this uncovers SSA splits which is more
3979  // important. In practice, the original loads will almost always be fully
3980  // split and removed eventually, and the splits will be merged by any
3981  // trivial CSE, including instcombine.
3982  if (LI->hasOneUse()) {
3983  assert(*LI->user_begin() == SI && "Single use isn't this store!");
3984  DeadInsts.insert(LI);
3985  }
3986  DeadInsts.insert(SI);
3987  Offsets.S->kill();
3988  }
3989 
3990  // Remove the killed slices that have ben pre-split.
3991  AS.erase(std::remove_if(AS.begin(), AS.end(), [](const Slice &S) {
3992  return S.isDead();
3993  }), AS.end());
3994 
3995  // Insert our new slices. This will sort and merge them into the sorted
3996  // sequence.
3997  AS.insert(NewSlices);
3998 
3999  DEBUG(dbgs() << " Pre-split slices:\n");
4000 #ifndef NDEBUG
4001  for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
4002  DEBUG(AS.print(dbgs(), I, " "));
4003 #endif
4004 
4005  // Finally, don't try to promote any allocas that new require re-splitting.
4006  // They have already been added to the worklist above.
4007  PromotableAllocas.erase(
4008  std::remove_if(
4009  PromotableAllocas.begin(), PromotableAllocas.end(),
4010  [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
4011  PromotableAllocas.end());
4012 
4013  return true;
4014 }
4015 
4016 /// \brief Rewrite an alloca partition's users.
4017 ///
4018 /// This routine drives both of the rewriting goals of the SROA pass. It tries
4019 /// to rewrite uses of an alloca partition to be conducive for SSA value
4020 /// promotion. If the partition needs a new, more refined alloca, this will
4021 /// build that new alloca, preserving as much type information as possible, and
4022 /// rewrite the uses of the old alloca to point at the new one and have the
4023 /// appropriate new offsets. It also evaluates how successful the rewrite was
4024 /// at enabling promotion and if it was successful queues the alloca to be
4025 /// promoted.
4026 AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
4027  AllocaSlices::Partition &P) {
4028  // Try to compute a friendly type for this partition of the alloca. This
4029  // won't always succeed, in which case we fall back to a legal integer type
4030  // or an i8 array of an appropriate size.
4031  Type *SliceTy = nullptr;
4032  const DataLayout &DL = AI.getModule()->getDataLayout();
4033  if (Type *CommonUseTy = findCommonType(P.begin(), P.end(), P.endOffset()))
4034  if (DL.getTypeAllocSize(CommonUseTy) >= P.size())
4035  SliceTy = CommonUseTy;
4036  if (!SliceTy)
4037  if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
4038  P.beginOffset(), P.size()))
4039  SliceTy = TypePartitionTy;
4040  if ((!SliceTy || (SliceTy->isArrayTy() &&
4041  SliceTy->getArrayElementType()->isIntegerTy())) &&
4042  DL.isLegalInteger(P.size() * 8))
4043  SliceTy = Type::getIntNTy(*C, P.size() * 8);
4044  if (!SliceTy)
4045  SliceTy = ArrayType::get(Type::getInt8Ty(*C), P.size());
4046  assert(DL.getTypeAllocSize(SliceTy) >= P.size());
4047 
4048  bool IsIntegerPromotable = isIntegerWideningViable(P, SliceTy, DL);
4049 
4050  VectorType *VecTy =
4051  IsIntegerPromotable ? nullptr : isVectorPromotionViable(P, DL);
4052  if (VecTy)
4053  SliceTy = VecTy;
4054 
4055  // Check for the case where we're going to rewrite to a new alloca of the
4056  // exact same type as the original, and with the same access offsets. In that
4057  // case, re-use the existing alloca, but still run through the rewriter to
4058  // perform phi and select speculation.
4059  AllocaInst *NewAI;
4060  if (SliceTy == AI.getAllocatedType()) {
4061  assert(P.beginOffset() == 0 &&
4062  "Non-zero begin offset but same alloca type");
4063  NewAI = &AI;
4064  // FIXME: We should be able to bail at this point with "nothing changed".
4065  // FIXME: We might want to defer PHI speculation until after here.
4066  // FIXME: return nullptr;
4067  } else {
4068  unsigned Alignment = AI.getAlignment();
4069  if (!Alignment) {
4070  // The minimum alignment which users can rely on when the explicit
4071  // alignment is omitted or zero is that required by the ABI for this
4072  // type.
4073  Alignment = DL.getABITypeAlignment(AI.getAllocatedType());
4074  }
4075  Alignment = MinAlign(Alignment, P.beginOffset());
4076  // If we will get at least this much alignment from the type alone, leave
4077  // the alloca's alignment unconstrained.
4078  if (Alignment <= DL.getABITypeAlignment(SliceTy))
4079  Alignment = 0;
4080  NewAI = new AllocaInst(
4081  SliceTy, nullptr, Alignment,
4082  AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()), &AI);
4083  ++NumNewAllocas;
4084  }
4085 
4086  DEBUG(dbgs() << "Rewriting alloca partition "
4087  << "[" << P.beginOffset() << "," << P.endOffset()
4088  << ") to: " << *NewAI << "\n");
4089 
4090  // Track the high watermark on the worklist as it is only relevant for
4091  // promoted allocas. We will reset it to this point if the alloca is not in
4092  // fact scheduled for promotion.
4093  unsigned PPWOldSize = PostPromotionWorklist.size();
4094  unsigned NumUses = 0;
4095  SmallPtrSet<PHINode *, 8> PHIUsers;
4096  SmallPtrSet<SelectInst *, 8> SelectUsers;
4097 
4098  AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),
4099  P.endOffset(), IsIntegerPromotable, VecTy,
4100  PHIUsers, SelectUsers);
4101  bool Promotable = true;
4102  for (Slice *S : P.splitSliceTails()) {
4103  Promotable &= Rewriter.visit(S);
4104  ++NumUses;
4105  }
4106  for (Slice &S : P) {
4107  Promotable &= Rewriter.visit(&S);
4108  ++NumUses;
4109  }
4110 
4111  NumAllocaPartitionUses += NumUses;
4112  MaxUsesPerAllocaPartition =
4113  std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
4114 
4115  // Now that we've processed all the slices in the new partition, check if any
4116  // PHIs or Selects would block promotion.
4117  for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
4118  E = PHIUsers.end();
4119  I != E; ++I)
4120  if (!isSafePHIToSpeculate(**I)) {
4121  Promotable = false;
4122  PHIUsers.clear();
4123  SelectUsers.clear();
4124  break;
4125  }
4126  for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
4127  E = SelectUsers.end();
4128  I != E; ++I)
4129  if (!isSafeSelectToSpeculate(**I)) {
4130  Promotable = false;
4131  PHIUsers.clear();
4132  SelectUsers.clear();
4133  break;
4134  }
4135 
4136  if (Promotable) {
4137  if (PHIUsers.empty() && SelectUsers.empty()) {
4138  // Promote the alloca.
4139  PromotableAllocas.push_back(NewAI);
4140  } else {
4141  // If we have either PHIs or Selects to speculate, add them to those
4142  // worklists and re-queue the new alloca so that we promote in on the
4143  // next iteration.
4144  for (PHINode *PHIUser : PHIUsers)
4145  SpeculatablePHIs.insert(PHIUser);
4146  for (SelectInst *SelectUser : SelectUsers)
4147  SpeculatableSelects.insert(SelectUser);
4148  Worklist.insert(NewAI);
4149  }
4150  } else {
4151  // If we can't promote the alloca, iterate on it to check for new
4152  // refinements exposed by splitting the current alloca. Don't iterate on an
4153  // alloca which didn't actually change and didn't get promoted.
4154  if (NewAI != &AI)
4155  Worklist.insert(NewAI);
4156 
4157  // Drop any post-promotion work items if promotion didn't happen.
4158  while (PostPromotionWorklist.size() > PPWOldSize)
4159  PostPromotionWorklist.pop_back();
4160  }
4161 
4162  return NewAI;
4163 }
4164 
4165 /// \brief Walks the slices of an alloca and form partitions based on them,
4166 /// rewriting each of their uses.
4167 bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
4168  if (AS.begin() == AS.end())
4169  return false;
4170 
4171  unsigned NumPartitions = 0;
4172  bool Changed = false;
4173  const DataLayout &DL = AI.getModule()->getDataLayout();
4174 
4175  // First try to pre-split loads and stores.
4176  Changed |= presplitLoadsAndStores(AI, AS);
4177 
4178  // Now that we have identified any pre-splitting opportunities, mark any
4179  // splittable (non-whole-alloca) loads and stores as unsplittable. If we fail
4180  // to split these during pre-splitting, we want to force them to be
4181  // rewritten into a partition.
4182  bool IsSorted = true;
4183  for (Slice &S : AS) {
4184  if (!S.isSplittable())
4185  continue;
4186  // FIXME: We currently leave whole-alloca splittable loads and stores. This
4187  // used to be the only splittable loads and stores and we need to be
4188  // confident that the above handling of splittable loads and stores is
4189  // completely sufficient before we forcibly disable the remaining handling.
4190  if (S.beginOffset() == 0 &&
4191  S.endOffset() >= DL.getTypeAllocSize(AI.getAllocatedType()))
4192  continue;
4193  if (isa<LoadInst>(S.getUse()->getUser()) ||
4194  isa<StoreInst>(S.getUse()->getUser())) {
4195  S.makeUnsplittable();
4196  IsSorted = false;
4197  }
4198  }
4199  if (!IsSorted)
4200  std::sort(AS.begin(), AS.end());
4201 
4202  /// \brief Describes the allocas introduced by rewritePartition
4203  /// in order to migrate the debug info.
4204  struct Piece {
4205  AllocaInst *Alloca;
4206  uint64_t Offset;
4207  uint64_t Size;
4208  Piece(AllocaInst *AI, uint64_t O, uint64_t S)
4209  : Alloca(AI), Offset(O), Size(S) {}
4210  };
4211  SmallVector<Piece, 4> Pieces;
4212 
4213  // Rewrite each partition.
4214  for (auto &P : AS.partitions()) {
4215  if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
4216  Changed = true;
4217  if (NewAI != &AI) {
4218  uint64_t SizeOfByte = 8;
4219  uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
4220  // Don't include any padding.
4221  uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
4222  Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size));
4223  }
4224  }
4225  ++NumPartitions;
4226  }
4227 
4228  NumAllocaPartitions += NumPartitions;
4229  MaxPartitionsPerAlloca =
4230  std::max<unsigned>(NumPartitions, MaxPartitionsPerAlloca);
4231 
4232  // Migrate debug information from the old alloca to the new alloca(s)
4233  // and the individial partitions.
4234  if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(&AI)) {
4235  auto *Var = DbgDecl->getVariable();
4236  auto *Expr = DbgDecl->getExpression();
4237  DIBuilder DIB(*AI.getParent()->getParent()->getParent(),
4238  /*AllowUnresolved*/ false);
4239  bool IsSplit = Pieces.size() > 1;
4240  for (auto Piece : Pieces) {
4241  // Create a piece expression describing the new partition or reuse AI's
4242  // expression if there is only one partition.
4243  auto *PieceExpr = Expr;
4244  if (IsSplit || Expr->isBitPiece()) {
4245  // If this alloca is already a scalar replacement of a larger aggregate,
4246  // Piece.Offset describes the offset inside the scalar.
4247  uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
4248  uint64_t Start = Offset + Piece.Offset;
4249  uint64_t Size = Piece.Size;
4250  if (Expr->isBitPiece()) {
4251  uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
4252  if (Start >= AbsEnd)
4253  // No need to describe a SROAed padding.
4254  continue;
4255  Size = std::min(Size, AbsEnd - Start);
4256  }
4257  PieceExpr = DIB.createBitPieceExpression(Start, Size);
4258  }
4259 
4260  // Remove any existing dbg.declare intrinsic describing the same alloca.
4261  if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
4262  OldDDI->eraseFromParent();
4263 
4264  DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
4265  &AI);
4266  }
4267  }
4268  return Changed;
4269 }
4270 
4271 /// \brief Clobber a use with undef, deleting the used value if it becomes dead.
4272 void SROA::clobberUse(Use &U) {
4273  Value *OldV = U;
4274  // Replace the use with an undef value.
4275  U = UndefValue::get(OldV->getType());
4276 
4277  // Check for this making an instruction dead. We have to garbage collect
4278  // all the dead instructions to ensure the uses of any alloca end up being
4279  // minimal.
4280  if (Instruction *OldI = dyn_cast<Instruction>(OldV))
4281  if (isInstructionTriviallyDead(OldI)) {
4282  DeadInsts.insert(OldI);
4283  }
4284 }
4285 
4286 /// \brief Analyze an alloca for SROA.
4287 ///
4288 /// This analyzes the alloca to ensure we can reason about it, builds
4289 /// the slices of the alloca, and then hands it off to be split and
4290 /// rewritten as needed.
4291 bool SROA::runOnAlloca(AllocaInst &AI) {
4292  DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
4293  ++NumAllocasAnalyzed;
4294 
4295  // Special case dead allocas, as they're trivial.
4296  if (AI.use_empty()) {
4297  AI.eraseFromParent();
4298  return true;
4299  }
4300  const DataLayout &DL = AI.getModule()->getDataLayout();
4301 
4302  // Skip alloca forms that this analysis can't handle.
4303  if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() ||
4304  DL.getTypeAllocSize(AI.getAllocatedType()) == 0)
4305  return false;
4306 
4307  bool Changed = false;
4308 
4309  // First, split any FCA loads and stores touching this alloca to promote
4310  // better splitting and promotion opportunities.
4311  AggLoadStoreRewriter AggRewriter(DL);
4312  Changed |= AggRewriter.rewrite(AI);
4313 
4314  // Build the slices using a recursive instruction-visiting builder.
4315  AllocaSlices AS(DL, AI);
4316  DEBUG(AS.print(dbgs()));
4317  if (AS.isEscaped())
4318  return Changed;
4319 
4320  // Delete all the dead users of this alloca before splitting and rewriting it.
4321  for (Instruction *DeadUser : AS.getDeadUsers()) {
4322  // Free up everything used by this instruction.
4323  for (Use &DeadOp : DeadUser->operands())
4324  clobberUse(DeadOp);
4325 
4326  // Now replace the uses of this instruction.
4327  DeadUser->replaceAllUsesWith(UndefValue::get(DeadUser->getType()));
4328 
4329  // And mark it for deletion.
4330  DeadInsts.insert(DeadUser);
4331  Changed = true;
4332  }
4333  for (Use *DeadOp : AS.getDeadOperands()) {
4334  clobberUse(*DeadOp);
4335  Changed = true;
4336  }
4337 
4338  // No slices to split. Leave the dead alloca for a later pass to clean up.
4339  if (AS.begin() == AS.end())
4340  return Changed;
4341 
4342  Changed |= splitAlloca(AI, AS);
4343 
4344  DEBUG(dbgs() << " Speculating PHIs\n");
4345  while (!SpeculatablePHIs.empty())
4346  speculatePHINodeLoads(*SpeculatablePHIs.pop_back_val());
4347 
4348  DEBUG(dbgs() << " Speculating Selects\n");
4349  while (!SpeculatableSelects.empty())
4350  speculateSelectInstLoads(*SpeculatableSelects.pop_back_val());
4351 
4352  return Changed;
4353 }
4354 
4355 /// \brief Delete the dead instructions accumulated in this run.
4356 ///
4357 /// Recursively deletes the dead instructions we've accumulated. This is done
4358 /// at the very end to maximize locality of the recursive delete and to
4359 /// minimize the problems of invalidated instruction pointers as such pointers
4360 /// are used heavily in the intermediate stages of the algorithm.
4361 ///
4362 /// We also record the alloca instructions deleted here so that they aren't
4363 /// subsequently handed to mem2reg to promote.
4364 void SROA::deleteDeadInstructions(
4365  SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
4366  while (!DeadInsts.empty()) {
4367  Instruction *I = DeadInsts.pop_back_val();
4368  DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
4369 
4370  I->replaceAllUsesWith(UndefValue::get(I->getType()));
4371 
4372  for (Use &Operand : I->operands())
4373  if (Instruction *U = dyn_cast<Instruction>(Operand)) {
4374  // Zero out the operand and see if it becomes trivially dead.
4375  Operand = nullptr;
4377  DeadInsts.insert(U);
4378  }
4379 
4380  if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
4381  DeletedAllocas.insert(AI);
4382  if (DbgDeclareInst *DbgDecl = FindAllocaDbgDeclare(AI))
4383  DbgDecl->eraseFromParent();
4384  }
4385 
4386  ++NumDeleted;
4387  I->eraseFromParent();
4388  }
4389 }
4390 
4393  SmallPtrSetImpl<Instruction *> &Visited) {
4394  for (User *U : I.users())
4395  if (Visited.insert(cast<Instruction>(U)).second)
4396  Worklist.push_back(cast<Instruction>(U));
4397 }
4398 
4399 /// \brief Promote the allocas, using the best available technique.
4400 ///
4401 /// This attempts to promote whatever allocas have been identified as viable in
4402 /// the PromotableAllocas list. If that list is empty, there is nothing to do.
4403 /// If there is a domtree available, we attempt to promote using the full power
4404 /// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is
4405 /// based on the SSAUpdater utilities. This function returns whether any
4406 /// promotion occurred.
4407 bool SROA::promoteAllocas(Function &F) {
4408  if (PromotableAllocas.empty())
4409  return false;
4410 
4411  NumPromoted += PromotableAllocas.size();
4412 
4413  if (DT && !ForceSSAUpdater) {
4414  DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
4415  PromoteMemToReg(PromotableAllocas, *DT, nullptr, AC);
4416  PromotableAllocas.clear();
4417  return true;
4418  }
4419 
4420  DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n");
4421  SSAUpdater SSA;
4422  DIBuilder DIB(*F.getParent(), /*AllowUnresolved*/ false);
4424 
4425  // We need a worklist to walk the uses of each alloca.
4429 
4430  for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) {
4431  AllocaInst *AI = PromotableAllocas[Idx];
4432  Insts.clear();
4433  Worklist.clear();
4434  Visited.clear();
4435 
4436  enqueueUsersInWorklist(*AI, Worklist, Visited);
4437 
4438  while (!Worklist.empty()) {
4439  Instruction *I = Worklist.pop_back_val();
4440 
4441  // FIXME: Currently the SSAUpdater infrastructure doesn't reason about
4442  // lifetime intrinsics and so we strip them (and the bitcasts+GEPs
4443  // leading to them) here. Eventually it should use them to optimize the
4444  // scalar values produced.
4445  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
4446  assert(II->getIntrinsicID() == Intrinsic::lifetime_start ||
4447  II->getIntrinsicID() == Intrinsic::lifetime_end);
4448  II->eraseFromParent();
4449  continue;
4450  }
4451 
4452  // Push the loads and stores we find onto the list. SROA will already
4453  // have validated that all loads and stores are viable candidates for
4454  // promotion.
4455  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
4456  assert(LI->getType() == AI->getAllocatedType());
4457  Insts.push_back(LI);
4458  continue;
4459  }
4460  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
4461  assert(SI->getValueOperand()->getType() == AI->getAllocatedType());
4462  Insts.push_back(SI);
4463  continue;
4464  }
4465 
4466  // For everything else, we know that only no-op bitcasts and GEPs will
4467  // make it this far, just recurse through them and recall them for later
4468  // removal.
4469  DeadInsts.push_back(I);
4470  enqueueUsersInWorklist(*I, Worklist, Visited);
4471  }
4472  AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts);
4473  while (!DeadInsts.empty())
4474  DeadInsts.pop_back_val()->eraseFromParent();
4475  AI->eraseFromParent();
4476  }
4477 
4478  PromotableAllocas.clear();
4479  return true;
4480 }
4481 
4482 bool SROA::runOnFunction(Function &F) {
4483  if (skipOptnoneFunction(F))
4484  return false;
4485 
4486  DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
4487  C = &F.getContext();
4488  DominatorTreeWrapperPass *DTWP =
4489  getAnalysisIfAvailable<DominatorTreeWrapperPass>();
4490  DT = DTWP ? &DTWP->getDomTree() : nullptr;
4491  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
4492 
4493  BasicBlock &EntryBB = F.getEntryBlock();
4494  for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
4495  I != E; ++I) {
4496  if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
4497  Worklist.insert(AI);
4498  }
4499 
4500  bool Changed = false;
4501  // A set of deleted alloca instruction pointers which should be removed from
4502  // the list of promotable allocas.
4503  SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
4504 
4505  do {
4506  while (!Worklist.empty()) {
4507  Changed |= runOnAlloca(*Worklist.pop_back_val());
4508  deleteDeadInstructions(DeletedAllocas);
4509 
4510  // Remove the deleted allocas from various lists so that we don't try to
4511  // continue processing them.
4512  if (!DeletedAllocas.empty()) {
4513  auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
4514  Worklist.remove_if(IsInSet);
4515  PostPromotionWorklist.remove_if(IsInSet);
4516  PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
4517  PromotableAllocas.end(),
4518  IsInSet),
4519  PromotableAllocas.end());
4520  DeletedAllocas.clear();
4521  }
4522  }
4523 
4524  Changed |= promoteAllocas(F);
4525 
4526  Worklist = PostPromotionWorklist;
4527  PostPromotionWorklist.clear();
4528  } while (!Worklist.empty());
4529 
4530  return Changed;
4531 }
4532 
4533 void SROA::getAnalysisUsage(AnalysisUsage &AU) const {
4535  if (RequiresDomTree)
4537  AU.setPreservesCFG();
4538 }
unsigned getAlignment() const
Scalar Replacement Of static false Type * findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices...
Definition: SROA.cpp:1288
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, Twine NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy...
Definition: SROA.cpp:1726
RetTy visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:195
static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, const Instruction *CtxI, const DominatorTree *DT, const TargetLibraryInfo *TLI, SmallPtrSetImpl< const Value * > &Visited)
Return true if Value is always a dereferenceable pointer.
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type (if unknown returns 0).
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:82
const Use & getOperandUse(unsigned i) const
Definition: User.h:129
Value * getValueOperand()
Definition: Instructions.h:406
iplist< Instruction >::iterator eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing basic block and deletes it...
Definition: Instruction.cpp:70
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
IntegerType * getType() const
getType - Specialize the getType() method to always return an IntegerType, which reduces the amount o...
Definition: Constants.h:140
RetTy visitMemSetInst(MemSetInst &I)
Definition: InstVisitor.h:208
iterator_range< use_iterator > uses()
Definition: Value.h:283
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition: SSAUpdater.h:38
void addIncoming(Value *V, BasicBlock *BB)
addIncoming - Add an incoming value to the end of the PHI list
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:223
LLVM Argument representation.
Definition: Argument.h:35
Base class for instruction visitors.
Definition: InstVisitor.h:81
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallPtrSet.h:78
STATISTIC(NumFunctions,"Total number of functions")
bool isVolatile() const
isVolatile - Return true if this is a store to a volatile memory location.
Definition: Instructions.h:351
SynchronizationScope getSynchScope() const
Definition: Instructions.h:383
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds...
Definition: Compiler.h:344
bool isVolatile() const
DbgDeclareInst * FindAllocaDbgDeclare(Value *V)
FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic corresponding to an alloca, if any.
Definition: Local.cpp:1070
Intrinsic::ID getIntrinsicID() const
getIntrinsicID - Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:44
static Value * getNaturalGEPWithOffset(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Get a natural GEP from a base pointer to a particular offset and resulting in a particular type...
Definition: SROA.cpp:1686
ConstantInt * getAlignmentCst() const
Value * getValue() const
get* - Return the arguments to the instruction.
This class represents zero extension of integer types.
unsigned getPointerTypeSizeInBits(Type *) const
Layout pointer size, in bits, based on the type.
Definition: DataLayout.cpp:602
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition: StringRef.h:240
iterator end() const
Definition: ArrayRef.h:123
bool isSimple() const
Definition: Instructions.h:401
CallInst - This class represents a function call, abstracting a target machine's calling convention...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2244
size_type count(PtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:276
An immutable pass that tracks lazily created AssumptionCache objects.
gep_type_iterator gep_type_end(const User *GEP)
bool mayHaveSideEffects() const
mayHaveSideEffects - Return true if the instruction may have side effects.
Definition: Instruction.h:387
A cache of .assume calls within a function.
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition: StringRef.h:260
StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition: StringRef.h:405
Offsets
Offsets in bytes from the start of the input buffer.
Definition: SIInstrInfo.h:378
static cl::opt< bool > ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden)
Hidden option to force the pass to not use DomTree and mem2reg, instead forming SSA values through th...
const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
MemSetInst - This class wraps the llvm.memset intrinsic.
Scalar Replacement Of Aggregates
Definition: SROA.cpp:1283
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:111
static const bool value
Definition: type_traits.h:46
F(f)
This class represents a sign extension of integer types.
unsigned getAddressSpace() const
Return the address space of the Pointer type.
Definition: DerivedTypes.h:472
LoadInst - an instruction for reading from memory.
Definition: Instructions.h:177
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
Definition: DerivedTypes.h:61
RetTy visitPHINode(PHINode &I)
Definition: InstVisitor.h:181
Hexagon Common GEP
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: Type.cpp:216
void reserve(size_type N)
Definition: SmallVector.h:401
void setDest(Value *Ptr)
set* - Set the specified arguments of the instruction.
bool isSimple() const
Definition: Instructions.h:279
void setAlignment(Constant *A)
op_iterator op_begin()
Definition: User.h:183
Tagged DWARF-like metadata node.
static unsigned getAdjustedAlignment(Instruction *I, uint64_t Offset, const DataLayout &DL)
Compute the adjusted alignment for a load or store from an offset.
Definition: SROA.cpp:1821
Builder for the alloca slices.
Definition: SROA.cpp:625
Type * getPointerElementType() const
Definition: Type.h:366
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:188
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:242
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:231
bool isSingleValueType() const
isSingleValueType - Return true if the type is a valid type for a register in codegen.
Definition: Type.h:250
static Value * getNaturalGEPWithType(IRBuilderTy &IRB, const DataLayout &DL, Value *BasePtr, Type *Ty, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Get a natural GEP off of the BasePtr walking through Ty toward TargetTy without changing the offset o...
Definition: SROA.cpp:1568
element_iterator element_end() const
Definition: DerivedTypes.h:280
static Value * buildGEP(IRBuilderTy &IRB, Value *BasePtr, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Build a GEP out of a base pointer and indices.
Definition: SROA.cpp:1545
bool isArrayAllocation() const
isArrayAllocation - Return true if there is an allocation size parameter to the allocation instructio...
AnalysisUsage & addRequired()
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition: DataLayout.h:475
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:70
bool isPacked() const
Definition: DerivedTypes.h:242
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:16
void push_back(NodeTy *val)
Definition: ilist.h:554
static Value * getPointerOperand(Instruction &Inst)
SelectInst - This class represents the LLVM 'select' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:79
Type::subtype_iterator element_iterator
Definition: DerivedTypes.h:278
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:551
RetTy visitIntrinsicInst(IntrinsicInst &I)
Definition: InstVisitor.h:216
const APInt & getValue() const
Return the constant as an APInt value reference.
Definition: Constants.h:106
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val()
Definition: SmallVector.h:406
StructType - Class to represent struct types.
Definition: DerivedTypes.h:191
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
Type * getArrayElementType() const
Definition: Type.h:361
A Use represents the edge between a Value definition and its users.
Definition: Use.h:69
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition: SROA.cpp:3425
static void advance(T &it, size_t Val)
FunctionPass * createSROAPass(bool RequiresDomTree=true)
Definition: SROA.cpp:1275
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:75
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:517
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition: SROA.cpp:1887
bool isSized(SmallPtrSetImpl< const Type * > *Visited=nullptr) const
isSized - Return true if it makes sense to take the size of this type.
Definition: Type.h:268
Number of individual test Apply this number of consecutive mutations to each input exit after the first new interesting input is found the minimized corpus is saved into the first input directory Number of jobs to run If min(jobs, NumberOfCpuCores()/2)\" is used.") FUZZER_FLAG_INT(reload
DominatorTree & getDomTree()
Definition: Dominators.h:213
element_iterator element_begin() const
Definition: DerivedTypes.h:279
void clear()
Definition: ilist.h:550
This file provides a collection of visitors which walk the (instruction) uses of a pointer...
SynchronizationScope getSynchScope() const
Definition: Instructions.h:261
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:117
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:414
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:90
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AliasSetTracker *AST=nullptr, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
load Combine Adjacent Loads
static Constant * getZExt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:1727
void setAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope=CrossThread)
Definition: Instructions.h:273
A base class for visitors over the uses of a pointer value.
LLVMContext & getContext() const
getContext - Return the LLVMContext in which this type was uniqued.
Definition: Type.h:125
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: SmallVector.h:57
ArrayType - Class to represent array types.
Definition: DerivedTypes.h:336
uint64_t getLimitedValue(uint64_t Limit=~0ULL) const
getLimitedValue - If the value is smaller than the specified limit, return it, otherwise return the l...
Definition: Constants.h:219
This class represents a no-op cast from one type to another.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
ConstantFolder - Create constants with minimum, target independent, folding.
StoreInst - an instruction for storing to memory.
Definition: Instructions.h:316
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:351
bool isArrayTy() const
isArrayTy - True if this is an instance of ArrayType.
Definition: Type.h:213
static Constant * getUDiv(Constant *C1, Constant *C2, bool isExact=false)
Definition: Constants.cpp:2291
unsigned getNumElements() const
Return the number of elements in the Vector type.
Definition: DerivedTypes.h:432
RetTy visitMemTransferInst(MemTransferInst &I)
Definition: InstVisitor.h:211
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition: iterator.h:38
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree...
Definition: Dominators.h:67
Type * getElementType() const
Definition: DerivedTypes.h:323
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy)
Test whether we can convert a value from the old to the new type.
Definition: SROA.cpp:1847
bool isInBounds() const
isInBounds - Determine whether the GEP has the inbounds flag.
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
unsigned getNumIncomingValues() const
getNumIncomingValues - Return the number of incoming edges
uint64_t getElementOffset(unsigned Idx) const
Definition: DataLayout.h:491
unsigned getNumSuccessors() const
Return the number of successors that this terminator has.
Definition: InstrTypes.h:57
GetElementPtrInst - an instruction for type-safe pointer arithmetic to access elements of arrays and ...
Definition: Instructions.h:830
Scalar Replacement Of false
Definition: SROA.cpp:1283
#define P(N)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:325
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:240
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
Definition: Instructions.h:365
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Subclasses of this class are all able to terminate a basic block.
Definition: InstrTypes.h:35
* if(!EatIfPresent(lltok::kw_thread_local)) return false
ParseOptionalThreadLocal := /*empty.
void setAAMetadata(const AAMDNodes &N)
setAAMetadata - Sets the metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1122
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
static unsigned getPointerOperandIndex()
Definition: Instructions.h:411
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
Definition: DataLayout.h:379
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:291
This is an important base class in LLVM.
Definition: Constant.h:41
const Value * getCondition() const
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool isSafeSelectToSpeculate(SelectInst &SI)
Select instructions that use an alloca and are subsequently loaded can be rewritten to load both inpu...
Definition: SROA.cpp:1476
static VectorType * isVectorPromotionViable(AllocaSlices::Partition &P, const DataLayout &DL)
Test whether the given alloca partitioning and range of slices can be promoted to a vector...
Definition: SROA.cpp:2015
unsigned getAlignment() const
getAlignment - Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:130
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:264
Value * getRawDest() const
Value * stripInBoundsOffsets()
Strip off pointer casts and inbounds GEPs.
Definition: Value.cpp:507
Represent the analysis usage information of a pass.
op_iterator op_end()
Definition: User.h:185
static cl::opt< bool > SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden)
Hidden option to experiment with completely strict handling of inbounds GEPs.
BasicBlock * getIncomingBlock(unsigned i) const
getIncomingBlock - Return incoming basic block number i.
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:142
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
iterator begin() const
Definition: ArrayRef.h:122
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1137
sroa
Definition: SROA.cpp:1283
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:294
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sdiv(const APInt &RHS) const
Signed division function for APInt.
Definition: APInt.cpp:1876
Value * getOperand(unsigned i) const
Definition: User.h:118
op_range operands()
Definition: User.h:191
Value * getPointerOperand()
Definition: Instructions.h:284
iterator begin() const
Definition: SmallPtrSet.h:286
Class to represent integer types.
Definition: DerivedTypes.h:37
static bool isIntegerWideningViable(AllocaSlices::Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones...
Definition: SROA.cpp:2202
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:175
void setAlignment(unsigned Align)
static Constant * getAllOnesValue(Type *Ty)
Get the all ones value.
Definition: Constants.cpp:230
bool isPointerTy() const
isPointerTy - True if this is an instance of PointerType.
Definition: Type.h:217
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1473
iterator erase(iterator I)
Definition: SmallVector.h:455
size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Definition: StringRef.cpp:212
aarch64 type AArch64 Type Promotion Pass
void setAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope=CrossThread)
Definition: Instructions.h:395
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:519
PointerType * getPointerTo(unsigned AddrSpace=0)
getPointerTo - Return a pointer to the current type.
Definition: Type.cpp:764
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:283
const Value * getTrueValue() const
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition: SROA.cpp:3387
bool ugt(const APInt &RHS) const
Unsigned greather than comparison.
Definition: APInt.h:1101
SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
Definition: SmallPtrSet.h:179
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition: IRBuilder.h:41
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:694
SequentialType - This is the superclass of the array, pointer and vector type classes.
Definition: DerivedTypes.h:310
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:674
isPodLike - This is a type trait that is used to determine whether a given type can be copied around ...
Definition: ArrayRef.h:365
MemIntrinsic - This is the common base class for memset/memcpy/memmove.
AtomicOrdering getOrdering() const
Returns the ordering effect of this store.
Definition: Instructions.h:372
unsigned getIntegerBitWidth() const
Definition: Type.cpp:176
This is the shared class of boolean and integer constants.
Definition: Constants.h:47
Value * getIncomingValue(unsigned i) const
getIncomingValue - Return incoming value number x
BlockMass operator*(const BlockMass &L, const BranchProbability &R)
uint64_t getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:388
iterator end()
Definition: BasicBlock.h:233
friend const_iterator begin(StringRef path)
Get begin iterator over path.
Definition: Path.cpp:232
Helper class for promoting a collection of loads and stores into SSA Form using the SSAUpdater...
Definition: SSAUpdater.h:134
bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI, DIBuilder &Builder)
===---------------------------------------------------------------——===// Dbg Intrinsic utilities ...
Definition: Local.cpp:979
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:57
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:222
Instruction * user_back()
user_back - Specialize the methods defined in Value, as we know that an instruction can only be used ...
Definition: Instruction.h:69
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition: SROA.cpp:2324
static void enqueueUsersInWorklist(Instruction &I, SmallVectorImpl< Instruction * > &Worklist, SmallPtrSetImpl< Instruction * > &Visited)
Definition: SROA.cpp:4391
bool isVolatile() const
isVolatile - Return true if this is a load from a volatile memory location.
Definition: Instructions.h:232
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:548
Value * getLength() const
uint64_t getSizeInBytes() const
Definition: DataLayout.h:481
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:243
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition: SROA.cpp:640
unsigned getElementContainingOffset(uint64_t Offset) const
Given a valid byte offset into the structure, returns the structure index that contains it...
Definition: DataLayout.cpp:74
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:582
AtomicOrdering getOrdering() const
Returns the ordering effect of this fence.
Definition: Instructions.h:250
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
const BasicBlock & getEntryBlock() const
Definition: Function.h:442
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:289
static cl::opt< AlignMode > Align(cl::desc("Load/store alignment support"), cl::Hidden, cl::init(NoStrictAlign), cl::values(clEnumValN(StrictAlign,"aarch64-strict-align","Disallow all unaligned memory accesses"), clEnumValN(NoStrictAlign,"aarch64-no-strict-align","Allow unaligned memory accesses"), clEnumValEnd))
static bool isVectorPromotionViableForSlice(AllocaSlices::Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL)
Test whether the given slice use can be promoted to a vector.
Definition: SROA.cpp:1939
void setOperand(unsigned i, Value *Val)
Definition: User.h:122
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:123
size_type count(const KeyT &Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:119
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
Value * getArgOperand(unsigned i) const
getArgOperand/setArgOperand - Return/set the i-th call argument.
A range adaptor for a pair of iterators.
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
Class for arbitrary precision integers.
Definition: APInt.h:73
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition: SROA.cpp:2122
iterator_range< user_iterator > users()
Definition: Value.h:300
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
void initializeSROAPass(PassRegistry &)
RetTy visitStoreInst(StoreInst &I)
Definition: InstVisitor.h:176
#define NDEBUG
Definition: regutils.h:48
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:481
void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB, BasicBlock::iterator InsertPt) const
Definition: IRBuilder.h:43
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition: SROA.cpp:2267
void setLength(Value *L)
const Type * getScalarType() const LLVM_READONLY
getScalarType - If this is a vector type, return the element type, otherwise return 'this'...
Definition: Type.cpp:51
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition: SROA.cpp:613
Virtual Register Rewriter
Definition: VirtRegMap.cpp:190
bool operator!=(uint64_t V1, const APInt &V2)
Definition: APInt.h:1736
iterator end() const
Definition: SmallPtrSet.h:289
uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
Definition: MathExtras.h:552
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:697
MemTransferInst - This class wraps the llvm.memcpy/memmove intrinsics.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:372
static const size_t npos
Definition: StringRef.h:44
bool isLegalInteger(unsigned Width) const
Returns true if the specified type is known to be a native integer type supported by the CPU...
Definition: DataLayout.h:239
INITIALIZE_PASS_BEGIN(SROA,"sroa","Scalar Replacement Of Aggregates", false, false) INITIALIZE_PASS_END(SROA
DbgValueInst - This represents the llvm.dbg.value instruction.
unsigned getAlignment() const
getAlignment - Return the alignment of the access that is being performed
Definition: Instructions.h:243
void * PointerTy
Definition: GenericValue.h:23
void getAAMetadata(AAMDNodes &N, bool Merge=false) const
getAAMetadata - Fills the AAMDNodes structure with AA metadata from this instruction.
static void speculateSelectInstLoads(SelectInst &SI)
Definition: SROA.cpp:1502
#define I(x, y, z)
Definition: MD5.cpp:54
TerminatorInst * getTerminator()
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:124
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:311
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:329
void setSource(Value *Ptr)
iterator find(const KeyT &Val)
Definition: DenseMap.h:124
void size_t size
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition: SROA.cpp:2298
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:371
static Value * getNaturalGEPRecursively(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, Type *Ty, APInt &Offset, Type *TargetTy, SmallVectorImpl< Value * > &Indices, Twine NamePrefix)
Recursively compute indices for a natural GEP.
Definition: SROA.cpp:1612
bool all_of(R &&Range, UnaryPredicate &&P)
Provide wrappers to std::all_of which take ranges instead of having to pass being/end explicitly...
Definition: STLExtras.h:334
Value * getRawSource() const
get* - Return the arguments to the instruction.
static unsigned getPointerOperandIndex()
Definition: Instructions.h:286
bool use_empty() const
Definition: Value.h:275
user_iterator user_begin()
Definition: Value.h:294
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:332
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:365
bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
isInstructionTriviallyDead - Return true if the result produced by the instruction is not used...
Definition: Local.cpp:282
LLVM Value Representation.
Definition: Value.h:69
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1023
A vector that has set insertion semantics.
Definition: SetVector.h:37
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition: SROA.cpp:1355
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Definition: Type.cpp:713
APInt shl(const APInt &LHS, unsigned shiftAmt)
Left-shift function.
Definition: APInt.h:1847
bool isTriviallyEmpty() const
Check if this twine is trivially empty; a false return value does not necessarily mean the twine is e...
Definition: Twine.h:399
static void speculatePHINodeLoads(PHINode &PN)
Definition: SROA.cpp:1420
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:507
This class implements an extremely fast bulk output stream that can only output to a stream...
Definition: raw_ostream.h:38
#define DEBUG(X)
Definition: Debug.h:92
bool isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align)
isSafeToLoadUnconditionally - Return true if we know that executing a load from this value cannot tra...
Definition: Loads.cpp:65
static cl::opt< bool > SROARandomShuffleSlices("sroa-random-shuffle-slices", cl::init(false), cl::Hidden)
Hidden option to enable randomly shuffling the slices to help uncover instability in their order...
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
const Value * getFalseValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:40
APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:996
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:203
bool operator==(uint64_t V1, const APInt &V2)
Definition: APInt.h:1734
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:256
Type * getAllocatedType() const
getAllocatedType - Return the type that is being allocated by the instruction.
Definition: Instructions.h:122
const Use & getRawDestUse() const
bool isBigEndian() const
Definition: DataLayout.h:218
DbgDeclareInst - This represents the llvm.dbg.declare instruction.
Definition: IntrinsicInst.h:82
Value * getPointerOperand()
Definition: Instructions.h:409
const BasicBlock * getParent() const
Definition: Instruction.h:72
static sys::TimeValue now(bool Deterministic)
IntrinsicInst - A useful wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:37
AllocaInst - an instruction to allocate memory on the stack.
Definition: Instructions.h:76
gep_type_iterator gep_type_begin(const User *GEP)
static Value * foldSelectInst(SelectInst &SI)
Definition: SROA.cpp:600